1336 files changed, 55509 insertions, 33794 deletions
diff --git a/Documentation/ABI/stable/sysfs-driver-usb-usbtmc b/Documentation/ABI/stable/sysfs-driver-usb-usbtmc
index e960cd027e1e..a9e123ba32cd 100644
--- a/Documentation/ABI/stable/sysfs-driver-usb-usbtmc
+++ b/Documentation/ABI/stable/sysfs-driver-usb-usbtmc
@@ -25,38 +25,3 @@ Description:
 		4.2.2.
 
 		The files are read only.
-
-
-What:		/sys/bus/usb/drivers/usbtmc/*/TermChar
-Date:		August 2008
-Contact:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Description:
-		This file is the TermChar value to be sent to the USB TMC
-		device as described by the document, "Universal Serial Bus Test
-		and Measurement Class Specification
-		(USBTMC) Revision 1.0" as published by the USB-IF.
-
-		Note that the TermCharEnabled file determines if this value is
-		sent to the device or not.
-
-
-What:		/sys/bus/usb/drivers/usbtmc/*/TermCharEnabled
-Date:		August 2008
-Contact:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Description:
-		This file determines if the TermChar is to be sent to the
-		device on every transaction or not.  For more details about
-		this, please see the document, "Universal Serial Bus Test and
-		Measurement Class Specification (USBTMC) Revision 1.0" as
-		published by the USB-IF.
-
-
-What:		/sys/bus/usb/drivers/usbtmc/*/auto_abort
-Date:		August 2008
-Contact:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Description:
-		This file determines if the transaction of the USB TMC
-		device is to be automatically aborted if there is any error.
-		For more details about this, please see the document,
-		"Universal Serial Bus Test and Measurement Class Specification
-		(USBTMC) Revision 1.0" as published by the USB-IF.
diff --git a/Documentation/ABI/testing/configfs-stp-policy-p_sys-t b/Documentation/ABI/testing/configfs-stp-policy-p_sys-t
new file mode 100644
index 000000000000..b290d1c00dcf
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-stp-policy-p_sys-t
@@ -0,0 +1,41 @@
+What:		/config/stp-policy/<device>:p_sys-t.<policy>/<node>/uuid
+Date:		June 2018
+KernelVersion:	4.19
+Description:
+		UUID source identifier string, RW.
+		Default value is randomly generated at the mkdir <node> time.
+		Data coming from trace sources that use this <node> will be
+		tagged with this UUID in the MIPI SyS-T packet stream, to
+		allow the decoder to discern between different sources
+		within the same master/channel range, and identify the
+		higher level decoders that may be needed for each source.
+
+What:		/config/stp-policy/<device>:p_sys-t.<policy>/<node>/do_len
+Date:		June 2018
+KernelVersion:	4.19
+Description:
+		Include payload length in the MIPI SyS-T header, boolean.
+		If enabled, the SyS-T protocol encoder will include payload
+		length in each packet's metadata. This is normally redundant
+		if the underlying transport protocol supports marking message
+		boundaries (which STP does), so this is off by default.
+
+What:		/config/stp-policy/<device>:p_sys-t.<policy>/<node>/ts_interval
+Date:		June 2018
+KernelVersion:	4.19
+Description:
+		Time interval in milliseconds. Include a timestamp in the
+		MIPI SyS-T packet metadata, if this many milliseconds have
+		passed since the previous packet from this source. Zero is
+		the default and stands for "never send the timestamp".
+
+What:		/config/stp-policy/<device>:p_sys-t.<policy>/<node>/clocksync_interval
+Date:		June 2018
+KernelVersion:	4.19
+Description:
+		Time interval in milliseconds. Send a CLOCKSYNC packet if
+		this many milliseconds have passed since the previous
+		CLOCKSYNC packet from this source. Zero is the default and
+		stands for "never send the CLOCKSYNC". It makes sense to
+		use this option with sources that generate constant and/or
+		periodic data, like stm_heartbeat.
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc
index 9281e2aa38df..809765bd9573 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-uvc
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc
@@ -12,6 +12,10 @@ Date:		Dec 2014
 KernelVersion:	4.0
 Description:	Control descriptors
 
+		All attributes read only:
+		bInterfaceNumber	- USB interface number for this
+					  streaming interface
+
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/class
 Date:		Dec 2014
 KernelVersion:	4.0
@@ -109,6 +113,10 @@ Date:		Dec 2014
 KernelVersion:	4.0
 Description:	Streaming descriptors
 
+		All attributes read only:
+		bInterfaceNumber	- USB interface number for this
+					  streaming interface
+
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/class
 Date:		Dec 2014
 KernelVersion:	4.0
@@ -160,6 +168,10 @@ Description:	Specific MJPEG format descriptors
 
 		All attributes read only,
 		except bmaControls and bDefaultFrameIndex:
+		bFormatIndex		- unique id for this format descriptor;
+					only defined after parent header is
+					linked into the streaming class;
+					read-only
 		bmaControls		- this format's data for bmaControls in
 					the streaming header
 		bmInterfaceFlags	- specifies interlace information,
@@ -177,6 +189,10 @@ Date:		Dec 2014
 KernelVersion:	4.0
 Description:	Specific MJPEG frame descriptors
 
+		bFrameIndex		- unique id for this framedescriptor;
+					only defined after parent format is
+					linked into the streaming header;
+					read-only
 		dwFrameInterval		- indicates how frame interval can be
 					programmed; a number of values
 					separated by newline can be specified
@@ -204,6 +220,10 @@ Date:		Dec 2014
 KernelVersion:	4.0
 Description:	Specific uncompressed format descriptors
 
+		bFormatIndex		- unique id for this format descriptor;
+					only defined after parent header is
+					linked into the streaming class;
+					read-only
 		bmaControls		- this format's data for bmaControls in
 					the streaming header
 		bmInterfaceFlags	- specifies interlace information,
@@ -224,6 +244,10 @@ Date:		Dec 2014
 KernelVersion:	4.0
 Description:	Specific uncompressed frame descriptors
 
+		bFrameIndex		- unique id for this framedescriptor;
+					only defined after parent format is
+					linked into the streaming header;
+					read-only
 		dwFrameInterval		- indicates how frame interval can be
 					programmed; a number of values
 					separated by newline can be specified
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb
index 08d456e07b53..559baa5c418c 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb
+++ b/Documentation/ABI/testing/sysfs-bus-usb
@@ -189,6 +189,16 @@ Description:
 		The file will read "hotplug", "wired" and "not used" if the
 		information is available, and "unknown" otherwise.
 
+What:		/sys/bus/usb/devices/.../(hub interface)/portX/location
+Date:		October 2018
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		Some platforms provide usb port physical location through
+		firmware. This is used by the kernel to pair up logical ports
+		mapping to the same physical connector. The attribute exposes the
+		raw location value as a hex integer.
+
+
 What:		/sys/bus/usb/devices/.../(hub interface)/portX/quirks
 Date:		May 2018
 Contact:	Nicolas Boichat <drinkcat@chromium.org>
@@ -219,7 +229,14 @@ Description:
 		ports and report them to the kernel. This attribute is to expose
 		the number of over-current situation occurred on a specific port
 		to user space. This file will contain an unsigned 32 bit value
-		which wraps to 0 after its maximum is reached.
+		which wraps to 0 after its maximum is reached. This file supports
+		poll() for monitoring changes to this value in user space.
+
+		Any time this value changes the corresponding hub device will send a
+		udev event with the following attributes:
+
+		OVER_CURRENT_PORT=/sys/bus/usb/devices/.../(hub interface)/portX
+		OVER_CURRENT_COUNT=[current value of this sysfs attribute]
 
 What:		/sys/bus/usb/devices/.../(hub interface)/portX/usb3_lpm_permit
 Date:		November 2015
diff --git a/Documentation/ABI/testing/sysfs-bus-vmbus b/Documentation/ABI/testing/sysfs-bus-vmbus
new file mode 100644
index 000000000000..91e6c065973c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-vmbus
@@ -0,0 +1,21 @@
+What:		/sys/bus/vmbus/devices/.../driver_override
+Date:		August 2019
+Contact:	Stephen Hemminger <sthemmin@microsoft.com>
+Description:
+		This file allows the driver for a device to be specified which
+		will override standard static and dynamic ID matching.  When
+		specified, only a driver with a name matching the value written
+		to driver_override will have an opportunity to bind to the
+		device.  The override is specified by writing a string to the
+		driver_override file (echo uio_hv_generic > driver_override) and
+		may be cleared with an empty string (echo > driver_override).
+		This returns the device to standard matching rules binding.
+		Writing to driver_override does not automatically unbind the
+		device from its current driver or make any attempt to
+		automatically load the specified driver.  If no driver with a
+		matching name is currently loaded in the kernel, the device
+		will not bind to any driver.  This also allows devices to
+		opt-out of driver binding using a driver_override name such as
+		"none".  Only a single driver may be specified in the override,
+		there is no support for parsing delimiters.
+
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index e2e0fe553ad8..664a8f6a634f 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -91,6 +91,24 @@ Description:
 		stacked (e.g: VLAN interfaces) but still have the same MAC
 		address as their parent device.
 
+What:		/sys/class/net/<iface>/dev_port
+Date:		February 2014
+KernelVersion:	3.15
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the port number of this network device, formatted
+		as a decimal value. Some NICs have multiple independent ports
+		on the same PCI bus, device and function. This attribute allows
+		userspace to distinguish the respective interfaces.
+
+		Note: some device drivers started to use 'dev_id' for this
+		purpose since long before 3.15 and have not adopted the new
+		attribute ever since. To query the port number, some tools look
+		exclusively at 'dev_port', while others only consult 'dev_id'.
+		If a network device has multiple client adapter ports as
+		described in the previous paragraph and does not set this
+		attribute to its port number, it's a kernel bug.
+
 What:		/sys/class/net/<iface>/dormant
 Date:		March 2006
 KernelVersion:	2.6.17
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e129cd8a6dcc..5b5f1ba76bba 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1759,6 +1759,18 @@
 		nobypass	[PPC/POWERNV]
 			Disable IOMMU bypass, using IOMMU for PCI devices.
 
+	iommu.strict=	[ARM64] Configure TLB invalidation behaviour
+			Format: { "0" | "1" }
+			0 - Lazy mode.
+			  Request that DMA unmap operations use deferred
+			  invalidation of hardware TLBs, for increased
+			  throughput at the cost of reduced device isolation.
+			  Will fall back to strict mode if not supported by
+			  the relevant IOMMU driver.
+			1 - Strict mode (default).
+			  DMA unmap operations invalidate IOMMU hardware TLBs
+			  synchronously.
+
 	iommu.passthrough=
 			[ARM64] Configure DMA to bypass the IOMMU by default.
 			Format: { "0" | "1" }
@@ -4623,7 +4635,8 @@
 
 	usbcore.old_scheme_first=
 			[USB] Start with the old device initialization
-			scheme (default 0 = off).
+			scheme,  applies only to low and full-speed devices
+			 (default 0 = off).
 
 	usbcore.usbfs_memory_mb=
 			[USB] Memory limit (in MB) for buffers allocated by
diff --git a/Documentation/admin-guide/security-bugs.rst b/Documentation/admin-guide/security-bugs.rst
index 30491d91e93d..164bf71149fd 100644
--- a/Documentation/admin-guide/security-bugs.rst
+++ b/Documentation/admin-guide/security-bugs.rst
@@ -26,23 +26,34 @@ information is helpful.  Any exploit code is very helpful and will not
 be released without consent from the reporter unless it has already been
 made public.
 
-Disclosure
-----------
-
-The goal of the Linux kernel security team is to work with the bug
-submitter to understand and fix the bug.  We prefer to publish the fix as
-soon as possible, but try to avoid public discussion of the bug itself
-and leave that to others.
-
-Publishing the fix may be delayed when the bug or the fix is not yet
-fully understood, the solution is not well-tested or for vendor
-coordination.  However, we expect these delays to be short, measurable in
-days, not weeks or months.  A release date is negotiated by the security
-team working with the bug submitter as well as vendors.  However, the
-kernel security team holds the final say when setting a timeframe.  The
-timeframe varies from immediate (esp. if it's already publicly known bug)
-to a few weeks.  As a basic default policy, we expect report date to
-release date to be on the order of 7 days.
+Disclosure and embargoed information
+------------------------------------
+
+The security list is not a disclosure channel.  For that, see Coordination
+below.
+
+Once a robust fix has been developed, our preference is to release the
+fix in a timely fashion, treating it no differently than any of the other
+thousands of changes and fixes the Linux kernel project releases every
+month.
+
+However, at the request of the reporter, we will postpone releasing the
+fix for up to 5 business days after the date of the report or after the
+embargo has lifted; whichever comes first.  The only exception to that
+rule is if the bug is publicly known, in which case the preference is to
+release the fix as soon as it's available.
+
+Whilst embargoed information may be shared with trusted individuals in
+order to develop a fix, such information will not be published alongside
+the fix or on any other disclosure channel without the permission of the
+reporter.  This includes but is not limited to the original bug report
+and followup discussions (if any), exploits, CVE information or the
+identity of the reporter.
+
+In other words our only interest is in getting bugs fixed.  All other
+information submitted to the security list and any followup discussions
+of the report are treated confidentially even after the embargo has been
+lifted, in perpetuity.
 
 Coordination
 ------------
@@ -68,7 +79,7 @@ may delay the bug handling. If a reporter wishes to have a CVE identifier
 assigned ahead of public disclosure, they will need to contact the private
 linux-distros list, described above. When such a CVE identifier is known
 before a patch is provided, it is desirable to mention it in the commit
-message, though.
+message if the reporter agrees.
 
 Non-disclosure agreements
 -------------------------
diff --git a/Documentation/cgroup-v1/rdma.txt b/Documentation/cgroup-v1/rdma.txt
index af618171e0eb..9bdb7fd03f83 100644
--- a/Documentation/cgroup-v1/rdma.txt
+++ b/Documentation/cgroup-v1/rdma.txt
@@ -27,7 +27,7 @@ cgroup.
 Currently user space applications can easily take away all the rdma verb
 specific resources such as AH, CQ, QP, MR etc. Due to which other applications
 in other cgroup or kernel space ULPs may not even get chance to allocate any
-rdma resources. This can leads to service unavailability.
+rdma resources. This can lead to service unavailability.
 
 Therefore RDMA controller is needed through which resource consumption
 of processes can be limited. Through this controller different rdma
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 86023c33906f..ff48b55040ef 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -420,9 +420,8 @@ struct clk
 	%pC	pll1
 	%pCn	pll1
 
-For printing struct clk structures. %pC and %pCn print the name
-(Common Clock Framework) or address (legacy clock framework) of the
-structure.
+For printing struct clk structures. %pC and %pCn print the name of the clock
+(Common Clock Framework) or a unique 32-bit ID (legacy clock framework).
 
 Passed by reference.
 
diff --git a/Documentation/devicetree/bindings/arm/al,alpine.txt b/Documentation/devicetree/bindings/arm/al,alpine.txt
index f404a4f9b165..d00debe2e86f 100644
--- a/Documentation/devicetree/bindings/arm/al,alpine.txt
+++ b/Documentation/devicetree/bindings/arm/al,alpine.txt
@@ -14,75 +14,3 @@ compatible: must contain "al,alpine"
 
 	...
 }
-
-* CPU node:
-
-The Alpine platform includes cortex-a15 cores.
-enable-method: must be "al,alpine-smp" to allow smp  [1]
-
-Example:
-
-cpus {
-	#address-cells = <1>;
-	#size-cells = <0>;
-	enable-method = "al,alpine-smp";
-
-	cpu@0 {
-		compatible = "arm,cortex-a15";
-		device_type = "cpu";
-		reg = <0>;
-	};
-
-	cpu@1 {
-		compatible = "arm,cortex-a15";
-		device_type = "cpu";
-		reg = <1>;
-	};
-
-	cpu@2 {
-		compatible = "arm,cortex-a15";
-		device_type = "cpu";
-		reg = <2>;
-	};
-
-	cpu@3 {
-		compatible = "arm,cortex-a15";
-		device_type = "cpu";
-		reg = <3>;
-	};
-};
-
-
-* Alpine CPU resume registers
-
-The CPU resume register are used to define required resume address after
-reset.
-
-Properties:
-- compatible : Should contain "al,alpine-cpu-resume".
-- reg : Offset and length of the register set for the device
-
-Example:
-
-cpu_resume {
-	compatible = "al,alpine-cpu-resume";
-	reg = <0xfbff5ed0 0x30>;
-};
-
-* Alpine System-Fabric Service Registers
-
-The System-Fabric Service Registers allow various operation on CPU and
-system fabric, like powering CPUs off.
-
-Properties:
-- compatible : Should contain "al,alpine-sysfabric-service" and "syscon".
-- reg : Offset and length of the register set for the device
-
-Example:
-
-nb_service {
-        compatible = "al,alpine-sysfabric-service", "syscon";
-        reg = <0xfb070000 0x10000>;
-};
-
-[1] arm/cpu-enable-method/al,alpine-smp
diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.txt b/Documentation/devicetree/bindings/arm/atmel-at91.txt
index 31220b54d85d..4bf1b4da7659 100644
--- a/Documentation/devicetree/bindings/arm/atmel-at91.txt
+++ b/Documentation/devicetree/bindings/arm/atmel-at91.txt
@@ -70,173 +70,3 @@ compatible: must be one of:
        - "atmel,samv71q19"
        - "atmel,samv71q20"
        - "atmel,samv71q21"
-
-Chipid required properties:
-- compatible: Should be "atmel,sama5d2-chipid"
-- reg : Should contain registers location and length
-
-PIT Timer required properties:
-- compatible: Should be "atmel,at91sam9260-pit"
-- reg: Should contain registers location and length
-- interrupts: Should contain interrupt for the PIT which is the IRQ line
-  shared across all System Controller members.
-
-System Timer (ST) required properties:
-- compatible: Should be "atmel,at91rm9200-st", "syscon", "simple-mfd"
-- reg: Should contain registers location and length
-- interrupts: Should contain interrupt for the ST which is the IRQ line
-  shared across all System Controller members.
-- clocks: phandle to input clock.
-Its subnodes can be:
-- watchdog: compatible should be "atmel,at91rm9200-wdt"
-
-RSTC Reset Controller required properties:
-- compatible: Should be "atmel,<chip>-rstc".
-  <chip> can be "at91sam9260" or "at91sam9g45" or "sama5d3"
-- reg: Should contain registers location and length
-- clocks: phandle to input clock.
-
-Example:
-
-	rstc@fffffd00 {
-		compatible = "atmel,at91sam9260-rstc";
-		reg = <0xfffffd00 0x10>;
-		clocks = <&clk32k>;
-	};
-
-RAMC SDRAM/DDR Controller required properties:
-- compatible: Should be "atmel,at91rm9200-sdramc", "syscon"
-			"atmel,at91sam9260-sdramc",
-			"atmel,at91sam9g45-ddramc",
-			"atmel,sama5d3-ddramc",
-- reg: Should contain registers location and length
-
-Examples:
-
-	ramc0: ramc@ffffe800 {
-		compatible = "atmel,at91sam9g45-ddramc";
-		reg = <0xffffe800 0x200>;
-	};
-
-SHDWC Shutdown Controller
-
-required properties:
-- compatible: Should be "atmel,<chip>-shdwc".
-  <chip> can be "at91sam9260", "at91sam9rl" or "at91sam9x5".
-- reg: Should contain registers location and length
-- clocks: phandle to input clock.
-
-optional properties:
-- atmel,wakeup-mode: String, operation mode of the wakeup mode.
-  Supported values are: "none", "high", "low", "any".
-- atmel,wakeup-counter: Counter on Wake-up 0 (between 0x0 and 0xf).
-
-optional at91sam9260 properties:
-- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
-
-optional at91sam9rl properties:
-- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
-- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
-
-optional at91sam9x5 properties:
-- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
-
-Example:
-
-	shdwc@fffffd10 {
-		compatible = "atmel,at91sam9260-shdwc";
-		reg = <0xfffffd10 0x10>;
-		clocks = <&clk32k>;
-	};
-
-SHDWC SAMA5D2-Compatible Shutdown Controller
-
-1) shdwc node
-
-required properties:
-- compatible: should be "atmel,sama5d2-shdwc".
-- reg: should contain registers location and length
-- clocks: phandle to input clock.
-- #address-cells: should be one. The cell is the wake-up input index.
-- #size-cells: should be zero.
-
-optional properties:
-
-- debounce-delay-us: minimum wake-up inputs debouncer period in
-  microseconds. It's usually a board-related property.
-- atmel,wakeup-rtc-timer: boolean to enable Real-Time Clock wake-up.
-
-The node contains child nodes for each wake-up input that the platform uses.
-
-2) input nodes
-
-Wake-up input nodes are usually described in the "board" part of the Device
-Tree. Note also that input 0 is linked to the wake-up pin and is frequently
-used.
-
-Required properties:
-- reg: should contain the wake-up input index [0 - 15].
-
-Optional properties:
-- atmel,wakeup-active-high: boolean, the corresponding wake-up input described
-  by the child, forces the wake-up of the core power supply on a high level.
-  The default is to be active low.
-
-Example:
-
-On the SoC side:
-	shdwc@f8048010 {
-		compatible = "atmel,sama5d2-shdwc";
-		reg = <0xf8048010 0x10>;
-		clocks = <&clk32k>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-		atmel,wakeup-rtc-timer;
-	};
-
-On the board side:
-	shdwc@f8048010 {
-		debounce-delay-us = <976>;
-
-		input@0 {
-			reg = <0>;
-		};
-
-		input@1 {
-			reg = <1>;
-			atmel,wakeup-active-high;
-		};
-	};
-
-Special Function Registers (SFR)
-
-Special Function Registers (SFR) manage specific aspects of the integrated
-memory, bridge implementations, processor and other functionality not controlled
-elsewhere.
-
-required properties:
-- compatible: Should be "atmel,<chip>-sfr", "syscon" or
-	"atmel,<chip>-sfrbu", "syscon"
-  <chip> can be "sama5d3", "sama5d4" or "sama5d2".
-- reg: Should contain registers location and length
-
-	sfr@f0038000 {
-		compatible = "atmel,sama5d3-sfr", "syscon";
-		reg = <0xf0038000 0x60>;
-	};
-
-Security Module (SECUMOD)
-
-The Security Module macrocell provides all necessary secure functions to avoid
-voltage, temperature, frequency and mechanical attacks on the chip. It also
-embeds secure memories that can be scrambled
-
-required properties:
-- compatible: Should be "atmel,<chip>-secumod", "syscon".
-  <chip> can be "sama5d2".
-- reg: Should contain registers location and length
-
-	secumod@fc040000 {
-		compatible = "atmel,sama5d2-secumod", "syscon";
-		reg = <0xfc040000 0x100>;
-	};
diff --git a/Documentation/devicetree/bindings/arm/atmel-sysregs.txt b/Documentation/devicetree/bindings/arm/atmel-sysregs.txt
new file mode 100644
index 000000000000..4b96608ad692
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/atmel-sysregs.txt
@@ -0,0 +1,171 @@
+Atmel system registers
+
+Chipid required properties:
+- compatible: Should be "atmel,sama5d2-chipid"
+- reg : Should contain registers location and length
+
+PIT Timer required properties:
+- compatible: Should be "atmel,at91sam9260-pit"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt for the PIT which is the IRQ line
+  shared across all System Controller members.
+
+System Timer (ST) required properties:
+- compatible: Should be "atmel,at91rm9200-st", "syscon", "simple-mfd"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt for the ST which is the IRQ line
+  shared across all System Controller members.
+- clocks: phandle to input clock.
+Its subnodes can be:
+- watchdog: compatible should be "atmel,at91rm9200-wdt"
+
+RSTC Reset Controller required properties:
+- compatible: Should be "atmel,<chip>-rstc".
+  <chip> can be "at91sam9260" or "at91sam9g45" or "sama5d3"
+- reg: Should contain registers location and length
+- clocks: phandle to input clock.
+
+Example:
+
+	rstc@fffffd00 {
+		compatible = "atmel,at91sam9260-rstc";
+		reg = <0xfffffd00 0x10>;
+		clocks = <&clk32k>;
+	};
+
+RAMC SDRAM/DDR Controller required properties:
+- compatible: Should be "atmel,at91rm9200-sdramc", "syscon"
+			"atmel,at91sam9260-sdramc",
+			"atmel,at91sam9g45-ddramc",
+			"atmel,sama5d3-ddramc",
+- reg: Should contain registers location and length
+
+Examples:
+
+	ramc0: ramc@ffffe800 {
+		compatible = "atmel,at91sam9g45-ddramc";
+		reg = <0xffffe800 0x200>;
+	};
+
+SHDWC Shutdown Controller
+
+required properties:
+- compatible: Should be "atmel,<chip>-shdwc".
+  <chip> can be "at91sam9260", "at91sam9rl" or "at91sam9x5".
+- reg: Should contain registers location and length
+- clocks: phandle to input clock.
+
+optional properties:
+- atmel,wakeup-mode: String, operation mode of the wakeup mode.
+  Supported values are: "none", "high", "low", "any".
+- atmel,wakeup-counter: Counter on Wake-up 0 (between 0x0 and 0xf).
+
+optional at91sam9260 properties:
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
+optional at91sam9rl properties:
+- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
+optional at91sam9x5 properties:
+- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
+
+Example:
+
+	shdwc@fffffd10 {
+		compatible = "atmel,at91sam9260-shdwc";
+		reg = <0xfffffd10 0x10>;
+		clocks = <&clk32k>;
+	};
+
+SHDWC SAMA5D2-Compatible Shutdown Controller
+
+1) shdwc node
+
+required properties:
+- compatible: should be "atmel,sama5d2-shdwc".
+- reg: should contain registers location and length
+- clocks: phandle to input clock.
+- #address-cells: should be one. The cell is the wake-up input index.
+- #size-cells: should be zero.
+
+optional properties:
+
+- debounce-delay-us: minimum wake-up inputs debouncer period in
+  microseconds. It's usually a board-related property.
+- atmel,wakeup-rtc-timer: boolean to enable Real-Time Clock wake-up.
+
+The node contains child nodes for each wake-up input that the platform uses.
+
+2) input nodes
+
+Wake-up input nodes are usually described in the "board" part of the Device
+Tree. Note also that input 0 is linked to the wake-up pin and is frequently
+used.
+
+Required properties:
+- reg: should contain the wake-up input index [0 - 15].
+
+Optional properties:
+- atmel,wakeup-active-high: boolean, the corresponding wake-up input described
+  by the child, forces the wake-up of the core power supply on a high level.
+  The default is to be active low.
+
+Example:
+
+On the SoC side:
+	shdwc@f8048010 {
+		compatible = "atmel,sama5d2-shdwc";
+		reg = <0xf8048010 0x10>;
+		clocks = <&clk32k>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		atmel,wakeup-rtc-timer;
+	};
+
+On the board side:
+	shdwc@f8048010 {
+		debounce-delay-us = <976>;
+
+		input@0 {
+			reg = <0>;
+		};
+
+		input@1 {
+			reg = <1>;
+			atmel,wakeup-active-high;
+		};
+	};
+
+Special Function Registers (SFR)
+
+Special Function Registers (SFR) manage specific aspects of the integrated
+memory, bridge implementations, processor and other functionality not controlled
+elsewhere.
+
+required properties:
+- compatible: Should be "atmel,<chip>-sfr", "syscon" or
+	"atmel,<chip>-sfrbu", "syscon"
+  <chip> can be "sama5d3", "sama5d4" or "sama5d2".
+- reg: Should contain registers location and length
+
+	sfr@f0038000 {
+		compatible = "atmel,sama5d3-sfr", "syscon";
+		reg = <0xf0038000 0x60>;
+	};
+
+Security Module (SECUMOD)
+
+The Security Module macrocell provides all necessary secure functions to avoid
+voltage, temperature, frequency and mechanical attacks on the chip. It also
+embeds secure memories that can be scrambled
+
+required properties:
+- compatible: Should be "atmel,<chip>-secumod", "syscon".
+  <chip> can be "sama5d2".
+- reg: Should contain registers location and length
+
+	secumod@fc040000 {
+		compatible = "atmel,sama5d2-secumod", "syscon";
+		reg = <0xfc040000 0x100>;
+	};
diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt
index 5d1ad09bafb4..f8aff65ab921 100644
--- a/Documentation/devicetree/bindings/arm/coresight.txt
+++ b/Documentation/devicetree/bindings/arm/coresight.txt
@@ -54,9 +54,7 @@ its hardware characteristcs.
 	  clocks the core of that coresight component. The latter clock
 	  is optional.
 
-	* port or ports: The representation of the component's port
-	  layout using the generic DT graph presentation found in
-	  "bindings/graph.txt".
+	* port or ports: see "Graph bindings for Coresight" below.
 
 * Additional required properties for System Trace Macrocells (STM):
 	* reg: along with the physical base address and length of the register
@@ -73,7 +71,7 @@ its hardware characteristcs.
 	  AMBA markee):
 		- "arm,coresight-replicator"
 
-	* port or ports: same as above.
+	* port or ports: see "Graph bindings for Coresight" below.
 
 * Optional properties for ETM/PTMs:
 
@@ -96,6 +94,20 @@ its hardware characteristcs.
 	* interrupts : Exactly one SPI may be listed for reporting the address
 	  error
 
+Graph bindings for Coresight
+-------------------------------
+
+Coresight components are interconnected to create a data path for the flow of
+trace data generated from the "sources" to their collection points "sink".
+Each coresight component must describe the "input" and "output" connections.
+The connections must be described via generic DT graph bindings as described
+by the "bindings/graph.txt", where each "port" along with an "endpoint"
+component represents a hardware port and the connection.
+
+ * All output ports must be listed inside a child node named "out-ports"
+ * All input ports must be listed inside a child node named "in-ports".
+ * Port address must match the hardware port number.
+
 Example:
 
 1. Sinks
@@ -105,10 +117,11 @@ Example:
 
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
-		port {
-			etb_in_port: endpoint@0 {
-				slave-mode;
-				remote-endpoint = <&replicator_out_port0>;
+		in-ports {
+			port {
+				etb_in_port: endpoint@0 {
+					remote-endpoint = <&replicator_out_port0>;
+				};
 			};
 		};
 	};
@@ -119,10 +132,11 @@ Example:
 
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
-		port {
-			tpiu_in_port: endpoint@0 {
-				slave-mode;
-				remote-endpoint = <&replicator_out_port1>;
+		in-ports {
+			port {
+				tpiu_in_port: endpoint@0 {
+					remote-endpoint = <&replicator_out_port1>;
+				};
 			};
 		};
 	};
@@ -133,22 +147,16 @@ Example:
 
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			/* input port */
-			port@0 {
-				reg =  <0>;
+		in-ports {
+			port {
 				etr_in_port: endpoint {
-					slave-mode;
 					remote-endpoint = <&replicator2_out_port0>;
 				};
 			};
+		};
 
-			/* CATU link represented by output port */
-			port@1 {
-				reg = <1>;
+		out-ports {
+			port {
 				etr_out_port: endpoint {
 					remote-endpoint = <&catu_in_port>;
 				};
@@ -163,7 +171,7 @@ Example:
 		 */
 		compatible = "arm,coresight-replicator";
 
-		ports {
+		out-ports {
 			#address-cells = <1>;
 			#size-cells = <0>;
 
@@ -181,12 +189,11 @@ Example:
 					remote-endpoint = <&tpiu_in_port>;
 				};
 			};
+		};
 
-			/* replicator input port */
-			port@2 {
-				reg = <0>;
+		in-ports {
+			port {
 				replicator_in_port0: endpoint {
-					slave-mode;
 					remote-endpoint = <&funnel_out_port0>;
 				};
 			};
@@ -199,40 +206,36 @@ Example:
 
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			/* funnel output port */
-			port@0 {
-				reg = <0>;
+		out-ports {
+			port {
 				funnel_out_port0: endpoint {
 					remote-endpoint =
 							<&replicator_in_port0>;
 				};
 			};
+		};
 
-			/* funnel input ports */
-			port@1 {
+		in-ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
 				reg = <0>;
 				funnel_in_port0: endpoint {
-					slave-mode;
 					remote-endpoint = <&ptm0_out_port>;
 				};
 			};
 
-			port@2 {
+			port@1 {
 				reg = <1>;
 				funnel_in_port1: endpoint {
-					slave-mode;
 					remote-endpoint = <&ptm1_out_port>;
 				};
 			};
 
-			port@3 {
+			port@2 {
 				reg = <2>;
 				funnel_in_port2: endpoint {
-					slave-mode;
 					remote-endpoint = <&etm0_out_port>;
 				};
 			};
@@ -248,9 +251,11 @@ Example:
 		cpu = <&cpu0>;
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
-		port {
-			ptm0_out_port: endpoint {
-				remote-endpoint = <&funnel_in_port0>;
+		out-ports {
+			port {
+				ptm0_out_port: endpoint {
+					remote-endpoint = <&funnel_in_port0>;
+				};
 			};
 		};
 	};
@@ -262,9 +267,11 @@ Example:
 		cpu = <&cpu1>;
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
-		port {
-			ptm1_out_port: endpoint {
-				remote-endpoint = <&funnel_in_port1>;
+		out-ports {
+			port {
+				ptm1_out_port: endpoint {
+					remote-endpoint = <&funnel_in_port1>;
+				};
 			};
 		};
 	};
@@ -278,9 +285,11 @@ Example:
 
 		clocks = <&soc_smc50mhz>;
 		clock-names = "apb_pclk";
-		port {
-			stm_out_port: endpoint {
-				remote-endpoint = <&main_funnel_in_port2>;
+		out-ports {
+			port {
+				stm_out_port: endpoint {
+					remote-endpoint = <&main_funnel_in_port2>;
+				};
 			};
 		};
 	};
@@ -295,10 +304,11 @@ Example:
 		clock-names = "apb_pclk";
 
 		interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
-		port {
-			catu_in_port: endpoint {
-				slave-mode;
-				remote-endpoint = <&etr_out_port>;
+		in-ports {
+			port {
+				catu_in_port: endpoint {
+					remote-endpoint = <&etr_out_port>;
+				};
 			};
 		};
 	};
diff --git a/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp b/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp
index c2e0cc5e4cfd..35e5afb6d9ad 100644
--- a/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp
+++ b/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp
@@ -14,7 +14,28 @@ Related properties:	(none)
 
 Note:
 This enable method requires valid nodes compatible with
-"al,alpine-cpu-resume" and "al,alpine-nb-service"[1].
+"al,alpine-cpu-resume" and "al,alpine-nb-service".
+
+
+* Alpine CPU resume registers
+
+The CPU resume register are used to define required resume address after
+reset.
+
+Properties:
+- compatible : Should contain "al,alpine-cpu-resume".
+- reg : Offset and length of the register set for the device
+
+
+* Alpine System-Fabric Service Registers
+
+The System-Fabric Service Registers allow various operation on CPU and
+system fabric, like powering CPUs off.
+
+Properties:
+- compatible : Should contain "al,alpine-sysfabric-service" and "syscon".
+- reg : Offset and length of the register set for the device
+
 
 Example:
 
@@ -48,5 +69,12 @@ cpus {
 	};
 };
 
---
-[1] arm/al,alpine.txt
+cpu_resume {
+	compatible = "al,alpine-cpu-resume";
+	reg = <0xfbff5ed0 0x30>;
+};
+
+nb_service {
+        compatible = "al,alpine-sysfabric-service", "syscon";
+        reg = <0xfb070000 0x10000>;
+};
diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt
index 96dfccc0faa8..b0198a1cf403 100644
--- a/Documentation/devicetree/bindings/arm/cpus.txt
+++ b/Documentation/devicetree/bindings/arm/cpus.txt
@@ -276,7 +276,7 @@ described below.
 		Usage: optional
 		Value type: <prop-encoded-array>
 		Definition: A u32 value that represents the running time dynamic
-			    power coefficient in units of mW/MHz/uV^2. The
+			    power coefficient in units of uW/MHz/V^2. The
 			    coefficient can either be calculated from power
 			    measurements or derived by analysis.
 
@@ -287,7 +287,7 @@ described below.
 
 			    Pdyn = dynamic-power-coefficient * V^2 * f
 
-			    where voltage is in uV, frequency is in MHz.
+			    where voltage is in V, frequency is in MHz.
 
 Example 1 (dual-cluster big.LITTLE system 32-bit):
 
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt
new file mode 100644
index 000000000000..b5cb374dc47d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt
@@ -0,0 +1,19 @@
+Freescale DCFG
+
+DCFG is the device configuration unit, that provides general purpose
+configuration and status for the device. Such as setting the secondary
+core start address and release the secondary core from holdoff and startup.
+
+Required properties:
+  - compatible: Should contain a chip-specific compatible string,
+	Chip-specific strings are of the form "fsl,<chip>-dcfg",
+	The following <chip>s are known to be supported:
+	ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
+
+  - reg : should contain base address and length of DCFG memory-mapped registers
+
+Example:
+	dcfg: dcfg@1ee0000 {
+		compatible = "fsl,ls1021a-dcfg";
+		reg = <0x0 0x1ee0000 0x0 0x10000>;
+	};
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-scfg.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-scfg.txt
new file mode 100644
index 000000000000..0ab67b0b216d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-scfg.txt
@@ -0,0 +1,19 @@
+Freescale SCFG
+
+SCFG is the supplemental configuration unit, that provides SoC specific
+configuration and status registers for the chip. Such as getting PEX port
+status.
+
+Required properties:
+  - compatible: Should contain a chip-specific compatible string,
+	Chip-specific strings are of the form "fsl,<chip>-scfg",
+	The following <chip>s are known to be supported:
+	ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
+
+  - reg: should contain base address and length of SCFG memory-mapped registers
+
+Example:
+	scfg: scfg@1570000 {
+		compatible = "fsl,ls1021a-scfg";
+		reg = <0x0 0x1570000 0x0 0x10000>;
+	};
diff --git a/Documentation/devicetree/bindings/arm/fsl.txt b/Documentation/devicetree/bindings/arm/fsl.txt
index 8a1baa2b9723..1e775aaa5c5b 100644
--- a/Documentation/devicetree/bindings/arm/fsl.txt
+++ b/Documentation/devicetree/bindings/arm/fsl.txt
@@ -101,45 +101,6 @@ Freescale LS1021A Platform Device Tree Bindings
 Required root node compatible properties:
   - compatible = "fsl,ls1021a";
 
-Freescale SoC-specific Device Tree Bindings
--------------------------------------------
-
-Freescale SCFG
-  SCFG is the supplemental configuration unit, that provides SoC specific
-configuration and status registers for the chip. Such as getting PEX port
-status.
-  Required properties:
-  - compatible: Should contain a chip-specific compatible string,
-	Chip-specific strings are of the form "fsl,<chip>-scfg",
-	The following <chip>s are known to be supported:
-	ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
-
-  - reg: should contain base address and length of SCFG memory-mapped registers
-
-Example:
-	scfg: scfg@1570000 {
-		compatible = "fsl,ls1021a-scfg";
-		reg = <0x0 0x1570000 0x0 0x10000>;
-	};
-
-Freescale DCFG
-  DCFG is the device configuration unit, that provides general purpose
-configuration and status for the device. Such as setting the secondary
-core start address and release the secondary core from holdoff and startup.
-  Required properties:
-  - compatible: Should contain a chip-specific compatible string,
-	Chip-specific strings are of the form "fsl,<chip>-dcfg",
-	The following <chip>s are known to be supported:
-	ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
-
-  - reg : should contain base address and length of DCFG memory-mapped registers
-
-Example:
-	dcfg: dcfg@1ee0000 {
-		compatible = "fsl,ls1021a-dcfg";
-		reg = <0x0 0x1ee0000 0x0 0x10000>;
-	};
-
 Freescale ARMv8 based Layerscape SoC family Device Tree Bindings
 ----------------------------------------------------------------
 
diff --git a/Documentation/devicetree/bindings/arm/secure.txt b/Documentation/devicetree/bindings/arm/secure.txt
index e31303fb233a..f27bbff2c780 100644
--- a/Documentation/devicetree/bindings/arm/secure.txt
+++ b/Documentation/devicetree/bindings/arm/secure.txt
@@ -32,7 +32,8 @@ describe the view of Secure world using the standard bindings. These
 secure- bindings only need to be used where both the Secure and Normal
 world views need to be described in a single device tree.
 
-Valid Secure world properties:
+Valid Secure world properties
+-----------------------------
 
 - secure-status : specifies whether the device is present and usable
   in the secure world. The combination of this with "status" allows
@@ -51,3 +52,19 @@ Valid Secure world properties:
    status = "disabled"; secure-status = "okay";     /* S-only */
    status = "disabled";                             /* disabled in both */
    status = "disabled"; secure-status = "disabled"; /* disabled in both */
+
+The secure-chosen node
+----------------------
+
+Similar to the /chosen node which serves as a place for passing data
+between firmware and the operating system, the /secure-chosen node may
+be used to pass data to the Secure OS. Only the properties defined
+below may appear in the /secure-chosen node.
+
+- stdout-path : specifies the device to be used by the Secure OS for
+  its console output. The syntax is the same as for /chosen/stdout-path.
+  If the /secure-chosen node exists but the stdout-path property is not
+  present, the Secure OS should not perform any console output. If
+  /secure-chosen does not exist, the Secure OS should use the value of
+  /chosen/stdout-path instead (that is, use the same device as the
+  Normal world OS).
diff --git a/Documentation/devicetree/bindings/arm/zte,sysctrl.txt b/Documentation/devicetree/bindings/arm/zte,sysctrl.txt
new file mode 100644
index 000000000000..7e66b7f7ba96
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/zte,sysctrl.txt
@@ -0,0 +1,30 @@
+ZTE sysctrl Registers
+
+Registers for 'zte,zx296702' SoC:
+
+System management required properties:
+      - compatible = "zte,sysctrl"
+
+Low power management required properties:
+      - compatible = "zte,zx296702-pcu"
+
+Bus matrix required properties:
+      - compatible = "zte,zx-bus-matrix"
+
+
+Registers for 'zte,zx296718' SoC:
+
+System management required properties:
+      - compatible = "zte,zx296718-aon-sysctrl"
+      - compatible = "zte,zx296718-sysctrl"
+
+Example:
+aon_sysctrl: aon-sysctrl@116000 {
+	compatible = "zte,zx296718-aon-sysctrl", "syscon";
+	reg = <0x116000 0x1000>;
+};
+
+sysctrl: sysctrl@1463000 {
+	compatible = "zte,zx296718-sysctrl", "syscon";
+	reg = <0x1463000 0x1000>;
+};
diff --git a/Documentation/devicetree/bindings/arm/zte.txt b/Documentation/devicetree/bindings/arm/zte.txt
index 83369785d29c..340612794a37 100644
--- a/Documentation/devicetree/bindings/arm/zte.txt
+++ b/Documentation/devicetree/bindings/arm/zte.txt
@@ -1,20 +1,10 @@
 ZTE platforms device tree bindings
----------------------------------------
 
+---------------------------------------
 -  ZX296702 board:
     Required root node properties:
       - compatible = "zte,zx296702-ad1", "zte,zx296702"
 
-System management required properties:
-      - compatible = "zte,sysctrl"
-
-Low power management required properties:
-      - compatible = "zte,zx296702-pcu"
-
-Bus matrix required properties:
-      - compatible = "zte,zx-bus-matrix"
-
-
 ---------------------------------------
 -  ZX296718 SoC:
     Required root node properties:
@@ -22,18 +12,3 @@ Bus matrix required properties:
 
 ZX296718 EVB board:
       - "zte,zx296718-evb"
-
-System management required properties:
-      - compatible = "zte,zx296718-aon-sysctrl"
-      - compatible = "zte,zx296718-sysctrl"
-
-Example:
-aon_sysctrl: aon-sysctrl@116000 {
-	compatible = "zte,zx296718-aon-sysctrl", "syscon";
-	reg = <0x116000 0x1000>;
-};
-
-sysctrl: sysctrl@1463000 {
-	compatible = "zte,zx296718-sysctrl", "syscon";
-	reg = <0x1463000 0x1000>;
-};
diff --git a/Documentation/devicetree/bindings/connector/usb-connector.txt b/Documentation/devicetree/bindings/connector/usb-connector.txt
index 8855bfcfd778..d90e17e2428b 100644
--- a/Documentation/devicetree/bindings/connector/usb-connector.txt
+++ b/Documentation/devicetree/bindings/connector/usb-connector.txt
@@ -29,15 +29,15 @@ Required properties for usb-c-connector with power delivery support:
   in "Universal Serial Bus Power Delivery Specification" chapter 6.4.1.2
   Source_Capabilities Message, the order of each entry(PDO) should follow
   the PD spec chapter 6.4.1. Required for power source and power dual role.
-  User can specify the source PDO array via PDO_FIXED/BATT/VAR() defined in
-  dt-bindings/usb/pd.h.
+  User can specify the source PDO array via PDO_FIXED/BATT/VAR/PPS_APDO()
+  defined in dt-bindings/usb/pd.h.
 - sink-pdos: An array of u32 with each entry providing supported power
   sink data object(PDO), the detailed bit definitions of PDO can be found
   in "Universal Serial Bus Power Delivery Specification" chapter 6.4.1.3
   Sink Capabilities Message, the order of each entry(PDO) should follow
   the PD spec chapter 6.4.1. Required for power sink and power dual role.
-  User can specify the sink PDO array via PDO_FIXED/BATT/VAR() defined in
-  dt-bindings/usb/pd.h.
+  User can specify the sink PDO array via PDO_FIXED/BATT/VAR/PPS_APDO() defined
+  in dt-bindings/usb/pd.h.
 - op-sink-microwatt: Sink required operating power in microwatt, if source
   can't offer the power, Capability Mismatch is set. Required for power
   sink and power dual role.
diff --git a/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt b/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt
index 78d2db9d4de5..d28fd1af01b4 100644
--- a/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt
+++ b/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt
@@ -24,7 +24,7 @@ Optional properties:
 
 Example:
 
-p1_sec_a: crypto@400,d2000000 {
+p1_sec_a: crypto@400d2000000 {
 	compatible = "hisilicon,hip07-sec";
 	reg = <0x400 0xd0000000 0x0 0x10000
 	       0x400 0xd2000000 0x0 0x10000
diff --git a/Documentation/devicetree/bindings/fpga/fpga-region.txt b/Documentation/devicetree/bindings/fpga/fpga-region.txt
index 6db8aeda461a..90c44694a30b 100644
--- a/Documentation/devicetree/bindings/fpga/fpga-region.txt
+++ b/Documentation/devicetree/bindings/fpga/fpga-region.txt
@@ -415,7 +415,7 @@ DT Overlay contains:
 			firmware-name = "base.rbf";
 
 			fpga-bridge@4400 {
-				compatible = "altr,freeze-bridge";
+				compatible = "altr,freeze-bridge-controller";
 				reg = <0x4400 0x10>;
 
 				fpga_region1: fpga-region1 {
@@ -427,7 +427,7 @@ DT Overlay contains:
 			};
 
 			fpga-bridge@4420 {
-				compatible = "altr,freeze-bridge";
+				compatible = "altr,freeze-bridge-controller";
 				reg = <0x4420 0x10>;
 
 				fpga_region2: fpga-region2 {
diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt
index 11263982470e..44efafdfd7f5 100644
--- a/Documentation/devicetree/bindings/i2c/i2c.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c.txt
@@ -84,7 +84,7 @@ Binding may contain optional "interrupts" property, describing interrupts
 used by the device. I2C core will assign "irq" interrupt (or the very first
 interrupt if not using interrupt names) as primary interrupt for the slave.
 
-Alternatively, devices supporting SMbus Host Notify, and connected to
+Alternatively, devices supporting SMBus Host Notify, and connected to
 adapters that support this feature, may use "host-notify" property. I2C
 core will create a virtual interrupt for Host Notify and assign it as
 primary interrupt for the slave.
diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,icu.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,icu.txt
index aa8bf2ec8905..1c94a57a661e 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/marvell,icu.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,icu.txt
@@ -5,6 +5,8 @@ The Marvell ICU (Interrupt Consolidation Unit) controller is
 responsible for collecting all wired-interrupt sources in the CP and
 communicating them to the GIC in the AP, the unit translates interrupt
 requests on input wires to MSG memory mapped transactions to the GIC.
+These messages will access a different GIC memory area depending on
+their type (NSR, SR, SEI, REI, etc).
 
 Required properties:
 
@@ -12,20 +14,23 @@ Required properties:
 
 - reg: Should contain ICU registers location and length.
 
-- #interrupt-cells: Specifies the number of cells needed to encode an
-  interrupt source. The value shall be 3.
+Subnodes: Each group of interrupt is declared as a subnode of the ICU,
+with their own compatible.
+
+Required properties for the icu_nsr/icu_sei subnodes:
 
-  The 1st cell is the group type of the ICU interrupt. Possible group
-  types are:
+- compatible: Should be one of:
+              * "marvell,cp110-icu-nsr"
+	      * "marvell,cp110-icu-sr"
+	      * "marvell,cp110-icu-sei"
+	      * "marvell,cp110-icu-rei"
 
-   ICU_GRP_NSR (0x0) : Shared peripheral interrupt, non-secure
-   ICU_GRP_SR  (0x1) : Shared peripheral interrupt, secure
-   ICU_GRP_SEI (0x4) : System error interrupt
-   ICU_GRP_REI (0x5) : RAM error interrupt
+- #interrupt-cells: Specifies the number of cells needed to encode an
+  interrupt source. The value shall be 2.
 
-  The 2nd cell is the index of the interrupt in the ICU unit.
+  The 1st cell is the index of the interrupt in the ICU unit.
 
-  The 3rd cell is the type of the interrupt. See arm,gic.txt for
+  The 2nd cell is the type of the interrupt. See arm,gic.txt for
   details.
 
 - interrupt-controller: Identifies the node as an interrupt
@@ -35,17 +40,73 @@ Required properties:
   that allows to trigger interrupts using MSG memory mapped
   transactions.
 
+Note: each 'interrupts' property referring to any 'icu_xxx' node shall
+      have a different number within [0:206].
+
 Example:
 
 icu: interrupt-controller@1e0000 {
 	compatible = "marvell,cp110-icu";
-	reg = <0x1e0000 0x10>;
+	reg = <0x1e0000 0x440>;
+
+	CP110_LABEL(icu_nsr): interrupt-controller@10 {
+		compatible = "marvell,cp110-icu-nsr";
+		reg = <0x10 0x20>;
+		#interrupt-cells = <2>;
+		interrupt-controller;
+		msi-parent = <&gicp>;
+	};
+
+	CP110_LABEL(icu_sei): interrupt-controller@50 {
+		compatible = "marvell,cp110-icu-sei";
+		reg = <0x50 0x10>;
+		#interrupt-cells = <2>;
+		interrupt-controller;
+		msi-parent = <&sei>;
+	};
+};
+
+node1 {
+	interrupt-parent = <&icu_nsr>;
+	interrupts = <106 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+node2 {
+	interrupt-parent = <&icu_sei>;
+	interrupts = <107 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+/* Would not work with the above nodes */
+node3 {
+	interrupt-parent = <&icu_nsr>;
+	interrupts = <107 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+The legacy bindings were different in this way:
+
+- #interrupt-cells: The value was 3.
+	The 1st cell was the group type of the ICU interrupt. Possible
+	group types were:
+	ICU_GRP_NSR (0x0) : Shared peripheral interrupt, non-secure
+	ICU_GRP_SR  (0x1) : Shared peripheral interrupt, secure
+	ICU_GRP_SEI (0x4) : System error interrupt
+	ICU_GRP_REI (0x5) : RAM error interrupt
+	The 2nd cell was the index of the interrupt in the ICU unit.
+	The 3rd cell was the type of the interrupt. See arm,gic.txt for
+	details.
+
+Example:
+
+icu: interrupt-controller@1e0000 {
+	compatible = "marvell,cp110-icu";
+	reg = <0x1e0000 0x440>;
+
 	#interrupt-cells = <3>;
 	interrupt-controller;
 	msi-parent = <&gicp>;
 };
 
-usb3h0: usb3@500000 {
+node1 {
 	interrupt-parent = <&icu>;
 	interrupts = <ICU_GRP_NSR 106 IRQ_TYPE_LEVEL_HIGH>;
 };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,sei.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,sei.txt
new file mode 100644
index 000000000000..0beafed502f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,sei.txt
@@ -0,0 +1,36 @@
+Marvell SEI (System Error Interrupt) Controller
+-----------------------------------------------
+
+Marvell SEI (System Error Interrupt) controller is an interrupt
+aggregator. It receives interrupts from several sources and aggregates
+them to a single interrupt line (an SPI) on the parent interrupt
+controller.
+
+This interrupt controller can handle up to 64 SEIs, a set comes from the
+AP and is wired while a second set comes from the CPs by the mean of
+MSIs.
+
+Required properties:
+
+- compatible: should be one of:
+              * "marvell,ap806-sei"
+- reg: SEI registers location and length.
+- interrupts: identifies the parent IRQ that will be triggered.
+- #interrupt-cells: number of cells to define an SEI wired interrupt
+                    coming from the AP, should be 1. The cell is the IRQ
+                    number.
+- interrupt-controller: identifies the node as an interrupt controller
+                        for AP interrupts.
+- msi-controller: identifies the node as an MSI controller for the CPs
+                  interrupts.
+
+Example:
+
+        sei: interrupt-controller@3f0200 {
+                compatible = "marvell,ap806-sei";
+                reg = <0x3f0200 0x40>;
+                interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>;
+                #interrupt-cells = <1>;
+                interrupt-controller;
+                msi-controller;
+        };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
index a046ed374d80..8de96a4fb2d5 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
@@ -2,10 +2,12 @@ DT bindings for the R-Mobile/R-Car/RZ/G interrupt controller
 
 Required properties:
 
-- compatible: has to be "renesas,irqc-<soctype>", "renesas,irqc" as fallback.
+- compatible: must be "renesas,irqc-<soctype>" or "renesas,intc-ex-<soctype>",
+	      and "renesas,irqc" as fallback.
   Examples with soctypes are:
     - "renesas,irqc-r8a73a4" (R-Mobile APE6)
     - "renesas,irqc-r8a7743" (RZ/G1M)
+    - "renesas,irqc-r8a7744" (RZ/G1N)
     - "renesas,irqc-r8a7745" (RZ/G1E)
     - "renesas,irqc-r8a77470" (RZ/G1C)
     - "renesas,irqc-r8a7790" (R-Car H2)
@@ -19,6 +21,7 @@ Required properties:
     - "renesas,intc-ex-r8a77965" (R-Car M3-N)
     - "renesas,intc-ex-r8a77970" (R-Car V3M)
     - "renesas,intc-ex-r8a77980" (R-Car V3H)
+    - "renesas,intc-ex-r8a77990" (R-Car E3)
     - "renesas,intc-ex-r8a77995" (R-Car D3)
 - #interrupt-cells: has to be <2>: an interrupt index and flags, as defined in
   interrupts.txt in this directory
diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
index c6e2d855fe13..377ee639d103 100644
--- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
+++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
@@ -12,6 +12,7 @@ Required Properties:
 
     - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU.
     - "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU.
+    - "renesas,ipmmu-r8a7744" for the R8A7744 (RZ/G1N) IPMMU.
     - "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU.
     - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU.
     - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU.
diff --git a/Documentation/devicetree/bindings/mfd/arizona.txt b/Documentation/devicetree/bindings/mfd/arizona.txt
index 9b62831fdf3e..148ef621a5e5 100644
--- a/Documentation/devicetree/bindings/mfd/arizona.txt
+++ b/Documentation/devicetree/bindings/mfd/arizona.txt
@@ -76,7 +76,7 @@ Deprecated properties:
 Also see child specific device properties:
   Regulator - ../regulator/arizona-regulator.txt
   Extcon    - ../extcon/extcon-arizona.txt
-  Sound     - ../sound/arizona.txt
+  Sound     - ../sound/wlf,arizona.txt
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
index 6611a7c2053a..01fdc33a41d0 100644
--- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
+++ b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
@@ -9,6 +9,25 @@ blocks that can be used to create functional hardware objects/devices
 such as network interfaces, crypto accelerator instances, L2 switches,
 etc.
 
+For an overview of the DPAA2 architecture and fsl-mc bus see:
+Documentation/networking/dpaa2/overview.rst
+
+As described in the above overview, all DPAA2 objects in a DPRC share the
+same hardware "isolation context" and a 10-bit value called an ICID
+(isolation context id) is expressed by the hardware to identify
+the requester.
+
+The generic 'iommus' property is insufficient to describe the relationship
+between ICIDs and IOMMUs, so an iommu-map property is used to define
+the set of possible ICIDs under a root DPRC and how they map to
+an IOMMU.
+
+For generic IOMMU bindings, see
+Documentation/devicetree/bindings/iommu/iommu.txt.
+
+For arm-smmu binding, see:
+Documentation/devicetree/bindings/iommu/arm,smmu.txt.
+
 Required properties:
 
     - compatible
@@ -88,14 +107,34 @@ Sub-nodes:
               Value type: <phandle>
               Definition: Specifies the phandle to the PHY device node associated
                           with the this dpmac.
+Optional properties:
+
+- iommu-map: Maps an ICID to an IOMMU and associated iommu-specifier
+  data.
+
+  The property is an arbitrary number of tuples of
+  (icid-base,iommu,iommu-base,length).
+
+  Any ICID i in the interval [icid-base, icid-base + length) is
+  associated with the listed IOMMU, with the iommu-specifier
+  (i - icid-base + iommu-base).
 
 Example:
 
+        smmu: iommu@5000000 {
+               compatible = "arm,mmu-500";
+               #iommu-cells = <1>;
+               stream-match-mask = <0x7C00>;
+               ...
+        };
+
         fsl_mc: fsl-mc@80c000000 {
                 compatible = "fsl,qoriq-mc";
                 reg = <0x00000008 0x0c000000 0 0x40>,    /* MC portal base */
                       <0x00000000 0x08340000 0 0x40000>; /* MC control reg */
                 msi-parent = <&its>;
+                /* define map for ICIDs 23-64 */
+                iommu-map = <23 &smmu 23 41>;
                 #address-cells = <3>;
                 #size-cells = <1>;
 
diff --git a/Documentation/devicetree/bindings/misc/lwn-bk4.txt b/Documentation/devicetree/bindings/misc/lwn-bk4.txt
new file mode 100644
index 000000000000..d6a8c188c087
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/lwn-bk4.txt
@@ -0,0 +1,26 @@
+* Liebherr's BK4 controller external SPI
+
+A device which handles data acquisition from compatible industrial
+peripherals.
+The SPI is used for data and management purposes in both master and
+slave modes.
+
+Required properties:
+
+- compatible : Should be "lwn,bk4"
+
+Required SPI properties:
+
+- reg : Should be address of the device chip select within
+  the controller.
+
+- spi-max-frequency : Maximum SPI clocking speed of device in Hz, should be
+  30MHz at most for the Liebherr's BK4 external bus.
+
+Example:
+
+spidev0: spi@0 {
+	compatible = "lwn,bk4";
+	spi-max-frequency = <30000000>;
+	reg = <0>;
+};
diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt
index 94a7f33ac5e9..cc4372842bf3 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_can.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt
@@ -3,6 +3,7 @@ Renesas R-Car CAN controller Device Tree Bindings
 
 Required properties:
 - compatible: "renesas,can-r8a7743" if CAN controller is a part of R8A7743 SoC.
+	      "renesas,can-r8a7744" if CAN controller is a part of R8A7744 SoC.
 	      "renesas,can-r8a7745" if CAN controller is a part of R8A7745 SoC.
 	      "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
 	      "renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC.
diff --git a/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt b/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
index 0aced97d8092..b640845fec67 100644
--- a/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
+++ b/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
@@ -8,6 +8,7 @@ Required properties:
      "brcm,iproc-nsp-sata-phy"
      "brcm,phy-sata3"
      "brcm,iproc-sr-sata-phy"
+     "brcm,bcm63138-sata-phy"
 - address-cells: should be 1
 - size-cells: should be 0
 - reg: register ranges for the PHY PCB interface
diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-dp.txt b/Documentation/devicetree/bindings/phy/phy-cadence-dp.txt
new file mode 100644
index 000000000000..7f49fd54ebc1
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-cadence-dp.txt
@@ -0,0 +1,30 @@
+Cadence MHDP DisplayPort SD0801 PHY binding
+===========================================
+
+This binding describes the Cadence SD0801 PHY hardware included with
+the Cadence MHDP DisplayPort controller.
+
+-------------------------------------------------------------------------------
+Required properties (controller (parent) node):
+- compatible	: Should be "cdns,dp-phy"
+- reg		: Defines the following sets of registers in the parent
+		  mhdp device:
+			- Offset of the DPTX PHY configuration registers
+			- Offset of the SD0801 PHY configuration registers
+- #phy-cells	: from the generic PHY bindings, must be 0.
+
+Optional properties:
+- num_lanes	: Number of DisplayPort lanes to use (1, 2 or 4)
+- max_bit_rate	: Maximum DisplayPort link bit rate to use, in Mbps (2160,
+		  2430, 2700, 3240, 4320, 5400 or 8100)
+-------------------------------------------------------------------------------
+
+Example:
+	dp_phy: phy@f0fb030a00 {
+		compatible = "cdns,dp-phy";
+		reg = <0xf0 0xfb030a00 0x0 0x00000040>,
+		      <0xf0 0xfb500000 0x0 0x00100000>;
+		num_lanes = <4>;
+		max_bit_rate = <8100>;
+		#phy-cells = <0>;
+	};
diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-inno-hdmi.txt b/Documentation/devicetree/bindings/phy/phy-rockchip-inno-hdmi.txt
new file mode 100644
index 000000000000..710cccd5ee56
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-rockchip-inno-hdmi.txt
@@ -0,0 +1,43 @@
+ROCKCHIP HDMI PHY WITH INNO IP BLOCK
+
+Required properties:
+ - compatible : should be one of the listed compatibles:
+	* "rockchip,rk3228-hdmi-phy",
+	* "rockchip,rk3328-hdmi-phy";
+ - reg : Address and length of the hdmi phy control register set
+ - clocks : phandle + clock specifier for the phy clocks
+ - clock-names : string, clock name, must contain "sysclk" for system
+	  control and register configuration, "refoclk" for crystal-
+	  oscillator reference PLL clock input and "refpclk" for pclk-
+	  based refeference PLL clock input.
+ - #clock-cells: should be 0.
+ - clock-output-names : shall be the name for the output clock.
+ - interrupts : phandle + interrupt specified for the hdmiphy interrupt
+ - #phy-cells : must be 0. See ./phy-bindings.txt for details.
+
+Optional properties for rk3328-hdmi-phy:
+ - nvmem-cells = phandle + nvmem specifier for the cpu-version efuse
+ - nvmem-cell-names : "cpu-version" to read the chip version, required
+	  for adjustment to some frequency settings
+
+Example:
+	hdmi_phy: hdmi-phy@12030000 {
+		compatible = "rockchip,rk3228-hdmi-phy";
+		reg = <0x12030000 0x10000>;
+		#phy-cells = <0>;
+		clocks = <&cru PCLK_HDMI_PHY>, <&xin24m>, <&cru DCLK_HDMIPHY>;
+		clock-names = "sysclk", "refoclk", "refpclk";
+		#clock-cells = <0>;
+		clock-output-names = "hdmi_phy";
+		status = "disabled";
+	};
+
+Then the PHY can be used in other nodes such as:
+
+	hdmi: hdmi@200a0000 {
+		compatible = "rockchip,rk3228-dw-hdmi";
+		...
+		phys = <&hdmi_phy>;
+		phy-names = "hdmi";
+		...
+	};
diff --git a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
index 0c7629e88bf3..adf20b2bdf71 100644
--- a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
+++ b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
@@ -10,16 +10,20 @@ Required properties:
 	       "qcom,msm8996-qmp-pcie-phy" for 14nm PCIe phy on msm8996,
 	       "qcom,msm8996-qmp-usb3-phy" for 14nm USB3 phy on msm8996,
 	       "qcom,sdm845-qmp-usb3-phy" for USB3 QMP V3 phy on sdm845,
-	       "qcom,sdm845-qmp-usb3-uni-phy" for USB3 QMP V3 UNI phy on sdm845.
+	       "qcom,sdm845-qmp-usb3-uni-phy" for USB3 QMP V3 UNI phy on sdm845,
+	       "qcom,sdm845-qmp-ufs-phy" for UFS QMP phy on sdm845.
 
- - reg:
-   - For "qcom,sdm845-qmp-usb3-phy":
-     - index 0: address and length of register set for PHY's common serdes
-       block.
-     - named register "dp_com" (using reg-names): address and length of the
-       DP_COM control block.
-   - For all others:
-     - offset and length of register set for PHY's common serdes block.
+- reg:
+  - index 0: address and length of register set for PHY's common
+             serdes block.
+  - index 1: address and length of the DP_COM control block (for
+             "qcom,sdm845-qmp-usb3-phy" only).
+
+- reg-names:
+  - For "qcom,sdm845-qmp-usb3-phy":
+    - Should be: "reg-base", "dp_com"
+  - For all others:
+    - The reg-names property shouldn't be defined.
 
  - #clock-cells: must be 1
     - Phy pll outputs a bunch of clocks for Tx, Rx and Pipe
@@ -35,6 +39,7 @@ Required properties:
 		"aux" for phy aux clock,
 		"ref" for 19.2 MHz ref clk,
 		"com_aux" for phy common block aux clock,
+		"ref_aux" for phy reference aux clock,
 		For "qcom,msm8996-qmp-pcie-phy" must contain:
 			"aux", "cfg_ahb", "ref".
 		For "qcom,msm8996-qmp-usb3-phy" must contain:
diff --git a/Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt b/Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt
index eeb9e1874ea6..4f0879a0ca12 100644
--- a/Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt
+++ b/Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt
@@ -5,6 +5,7 @@ This file provides information on what the device node for the R-Car generation
 
 Required properties:
 - compatible: "renesas,usb-phy-r8a7743" if the device is a part of R8A7743 SoC.
+	      "renesas,usb-phy-r8a7744" if the device is a part of R8A7744 SoC.
 	      "renesas,usb-phy-r8a7745" if the device is a part of R8A7745 SoC.
 	      "renesas,usb-phy-r8a7790" if the device is a part of R8A7790 SoC.
 	      "renesas,usb-phy-r8a7791" if the device is a part of R8A7791 SoC.
diff --git a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
index fb4a204da2bf..de7b5393c163 100644
--- a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
+++ b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
@@ -1,10 +1,12 @@
 * Renesas R-Car generation 3 USB 2.0 PHY
 
 This file provides information on what the device node for the R-Car generation
-3 USB 2.0 PHY contains.
+3 and RZ/G2 USB 2.0 PHY contain.
 
 Required properties:
-- compatible: "renesas,usb2-phy-r8a7795" if the device is a part of an R8A7795
+- compatible: "renesas,usb2-phy-r8a774a1" if the device is a part of an R8A774A1
+	      SoC.
+	      "renesas,usb2-phy-r8a7795" if the device is a part of an R8A7795
 	      SoC.
 	      "renesas,usb2-phy-r8a7796" if the device is a part of an R8A7796
 	      SoC.
@@ -14,7 +16,8 @@ Required properties:
 	      R8A77990 SoC.
 	      "renesas,usb2-phy-r8a77995" if the device is a part of an
 	      R8A77995 SoC.
-	      "renesas,rcar-gen3-usb2-phy" for a generic R-Car Gen3 compatible device.
+	      "renesas,rcar-gen3-usb2-phy" for a generic R-Car Gen3 or RZ/G2
+	      compatible device.
 
 	      When compatible with the generic version, nodes must list the
 	      SoC-specific version corresponding to the platform first
@@ -31,6 +34,8 @@ channel as USB OTG:
 - interrupts: interrupt specifier for the PHY.
 - vbus-supply: Phandle to a regulator that provides power to the VBUS. This
 	       regulator will be managed during the PHY power on/off sequence.
+- renesas,no-otg-pins: boolean, specify when a board does not provide proper
+		       otg pins.
 
 Example (R-Car H3):
 
diff --git a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb3.txt b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb3.txt
index 47dd296ecead..9d9826609c2f 100644
--- a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb3.txt
+++ b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb3.txt
@@ -1,20 +1,22 @@
 * Renesas R-Car generation 3 USB 3.0 PHY
 
 This file provides information on what the device node for the R-Car generation
-3 USB 3.0 PHY contains.
+3 and RZ/G2 USB 3.0 PHY contain.
 If you want to enable spread spectrum clock (ssc), you should use USB_EXTAL
 instead of USB3_CLK. However, if you don't want to these features, you don't
 need this driver.
 
 Required properties:
-- compatible: "renesas,r8a7795-usb3-phy" if the device is a part of an R8A7795
+- compatible: "renesas,r8a774a1-usb3-phy" if the device is a part of an R8A774A1
+	      SoC.
+	      "renesas,r8a7795-usb3-phy" if the device is a part of an R8A7795
 	      SoC.
 	      "renesas,r8a7796-usb3-phy" if the device is a part of an R8A7796
 	      SoC.
 	      "renesas,r8a77965-usb3-phy" if the device is a part of an
 	      R8A77965 SoC.
-	      "renesas,rcar-gen3-usb3-phy" for a generic R-Car Gen3 compatible
-	      device.
+	      "renesas,rcar-gen3-usb3-phy" for a generic R-Car Gen3 or RZ/G2
+	      compatible device.
 
 	      When compatible with the generic version, nodes must list the
 	      SoC-specific version corresponding to the platform first
diff --git a/Documentation/devicetree/bindings/phy/uniphier-pcie-phy.txt b/Documentation/devicetree/bindings/phy/uniphier-pcie-phy.txt
new file mode 100644
index 000000000000..1889d3b89d68
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/uniphier-pcie-phy.txt
@@ -0,0 +1,31 @@
+Socionext UniPhier PCIe PHY bindings
+
+This describes the devicetree bindings for PHY interface built into
+PCIe controller implemented on Socionext UniPhier SoCs.
+
+Required properties:
+- compatible: Should contain one of the following:
+    "socionext,uniphier-ld20-pcie-phy" - for LD20 PHY
+    "socionext,uniphier-pxs3-pcie-phy" - for PXs3 PHY
+- reg: Specifies offset and length of the register set for the device.
+- #phy-cells: Must be zero.
+- clocks: A phandle to the clock gate for PCIe glue layer including
+	this phy.
+- resets: A phandle to the reset line for PCIe glue layer including
+	this phy.
+
+Optional properties:
+- socionext,syscon: A phandle to system control to set configurations
+	for phy.
+
+Refer to phy/phy-bindings.txt for the generic PHY binding properties.
+
+Example:
+	pcie_phy: phy@66038000 {
+		compatible = "socionext,uniphier-ld20-pcie-phy";
+		reg = <0x66038000 0x4000>;
+		#phy-cells = <0>;
+		clocks = <&sys_clk 24>;
+		resets = <&sys_rst 24>;
+		socionext,syscon = <&soc_glue>;
+	};
diff --git a/Documentation/devicetree/bindings/phy/uniphier-usb2-phy.txt b/Documentation/devicetree/bindings/phy/uniphier-usb2-phy.txt
new file mode 100644
index 000000000000..b43b28250cc0
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/uniphier-usb2-phy.txt
@@ -0,0 +1,45 @@
+Socionext UniPhier USB2 PHY
+
+This describes the devicetree bindings for PHY interface built into
+USB2 controller implemented on Socionext UniPhier SoCs.
+
+Pro4 SoC has both USB2 and USB3 host controllers, however, this USB3
+controller doesn't include its own High-Speed PHY. This needs to specify
+USB2 PHY instead of USB3 HS-PHY.
+
+Required properties:
+- compatible: Should contain one of the following:
+    "socionext,uniphier-pro4-usb2-phy" - for Pro4 SoC
+    "socionext,uniphier-ld11-usb2-phy" - for LD11 SoC
+
+Sub-nodes:
+Each PHY should be represented as a sub-node.
+
+Sub-nodes required properties:
+- #phy-cells: Should be 0.
+- reg: The number of the PHY.
+
+Sub-nodes optional properties:
+- vbus-supply: A phandle to the regulator for USB VBUS.
+
+Refer to phy/phy-bindings.txt for the generic PHY binding properties.
+
+Example:
+	soc-glue@5f800000 {
+		...
+		usb-phy {
+			compatible = "socionext,uniphier-ld11-usb2-phy";
+			usb_phy0: phy@0 {
+				reg = <0>;
+				#phy-cells = <0>;
+			};
+			...
+		};
+	};
+
+	usb@5a800100 {
+		compatible = "socionext,uniphier-ehci", "generic-ehci";
+		...
+		phy-names = "usb";
+		phys = <&usb_phy0>;
+	};
diff --git a/Documentation/devicetree/bindings/phy/uniphier-usb3-hsphy.txt b/Documentation/devicetree/bindings/phy/uniphier-usb3-hsphy.txt
new file mode 100644
index 000000000000..e8d8086a7ae9
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/uniphier-usb3-hsphy.txt
@@ -0,0 +1,69 @@
+Socionext UniPhier USB3 High-Speed (HS) PHY
+
+This describes the devicetree bindings for PHY interfaces built into
+USB3 controller implemented on Socionext UniPhier SoCs.
+Although the controller includes High-Speed PHY and Super-Speed PHY,
+this describes about High-Speed PHY.
+
+Required properties:
+- compatible: Should contain one of the following:
+    "socionext,uniphier-pro4-usb3-hsphy" - for Pro4 SoC
+    "socionext,uniphier-pxs2-usb3-hsphy" - for PXs2 SoC
+    "socionext,uniphier-ld20-usb3-hsphy" - for LD20 SoC
+    "socionext,uniphier-pxs3-usb3-hsphy" - for PXs3 SoC
+- reg: Specifies offset and length of the register set for the device.
+- #phy-cells: Should be 0.
+- clocks: A list of phandles to the clock gate for USB3 glue layer.
+	According to the clock-names, appropriate clocks are required.
+- clock-names: Should contain the following:
+    "gio", "link" - for Pro4 SoC
+    "phy", "phy-ext", "link" - for PXs3 SoC, "phy-ext" is optional.
+    "phy", "link" - for others
+- resets: A list of phandles to the reset control for USB3 glue layer.
+	According to the reset-names, appropriate resets are required.
+- reset-names: Should contain the following:
+    "gio", "link" - for Pro4 SoC
+    "phy", "link" - for others
+
+Optional properties:
+- vbus-supply: A phandle to the regulator for USB VBUS.
+- nvmem-cells: Phandles to nvmem cell that contains the trimming data.
+	Available only for HS-PHY implemented on LD20 and PXs3, and
+	if unspecified, default value is used.
+- nvmem-cell-names: Should be the following names, which correspond to
+	each nvmem-cells.
+	All of the 3 parameters associated with the following names are
+	required for each port, if any one is omitted, the trimming data
+	of the port will not be set at all.
+    "rterm", "sel_t", "hs_i" - Each cell name for phy parameters
+
+Refer to phy/phy-bindings.txt for the generic PHY binding properties.
+
+Example:
+
+	usb-glue@65b00000 {
+		compatible = "socionext,uniphier-ld20-dwc3-glue",
+			     "simple-mfd";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0 0x65b00000 0x400>;
+
+		usb_vbus0: regulator {
+			...
+		};
+
+		usb_hsphy0: hs-phy@200 {
+			compatible = "socionext,uniphier-ld20-usb3-hsphy";
+			reg = <0x200 0x10>;
+			#phy-cells = <0>;
+			clock-names = "link", "phy";
+			clocks = <&sys_clk 14>, <&sys_clk 16>;
+			reset-names = "link", "phy";
+			resets = <&sys_rst 14>, <&sys_rst 16>;
+			vbus-supply = <&usb_vbus0>;
+			nvmem-cell-names = "rterm", "sel_t", "hs_i";
+			nvmem-cells = <&usb_rterm0>, <&usb_sel_t0>,
+				      <&usb_hs_i0>;
+		};
+		...
+	};
diff --git a/Documentation/devicetree/bindings/phy/uniphier-usb3-ssphy.txt b/Documentation/devicetree/bindings/phy/uniphier-usb3-ssphy.txt
new file mode 100644
index 000000000000..490b815445e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/uniphier-usb3-ssphy.txt
@@ -0,0 +1,57 @@
+Socionext UniPhier USB3 Super-Speed (SS) PHY
+
+This describes the devicetree bindings for PHY interfaces built into
+USB3 controller implemented on Socionext UniPhier SoCs.
+Although the controller includes High-Speed PHY and Super-Speed PHY,
+this describes about Super-Speed PHY.
+
+Required properties:
+- compatible: Should contain one of the following:
+    "socionext,uniphier-pro4-usb3-ssphy" - for Pro4 SoC
+    "socionext,uniphier-pxs2-usb3-ssphy" - for PXs2 SoC
+    "socionext,uniphier-ld20-usb3-ssphy" - for LD20 SoC
+    "socionext,uniphier-pxs3-usb3-ssphy" - for PXs3 SoC
+- reg: Specifies offset and length of the register set for the device.
+- #phy-cells: Should be 0.
+- clocks: A list of phandles to the clock gate for USB3 glue layer.
+	According to the clock-names, appropriate clocks are required.
+- clock-names:
+    "gio", "link" - for Pro4 SoC
+    "phy", "phy-ext", "link" - for PXs3 SoC, "phy-ext" is optional.
+    "phy", "link" - for others
+- resets: A list of phandles to the reset control for USB3 glue layer.
+	According to the reset-names, appropriate resets are required.
+- reset-names:
+    "gio", "link" - for Pro4 SoC
+    "phy", "link" - for others
+
+Optional properties:
+- vbus-supply: A phandle to the regulator for USB VBUS.
+
+Refer to phy/phy-bindings.txt for the generic PHY binding properties.
+
+Example:
+
+	usb-glue@65b00000 {
+		compatible = "socionext,uniphier-ld20-dwc3-glue",
+			     "simple-mfd";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0 0x65b00000 0x400>;
+
+		usb_vbus0: regulator {
+			...
+		};
+
+		usb_ssphy0: ss-phy@300 {
+			compatible = "socionext,uniphier-ld20-usb3-ssphy";
+			reg = <0x300 0x10>;
+			#phy-cells = <0>;
+			clock-names = "link", "phy";
+			clocks = <&sys_clk 14>, <&sys_clk 16>;
+			reset-names = "link", "phy";
+			resets = <&sys_rst 14>, <&sys_rst 16>;
+			vbus-supply = <&usb_vbus0>;
+		};
+		...
+	};
diff --git a/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt b/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
index 5e1afc3d8480..1ab1d109318e 100644
--- a/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
+++ b/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
@@ -5,7 +5,7 @@ Please also refer to reset.txt in this directory for common reset
 controller binding usage.
 
 Required properties:
-- compatible: Should be "fsl,imx7-src", "syscon"
+- compatible: Should be "fsl,imx7d-src", "syscon"
 - reg: should be register base and length as documented in the
   datasheet
 - interrupts: Should contain SRC interrupt
diff --git a/Documentation/devicetree/bindings/thermal/qcom-spmi-temp-alarm.txt b/Documentation/devicetree/bindings/thermal/qcom-spmi-temp-alarm.txt
index 290ec06fa33a..0273a92a2a84 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-spmi-temp-alarm.txt
+++ b/Documentation/devicetree/bindings/thermal/qcom-spmi-temp-alarm.txt
@@ -6,8 +6,7 @@ interrupt signal and status register to identify high PMIC die temperature.
 
 Required properties:
 - compatible:      Should contain "qcom,spmi-temp-alarm".
-- reg:             Specifies the SPMI address and length of the controller's
-                   registers.
+- reg:             Specifies the SPMI address.
 - interrupts:      PMIC temperature alarm interrupt.
 - #thermal-sensor-cells: Should be 0. See thermal.txt for a description.
 
@@ -20,7 +19,7 @@ Example:
 
 	pm8941_temp: thermal-alarm@2400 {
 		compatible = "qcom,spmi-temp-alarm";
-		reg = <0x2400 0x100>;
+		reg = <0x2400>;
 		interrupts = <0 0x24 0 IRQ_TYPE_EDGE_RISING>;
 		#thermal-sensor-cells = <0>;
 
@@ -36,19 +35,14 @@ Example:
 			thermal-sensors = <&pm8941_temp>;
 
 			trips {
-				passive {
-					temperature = <1050000>;
+				stage1 {
+					temperature = <105000>;
 					hysteresis = <2000>;
 					type = "passive";
 				};
-				alert {
+				stage2 {
 					temperature = <125000>;
 					hysteresis = <2000>;
-					type = "hot";
-				};
-				crit {
-					temperature = <145000>;
-					hysteresis = <2000>;
 					type = "critical";
 				};
 			};
diff --git a/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt b/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt
index 20ca4ef9d776..04cbb90a5d3e 100644
--- a/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt
@@ -1,9 +1,9 @@
 * Thermal Monitoring Unit (TMU) on Freescale QorIQ SoCs
 
 Required properties:
-- compatible : Must include "fsl,qoriq-tmu". The version of the device is
-	determined by the TMU IP Block Revision Register (IPBRR0) at
-	offset 0x0BF8.
+- compatible : Must include "fsl,qoriq-tmu" or "fsl,imx8mq-tmu". The
+	version of the device is determined by the TMU IP Block Revision
+	Register (IPBRR0) at offset 0x0BF8.
 	Table of correspondences between IPBRR0 values and example  chips:
 		Value           Device
 		----------      -----
diff --git a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt
index cfa154bb0fa7..ad9a435afef4 100644
--- a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt
@@ -7,9 +7,11 @@ inside the LSI.
 Required properties:
 - compatible		: "renesas,<soctype>-thermal",
 			  Examples with soctypes are:
+			    - "renesas,r8a774a1-thermal" (RZ/G2M)
 			    - "renesas,r8a7795-thermal" (R-Car H3)
 			    - "renesas,r8a7796-thermal" (R-Car M3-W)
 			    - "renesas,r8a77965-thermal" (R-Car M3-N)
+			    - "renesas,r8a77980-thermal" (R-Car V3H)
 - reg			: Address ranges of the thermal registers. Each sensor
 			  needs one address range. Sorting must be done in
 			  increasing order according to datasheet, i.e.
@@ -19,7 +21,8 @@ Required properties:
 
 Optional properties:
 
-- interrupts           : interrupts routed to the TSC (3 for H3, M3-W and M3-N)
+- interrupts		: interrupts routed to the TSC (3 for H3, M3-W, M3-N,
+			  and V3H)
 - power-domain		: Must contain a reference to the power domain. This
 			  property is mandatory if the thermal sensor instance
 			  is part of a controllable power domain.
diff --git a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
index 67c563f1b4c4..73e1613d2cb0 100644
--- a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
@@ -4,15 +4,17 @@ Required properties:
 - compatible		: "renesas,thermal-<soctype>",
 			   "renesas,rcar-gen2-thermal" (with thermal-zone) or
 			   "renesas,rcar-thermal" (without thermal-zone) as
-                           fallback except R-Car D3.
+                           fallback except R-Car V3M/D3.
 			  Examples with soctypes are:
 			    - "renesas,thermal-r8a73a4" (R-Mobile APE6)
 			    - "renesas,thermal-r8a7743" (RZ/G1M)
+			    - "renesas,thermal-r8a7744" (RZ/G1N)
 			    - "renesas,thermal-r8a7779" (R-Car H1)
 			    - "renesas,thermal-r8a7790" (R-Car H2)
 			    - "renesas,thermal-r8a7791" (R-Car M2-W)
 			    - "renesas,thermal-r8a7792" (R-Car V2H)
 			    - "renesas,thermal-r8a7793" (R-Car M2-N)
+			    - "renesas,thermal-r8a77970" (R-Car V3M)
 			    - "renesas,thermal-r8a77995" (R-Car D3)
 - reg			: Address range of the thermal registers.
 			  The 1st reg will be recognized as common register
@@ -21,7 +23,7 @@ Required properties:
 Option properties:
 
 - interrupts		: If present should contain 3 interrupts for
-                          R-Car D3 or 1 interrupt otherwise.
+                          R-Car V3M/D3 or 1 interrupt otherwise.
 
 Example (non interrupt support):
 
diff --git a/Documentation/devicetree/bindings/thermal/stm32-thermal.txt b/Documentation/devicetree/bindings/thermal/stm32-thermal.txt
new file mode 100644
index 000000000000..8c0d5a4d8031
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/stm32-thermal.txt
@@ -0,0 +1,61 @@
+Binding for Thermal Sensor for STMicroelectronics STM32 series of SoCs.
+
+On STM32 SoCs, the Digital Temperature Sensor (DTS) is in charge of managing an
+analog block which delivers a frequency depending on the internal SoC's
+temperature. By using a reference frequency, DTS is able to provide a sample
+number which can be translated into a temperature by the user.
+
+DTS provides interrupt notification mechanism by threshold. This mechanism
+offers two temperature trip points: passive and critical. The first is intended
+for passive cooling notification while the second is used for over-temperature
+reset.
+
+Required parameters:
+-------------------
+
+compatible: 	Should be "st,stm32-thermal"
+reg: 		This should be the physical base address and length of the
+		sensor's registers.
+clocks: 	Phandle of the clock used by the thermal sensor.
+		  See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+clock-names: 	Should be "pclk" for register access clock and reference clock.
+		  See: Documentation/devicetree/bindings/resource-names.txt
+#thermal-sensor-cells: Should be 0. See ./thermal.txt for a description.
+interrupts:	Standard way to define interrupt number.
+
+Example:
+
+	thermal-zones {
+		cpu_thermal: cpu-thermal {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+
+			thermal-sensors = <&thermal>;
+
+			trips {
+				cpu_alert1: cpu-alert1 {
+					temperature = <85000>;
+					hysteresis = <0>;
+					type = "passive";
+				};
+
+				cpu-crit: cpu-crit {
+					temperature = <120000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+			};
+		};
+	};
+
+	thermal: thermal@50028000 {
+		compatible = "st,stm32-thermal";
+		reg = <0x50028000 0x100>;
+		clocks = <&rcc TMPSENS>;
+		clock-names = "pclk";
+		#thermal-sensor-cells = <0>;
+		interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+	};
diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt
index eb7ee91556a5..ca14ba959e0d 100644
--- a/Documentation/devicetree/bindings/thermal/thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/thermal.txt
@@ -152,7 +152,7 @@ Optional property:
   Elem size: one cell	the sensors listed in the thermal-sensors property.
   Elem type: signed	Coefficients defaults to 1, in case this property
 			is not specified. A simple linear polynomial is used:
-			Z = c0 * x0 + c1 + x1 + ... + c(n-1) * x(n-1) + cn.
+			Z = c0 * x0 + c1 * x1 + ... + c(n-1) * x(n-1) + cn.
 
 			The coefficients are ordered and they match with sensors
 			by means of sensor ID. Additional coefficients are
diff --git a/Documentation/devicetree/bindings/timer/renesas,cmt.txt b/Documentation/devicetree/bindings/timer/renesas,cmt.txt
index b40add2d9bb4..33992679a8bd 100644
--- a/Documentation/devicetree/bindings/timer/renesas,cmt.txt
+++ b/Documentation/devicetree/bindings/timer/renesas,cmt.txt
@@ -24,6 +24,8 @@ Required Properties:
     - "renesas,r8a73a4-cmt1" for the 48-bit CMT1 device included in r8a73a4.
     - "renesas,r8a7743-cmt0" for the 32-bit CMT0 device included in r8a7743.
     - "renesas,r8a7743-cmt1" for the 48-bit CMT1 device included in r8a7743.
+    - "renesas,r8a7744-cmt0" for the 32-bit CMT0 device included in r8a7744.
+    - "renesas,r8a7744-cmt1" for the 48-bit CMT1 device included in r8a7744.
     - "renesas,r8a7745-cmt0" for the 32-bit CMT0 device included in r8a7745.
     - "renesas,r8a7745-cmt1" for the 48-bit CMT1 device included in r8a7745.
     - "renesas,r8a7790-cmt0" for the 32-bit CMT0 device included in r8a7790.
@@ -34,6 +36,10 @@ Required Properties:
     - "renesas,r8a7793-cmt1" for the 48-bit CMT1 device included in r8a7793.
     - "renesas,r8a7794-cmt0" for the 32-bit CMT0 device included in r8a7794.
     - "renesas,r8a7794-cmt1" for the 48-bit CMT1 device included in r8a7794.
+    - "renesas,r8a77970-cmt0" for the 32-bit CMT0 device included in r8a77970.
+    - "renesas,r8a77970-cmt1" for the 48-bit CMT1 device included in r8a77970.
+    - "renesas,r8a77980-cmt0" for the 32-bit CMT0 device included in r8a77980.
+    - "renesas,r8a77980-cmt1" for the 48-bit CMT1 device included in r8a77980.
 
     - "renesas,rcar-gen2-cmt0" for 32-bit CMT0 devices included in R-Car Gen2
 		and RZ/G1.
@@ -41,6 +47,9 @@ Required Properties:
 		and RZ/G1.
 		These are fallbacks for r8a73a4, R-Car Gen2 and RZ/G1 entries
 		listed above.
+    - "renesas,rcar-gen3-cmt0" for 32-bit CMT0 devices included in R-Car Gen3.
+    - "renesas,rcar-gen3-cmt1" for 48-bit CMT1 devices included in R-Car Gen3.
+		These are fallbacks for R-Car Gen3 entries listed above.
 
   - reg: base address and length of the registers block for the timer module.
   - interrupts: interrupt-specifier for the timer, one per channel.
diff --git a/Documentation/devicetree/bindings/timer/renesas,ostm.txt b/Documentation/devicetree/bindings/timer/renesas,ostm.txt
index be3ae0fdf775..81a78f8bcf17 100644
--- a/Documentation/devicetree/bindings/timer/renesas,ostm.txt
+++ b/Documentation/devicetree/bindings/timer/renesas,ostm.txt
@@ -9,7 +9,8 @@ Channels are independent from each other.
 Required Properties:
 
   - compatible: must be one or more of the following:
-    - "renesas,r7s72100-ostm" for the r7s72100 OSTM
+    - "renesas,r7s72100-ostm" for the R7S72100 (RZ/A1) OSTM
+    - "renesas,r7s9210-ostm" for the R7S9210 (RZ/A2) OSTM
     - "renesas,ostm" for any OSTM
 		This is a fallback for the above renesas,*-ostm entries
 
diff --git a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
index 2e9318151df7..529e51879fb2 100644
--- a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
+++ b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
@@ -80,6 +80,8 @@ Optional properties:
   controller. It's expected that a mux state of 0 indicates device mode and a
   mux state of 1 indicates host mode.
 - mux-control-names: Shall be "usb_switch" if mux-controls is specified.
+- pinctrl-names: Names for optional pin modes in "default", "host", "device"
+- pinctrl-n: alternate pin modes
 
 i.mx specific properties
 - fsl,usbmisc: phandler of non-core register device, with one
diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt b/Documentation/devicetree/bindings/usb/dwc3.txt
index 3e4c38b806ac..636630fb92d7 100644
--- a/Documentation/devicetree/bindings/usb/dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/dwc3.txt
@@ -19,6 +19,7 @@ Exception for clocks:
     "cavium,octeon-7130-usb-uctl"
     "qcom,dwc3"
     "samsung,exynos5250-dwusb3"
+    "samsung,exynos5433-dwusb3"
     "samsung,exynos7-dwusb3"
     "sprd,sc9860-dwc3"
     "st,stih407-dwc3"
diff --git a/Documentation/devicetree/bindings/usb/ehci-mv.txt b/Documentation/devicetree/bindings/usb/ehci-mv.txt
new file mode 100644
index 000000000000..335589895763
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ehci-mv.txt
@@ -0,0 +1,23 @@
+* Marvell PXA/MMP EHCI controller.
+
+Required properties:
+
+- compatible: must be "marvell,pxau2o-ehci"
+- reg: physical base addresses of the controller and length of memory mapped region
+- interrupts: one EHCI controller interrupt should be described here
+- clocks: phandle list of usb clocks
+- clock-names: should be "USBCLK"
+- phys: phandle for the PHY device
+- phy-names: should be "usb"
+
+Example:
+
+	ehci0: usb-ehci@d4208000 {
+		compatible = "marvell,pxau2o-ehci";
+		reg = <0xd4208000 0x200>;
+		interrupts = <44>;
+		clocks = <&soc_clocks MMP2_CLK_USB>;
+		clock-names = "USBCLK";
+		phys = <&usb_otg_phy>;
+		phy-names = "usb";
+	};
diff --git a/Documentation/devicetree/bindings/usb/exynos-usb.txt b/Documentation/devicetree/bindings/usb/exynos-usb.txt
index c97374315049..b7111f43fa59 100644
--- a/Documentation/devicetree/bindings/usb/exynos-usb.txt
+++ b/Documentation/devicetree/bindings/usb/exynos-usb.txt
@@ -83,6 +83,8 @@ Required properties:
  - compatible: should be one of the following -
 	       "samsung,exynos5250-dwusb3": for USB 3.0 DWC3 controller on
 					    Exynos5250/5420.
+	       "samsung,exynos5433-dwusb3": for USB 3.0 DWC3 controller on
+					    Exynos5433.
 	       "samsung,exynos7-dwusb3": for USB 3.0 DWC3 controller on Exynos7.
  - #address-cells, #size-cells : should be '1' if the device has sub-nodes
 				 with 'reg' property.
diff --git a/Documentation/devicetree/bindings/usb/faraday,fotg210.txt b/Documentation/devicetree/bindings/usb/faraday,fotg210.txt
new file mode 100644
index 000000000000..06a2286e2054
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/faraday,fotg210.txt
@@ -0,0 +1,35 @@
+Faraday FOTG Host controller
+
+This OTG-capable USB host controller is found in Cortina Systems
+Gemini and other SoC products.
+
+Required properties:
+- compatible: should be one of:
+  "faraday,fotg210"
+  "cortina,gemini-usb", "faraday,fotg210"
+- reg: should contain one register range i.e. start and length
+- interrupts: description of the interrupt line
+
+Optional properties:
+- clocks: should contain the IP block clock
+- clock-names: should be "PCLK" for the IP block clock
+
+Required properties for "cortina,gemini-usb" compatible:
+- syscon: a phandle to the system controller to access PHY registers
+
+Optional properties for "cortina,gemini-usb" compatible:
+- cortina,gemini-mini-b: boolean property that indicates that a Mini-B
+  OTG connector is in use
+- wakeup-source: see power/wakeup-source.txt
+
+Example for Gemini:
+
+usb@68000000 {
+	compatible = "cortina,gemini-usb", "faraday,fotg210";
+	reg = <0x68000000 0x1000>;
+	interrupts = <10 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&cc 12>;
+	clock-names = "PCLK";
+	syscon = <&syscon>;
+	wakeup-source;
+};
diff --git a/Documentation/devicetree/bindings/usb/fcs,fusb302.txt b/Documentation/devicetree/bindings/usb/fcs,fusb302.txt
index 6087dc7f209e..a5d011d2efc8 100644
--- a/Documentation/devicetree/bindings/usb/fcs,fusb302.txt
+++ b/Documentation/devicetree/bindings/usb/fcs,fusb302.txt
@@ -5,10 +5,19 @@ Required properties :
 - reg                    : I2C slave address
 - interrupts             : Interrupt specifier
 
-Optional properties :
-- fcs,operating-sink-microwatt :
-			   Minimum amount of power accepted from a sink
-			   when negotiating
+Required sub-node:
+- connector : The "usb-c-connector" attached to the FUSB302 IC. The bindings
+  of the connector node are specified in:
+
+	Documentation/devicetree/bindings/connector/usb-connector.txt
+
+Deprecated properties :
+- fcs,max-sink-microvolt : Maximum sink voltage accepted by port controller
+- fcs,max-sink-microamp : Maximum sink current accepted by port controller
+- fcs,max-sink-microwatt : Maximum sink power accepted by port controller
+- fcs,operating-sink-microwatt : Minimum amount of power accepted from a sink
+  when negotiating
+
 
 Example:
 
@@ -17,7 +26,16 @@ fusb302: typec-portc@54 {
 	reg = <0x54>;
 	interrupt-parent = <&nmi_intc>;
 	interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
-	fcs,max-sink-microvolt = <12000000>;
-	fcs,max-sink-microamp = <3000000>;
-	fcs,max-sink-microwatt = <36000000>;
+
+	usb_con: connector {
+		compatible = "usb-c-connector";
+		label = "USB-C";
+		power-role = "dual";
+		try-power-role = "sink";
+		source-pdos = <PDO_FIXED(5000, 3000, PDO_FIXED_USB_COMM)>;
+		sink-pdos = <PDO_FIXED(5000, 3000, PDO_FIXED_USB_COMM)
+			     PDO_VAR(3000, 12000, 3000)
+			     PDO_PPS_APDO(3000, 11000, 3000)>;
+		op-sink-microwatt = <10000000>;
+	};
 };
diff --git a/Documentation/devicetree/bindings/usb/renesas_usb3.txt b/Documentation/devicetree/bindings/usb/renesas_usb3.txt
index 2c071bb5801e..d366555166d0 100644
--- a/Documentation/devicetree/bindings/usb/renesas_usb3.txt
+++ b/Documentation/devicetree/bindings/usb/renesas_usb3.txt
@@ -2,11 +2,13 @@ Renesas Electronics USB3.0 Peripheral driver
 
 Required properties:
   - compatible: Must contain one of the following:
+	- "renesas,r8a774a1-usb3-peri"
 	- "renesas,r8a7795-usb3-peri"
 	- "renesas,r8a7796-usb3-peri"
 	- "renesas,r8a77965-usb3-peri"
-	- "renesas,rcar-gen3-usb3-peri" for a generic R-Car Gen3 compatible
-	  device
+	- "renesas,r8a77990-usb3-peri"
+	- "renesas,rcar-gen3-usb3-peri" for a generic R-Car Gen3 or RZ/G2
+	  compatible device
 
     When compatible with the generic version, nodes must list the
     SoC-specific version corresponding to the platform first
diff --git a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
index 43960faf5a88..90719f501852 100644
--- a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
+++ b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
@@ -4,7 +4,9 @@ Required properties:
   - compatible: Must contain one or more of the following:
 
 	- "renesas,usbhs-r8a7743" for r8a7743 (RZ/G1M) compatible device
+	- "renesas,usbhs-r8a7744" for r8a7744 (RZ/G1N) compatible device
 	- "renesas,usbhs-r8a7745" for r8a7745 (RZ/G1E) compatible device
+	- "renesas,usbhs-r8a774a1" for r8a774a1 (RZ/G2M) compatible device
 	- "renesas,usbhs-r8a7790" for r8a7790 (R-Car H2) compatible device
 	- "renesas,usbhs-r8a7791" for r8a7791 (R-Car M2-W) compatible device
 	- "renesas,usbhs-r8a7792" for r8a7792 (R-Car V2H) compatible device
@@ -13,10 +15,11 @@ Required properties:
 	- "renesas,usbhs-r8a7795" for r8a7795 (R-Car H3) compatible device
 	- "renesas,usbhs-r8a7796" for r8a7796 (R-Car M3-W) compatible device
 	- "renesas,usbhs-r8a77965" for r8a77965 (R-Car M3-N) compatible device
+	- "renesas,usbhs-r8a77990" for r8a77990 (R-Car E3) compatible device
 	- "renesas,usbhs-r8a77995" for r8a77995 (R-Car D3) compatible device
 	- "renesas,usbhs-r7s72100" for r7s72100 (RZ/A1) compatible device
 	- "renesas,rcar-gen2-usbhs" for R-Car Gen2 or RZ/G1 compatible devices
-	- "renesas,rcar-gen3-usbhs" for R-Car Gen3 compatible device
+	- "renesas,rcar-gen3-usbhs" for R-Car Gen3 or RZ/G2 compatible devices
 	- "renesas,rza1-usbhs" for RZ/A1 compatible device
 
 	When compatible with the generic version, nodes must list the
@@ -25,7 +28,11 @@ Required properties:
 
   - reg: Base address and length of the register for the USBHS
   - interrupts: Interrupt specifier for the USBHS
-  - clocks: A list of phandle + clock specifier pairs
+  - clocks: A list of phandle + clock specifier pairs.
+	    - In case of "renesas,rcar-gen3-usbhs", two clocks are required.
+	      First clock should be peripheral and second one should be host.
+	    - In case of except above, one clock is required. First clock
+	      should be peripheral.
 
 Optional properties:
   - renesas,buswait: Integer to use BUSWAIT register
diff --git a/Documentation/devicetree/bindings/usb/usb-ehci.txt b/Documentation/devicetree/bindings/usb/usb-ehci.txt
index 0f1b75386207..406252d14c6b 100644
--- a/Documentation/devicetree/bindings/usb/usb-ehci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-ehci.txt
@@ -15,7 +15,11 @@ Optional properties:
  - needs-reset-on-resume : boolean, set this to force EHCI reset after resume
  - has-transaction-translator : boolean, set this if EHCI have a Transaction
 				Translator built into the root hub.
- - clocks : a list of phandle + clock specifier pairs
+ - clocks : a list of phandle + clock specifier pairs. In case of Renesas
+	    R-Car Gen3 SoCs:
+	    - if a host only channel: first clock should be host.
+	    - if a USB DRD channel: first clock should be host and second one
+				    should be peripheral.
  - phys : see usb-hcd.txt in the current directory
  - resets : phandle + reset specifier pair
 
diff --git a/Documentation/devicetree/bindings/usb/usb-ohci.txt b/Documentation/devicetree/bindings/usb/usb-ohci.txt
index a8d2103d1f3d..aaaa5255c972 100644
--- a/Documentation/devicetree/bindings/usb/usb-ohci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-ohci.txt
@@ -12,7 +12,11 @@ Optional properties:
 - no-big-frame-no : boolean, set if frame_no lives in bits [15:0] of HCCA
 - remote-wakeup-connected: remote wakeup is wired on the platform
 - num-ports : u32, to override the detected port count
-- clocks : a list of phandle + clock specifier pairs
+- clocks : a list of phandle + clock specifier pairs. In case of Renesas
+	   R-Car Gen3 SoCs:
+	   - if a host only channel: first clock should be host.
+	   - if a USB DRD channel: first clock should be host and second one
+				   should be peripheral.
 - phys : see usb-hcd.txt in the current directory
 - resets : a list of phandle + reset specifier pairs
 
diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt b/Documentation/devicetree/bindings/usb/usb-xhci.txt
index ac4cd0d6195a..fea8b1545751 100644
--- a/Documentation/devicetree/bindings/usb/usb-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt
@@ -8,6 +8,8 @@ Required properties:
     - "marvell,armada-375-xhci" for Armada 375 SoCs
     - "marvell,armada-380-xhci" for Armada 38x SoCs
     - "renesas,xhci-r8a7743" for r8a7743 SoC
+    - "renesas,xhci-r8a7744" for r8a7744 SoC
+    - "renesas,xhci-r8a774a1" for r8a774a1 SoC
     - "renesas,xhci-r8a7790" for r8a7790 SoC
     - "renesas,xhci-r8a7791" for r8a7791 SoC
     - "renesas,xhci-r8a7793" for r8a7793 SoC
@@ -17,7 +19,8 @@ Required properties:
     - "renesas,xhci-r8a77990" for r8a77990 SoC
     - "renesas,rcar-gen2-xhci" for a generic R-Car Gen2 or RZ/G1 compatible
       device
-    - "renesas,rcar-gen3-xhci" for a generic R-Car Gen3 compatible device
+    - "renesas,rcar-gen3-xhci" for a generic R-Car Gen3 or RZ/G2 compatible
+      device
     - "xhci-platform" (deprecated)
 
     When compatible with the generic version, nodes must list the
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index f26bf667e530..376f24484182 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -127,6 +127,7 @@ everspin	Everspin Technologies, Inc.
 exar	Exar Corporation
 excito	Excito
 ezchip	EZchip Semiconductor
+facebook	Facebook
 fairphone	Fairphone B.V.
 faraday	Faraday Technology Corporation
 fastrax	Fastrax Oy
@@ -275,6 +276,7 @@ nxp	NXP Semiconductors
 okaya	Okaya Electric America, Inc.
 oki	Oki Electric Industry Co., Ltd.
 olimex	OLIMEX Ltd.
+olpc	One Laptop Per Child
 onion	Onion Corporation
 onnn	ON Semiconductor Corp.
 ontat	On Tat Industrial Company
diff --git a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
index 9407212a85a8..d72d1181ec62 100644
--- a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
@@ -6,6 +6,7 @@ Required properties:
 		version.
 	       Examples with soctypes are:
 		 - "renesas,r8a7743-wdt" (RZ/G1M)
+		 - "renesas,r8a7744-wdt" (RZ/G1N)
 		 - "renesas,r8a7745-wdt" (RZ/G1E)
 		 - "renesas,r8a774a1-wdt" (RZ/G2M)
 	         - "renesas,r8a7790-wdt" (R-Car H2)
diff --git a/Documentation/driver-api/fpga/fpga-bridge.rst b/Documentation/driver-api/fpga/fpga-bridge.rst
index 2c2aaca894bf..71c5a40da320 100644
--- a/Documentation/driver-api/fpga/fpga-bridge.rst
+++ b/Documentation/driver-api/fpga/fpga-bridge.rst
@@ -4,6 +4,12 @@ FPGA Bridge
 API to implement a new FPGA bridge
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+* struct :c:type:`fpga_bridge` — The FPGA Bridge structure
+* struct :c:type:`fpga_bridge_ops` — Low level Bridge driver ops
+* :c:func:`devm_fpga_bridge_create()` — Allocate and init a bridge struct
+* :c:func:`fpga_bridge_register()` — Register a bridge
+* :c:func:`fpga_bridge_unregister()` — Unregister a bridge
+
 .. kernel-doc:: include/linux/fpga/fpga-bridge.h
    :functions: fpga_bridge
 
@@ -11,39 +17,10 @@ API to implement a new FPGA bridge
    :functions: fpga_bridge_ops
 
 .. kernel-doc:: drivers/fpga/fpga-bridge.c
-   :functions: fpga_bridge_create
-
-.. kernel-doc:: drivers/fpga/fpga-bridge.c
-   :functions: fpga_bridge_free
+   :functions: devm_fpga_bridge_create
 
 .. kernel-doc:: drivers/fpga/fpga-bridge.c
    :functions: fpga_bridge_register
 
 .. kernel-doc:: drivers/fpga/fpga-bridge.c
    :functions: fpga_bridge_unregister
-
-API to control an FPGA bridge
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-You probably won't need these directly.  FPGA regions should handle this.
-
-.. kernel-doc:: drivers/fpga/fpga-bridge.c
-   :functions: of_fpga_bridge_get
-
-.. kernel-doc:: drivers/fpga/fpga-bridge.c
-   :functions: fpga_bridge_get
-
-.. kernel-doc:: drivers/fpga/fpga-bridge.c
-   :functions: fpga_bridge_put
-
-.. kernel-doc:: drivers/fpga/fpga-bridge.c
-   :functions: fpga_bridge_get_to_list
-
-.. kernel-doc:: drivers/fpga/fpga-bridge.c
-   :functions: of_fpga_bridge_get_to_list
-
-.. kernel-doc:: drivers/fpga/fpga-bridge.c
-   :functions: fpga_bridge_enable
-
-.. kernel-doc:: drivers/fpga/fpga-bridge.c
-   :functions: fpga_bridge_disable
diff --git a/Documentation/driver-api/fpga/fpga-mgr.rst b/Documentation/driver-api/fpga/fpga-mgr.rst
index 82b6dbbd31cd..576f1945eacd 100644
--- a/Documentation/driver-api/fpga/fpga-mgr.rst
+++ b/Documentation/driver-api/fpga/fpga-mgr.rst
@@ -49,18 +49,14 @@ probe function calls fpga_mgr_register(), such as::
 		 * them in priv
 		 */
 
-		mgr = fpga_mgr_create(dev, "Altera SOCFPGA FPGA Manager",
-				      &socfpga_fpga_ops, priv);
+		mgr = devm_fpga_mgr_create(dev, "Altera SOCFPGA FPGA Manager",
+					   &socfpga_fpga_ops, priv);
 		if (!mgr)
 			return -ENOMEM;
 
 		platform_set_drvdata(pdev, mgr);
 
-		ret = fpga_mgr_register(mgr);
-		if (ret)
-			fpga_mgr_free(mgr);
-
-		return ret;
+		return fpga_mgr_register(mgr);
 	}
 
 	static int socfpga_fpga_remove(struct platform_device *pdev)
@@ -102,67 +98,19 @@ The ops include a .state function which will determine the state the FPGA is in
 and return a code of type enum fpga_mgr_states.  It doesn't result in a change
 in state.
 
-How to write an image buffer to a supported FPGA
-------------------------------------------------
-
-Some sample code::
-
-	#include <linux/fpga/fpga-mgr.h>
-
-	struct fpga_manager *mgr;
-	struct fpga_image_info *info;
-	int ret;
-
-	/*
-	 * Get a reference to FPGA manager.  The manager is not locked, so you can
-	 * hold onto this reference without it preventing programming.
-	 *
-	 * This example uses the device node of the manager.  Alternatively, use
-	 * fpga_mgr_get(dev) instead if you have the device.
-	 */
-	mgr = of_fpga_mgr_get(mgr_node);
-
-	/* struct with information about the FPGA image to program. */
-	info = fpga_image_info_alloc(dev);
-
-	/* flags indicates whether to do full or partial reconfiguration */
-	info->flags = FPGA_MGR_PARTIAL_RECONFIG;
-
-	/*
-	 * At this point, indicate where the image is. This is pseudo-code; you're
-	 * going to use one of these three.
-	 */
-	if (image is in a scatter gather table) {
-
-		info->sgt = [your scatter gather table]
-
-	} else if (image is in a buffer) {
-
-		info->buf = [your image buffer]
-		info->count = [image buffer size]
-
-	} else if (image is in a firmware file) {
-
-		info->firmware_name = devm_kstrdup(dev, firmware_name, GFP_KERNEL);
-
-	}
-
-	/* Get exclusive control of FPGA manager */
-	ret = fpga_mgr_lock(mgr);
-
-	/* Load the buffer to the FPGA */
-	ret = fpga_mgr_buf_load(mgr, &info, buf, count);
-
-	/* Release the FPGA manager */
-	fpga_mgr_unlock(mgr);
-	fpga_mgr_put(mgr);
-
-	/* Deallocate the image info if you're done with it */
-	fpga_image_info_free(info);
-
 API for implementing a new FPGA Manager driver
 ----------------------------------------------
 
+* ``fpga_mgr_states`` —  Values for :c:member:`fpga_manager->state`.
+* struct :c:type:`fpga_manager` —  the FPGA manager struct
+* struct :c:type:`fpga_manager_ops` —  Low level FPGA manager driver ops
+* :c:func:`devm_fpga_mgr_create` —  Allocate and init a manager struct
+* :c:func:`fpga_mgr_register` —  Register an FPGA manager
+* :c:func:`fpga_mgr_unregister` —  Unregister an FPGA manager
+
+.. kernel-doc:: include/linux/fpga/fpga-mgr.h
+   :functions: fpga_mgr_states
+
 .. kernel-doc:: include/linux/fpga/fpga-mgr.h
    :functions: fpga_manager
 
@@ -170,56 +118,10 @@ API for implementing a new FPGA Manager driver
    :functions: fpga_manager_ops
 
 .. kernel-doc:: drivers/fpga/fpga-mgr.c
-   :functions: fpga_mgr_create
-
-.. kernel-doc:: drivers/fpga/fpga-mgr.c
-   :functions: fpga_mgr_free
+   :functions: devm_fpga_mgr_create
 
 .. kernel-doc:: drivers/fpga/fpga-mgr.c
    :functions: fpga_mgr_register
 
 .. kernel-doc:: drivers/fpga/fpga-mgr.c
    :functions: fpga_mgr_unregister
-
-API for programming an FPGA
----------------------------
-
-FPGA Manager flags
-
-.. kernel-doc:: include/linux/fpga/fpga-mgr.h
-   :doc: FPGA Manager flags
-
-.. kernel-doc:: include/linux/fpga/fpga-mgr.h
-   :functions: fpga_image_info
-
-.. kernel-doc:: include/linux/fpga/fpga-mgr.h
-   :functions: fpga_mgr_states
-
-.. kernel-doc:: drivers/fpga/fpga-mgr.c
-   :functions: fpga_image_info_alloc
-
-.. kernel-doc:: drivers/fpga/fpga-mgr.c
-   :functions: fpga_image_info_free
-
-.. kernel-doc:: drivers/fpga/fpga-mgr.c
-   :functions: of_fpga_mgr_get
-
-.. kernel-doc:: drivers/fpga/fpga-mgr.c
-   :functions: fpga_mgr_get
-
-.. kernel-doc:: drivers/fpga/fpga-mgr.c
-   :functions: fpga_mgr_put
-
-.. kernel-doc:: drivers/fpga/fpga-mgr.c
-   :functions: fpga_mgr_lock
-
-.. kernel-doc:: drivers/fpga/fpga-mgr.c
-   :functions: fpga_mgr_unlock
-
-.. kernel-doc:: include/linux/fpga/fpga-mgr.h
-   :functions: fpga_mgr_states
-
-Note - use :c:func:`fpga_region_program_fpga()` instead of :c:func:`fpga_mgr_load()`
-
-.. kernel-doc:: drivers/fpga/fpga-mgr.c
-   :functions: fpga_mgr_load
diff --git a/Documentation/driver-api/fpga/fpga-programming.rst b/Documentation/driver-api/fpga/fpga-programming.rst
new file mode 100644
index 000000000000..b5484df6ff0f
--- /dev/null
+++ b/Documentation/driver-api/fpga/fpga-programming.rst
@@ -0,0 +1,107 @@
+In-kernel API for FPGA Programming
+==================================
+
+Overview
+--------
+
+The in-kernel API for FPGA programming is a combination of APIs from
+FPGA manager, bridge, and regions.  The actual function used to
+trigger FPGA programming is :c:func:`fpga_region_program_fpga()`.
+
+:c:func:`fpga_region_program_fpga()` uses functionality supplied by
+the FPGA manager and bridges.  It will:
+
+ * lock the region's mutex
+ * lock the mutex of the region's FPGA manager
+ * build a list of FPGA bridges if a method has been specified to do so
+ * disable the bridges
+ * program the FPGA using info passed in :c:member:`fpga_region->info`.
+ * re-enable the bridges
+ * release the locks
+
+The struct fpga_image_info specifies what FPGA image to program.  It is
+allocated/freed by :c:func:`fpga_image_info_alloc()` and freed with
+:c:func:`fpga_image_info_free()`
+
+How to program an FPGA using a region
+-------------------------------------
+
+When the FPGA region driver probed, it was given a pointer to an FPGA manager
+driver so it knows which manager to use.  The region also either has a list of
+bridges to control during programming or it has a pointer to a function that
+will generate that list.  Here's some sample code of what to do next::
+
+	#include <linux/fpga/fpga-mgr.h>
+	#include <linux/fpga/fpga-region.h>
+
+	struct fpga_image_info *info;
+	int ret;
+
+	/*
+	 * First, alloc the struct with information about the FPGA image to
+	 * program.
+	 */
+	info = fpga_image_info_alloc(dev);
+	if (!info)
+		return -ENOMEM;
+
+	/* Set flags as needed, such as: */
+	info->flags = FPGA_MGR_PARTIAL_RECONFIG;
+
+	/*
+	 * Indicate where the FPGA image is. This is pseudo-code; you're
+	 * going to use one of these three.
+	 */
+	if (image is in a scatter gather table) {
+
+		info->sgt = [your scatter gather table]
+
+	} else if (image is in a buffer) {
+
+		info->buf = [your image buffer]
+		info->count = [image buffer size]
+
+	} else if (image is in a firmware file) {
+
+		info->firmware_name = devm_kstrdup(dev, firmware_name,
+						   GFP_KERNEL);
+
+	}
+
+	/* Add info to region and do the programming */
+	region->info = info;
+	ret = fpga_region_program_fpga(region);
+
+	/* Deallocate the image info if you're done with it */
+	region->info = NULL;
+	fpga_image_info_free(info);
+
+	if (ret)
+		return ret;
+
+	/* Now enumerate whatever hardware has appeared in the FPGA. */
+
+API for programming an FPGA
+---------------------------
+
+* :c:func:`fpga_region_program_fpga` —  Program an FPGA
+* :c:type:`fpga_image_info` —  Specifies what FPGA image to program
+* :c:func:`fpga_image_info_alloc()` —  Allocate an FPGA image info struct
+* :c:func:`fpga_image_info_free()` —  Free an FPGA image info struct
+
+.. kernel-doc:: drivers/fpga/fpga-region.c
+   :functions: fpga_region_program_fpga
+
+FPGA Manager flags
+
+.. kernel-doc:: include/linux/fpga/fpga-mgr.h
+   :doc: FPGA Manager flags
+
+.. kernel-doc:: include/linux/fpga/fpga-mgr.h
+   :functions: fpga_image_info
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_image_info_alloc
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_image_info_free
diff --git a/Documentation/driver-api/fpga/fpga-region.rst b/Documentation/driver-api/fpga/fpga-region.rst
index f30333ce828e..0529b2d2231a 100644
--- a/Documentation/driver-api/fpga/fpga-region.rst
+++ b/Documentation/driver-api/fpga/fpga-region.rst
@@ -34,41 +34,6 @@ fpga_image_info including:
  * flags indicating specifics such as whether the image is for partial
    reconfiguration.
 
-How to program an FPGA using a region
--------------------------------------
-
-First, allocate the info struct::
-
-	info = fpga_image_info_alloc(dev);
-	if (!info)
-		return -ENOMEM;
-
-Set flags as needed, i.e.::
-
-	info->flags |= FPGA_MGR_PARTIAL_RECONFIG;
-
-Point to your FPGA image, such as::
-
-	info->sgt = &sgt;
-
-Add info to region and do the programming::
-
-	region->info = info;
-	ret = fpga_region_program_fpga(region);
-
-:c:func:`fpga_region_program_fpga()` operates on info passed in the
-fpga_image_info (region->info).  This function will attempt to:
-
- * lock the region's mutex
- * lock the region's FPGA manager
- * build a list of FPGA bridges if a method has been specified to do so
- * disable the bridges
- * program the FPGA
- * re-enable the bridges
- * release the locks
-
-Then you will want to enumerate whatever hardware has appeared in the FPGA.
-
 How to add a new FPGA region
 ----------------------------
 
@@ -77,26 +42,62 @@ An example of usage can be seen in the probe function of [#f2]_.
 .. [#f1] ../devicetree/bindings/fpga/fpga-region.txt
 .. [#f2] ../../drivers/fpga/of-fpga-region.c
 
-API to program an FPGA
-----------------------
-
-.. kernel-doc:: drivers/fpga/fpga-region.c
-   :functions: fpga_region_program_fpga
-
 API to add a new FPGA region
 ----------------------------
 
+* struct :c:type:`fpga_region` — The FPGA region struct
+* :c:func:`devm_fpga_region_create` — Allocate and init a region struct
+* :c:func:`fpga_region_register` —  Register an FPGA region
+* :c:func:`fpga_region_unregister` —  Unregister an FPGA region
+
+The FPGA region's probe function will need to get a reference to the FPGA
+Manager it will be using to do the programming.  This usually would happen
+during the region's probe function.
+
+* :c:func:`fpga_mgr_get` — Get a reference to an FPGA manager, raise ref count
+* :c:func:`of_fpga_mgr_get` —  Get a reference to an FPGA manager, raise ref count,
+  given a device node.
+* :c:func:`fpga_mgr_put` — Put an FPGA manager
+
+The FPGA region will need to specify which bridges to control while programming
+the FPGA.  The region driver can build a list of bridges during probe time
+(:c:member:`fpga_region->bridge_list`) or it can have a function that creates
+the list of bridges to program just before programming
+(:c:member:`fpga_region->get_bridges`).  The FPGA bridge framework supplies the
+following APIs to handle building or tearing down that list.
+
+* :c:func:`fpga_bridge_get_to_list` — Get a ref of an FPGA bridge, add it to a
+  list
+* :c:func:`of_fpga_bridge_get_to_list` — Get a ref of an FPGA bridge, add it to a
+  list, given a device node
+* :c:func:`fpga_bridges_put` — Given a list of bridges, put them
+
 .. kernel-doc:: include/linux/fpga/fpga-region.h
    :functions: fpga_region
 
 .. kernel-doc:: drivers/fpga/fpga-region.c
-   :functions: fpga_region_create
-
-.. kernel-doc:: drivers/fpga/fpga-region.c
-   :functions: fpga_region_free
+   :functions: devm_fpga_region_create
 
 .. kernel-doc:: drivers/fpga/fpga-region.c
    :functions: fpga_region_register
 
 .. kernel-doc:: drivers/fpga/fpga-region.c
    :functions: fpga_region_unregister
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_mgr_get
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: of_fpga_mgr_get
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_mgr_put
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: fpga_bridge_get_to_list
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: of_fpga_bridge_get_to_list
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: fpga_bridges_put
diff --git a/Documentation/driver-api/fpga/index.rst b/Documentation/driver-api/fpga/index.rst
index c51e5ebd544a..31a4773bd2e6 100644
--- a/Documentation/driver-api/fpga/index.rst
+++ b/Documentation/driver-api/fpga/index.rst
@@ -11,3 +11,5 @@ FPGA Subsystem
    fpga-mgr
    fpga-bridge
    fpga-region
+   fpga-programming
+
diff --git a/Documentation/driver-api/fpga/intro.rst b/Documentation/driver-api/fpga/intro.rst
index 50d1cab84950..f54c7dabcc7d 100644
--- a/Documentation/driver-api/fpga/intro.rst
+++ b/Documentation/driver-api/fpga/intro.rst
@@ -44,7 +44,7 @@ FPGA Region
 -----------
 
 If you are adding a new interface to the FPGA framework, add it on top
-of an FPGA region to allow the most reuse of your interface.
+of an FPGA region.
 
 The FPGA Region framework (fpga-region.c) associates managers and
 bridges as reconfigurable regions.  A region may refer to the whole
diff --git a/Documentation/driver-api/soundwire/stream.rst b/Documentation/driver-api/soundwire/stream.rst
index 29121aa55fb9..26a6064503fd 100644
--- a/Documentation/driver-api/soundwire/stream.rst
+++ b/Documentation/driver-api/soundwire/stream.rst
@@ -101,6 +101,34 @@ interface. ::
 	+--------------------+                             |                |
 							   +----------------+
 
+Example 5: Stereo Stream with L and R channel is rendered by 2 Masters, each
+rendering one channel, and is received by two different Slaves, each
+receiving one channel. Both Masters and both Slaves are using single port. ::
+
+	+---------------+                    Clock Signal  +---------------+
+	|    Master     +----------------------------------+     Slave     |
+	|   Interface   |                                  |   Interface   |
+	|       1       |                                  |       1       |
+	|               |                     Data Signal  |               |
+	|       L       +----------------------------------+       L       |
+	|     (Data)    |     Data Direction               |     (Data)    |
+	+---------------+  +----------------------->       +---------------+
+
+	+---------------+                    Clock Signal  +---------------+
+	|    Master     +----------------------------------+     Slave     |
+	|   Interface   |                                  |   Interface   |
+	|       2       |                                  |       2       |
+	|               |                     Data Signal  |               |
+	|       R       +----------------------------------+       R       |
+	|     (Data)    |     Data Direction               |     (Data)    |
+	+---------------+  +----------------------->       +---------------+
+
+Note: In multi-link cases like above, to lock, one would acquire a global
+lock and then go on locking bus instances. But, in this case the caller
+framework(ASoC DPCM) guarantees that stream operations on a card are
+always serialized. So, there is no race condition and hence no need for
+global lock.
+
 SoundWire Stream Management flow
 ================================
 
@@ -174,6 +202,7 @@ per stream. From ASoC DPCM framework, this stream state maybe linked to
 .startup() operation.
 
   .. code-block:: c
+
   int sdw_alloc_stream(char * stream_name);
 
 
@@ -200,6 +229,7 @@ only be invoked once by respective Master(s) and Slave(s). From ASoC DPCM
 framework, this stream state is linked to .hw_params() operation.
 
   .. code-block:: c
+
   int sdw_stream_add_master(struct sdw_bus * bus,
 		struct sdw_stream_config * stream_config,
 		struct sdw_ports_config * ports_config,
@@ -245,6 +275,7 @@ stream. From ASoC DPCM framework, this stream state is linked to
 .prepare() operation.
 
   .. code-block:: c
+
   int sdw_prepare_stream(struct sdw_stream_runtime * stream);
 
 
@@ -274,6 +305,7 @@ stream. From ASoC DPCM framework, this stream state is linked to
 .trigger() start operation.
 
   .. code-block:: c
+
   int sdw_enable_stream(struct sdw_stream_runtime * stream);
 
 SDW_STREAM_DISABLED
@@ -301,6 +333,7 @@ per stream. From ASoC DPCM framework, this stream state is linked to
 .trigger() stop operation.
 
   .. code-block:: c
+
   int sdw_disable_stream(struct sdw_stream_runtime * stream);
 
 
@@ -325,6 +358,7 @@ per stream. From ASoC DPCM framework, this stream state is linked to
 .trigger() stop operation.
 
   .. code-block:: c
+
   int sdw_deprepare_stream(struct sdw_stream_runtime * stream);
 
 
@@ -349,6 +383,7 @@ all the Master(s) and Slave(s) associated with stream. From ASoC DPCM
 framework, this stream state is linked to .hw_free() operation.
 
   .. code-block:: c
+
   int sdw_stream_remove_master(struct sdw_bus * bus,
 		struct sdw_stream_runtime * stream);
   int sdw_stream_remove_slave(struct sdw_slave * slave,
@@ -361,6 +396,7 @@ stream assigned as part of ALLOCATED state.
 In .shutdown() the data structure maintaining stream state are freed up.
 
   .. code-block:: c
+
   void sdw_release_stream(struct sdw_stream_runtime * stream);
 
 Not Supported
diff --git a/Documentation/driver-api/uio-howto.rst b/Documentation/driver-api/uio-howto.rst
index fb2eb73be4a3..25f50eace28b 100644
--- a/Documentation/driver-api/uio-howto.rst
+++ b/Documentation/driver-api/uio-howto.rst
@@ -463,8 +463,8 @@ Getting information about your UIO device
 
 Information about all UIO devices is available in sysfs. The first thing
 you should do in your driver is check ``name`` and ``version`` to make
-sure your talking to the right device and that its kernel driver has the
-version you expect.
+sure you're talking to the right device and that its kernel driver has
+the version you expect.
 
 You should also make sure that the memory mapping you need exists and
 has the size you expect.
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 48b424de85bb..cfbc18f0d9c9 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -191,21 +191,11 @@ Currently, the following pairs of encryption modes are supported:
 
 - AES-256-XTS for contents and AES-256-CTS-CBC for filenames
 - AES-128-CBC for contents and AES-128-CTS-CBC for filenames
-- Speck128/256-XTS for contents and Speck128/256-CTS-CBC for filenames
 
 It is strongly recommended to use AES-256-XTS for contents encryption.
 AES-128-CBC was added only for low-powered embedded devices with
 crypto accelerators such as CAAM or CESA that do not support XTS.
 
-Similarly, Speck128/256 support was only added for older or low-end
-CPUs which cannot do AES fast enough -- especially ARM CPUs which have
-NEON instructions but not the Cryptography Extensions -- and for which
-it would not otherwise be feasible to use encryption at all.  It is
-not recommended to use Speck on CPUs that have AES instructions.
-Speck support is only available if it has been enabled in the crypto
-API via CONFIG_CRYPTO_SPECK.  Also, on ARM platforms, to get
-acceptable performance CONFIG_CRYPTO_SPECK_NEON must be enabled.
-
 New encryption modes can be added relatively easily, without changes
 to individual filesystems.  However, authenticated encryption (AE)
 modes are not currently supported because of the difficulty of dealing
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 7b7b845c490a..321d74b73937 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -622,3 +622,14 @@ in your dentry operations instead.
 	alloc_file_clone(file, flags, ops) does not affect any caller's references.
 	On success you get a new struct file sharing the mount/dentry with the
 	original, on failure - ERR_PTR().
+--
+[recommended]
+	->lookup() instances doing an equivalent of
+		if (IS_ERR(inode))
+			return ERR_CAST(inode);
+		return d_splice_alias(inode, dentry);
+	don't need to bother with the check - d_splice_alias() will do the
+	right thing when given ERR_PTR(...) as inode.  Moreover, passing NULL
+	inode to d_splice_alias() will also do the right thing (equivalent of
+	d_add(dentry, NULL); return NULL;), so that kind of special cases
+	also doesn't need a separate treatment.
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 13a7c999c04a..d05d93761653 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -201,7 +201,7 @@ Code  Seq#(hex)	Include File		Comments
 'X'	01	linux/pktcdvd.h		conflict!
 'Y'	all	linux/cyclades.h
 'Z'	14-15	drivers/message/fusion/mptctl.h
-'['	00-07	linux/usb/tmc.h		USB Test and Measurement Devices
+'['	00-3F	linux/usb/tmc.h		USB Test and Measurement Devices
 					<mailto:gregkh@linuxfoundation.org>
 'a'	all	linux/atm*.h, linux/sonet.h	ATM on linux
 					<http://lrcwww.epfl.ch/>
diff --git a/Documentation/nvmem/nvmem.txt b/Documentation/nvmem/nvmem.txt
index 8d8d8f58f96f..fc2fe4b18655 100644
--- a/Documentation/nvmem/nvmem.txt
+++ b/Documentation/nvmem/nvmem.txt
@@ -58,6 +58,37 @@ static int qfprom_probe(struct platform_device *pdev)
 It is mandatory that the NVMEM provider has a regmap associated with its
 struct device. Failure to do would return error code from nvmem_register().
 
+Users of board files can define and register nvmem cells using the
+nvmem_cell_table struct:
+
+static struct nvmem_cell_info foo_nvmem_cells[] = {
+	{
+		.name		= "macaddr",
+		.offset		= 0x7f00,
+		.bytes		= ETH_ALEN,
+	}
+};
+
+static struct nvmem_cell_table foo_nvmem_cell_table = {
+	.nvmem_name		= "i2c-eeprom",
+	.cells			= foo_nvmem_cells,
+	.ncells			= ARRAY_SIZE(foo_nvmem_cells),
+};
+
+nvmem_add_cell_table(&foo_nvmem_cell_table);
+
+Additionally it is possible to create nvmem cell lookup entries and register
+them with the nvmem framework from machine code as shown in the example below:
+
+static struct nvmem_cell_lookup foo_nvmem_lookup = {
+	.nvmem_name		= "i2c-eeprom",
+	.cell_name		= "macaddr",
+	.dev_id			= "foo_mac.0",
+	.con_id			= "mac-address",
+};
+
+nvmem_add_cell_lookups(&foo_nvmem_lookup, 1);
+
 NVMEM Consumers
 +++++++++++++++
 
diff --git a/Documentation/s390/vfio-ap.txt b/Documentation/s390/vfio-ap.txt
new file mode 100644
index 000000000000..65167cfe4485
--- /dev/null
+++ b/Documentation/s390/vfio-ap.txt
@@ -0,0 +1,837 @@
+Introduction:
+============
+The Adjunct Processor (AP) facility is an IBM Z cryptographic facility comprised
+of three AP instructions and from 1 up to 256 PCIe cryptographic adapter cards.
+The AP devices provide cryptographic functions to all CPUs assigned to a
+linux system running in an IBM Z system LPAR.
+
+The AP adapter cards are exposed via the AP bus. The motivation for vfio-ap
+is to make AP cards available to KVM guests using the VFIO mediated device
+framework. This implementation relies considerably on the s390 virtualization
+facilities which do most of the hard work of providing direct access to AP
+devices.
+
+AP Architectural Overview:
+=========================
+To facilitate the comprehension of the design, let's start with some
+definitions:
+
+* AP adapter
+
+  An AP adapter is an IBM Z adapter card that can perform cryptographic
+  functions. There can be from 0 to 256 adapters assigned to an LPAR. Adapters
+  assigned to the LPAR in which a linux host is running will be available to
+  the linux host. Each adapter is identified by a number from 0 to 255; however,
+  the maximum adapter number is determined by machine model and/or adapter type.
+  When installed, an AP adapter is accessed by AP instructions executed by any
+  CPU.
+
+  The AP adapter cards are assigned to a given LPAR via the system's Activation
+  Profile which can be edited via the HMC. When the linux host system is IPL'd
+  in the LPAR, the AP bus detects the AP adapter cards assigned to the LPAR and
+  creates a sysfs device for each assigned adapter. For example, if AP adapters
+  4 and 10 (0x0a) are assigned to the LPAR, the AP bus will create the following
+  sysfs device entries:
+
+    /sys/devices/ap/card04
+    /sys/devices/ap/card0a
+
+  Symbolic links to these devices will also be created in the AP bus devices
+  sub-directory:
+
+    /sys/bus/ap/devices/[card04]
+    /sys/bus/ap/devices/[card04]
+
+* AP domain
+
+  An adapter is partitioned into domains. An adapter can hold up to 256 domains
+  depending upon the adapter type and hardware configuration. A domain is
+  identified by a number from 0 to 255; however, the maximum domain number is
+  determined by machine model and/or adapter type.. A domain can be thought of
+  as a set of hardware registers and memory used for processing AP commands. A
+  domain can be configured with a secure private key used for clear key
+  encryption. A domain is classified in one of two ways depending upon how it
+  may be accessed:
+
+    * Usage domains are domains that are targeted by an AP instruction to
+      process an AP command.
+
+    * Control domains are domains that are changed by an AP command sent to a
+      usage domain; for example, to set the secure private key for the control
+      domain.
+
+  The AP usage and control domains are assigned to a given LPAR via the system's
+  Activation Profile which can be edited via the HMC. When a linux host system
+  is IPL'd in the LPAR, the AP bus module detects the AP usage and control
+  domains assigned to the LPAR. The domain number of each usage domain and
+  adapter number of each AP adapter are combined to create AP queue devices
+  (see AP Queue section below). The domain number of each control domain will be
+  represented in a bitmask and stored in a sysfs file
+  /sys/bus/ap/ap_control_domain_mask. The bits in the mask, from most to least
+  significant bit, correspond to domains 0-255.
+
+* AP Queue
+
+  An AP queue is the means by which an AP command is sent to a usage domain
+  inside a specific adapter. An AP queue is identified by a tuple
+  comprised of an AP adapter ID (APID) and an AP queue index (APQI). The
+  APQI corresponds to a given usage domain number within the adapter. This tuple
+  forms an AP Queue Number (APQN) uniquely identifying an AP queue. AP
+  instructions include a field containing the APQN to identify the AP queue to
+  which the AP command is to be sent for processing.
+
+  The AP bus will create a sysfs device for each APQN that can be derived from
+  the cross product of the AP adapter and usage domain numbers detected when the
+  AP bus module is loaded. For example, if adapters 4 and 10 (0x0a) and usage
+  domains 6 and 71 (0x47) are assigned to the LPAR, the AP bus will create the
+  following sysfs entries:
+
+    /sys/devices/ap/card04/04.0006
+    /sys/devices/ap/card04/04.0047
+    /sys/devices/ap/card0a/0a.0006
+    /sys/devices/ap/card0a/0a.0047
+
+  The following symbolic links to these devices will be created in the AP bus
+  devices subdirectory:
+
+    /sys/bus/ap/devices/[04.0006]
+    /sys/bus/ap/devices/[04.0047]
+    /sys/bus/ap/devices/[0a.0006]
+    /sys/bus/ap/devices/[0a.0047]
+
+* AP Instructions:
+
+  There are three AP instructions:
+
+  * NQAP: to enqueue an AP command-request message to a queue
+  * DQAP: to dequeue an AP command-reply message from a queue
+  * PQAP: to administer the queues
+
+  AP instructions identify the domain that is targeted to process the AP
+  command; this must be one of the usage domains. An AP command may modify a
+  domain that is not one of the usage domains, but the modified domain
+  must be one of the control domains.
+
+AP and SIE:
+==========
+Let's now take a look at how AP instructions executed on a guest are interpreted
+by the hardware.
+
+A satellite control block called the Crypto Control Block (CRYCB) is attached to
+our main hardware virtualization control block. The CRYCB contains three fields
+to identify the adapters, usage domains and control domains assigned to the KVM
+guest:
+
+* The AP Mask (APM) field is a bit mask that identifies the AP adapters assigned
+  to the KVM guest. Each bit in the mask, from left to right (i.e. from most
+  significant to least significant bit in big endian order), corresponds to
+  an APID from 0-255. If a bit is set, the corresponding adapter is valid for
+  use by the KVM guest.
+
+* The AP Queue Mask (AQM) field is a bit mask identifying the AP usage domains
+  assigned to the KVM guest. Each bit in the mask, from left to right (i.e. from
+  most significant to least significant bit in big endian order), corresponds to
+  an AP queue index (APQI) from 0-255. If a bit is set, the corresponding queue
+  is valid for use by the KVM guest.
+
+* The AP Domain Mask field is a bit mask that identifies the AP control domains
+  assigned to the KVM guest. The ADM bit mask controls which domains can be
+  changed by an AP command-request message sent to a usage domain from the
+  guest. Each bit in the mask, from left to right (i.e. from most significant to
+  least significant bit in big endian order), corresponds to a domain from
+  0-255. If a bit is set, the corresponding domain can be modified by an AP
+  command-request message sent to a usage domain.
+
+If you recall from the description of an AP Queue, AP instructions include
+an APQN to identify the AP queue to which an AP command-request message is to be
+sent (NQAP and PQAP instructions), or from which a command-reply message is to
+be received (DQAP instruction). The validity of an APQN is defined by the matrix
+calculated from the APM and AQM; it is the cross product of all assigned adapter
+numbers (APM) with all assigned queue indexes (AQM). For example, if adapters 1
+and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs (1,5), (1,6),
+(2,5) and (2,6) will be valid for the guest.
+
+The APQNs can provide secure key functionality - i.e., a private key is stored
+on the adapter card for each of its domains - so each APQN must be assigned to
+at most one guest or to the linux host.
+
+   Example 1: Valid configuration:
+   ------------------------------
+   Guest1: adapters 1,2  domains 5,6
+   Guest2: adapter  1,2  domain 7
+
+   This is valid because both guests have a unique set of APQNs:
+      Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
+      Guest2 has APQNs (1,7), (2,7)
+
+   Example 2: Valid configuration:
+   ------------------------------
+   Guest1: adapters 1,2 domains 5,6
+   Guest2: adapters 3,4 domains 5,6
+
+   This is also valid because both guests have a unique set of APQNs:
+      Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
+      Guest2 has APQNs (3,5), (3,6), (4,5), (4,6)
+
+   Example 3: Invalid configuration:
+   --------------------------------
+   Guest1: adapters 1,2  domains 5,6
+   Guest2: adapter  1    domains 6,7
+
+   This is an invalid configuration because both guests have access to
+   APQN (1,6).
+
+The Design:
+===========
+The design introduces three new objects:
+
+1. AP matrix device
+2. VFIO AP device driver (vfio_ap.ko)
+3. VFIO AP mediated matrix pass-through device
+
+The VFIO AP device driver
+-------------------------
+The VFIO AP (vfio_ap) device driver serves the following purposes:
+
+1. Provides the interfaces to secure APQNs for exclusive use of KVM guests.
+
+2. Sets up the VFIO mediated device interfaces to manage a mediated matrix
+   device and creates the sysfs interfaces for assigning adapters, usage
+   domains, and control domains comprising the matrix for a KVM guest.
+
+3. Configures the APM, AQM and ADM in the CRYCB referenced by a KVM guest's
+   SIE state description to grant the guest access to a matrix of AP devices
+
+Reserve APQNs for exclusive use of KVM guests
+---------------------------------------------
+The following block diagram illustrates the mechanism by which APQNs are
+reserved:
+
+                              +------------------+
+               7 remove       |                  |
+         +--------------------> cex4queue driver |
+         |                    |                  |
+         |                    +------------------+
+         |
+         |
+         |                    +------------------+          +-----------------+
+         |  5 register driver |                  | 3 create |                 |
+         |   +---------------->   Device core    +---------->  matrix device  |
+         |   |                |                  |          |                 |
+         |   |                +--------^---------+          +-----------------+
+         |   |                         |
+         |   |                         +-------------------+
+         |   | +-----------------------------------+       |
+         |   | |      4 register AP driver         |       | 2 register device
+         |   | |                                   |       |
++--------+---+-v---+                      +--------+-------+-+
+|                  |                      |                  |
+|      ap_bus      +--------------------- >  vfio_ap driver  |
+|                  |       8 probe        |                  |
++--------^---------+                      +--^--^------------+
+6 edit   |                                   |  |
+  apmask |     +-----------------------------+  | 9 mdev create
+  aqmask |     |           1 modprobe           |
++--------+-----+---+           +----------------+-+         +------------------+
+|                  |           |                  |8 create |     mediated     |
+|      admin       |           | VFIO device core |--------->     matrix       |
+|                  +           |                  |         |     device       |
++------+-+---------+           +--------^---------+         +--------^---------+
+       | |                              |                            |
+       | | 9 create vfio_ap-passthrough |                            |
+       | +------------------------------+                            |
+       +-------------------------------------------------------------+
+                   10  assign adapter/domain/control domain
+
+The process for reserving an AP queue for use by a KVM guest is:
+
+1. The administrator loads the vfio_ap device driver
+2. The vfio-ap driver during its initialization will register a single 'matrix'
+   device with the device core. This will serve as the parent device for
+   all mediated matrix devices used to configure an AP matrix for a guest.
+3. The /sys/devices/vfio_ap/matrix device is created by the device core
+4  The vfio_ap device driver will register with the AP bus for AP queue devices
+   of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap
+   driver's probe and remove callback interfaces. Devices older than CEX4 queues
+   are not supported to simplify the implementation by not needlessly
+   complicating the design by supporting older devices that will go out of
+   service in the relatively near future, and for which there are few older
+   systems around on which to test.
+5. The AP bus registers the vfio_ap device driver with the device core
+6. The administrator edits the AP adapter and queue masks to reserve AP queues
+   for use by the vfio_ap device driver.
+7. The AP bus removes the AP queues reserved for the vfio_ap driver from the
+   default zcrypt cex4queue driver.
+8. The AP bus probes the vfio_ap device driver to bind the queues reserved for
+   it.
+9. The administrator creates a passthrough type mediated matrix device to be
+   used by a guest
+10 The administrator assigns the adapters, usage domains and control domains
+   to be exclusively used by a guest.
+
+Set up the VFIO mediated device interfaces
+------------------------------------------
+The VFIO AP device driver utilizes the common interface of the VFIO mediated
+device core driver to:
+* Register an AP mediated bus driver to add a mediated matrix device to and
+  remove it from a VFIO group.
+* Create and destroy a mediated matrix device
+* Add a mediated matrix device to and remove it from the AP mediated bus driver
+* Add a mediated matrix device to and remove it from an IOMMU group
+
+The following high-level block diagram shows the main components and interfaces
+of the VFIO AP mediated matrix device driver:
+
+ +-------------+
+ |             |
+ | +---------+ | mdev_register_driver() +--------------+
+ | |  Mdev   | +<-----------------------+              |
+ | |  bus    | |                        | vfio_mdev.ko |
+ | | driver  | +----------------------->+              |<-> VFIO user
+ | +---------+ |    probe()/remove()    +--------------+    APIs
+ |             |
+ |  MDEV CORE  |
+ |   MODULE    |
+ |   mdev.ko   |
+ | +---------+ | mdev_register_device() +--------------+
+ | |Physical | +<-----------------------+              |
+ | | device  | |                        |  vfio_ap.ko  |<-> matrix
+ | |interface| +----------------------->+              |    device
+ | +---------+ |       callback         +--------------+
+ +-------------+
+
+During initialization of the vfio_ap module, the matrix device is registered
+with an 'mdev_parent_ops' structure that provides the sysfs attribute
+structures, mdev functions and callback interfaces for managing the mediated
+matrix device.
+
+* sysfs attribute structures:
+  * supported_type_groups
+    The VFIO mediated device framework supports creation of user-defined
+    mediated device types. These mediated device types are specified
+    via the 'supported_type_groups' structure when a device is registered
+    with the mediated device framework. The registration process creates the
+    sysfs structures for each mediated device type specified in the
+    'mdev_supported_types' sub-directory of the device being registered. Along
+    with the device type, the sysfs attributes of the mediated device type are
+    provided.
+
+    The VFIO AP device driver will register one mediated device type for
+    passthrough devices:
+      /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough
+    Only the read-only attributes required by the VFIO mdev framework will
+    be provided:
+        ... name
+        ... device_api
+        ... available_instances
+        ... device_api
+        Where:
+        * name: specifies the name of the mediated device type
+        * device_api: the mediated device type's API
+        * available_instances: the number of mediated matrix passthrough devices
+                               that can be created
+        * device_api: specifies the VFIO API
+  * mdev_attr_groups
+    This attribute group identifies the user-defined sysfs attributes of the
+    mediated device. When a device is registered with the VFIO mediated device
+    framework, the sysfs attribute files identified in the 'mdev_attr_groups'
+    structure will be created in the mediated matrix device's directory. The
+    sysfs attributes for a mediated matrix device are:
+    * assign_adapter:
+    * unassign_adapter:
+      Write-only attributes for assigning/unassigning an AP adapter to/from the
+      mediated matrix device. To assign/unassign an adapter, the APID of the
+      adapter is echoed to the respective attribute file.
+    * assign_domain:
+    * unassign_domain:
+      Write-only attributes for assigning/unassigning an AP usage domain to/from
+      the mediated matrix device. To assign/unassign a domain, the domain
+      number of the the usage domain is echoed to the respective attribute
+      file.
+    * matrix:
+      A read-only file for displaying the APQNs derived from the cross product
+      of the adapter and domain numbers assigned to the mediated matrix device.
+    * assign_control_domain:
+    * unassign_control_domain:
+      Write-only attributes for assigning/unassigning an AP control domain
+      to/from the mediated matrix device. To assign/unassign a control domain,
+      the ID of the domain to be assigned/unassigned is echoed to the respective
+      attribute file.
+    * control_domains:
+      A read-only file for displaying the control domain numbers assigned to the
+      mediated matrix device.
+
+* functions:
+  * create:
+    allocates the ap_matrix_mdev structure used by the vfio_ap driver to:
+    * Store the reference to the KVM structure for the guest using the mdev
+    * Store the AP matrix configuration for the adapters, domains, and control
+      domains assigned via the corresponding sysfs attributes files
+  * remove:
+    deallocates the mediated matrix device's ap_matrix_mdev structure. This will
+    be allowed only if a running guest is not using the mdev.
+
+* callback interfaces
+  * open:
+    The vfio_ap driver uses this callback to register a
+    VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix
+    device. The open is invoked when QEMU connects the VFIO iommu group
+    for the mdev matrix device to the MDEV bus. Access to the KVM structure used
+    to configure the KVM guest is provided via this callback. The KVM structure,
+    is used to configure the guest's access to the AP matrix defined via the
+    mediated matrix device's sysfs attribute files.
+  * release:
+    unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the
+    mdev matrix device and deconfigures the guest's AP matrix.
+
+Configure the APM, AQM and ADM in the CRYCB:
+-------------------------------------------
+Configuring the AP matrix for a KVM guest will be performed when the
+VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier
+function is called when QEMU connects to KVM. The guest's AP matrix is
+configured via it's CRYCB by:
+* Setting the bits in the APM corresponding to the APIDs assigned to the
+  mediated matrix device via its 'assign_adapter' interface.
+* Setting the bits in the AQM corresponding to the domains assigned to the
+  mediated matrix device via its 'assign_domain' interface.
+* Setting the bits in the ADM corresponding to the domain dIDs assigned to the
+  mediated matrix device via its 'assign_control_domains' interface.
+
+The CPU model features for AP
+-----------------------------
+The AP stack relies on the presence of the AP instructions as well as two
+facilities: The AP Facilities Test (APFT) facility; and the AP Query
+Configuration Information (QCI) facility. These features/facilities are made
+available to a KVM guest via the following CPU model features:
+
+1. ap: Indicates whether the AP instructions are installed on the guest. This
+   feature will be enabled by KVM only if the AP instructions are installed
+   on the host.
+
+2. apft: Indicates the APFT facility is available on the guest. This facility
+   can be made available to the guest only if it is available on the host (i.e.,
+   facility bit 15 is set).
+
+3. apqci: Indicates the AP QCI facility is available on the guest. This facility
+   can be made available to the guest only if it is available on the host (i.e.,
+   facility bit 12 is set).
+
+Note: If the user chooses to specify a CPU model different than the 'host'
+model to QEMU, the CPU model features and facilities need to be turned on
+explicitly; for example:
+
+     /usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on
+
+A guest can be precluded from using AP features/facilities by turning them off
+explicitly; for example:
+
+     /usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off
+
+Note: If the APFT facility is turned off (apft=off) for the guest, the guest
+will not see any AP devices. The zcrypt device drivers that register for type 10
+and newer AP devices - i.e., the cex4card and cex4queue device drivers - need
+the APFT facility to ascertain the facilities installed on a given AP device. If
+the APFT facility is not installed on the guest, then the probe of device
+drivers will fail since only type 10 and newer devices can be configured for
+guest use.
+
+Example:
+=======
+Let's now provide an example to illustrate how KVM guests may be given
+access to AP facilities. For this example, we will show how to configure
+three guests such that executing the lszcrypt command on the guests would
+look like this:
+
+Guest1
+------
+CARD.DOMAIN TYPE  MODE
+------------------------------
+05          CEX5C CCA-Coproc
+05.0004     CEX5C CCA-Coproc
+05.00ab     CEX5C CCA-Coproc
+06          CEX5A Accelerator
+06.0004     CEX5A Accelerator
+06.00ab     CEX5C CCA-Coproc
+
+Guest2
+------
+CARD.DOMAIN TYPE  MODE
+------------------------------
+05          CEX5A Accelerator
+05.0047     CEX5A Accelerator
+05.00ff     CEX5A Accelerator
+
+Guest2
+------
+CARD.DOMAIN TYPE  MODE
+------------------------------
+06          CEX5A Accelerator
+06.0047     CEX5A Accelerator
+06.00ff     CEX5A Accelerator
+
+These are the steps:
+
+1. Install the vfio_ap module on the linux host. The dependency chain for the
+   vfio_ap module is:
+   * iommu
+   * s390
+   * zcrypt
+   * vfio
+   * vfio_mdev
+   * vfio_mdev_device
+   * KVM
+
+   To build the vfio_ap module, the kernel build must be configured with the
+   following Kconfig elements selected:
+   * IOMMU_SUPPORT
+   * S390
+   * ZCRYPT
+   * S390_AP_IOMMU
+   * VFIO
+   * VFIO_MDEV
+   * VFIO_MDEV_DEVICE
+   * KVM
+
+   If using make menuconfig select the following to build the vfio_ap module:
+   -> Device Drivers
+      -> IOMMU Hardware Support
+         select S390 AP IOMMU Support
+      -> VFIO Non-Privileged userspace driver framework
+         -> Mediated device driver frramework
+            -> VFIO driver for Mediated devices
+   -> I/O subsystem
+      -> VFIO support for AP devices
+
+2. Secure the AP queues to be used by the three guests so that the host can not
+   access them. To secure them, there are two sysfs files that specify
+   bitmasks marking a subset of the APQN range as 'usable by the default AP
+   queue device drivers' or 'not usable by the default device drivers' and thus
+   available for use by the vfio_ap device driver'. The location of the sysfs
+   files containing the masks are:
+
+   /sys/bus/ap/apmask
+   /sys/bus/ap/aqmask
+
+   The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
+   (APID). Each bit in the mask, from left to right (i.e., from most significant
+   to least significant bit in big endian order), corresponds to an APID from
+   0-255. If a bit is set, the APID is marked as usable only by the default AP
+   queue device drivers; otherwise, the APID is usable by the vfio_ap
+   device driver.
+
+   The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes
+   (APQI). Each bit in the mask, from left to right (i.e., from most significant
+   to least significant bit in big endian order), corresponds to an APQI from
+   0-255. If a bit is set, the APQI is marked as usable only by the default AP
+   queue device drivers; otherwise, the APQI is usable by the vfio_ap device
+   driver.
+
+   Take, for example, the following mask:
+
+      0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+
+    It indicates:
+
+      1, 2, 3, 4, 5, and 7-255 belong to the default drivers' pool, and 0 and 6
+      belong to the vfio_ap device driver's pool.
+
+   The APQN of each AP queue device assigned to the linux host is checked by the
+   AP bus against the set of APQNs derived from the cross product of APIDs
+   and APQIs marked as usable only by the default AP queue device drivers. If a
+   match is detected,  only the default AP queue device drivers will be probed;
+   otherwise, the vfio_ap device driver will be probed.
+
+   By default, the two masks are set to reserve all APQNs for use by the default
+   AP queue device drivers. There are two ways the default masks can be changed:
+
+   1. The sysfs mask files can be edited by echoing a string into the
+      respective sysfs mask file in one of two formats:
+
+      * An absolute hex string starting with 0x - like "0x12345678" - sets
+        the mask. If the given string is shorter than the mask, it is padded
+        with 0s on the right; for example, specifying a mask value of 0x41 is
+        the same as specifying:
+
+           0x4100000000000000000000000000000000000000000000000000000000000000
+
+        Keep in mind that the mask reads from left to right (i.e., most
+        significant to least significant bit in big endian order), so the mask
+        above identifies device numbers 1 and 7 (01000001).
+
+        If the string is longer than the mask, the operation is terminated with
+        an error (EINVAL).
+
+      * Individual bits in the mask can be switched on and off by specifying
+        each bit number to be switched in a comma separated list. Each bit
+        number string must be prepended with a ('+') or minus ('-') to indicate
+        the corresponding bit is to be switched on ('+') or off ('-'). Some
+        valid values are:
+
+           "+0"    switches bit 0 on
+           "-13"   switches bit 13 off
+           "+0x41" switches bit 65 on
+           "-0xff" switches bit 255 off
+
+           The following example:
+              +0,-6,+0x47,-0xf0
+
+              Switches bits 0 and 71 (0x47) on
+              Switches bits 6 and 240 (0xf0) off
+
+        Note that the bits not specified in the list remain as they were before
+        the operation.
+
+   2. The masks can also be changed at boot time via parameters on the kernel
+      command line like this:
+
+         ap.apmask=0xffff ap.aqmask=0x40
+
+         This would create the following masks:
+
+            apmask:
+            0xffff000000000000000000000000000000000000000000000000000000000000
+
+            aqmask:
+            0x4000000000000000000000000000000000000000000000000000000000000000
+
+         Resulting in these two pools:
+
+            default drivers pool:    adapter 0-15, domain 1
+            alternate drivers pool:  adapter 16-255, domains 0, 2-255
+
+   Securing the APQNs for our example:
+   ----------------------------------
+   To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047,
+   06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding
+   APQNs can either be removed from the default masks:
+
+      echo -5,-6 > /sys/bus/ap/apmask
+
+      echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask
+
+   Or the masks can be set as follows:
+
+      echo 0xf9ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff \
+      > apmask
+
+      echo 0xf7fffffffffffffffeffffffffffffffffffffffffeffffffffffffffffffffe \
+      > aqmask
+
+   This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004,
+   06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The
+   sysfs directory for the vfio_ap device driver will now contain symbolic links
+   to the AP queue devices bound to it:
+
+   /sys/bus/ap
+   ... [drivers]
+   ...... [vfio_ap]
+   ......... [05.0004]
+   ......... [05.0047]
+   ......... [05.00ab]
+   ......... [05.00ff]
+   ......... [06.0004]
+   ......... [06.0047]
+   ......... [06.00ab]
+   ......... [06.00ff]
+
+   Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later)
+   can be bound to the vfio_ap device driver. The reason for this is to
+   simplify the implementation by not needlessly complicating the design by
+   supporting older devices that will go out of service in the relatively near
+   future and for which there are few older systems on which to test.
+
+   The administrator, therefore, must take care to secure only AP queues that
+   can be bound to the vfio_ap device driver. The device type for a given AP
+   queue device can be read from the parent card's sysfs directory. For example,
+   to see the hardware type of the queue 05.0004:
+
+   cat /sys/bus/ap/devices/card05/hwtype
+
+   The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the
+   vfio_ap device driver.
+
+3. Create the mediated devices needed to configure the AP matrixes for the
+   three guests and to provide an interface to the vfio_ap driver for
+   use by the guests:
+
+   /sys/devices/vfio_ap/matrix/
+   --- [mdev_supported_types]
+   ------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
+   --------- create
+   --------- [devices]
+
+   To create the mediated devices for the three guests:
+
+	uuidgen > create
+	uuidgen > create
+	uuidgen > create
+
+        or
+
+        echo $uuid1 > create
+        echo $uuid2 > create
+        echo $uuid3 > create
+
+   This will create three mediated devices in the [devices] subdirectory named
+   after the UUID written to the create attribute file. We call them $uuid1,
+   $uuid2 and $uuid3 and this is the sysfs directory structure after creation:
+
+   /sys/devices/vfio_ap/matrix/
+   --- [mdev_supported_types]
+   ------ [vfio_ap-passthrough]
+   --------- [devices]
+   ------------ [$uuid1]
+   --------------- assign_adapter
+   --------------- assign_control_domain
+   --------------- assign_domain
+   --------------- matrix
+   --------------- unassign_adapter
+   --------------- unassign_control_domain
+   --------------- unassign_domain
+
+   ------------ [$uuid2]
+   --------------- assign_adapter
+   --------------- assign_control_domain
+   --------------- assign_domain
+   --------------- matrix
+   --------------- unassign_adapter
+   ----------------unassign_control_domain
+   ----------------unassign_domain
+
+   ------------ [$uuid3]
+   --------------- assign_adapter
+   --------------- assign_control_domain
+   --------------- assign_domain
+   --------------- matrix
+   --------------- unassign_adapter
+   ----------------unassign_control_domain
+   ----------------unassign_domain
+
+4. The administrator now needs to configure the matrixes for the mediated
+   devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
+
+   This is how the matrix is configured for Guest1:
+
+      echo 5 > assign_adapter
+      echo 6 > assign_adapter
+      echo 4 > assign_domain
+      echo 0xab > assign_domain
+
+      Control domains can similarly be assigned using the assign_control_domain
+      sysfs file.
+
+      If a mistake is made configuring an adapter, domain or control domain,
+      you can use the unassign_xxx files to unassign the adapter, domain or
+      control domain.
+
+      To display the matrix configuration for Guest1:
+
+         cat matrix
+
+   This is how the matrix is configured for Guest2:
+
+      echo 5 > assign_adapter
+      echo 0x47 > assign_domain
+      echo 0xff > assign_domain
+
+   This is how the matrix is configured for Guest3:
+
+      echo 6 > assign_adapter
+      echo 0x47 > assign_domain
+      echo 0xff > assign_domain
+
+   In order to successfully assign an adapter:
+
+   * The adapter number specified must represent a value from 0 up to the
+     maximum adapter number configured for the system. If an adapter number
+     higher than the maximum is specified, the operation will terminate with
+     an error (ENODEV).
+
+   * All APQNs that can be derived from the adapter ID and the IDs of
+     the previously assigned domains must be bound to the vfio_ap device
+     driver. If no domains have yet been assigned, then there must be at least
+     one APQN with the specified APID bound to the vfio_ap driver. If no such
+     APQNs are bound to the driver, the operation will terminate with an
+     error (EADDRNOTAVAIL).
+
+     No APQN that can be derived from the adapter ID and the IDs of the
+     previously assigned domains can be assigned to another mediated matrix
+     device. If an APQN is assigned to another mediated matrix device, the
+     operation will terminate with an error (EADDRINUSE).
+
+   In order to successfully assign a domain:
+
+   * The domain number specified must represent a value from 0 up to the
+     maximum domain number configured for the system. If a domain number
+     higher than the maximum is specified, the operation will terminate with
+     an error (ENODEV).
+
+   * All APQNs that can be derived from the domain ID and the IDs of
+     the previously assigned adapters must be bound to the vfio_ap device
+     driver. If no domains have yet been assigned, then there must be at least
+     one APQN with the specified APQI bound to the vfio_ap driver. If no such
+     APQNs are bound to the driver, the operation will terminate with an
+     error (EADDRNOTAVAIL).
+
+     No APQN that can be derived from the domain ID and the IDs of the
+     previously assigned adapters can be assigned to another mediated matrix
+     device. If an APQN is assigned to another mediated matrix device, the
+     operation will terminate with an error (EADDRINUSE).
+
+   In order to successfully assign a control domain, the domain number
+   specified must represent a value from 0 up to the maximum domain number
+   configured for the system. If a control domain number higher than the maximum
+   is specified, the operation will terminate with an error (ENODEV).
+
+5. Start Guest1:
+
+   /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+      -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
+
+7. Start Guest2:
+
+   /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+      -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
+
+7. Start Guest3:
+
+   /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+      -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
+
+When the guest is shut down, the mediated matrix devices may be removed.
+
+Using our example again, to remove the mediated matrix device $uuid1:
+
+   /sys/devices/vfio_ap/matrix/
+      --- [mdev_supported_types]
+      ------ [vfio_ap-passthrough]
+      --------- [devices]
+      ------------ [$uuid1]
+      --------------- remove
+
+
+   echo 1 > remove
+
+   This will remove all of the mdev matrix device's sysfs structures including
+   the mdev device itself. To recreate and reconfigure the mdev matrix device,
+   all of the steps starting with step 3 will have to be performed again. Note
+   that the remove will fail if a guest using the mdev is still running.
+
+   It is not necessary to remove an mdev matrix device, but one may want to
+   remove it if no guest will use it during the remaining lifetime of the linux
+   host. If the mdev matrix device is removed, one may want to also reconfigure
+   the pool of adapters and queues reserved for use by the default drivers.
+
+Limitations
+===========
+* The KVM/kernel interfaces do not provide a way to prevent restoring an APQN
+  to the default drivers pool of a queue that is still assigned to a mediated
+  device in use by a guest. It is incumbent upon the administrator to
+  ensure there is no mediated device in use by a guest to which the APQN is
+  assigned lest the host be given access to the private data of the AP queue
+  device such as a private key configured specifically for the guest.
+
+* Dynamically modifying the AP matrix for a running guest (which would amount to
+  hot(un)plug of AP devices for the guest) is currently not supported
+
+* Live guest migration is not supported for guests using AP devices.
diff --git a/Documentation/trace/stm.rst b/Documentation/trace/stm.rst
index 2c22ddb7fd3e..99f99963e5e7 100644
--- a/Documentation/trace/stm.rst
+++ b/Documentation/trace/stm.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ===================
 System Trace Module
 ===================
@@ -53,12 +55,30 @@ under "user" directory from the example above and this new rule will
 be used for trace sources with the id string of "user/dummy".
 
 Trace sources have to open the stm class device's node and write their
-trace data into its file descriptor. In order to identify themselves
-to the policy, they need to do a STP_POLICY_ID_SET ioctl on this file
-descriptor providing their id string. Otherwise, they will be
-automatically allocated a master/channel pair upon first write to this
-file descriptor according to the "default" rule of the policy, if such
-exists.
+trace data into its file descriptor.
+
+In order to find an appropriate policy node for a given trace source,
+several mechanisms can be used. First, a trace source can explicitly
+identify itself by calling an STP_POLICY_ID_SET ioctl on the character
+device's file descriptor, providing their id string, before they write
+any data there. Secondly, if they chose not to perform the explicit
+identification (because you may not want to patch existing software
+to do this), they can just start writing the data, at which point the
+stm core will try to find a policy node with the name matching the
+task's name (e.g., "syslogd") and if one exists, it will be used.
+Thirdly, if the task name can't be found among the policy nodes, the
+catch-all entry "default" will be used, if it exists. This entry also
+needs to be created and configured by the system administrator or
+whatever tools are taking care of the policy configuration. Finally,
+if all the above steps failed, the write() to an stm file descriptor
+will return a error (EINVAL).
+
+Previously, if no policy nodes were found for a trace source, the stm
+class would silently fall back to allocating the first available
+contiguous range of master/channels from the beginning of the device's
+master/channel range. The new requirement for a policy node to exist
+will help programmers and sysadmins identify gaps in configuration
+and have better control over the un-identified sources.
 
 Some STM devices may allow direct mapping of the channel mmio regions
 to userspace for zero-copy writing. One mappable page (in terms of
@@ -92,9 +112,9 @@ allocated for the device according to the policy configuration. If
 there's a node in the root of the policy directory that matches the
 stm_source device's name (for example, "console"), this node will be
 used to allocate master and channel numbers. If there's no such policy
-node, the stm core will pick the first contiguous chunk of channels
-within the first available master. Note that the node must exist
-before the stm_source device is connected to its stm device.
+node, the stm core will use the catch-all entry "default", if one
+exists. If neither policy nodes exist, the write() to stm_source_link
+will return an error.
 
 stm_console
 ===========
diff --git a/Documentation/trace/sys-t.rst b/Documentation/trace/sys-t.rst
new file mode 100644
index 000000000000..3d8eb92735e9
--- /dev/null
+++ b/Documentation/trace/sys-t.rst
@@ -0,0 +1,62 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+MIPI SyS-T over STP
+===================
+
+The MIPI SyS-T protocol driver can be used with STM class devices to
+generate standardized trace stream. Aside from being a standard, it
+provides better trace source identification and timestamp correlation.
+
+In order to use the MIPI SyS-T protocol driver with your STM device,
+first, you'll need CONFIG_STM_PROTO_SYS_T.
+
+Now, you can select which protocol driver you want to use when you create
+a policy for your STM device, by specifying it in the policy name:
+
+# mkdir /config/stp-policy/dummy_stm.0:p_sys-t.my-policy/
+
+In other words, the policy name format is extended like this:
+
+  <device_name>:<protocol_name>.<policy_name>
+
+With Intel TH, therefore it can look like "0-sth:p_sys-t.my-policy".
+
+If the protocol name is omitted, the STM class will chose whichever
+protocol driver was loaded first.
+
+You can also double check that everything is working as expected by
+
+# cat /config/stp-policy/dummy_stm.0:p_sys-t.my-policy/protocol
+p_sys-t
+
+Now, with the MIPI SyS-T protocol driver, each policy node in the
+configfs gets a few additional attributes, which determine per-source
+parameters specific to the protocol:
+
+# mkdir /config/stp-policy/dummy_stm.0:p_sys-t.my-policy/default
+# ls /config/stp-policy/dummy_stm.0:p_sys-t.my-policy/default
+channels
+clocksync_interval
+do_len
+masters
+ts_interval
+uuid
+
+The most important one here is the "uuid", which determines the UUID
+that will be used to tag all data coming from this source. It is
+automatically generated when a new node is created, but it is likely
+that you would want to change it.
+
+do_len switches on/off the additional "payload length" field in the
+MIPI SyS-T message header. It is off by default as the STP already
+marks message boundaries.
+
+ts_interval and clocksync_interval determine how much time in milliseconds
+can pass before we need to include a protocol (not transport, aka STP)
+timestamp in a message header or send a CLOCKSYNC packet, respectively.
+
+See Documentation/ABI/testing/configfs-stp-policy-p_sys-t for more
+details.
+
+* [1] https://www.mipi.org/specifications/sys-t
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 647f94128a85..cd209f7730af 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -123,6 +123,37 @@ memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
 flag KVM_VM_MIPS_VZ.
 
 
+On arm64, the physical address size for a VM (IPA Size limit) is limited
+to 40bits by default. The limit can be configured if the host supports the
+extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use
+KVM_VM_TYPE_ARM_IPA_SIZE(IPA_Bits) to set the size in the machine type
+identifier, where IPA_Bits is the maximum width of any physical
+address used by the VM. The IPA_Bits is encoded in bits[7-0] of the
+machine type identifier.
+
+e.g, to configure a guest to use 48bit physical address size :
+
+    vm_fd = ioctl(dev_fd, KVM_CREATE_VM, KVM_VM_TYPE_ARM_IPA_SIZE(48));
+
+The requested size (IPA_Bits) must be :
+  0 - Implies default size, 40bits (for backward compatibility)
+
+  or
+
+  N - Implies N bits, where N is a positive integer such that,
+      32 <= N <= Host_IPA_Limit
+
+Host_IPA_Limit is the maximum possible value for IPA_Bits on the host and
+is dependent on the CPU capability and the kernel configuration. The limit can
+be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
+ioctl() at run-time.
+
+Please note that configuring the IPA size does not affect the capability
+exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
+size of the address translated by the stage2 level (guest physical to
+host physical address translations).
+
+
 4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST
 
 Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST
@@ -850,7 +881,7 @@ struct kvm_vcpu_events {
 		__u8 injected;
 		__u8 nr;
 		__u8 has_error_code;
-		__u8 pad;
+		__u8 pending;
 		__u32 error_code;
 	} exception;
 	struct {
@@ -873,15 +904,23 @@ struct kvm_vcpu_events {
 		__u8 smm_inside_nmi;
 		__u8 latched_init;
 	} smi;
+	__u8 reserved[27];
+	__u8 exception_has_payload;
+	__u64 exception_payload;
 };
 
-Only two fields are defined in the flags field:
+The following bits are defined in the flags field:
 
-- KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
+- KVM_VCPUEVENT_VALID_SHADOW may be set to signal that
   interrupt.shadow contains a valid state.
 
-- KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that
-  smi contains a valid state.
+- KVM_VCPUEVENT_VALID_SMM may be set to signal that smi contains a
+  valid state.
+
+- KVM_VCPUEVENT_VALID_PAYLOAD may be set to signal that the
+  exception_has_payload, exception_payload, and exception.pending
+  fields contain a valid state. This bit will be set whenever
+  KVM_CAP_EXCEPTION_PAYLOAD is enabled.
 
 ARM/ARM64:
 
@@ -961,6 +1000,11 @@ shall be written into the VCPU.
 
 KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available.
 
+If KVM_CAP_EXCEPTION_PAYLOAD is enabled, KVM_VCPUEVENT_VALID_PAYLOAD
+can be set in the flags field to signal that the
+exception_has_payload, exception_payload, and exception.pending fields
+contain a valid state and shall be written into the VCPU.
+
 ARM/ARM64:
 
 Set the pending SError exception state for this VCPU. It is not possible to
@@ -1922,6 +1966,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TIDR              | 64
   PPC   | KVM_REG_PPC_PSSCR             | 64
   PPC   | KVM_REG_PPC_DEC_EXPIRY        | 64
+  PPC   | KVM_REG_PPC_PTCR              | 64
   PPC   | KVM_REG_PPC_TM_GPR0           | 64
           ...
   PPC   | KVM_REG_PPC_TM_GPR31          | 64
@@ -2269,6 +2314,10 @@ The supported flags are:
         The emulated MMU supports 1T segments in addition to the
         standard 256M ones.
 
+    - KVM_PPC_NO_HASH
+	This flag indicates that HPT guests are not supported by KVM,
+	thus all guests must use radix MMU mode.
+
 The "slb_size" field indicates how many SLB entries are supported
 
 The "sps" array contains 8 entries indicating the supported base
@@ -3676,6 +3725,34 @@ Returns: 0 on success, -1 on error
 This copies the vcpu's kvm_nested_state struct from userspace to the kernel.  For
 the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
 
+4.116 KVM_(UN)REGISTER_COALESCED_MMIO
+
+Capability: KVM_CAP_COALESCED_MMIO (for coalesced mmio)
+	    KVM_CAP_COALESCED_PIO (for coalesced pio)
+Architectures: all
+Type: vm ioctl
+Parameters: struct kvm_coalesced_mmio_zone
+Returns: 0 on success, < 0 on error
+
+Coalesced I/O is a performance optimization that defers hardware
+register write emulation so that userspace exits are avoided.  It is
+typically used to reduce the overhead of emulating frequently accessed
+hardware registers.
+
+When a hardware register is configured for coalesced I/O, write accesses
+do not exit to userspace and their value is recorded in a ring buffer
+that is shared between kernel and userspace.
+
+Coalesced I/O is used if one or more write accesses to a hardware
+register can be deferred until a read or a write to another hardware
+register on the same device.  This last access will cause a vmexit and
+userspace will process accesses from the ring buffer before emulating
+it. That will avoid exiting to userspace on repeated writes.
+
+Coalesced pio is based on coalesced mmio. There is little difference
+between coalesced mmio and pio except that coalesced pio records accesses
+to I/O ports.
+
 5. The kvm_run structure
 ------------------------
 
@@ -4522,7 +4599,7 @@ hpage module parameter is not set to 1, -EINVAL is returned.
 While it is generally possible to create a huge page backed VM without
 this capability, the VM will not be able to run.
 
-7.14 KVM_CAP_MSR_PLATFORM_INFO
+7.15 KVM_CAP_MSR_PLATFORM_INFO
 
 Architectures: x86
 Parameters: args[0] whether feature should be enabled or not
@@ -4531,6 +4608,45 @@ With this capability, a guest may read the MSR_PLATFORM_INFO MSR. Otherwise,
 a #GP would be raised when the guest tries to access. Currently, this
 capability does not enable write permissions of this MSR for the guest.
 
+7.16 KVM_CAP_PPC_NESTED_HV
+
+Architectures: ppc
+Parameters: none
+Returns: 0 on success, -EINVAL when the implementation doesn't support
+	 nested-HV virtualization.
+
+HV-KVM on POWER9 and later systems allows for "nested-HV"
+virtualization, which provides a way for a guest VM to run guests that
+can run using the CPU's supervisor mode (privileged non-hypervisor
+state).  Enabling this capability on a VM depends on the CPU having
+the necessary functionality and on the facility being enabled with a
+kvm-hv module parameter.
+
+7.17 KVM_CAP_EXCEPTION_PAYLOAD
+
+Architectures: x86
+Parameters: args[0] whether feature should be enabled or not
+
+With this capability enabled, CR2 will not be modified prior to the
+emulated VM-exit when L1 intercepts a #PF exception that occurs in
+L2. Similarly, for kvm-intel only, DR6 will not be modified prior to
+the emulated VM-exit when L1 intercepts a #DB exception that occurs in
+L2. As a result, when KVM_GET_VCPU_EVENTS reports a pending #PF (or
+#DB) exception for L2, exception.has_payload will be set and the
+faulting address (or the new DR6 bits*) will be reported in the
+exception_payload field. Similarly, when userspace injects a #PF (or
+#DB) into L2 using KVM_SET_VCPU_EVENTS, it is expected to set
+exception.has_payload and to put the faulting address (or the new DR6
+bits*) in the exception_payload field.
+
+This capability also enables exception.pending in struct
+kvm_vcpu_events, which allows userspace to distinguish between pending
+and injected exceptions.
+
+
+* For the new DR6 bits, note that bit 16 is set iff the #DB exception
+  will clear DR6.RTM.
+
 8. Other capabilities.
 ----------------------
 
@@ -4772,3 +4888,10 @@ CPU when the exception is taken. If this virtual SError is taken to EL1 using
 AArch64, this value will be reported in the ISS field of ESR_ELx.
 
 See KVM_CAP_VCPU_EVENTS for more details.
+8.20 KVM_CAP_HYPERV_SEND_IPI
+
+Architectures: x86
+
+This capability indicates that KVM supports paravirtualized Hyper-V IPI send
+hypercalls:
+HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
diff --git a/MAINTAINERS b/MAINTAINERS
index 2b928aa857ce..fdb6a298c7e7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -932,6 +932,7 @@ M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 M:	Arve Hjønnevåg <arve@android.com>
 M:	Todd Kjos <tkjos@android.com>
 M:	Martijn Coenen <maco@android.com>
+M:	Joel Fernandes <joel@joelfernandes.org>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
 L:	devel@driverdev.osuosl.org
 S:	Supported
@@ -1180,7 +1181,7 @@ N:	owl
 F:	arch/arm/mach-actions/
 F:	arch/arm/boot/dts/owl-*
 F:	arch/arm64/boot/dts/actions/
-F:	drivers/clocksource/owl-*
+F:	drivers/clocksource/timer-owl*
 F:	drivers/pinctrl/actions/*
 F:	drivers/soc/actions/
 F:	include/dt-bindings/power/owl-*
@@ -1603,7 +1604,7 @@ L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/boot/dts/lpc43*
 F:	drivers/clk/nxp/clk-lpc18xx*
-F:	drivers/clocksource/time-lpc32xx.c
+F:	drivers/clocksource/timer-lpc32xx.c
 F:	drivers/i2c/busses/i2c-lpc2k.c
 F:	drivers/memory/pl172.c
 F:	drivers/mtd/spi-nor/nxp-spifi.c
@@ -2220,7 +2221,7 @@ F:	arch/arm/mach-vexpress/
 F:	*/*/vexpress*
 F:	*/*/*/vexpress*
 F:	drivers/clk/versatile/clk-vexpress-osc.c
-F:	drivers/clocksource/versatile.c
+F:	drivers/clocksource/timer-versatile.c
 N:	mps2
 
 ARM/VFP SUPPORT
@@ -2242,7 +2243,7 @@ M:	Tony Prisk <linux@prisktech.co.nz>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-vt8500/
-F:	drivers/clocksource/vt8500_timer.c
+F:	drivers/clocksource/timer-vt8500.c
 F:	drivers/i2c/busses/i2c-wmt.c
 F:	drivers/mmc/host/wmt-sdmmc.c
 F:	drivers/pwm/pwm-vt8500.c
@@ -2307,7 +2308,7 @@ F:	drivers/cpuidle/cpuidle-zynq.c
 F:	drivers/block/xsysace.c
 N:	zynq
 N:	xilinx
-F:	drivers/clocksource/cadence_ttc_timer.c
+F:	drivers/clocksource/timer-cadence-ttc.c
 F:	drivers/i2c/busses/i2c-cadence.c
 F:	drivers/mmc/host/sdhci-of-arasan.c
 F:	drivers/edac/synopsys_edac.c
@@ -7578,14 +7579,6 @@ S:	Supported
 F:	drivers/infiniband/hw/i40iw/
 F:	include/uapi/rdma/i40iw-abi.h
 
-INTEL SHA MULTIBUFFER DRIVER
-M:	Megha Dey <megha.dey@linux.intel.com>
-R:	Tim Chen <tim.c.chen@linux.intel.com>
-L:	linux-crypto@vger.kernel.org
-S:	Supported
-F:	arch/x86/crypto/sha*-mb/
-F:	crypto/mcryptd.c
-
 INTEL TELEMETRY DRIVER
 M:	Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
 L:	platform-driver-x86@vger.kernel.org
@@ -8165,6 +8158,7 @@ F:	security/keys/encrypted-keys/
 
 KEYS-TRUSTED
 M:	James Bottomley <jejb@linux.vnet.ibm.com>
+M:      Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
 M:	Mimi Zohar <zohar@linux.vnet.ibm.com>
 L:	linux-integrity@vger.kernel.org
 L:	keyrings@vger.kernel.org
@@ -12807,6 +12801,18 @@ W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 F:	drivers/s390/crypto/
 
+S390 VFIO AP DRIVER
+M:	Tony Krowiak <akrowiak@linux.ibm.com>
+M:	Pierre Morel <pmorel@linux.ibm.com>
+M:	Halil Pasic <pasic@linux.ibm.com>
+L:	linux-s390@vger.kernel.org
+W:	http://www.ibm.com/developerworks/linux/linux390/
+S:	Supported
+F:	drivers/s390/crypto/vfio_ap_drv.c
+F:	drivers/s390/crypto/vfio_ap_private.h
+F:	drivers/s390/crypto/vfio_ap_ops.c
+F:	Documentation/s390/vfio-ap.txt
+
 S390 ZFCP DRIVER
 M:	Steffen Maier <maier@linux.ibm.com>
 M:	Benjamin Block <bblock@linux.ibm.com>
@@ -13752,7 +13758,7 @@ F:	sound/soc/
 F:	include/sound/soc*
 
 SOUNDWIRE SUBSYSTEM
-M:	Vinod Koul <vinod.koul@intel.com>
+M:	Vinod Koul <vkoul@kernel.org>
 M:	Sanyog Kale <sanyog.r.kale@intel.com>
 R:	Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -15438,6 +15444,12 @@ F:	Documentation/driver-api/usb/typec_bus.rst
 F:	drivers/usb/typec/altmodes/
 F:	include/linux/usb/typec_altmode.h
 
+USB TYPEC PORT CONTROLLER DRIVERS
+M:	Guenter Roeck <linux@roeck-us.net>
+L:	linux-usb@vger.kernel.org
+S:	Maintained
+F:	drivers/usb/typec/tcpm/
+
 USB UHCI DRIVER
 M:	Alan Stern <stern@rowland.harvard.edu>
 L:	linux-usb@vger.kernel.org
@@ -15512,13 +15524,19 @@ F:	arch/x86/um/
 F:	fs/hostfs/
 F:	fs/hppfs/
 
+USERSPACE COPYIN/COPYOUT (UIOVEC)
+M:	Alexander Viro <viro@zeniv.linux.org.uk>
+S:	Maintained
+F:	lib/iov_iter.c
+F:	include/linux/uio.h
+
 USERSPACE I/O (UIO)
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
 F:	Documentation/driver-api/uio-howto.rst
 F:	drivers/uio/
-F:	include/linux/uio*.h
+F:	include/linux/uio_driver.h
 
 UTIL-LINUX PACKAGE
 M:	Karel Zak <kzak@redhat.com>
diff --git a/Makefile b/Makefile
index 2fc5732a4f9e..7d4ba5196010 100644
--- a/Makefile
+++ b/Makefile
@@ -1063,7 +1063,7 @@ include/config/kernel.release: $(srctree)/Makefile FORCE
 # Carefully list dependencies so we do not try to build scripts twice
 # in parallel
 PHONY += scripts
-scripts: scripts_basic asm-generic gcc-plugins $(autoksyms_h)
+scripts: scripts_basic scripts_dtc asm-generic gcc-plugins $(autoksyms_h)
 	$(Q)$(MAKE) $(build)=$(@)
 
 # Things we need to do before we recursively start building the kernel
@@ -1213,6 +1213,35 @@ kselftest-merge:
 	+$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
 
 # ---------------------------------------------------------------------------
+# Devicetree files
+
+ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/boot/dts/),)
+dtstree := arch/$(SRCARCH)/boot/dts
+endif
+
+ifneq ($(dtstree),)
+
+%.dtb: prepare3 scripts_dtc
+	$(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@
+
+PHONY += dtbs dtbs_install
+dtbs: prepare3 scripts_dtc
+	$(Q)$(MAKE) $(build)=$(dtstree)
+
+dtbs_install:
+	$(Q)$(MAKE) $(dtbinst)=$(dtstree)
+
+ifdef CONFIG_OF_EARLY_FLATTREE
+all: dtbs
+endif
+
+endif
+
+PHONY += scripts_dtc
+scripts_dtc: scripts_basic
+	$(Q)$(MAKE) $(build)=scripts/dtc
+
+# ---------------------------------------------------------------------------
 # Modules
 
 ifdef CONFIG_MODULES
@@ -1421,6 +1450,12 @@ help:
 	@echo  '  kselftest-merge - Merge all the config dependencies of kselftest to existing'
 	@echo  '                    .config.'
 	@echo  ''
+	@$(if $(dtstree), \
+		echo 'Devicetree:'; \
+		echo '* dtbs            - Build device tree blobs for enabled boards'; \
+		echo '  dtbs_install    - Install dtbs to $(INSTALL_DTBS_PATH)'; \
+		echo '')
+
 	@echo 'Userspace tools targets:'
 	@echo '  use "make tools/help"'
 	@echo '  or  "cd tools; make help"'
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index d6e29a1de4cc..9ff37aa1165f 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -6,6 +6,7 @@
 
 #define NR_SYSCALLS			523
 
+#define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_GETHOSTNAME
@@ -13,6 +14,7 @@
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_UTIME
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index c64806a2daf5..2e09248f8324 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -473,7 +473,7 @@ entSys:
 	bne     $3, strace
 	beq	$4, 1f
 	ldq	$27, 0($5)
-1:	jsr	$26, ($27), alpha_ni_syscall
+1:	jsr	$26, ($27), sys_ni_syscall
 	ldgp	$gp, 0($26)
 	blt	$0, $syscall_error	/* the call failed */
 	stq	$0, 0($sp)
@@ -587,7 +587,7 @@ strace:
 	/* get the system call pointer.. */
 	lda	$1, NR_SYSCALLS($31)
 	lda	$2, sys_call_table
-	lda	$27, alpha_ni_syscall
+	lda	$27, sys_ni_syscall
 	cmpult	$0, $1, $1
 	s8addq	$0, $2, $2
 	beq	$1, 1f
@@ -791,7 +791,7 @@ ret_from_kernel_thread:
 
 /*
  * Special system calls.  Most of these are special in that they either
- * have to play switch_stack games or in some way use the pt_regs struct.
+ * have to play switch_stack games.
  */
 
 .macro	fork_like name
@@ -812,46 +812,41 @@ fork_like fork
 fork_like vfork
 fork_like clone
 
+.macro	sigreturn_like name
 	.align	4
-	.globl	sys_sigreturn
-	.ent	sys_sigreturn
-sys_sigreturn:
+	.globl	sys_\name
+	.ent	sys_\name
+sys_\name:
 	.prologue 0
 	lda	$9, ret_from_straced
 	cmpult	$26, $9, $9
 	lda	$sp, -SWITCH_STACK_SIZE($sp)
-	jsr	$26, do_sigreturn
+	jsr	$26, do_\name
 	bne	$9, 1f
 	jsr	$26, syscall_trace_leave
 1:	br	$1, undo_switch_stack
 	br	ret_from_sys_call
-.end sys_sigreturn
+.end sys_\name
+.endm
 
-	.align	4
-	.globl	sys_rt_sigreturn
-	.ent	sys_rt_sigreturn
-sys_rt_sigreturn:
-	.prologue 0
-	lda	$9, ret_from_straced
-	cmpult	$26, $9, $9
-	lda	$sp, -SWITCH_STACK_SIZE($sp)
-	jsr	$26, do_rt_sigreturn
-	bne	$9, 1f
-	jsr	$26, syscall_trace_leave
-1:	br	$1, undo_switch_stack
-	br	ret_from_sys_call
-.end sys_rt_sigreturn
+sigreturn_like sigreturn
+sigreturn_like rt_sigreturn
 
 	.align	4
-	.globl	alpha_ni_syscall
-	.ent	alpha_ni_syscall
-alpha_ni_syscall:
+	.globl	alpha_syscall_zero
+	.ent	alpha_syscall_zero
+alpha_syscall_zero:
 	.prologue 0
-	/* Special because it also implements overflow handling via
-	   syscall number 0.  And if you recall, zero is a special
-	   trigger for "not an error".  Store large non-zero there.  */
+	/* Special because it needs to do something opposite to
+	   force_successful_syscall_return().  We use the saved
+	   syscall number for that, zero meaning "not an error".
+	   That works nicely, but for real syscall 0 we need to
+	   make sure that this logics doesn't get confused.
+	   Store a non-zero there - -ENOSYS we need in register
+	   for our return value will do just fine.
+	  */
 	lda	$0, -ENOSYS
 	unop
 	stq	$0, 0($sp)
 	ret
-.end alpha_ni_syscall
+.end alpha_syscall_zero
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 1374e591511f..5b2e8ecb7ce3 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -11,93 +11,93 @@
 	.align 3
 	.globl sys_call_table
 sys_call_table:
-	.quad alpha_ni_syscall			/* 0 */
+	.quad alpha_syscall_zero		/* 0 */
 	.quad sys_exit
 	.quad alpha_fork
 	.quad sys_read
 	.quad sys_write
-	.quad alpha_ni_syscall			/* 5 */
+	.quad sys_ni_syscall			/* 5 */
 	.quad sys_close
 	.quad sys_osf_wait4
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_link
 	.quad sys_unlink			/* 10 */
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_chdir
 	.quad sys_fchdir
 	.quad sys_mknod
 	.quad sys_chmod				/* 15 */
 	.quad sys_chown
 	.quad sys_osf_brk
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_lseek
 	.quad sys_getxpid			/* 20 */
 	.quad sys_osf_mount
 	.quad sys_umount
 	.quad sys_setuid
 	.quad sys_getxuid
-	.quad alpha_ni_syscall			/* 25 */
+	.quad sys_ni_syscall			/* 25 */
 	.quad sys_ptrace
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 30 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 30 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_access
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 35 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 35 */
 	.quad sys_sync
 	.quad sys_kill
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_setpgid
-	.quad alpha_ni_syscall			/* 40 */
+	.quad sys_ni_syscall			/* 40 */
 	.quad sys_dup
 	.quad sys_alpha_pipe
 	.quad sys_osf_set_program_attributes
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_open				/* 45 */
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_getxgid
 	.quad sys_osf_sigprocmask
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 50 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 50 */
 	.quad sys_acct
 	.quad sys_sigpending
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_ioctl
-	.quad alpha_ni_syscall			/* 55 */
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall			/* 55 */
+	.quad sys_ni_syscall
 	.quad sys_symlink
 	.quad sys_readlink
 	.quad sys_execve
 	.quad sys_umask				/* 60 */
 	.quad sys_chroot
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_getpgrp
 	.quad sys_getpagesize
-	.quad alpha_ni_syscall			/* 65 */
+	.quad sys_ni_syscall			/* 65 */
 	.quad alpha_vfork
 	.quad sys_newstat
 	.quad sys_newlstat
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 70 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 70 */
 	.quad sys_osf_mmap
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_munmap
 	.quad sys_mprotect
 	.quad sys_madvise			/* 75 */
 	.quad sys_vhangup
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_getgroups
 	/* map BSD's setpgrp to sys_setpgid for binary compatibility: */
 	.quad sys_setgroups			/* 80 */
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_setpgid
 	.quad sys_osf_setitimer
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 85 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 85 */
 	.quad sys_osf_getitimer
 	.quad sys_gethostname
 	.quad sys_sethostname
@@ -119,19 +119,19 @@ sys_call_table:
 	.quad sys_bind
 	.quad sys_setsockopt			/* 105 */
 	.quad sys_listen
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 110 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 110 */
 	.quad sys_sigsuspend
 	.quad sys_osf_sigstack
 	.quad sys_recvmsg
 	.quad sys_sendmsg
-	.quad alpha_ni_syscall			/* 115 */
+	.quad sys_ni_syscall			/* 115 */
 	.quad sys_osf_gettimeofday
 	.quad sys_osf_getrusage
 	.quad sys_getsockopt
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 #ifdef CONFIG_OSF4_COMPAT
 	.quad sys_osf_readv			/* 120 */
 	.quad sys_osf_writev
@@ -156,66 +156,66 @@ sys_call_table:
 	.quad sys_mkdir
 	.quad sys_rmdir
 	.quad sys_osf_utimes
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 140 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 140 */
 	.quad sys_getpeername
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_getrlimit
 	.quad sys_setrlimit			/* 145 */
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_setsid
 	.quad sys_quotactl
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_getsockname			/* 150 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 155 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 155 */
 	.quad sys_osf_sigaction
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_osf_getdirentries
 	.quad sys_osf_statfs			/* 160 */
 	.quad sys_osf_fstatfs
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_osf_getdomainname		/* 165 */
 	.quad sys_setdomainname
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 170 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 175 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 180 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 185 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 190 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 195 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 170 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 175 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 180 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 185 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 190 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 195 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
 	/* The OSF swapon has two extra arguments, but we ignore them.  */
 	.quad sys_swapon
 	.quad sys_msgctl			/* 200 */
@@ -231,93 +231,93 @@ sys_call_table:
 	.quad sys_shmctl			/* 210 */
 	.quad sys_shmdt
 	.quad sys_shmget
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 215 */
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 215 */
+	.quad sys_ni_syscall
 	.quad sys_msync
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 220 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 220 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_osf_stat
 	.quad sys_osf_lstat			/* 225 */
 	.quad sys_osf_fstat
 	.quad sys_osf_statfs64
 	.quad sys_osf_fstatfs64
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 230 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 230 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_getpgid
 	.quad sys_getsid
 	.quad sys_sigaltstack			/* 235 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 240 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 240 */
 	.quad sys_osf_sysinfo
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_osf_proplist_syscall
-	.quad alpha_ni_syscall			/* 245 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 250 */
+	.quad sys_ni_syscall			/* 245 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 250 */
 	.quad sys_osf_usleep_thread
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
 	.quad sys_sysfs
-	.quad alpha_ni_syscall			/* 255 */
+	.quad sys_ni_syscall			/* 255 */
 	.quad sys_osf_getsysinfo
 	.quad sys_osf_setsysinfo
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 260 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 265 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 270 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 275 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 280 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 285 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 290 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall			/* 295 */
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
-	.quad alpha_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 260 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 265 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 270 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 275 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 280 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 285 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 290 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall			/* 295 */
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
+	.quad sys_ni_syscall
 /* linux-specific system calls start at 300 */
 	.quad sys_bdflush			/* 300 */
 	.quad sys_sethae
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 644815c0516e..c64c505d966c 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -102,11 +102,5 @@ boot_targets += uImage uImage.bin uImage.gz
 $(boot_targets): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-%.dtb %.dtb.S %.dtb.o: scripts
-	$(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-dtbs: scripts
-	$(Q)$(MAKE) $(build)=$(boot)/dts
-
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h
index 517178b1daef..3b3543fd151c 100644
--- a/arch/arc/include/uapi/asm/unistd.h
+++ b/arch/arc/include/uapi/asm/unistd.h
@@ -17,6 +17,7 @@
 #define _UAPI_ASM_ARC_UNISTD_H
 
 #define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 5c91e0093ee8..05a91d8b89f3 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -303,12 +303,7 @@ else
 KBUILD_IMAGE := $(boot)/zImage
 endif
 
-# Build the DT binary blobs if we have OF configured
-ifeq ($(CONFIG_USE_OF),y)
-KBUILD_DTBS := dtbs
-endif
-
-all:	$(notdir $(KBUILD_IMAGE)) $(KBUILD_DTBS)
+all:	$(notdir $(KBUILD_IMAGE))
 
 
 archheaders:
@@ -335,17 +330,6 @@ $(BOOT_TARGETS): vmlinux
 $(INSTALL_TARGETS):
 	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
 
-%.dtb: | scripts
-	$(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) $(boot)/dts/$@
-
-PHONY += dtbs dtbs_install
-
-dtbs: prepare scripts
-	$(Q)$(MAKE) $(build)=$(boot)/dts
-
-dtbs_install:
-	$(Q)$(MAKE) $(dtbinst)=$(boot)/dts
-
 PHONY += vdso_install
 vdso_install:
 ifeq ($(CONFIG_VDSO),y)
@@ -367,8 +351,6 @@ define archhelp
   echo  '  uImage        - U-Boot wrapped zImage'
   echo  '  bootpImage    - Combined zImage and initial RAM disk'
   echo  '                  (supply initrd image via make variable INITRD=<path>)'
-  echo  '* dtbs          - Build device tree blobs for enabled boards'
-  echo  '  dtbs_install  - Install dtbs to $(INSTALL_DTBS_PATH)'
   echo  '  install       - Install uncompressed kernel'
   echo  '  zinstall      - Install compressed kernel'
   echo  '  uinstall      - Install U-Boot wrapped compressed kernel'
diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h
index 07437816e098..b36c0289a308 100644
--- a/arch/arm/boot/compressed/libfdt_env.h
+++ b/arch/arm/boot/compressed/libfdt_env.h
@@ -6,6 +6,8 @@
 #include <linux/string.h>
 #include <asm/byteorder.h>
 
+#define INT_MAX			((int)(~0U>>1))
+
 typedef __be16 fdt16_t;
 typedef __be32 fdt32_t;
 typedef __be64 fdt64_t;
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 925d1364727a..ef0c7feea6e2 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -99,6 +99,7 @@ config CRYPTO_GHASH_ARM_CE
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_CRYPTD
+	select CRYPTO_GF128MUL
 	help
 	  Use an implementation of GHASH (used by the GCM AEAD chaining mode)
 	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
@@ -121,10 +122,4 @@ config CRYPTO_CHACHA20_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_CHACHA20
 
-config CRYPTO_SPECK_NEON
-	tristate "NEON accelerated Speck cipher algorithms"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_SPECK
-
 endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 8de542c48ade..bd5bceef0605 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -10,7 +10,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
-obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
 
 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -54,7 +53,6 @@ ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
 crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
-speck-neon-y := speck-neon-core.o speck-neon-glue.o
 
 ifdef REGENERATE_ARM_CRYPTO
 quiet_cmd_perl = PERL    $@
diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S
index 451a849ad518..50e7b9896818 100644
--- a/arch/arm/crypto/chacha20-neon-core.S
+++ b/arch/arm/crypto/chacha20-neon-core.S
@@ -18,6 +18,34 @@
  * (at your option) any later version.
  */
 
+ /*
+  * NEON doesn't have a rotate instruction.  The alternatives are, more or less:
+  *
+  * (a)  vshl.u32 + vsri.u32		(needs temporary register)
+  * (b)  vshl.u32 + vshr.u32 + vorr	(needs temporary register)
+  * (c)  vrev32.16			(16-bit rotations only)
+  * (d)  vtbl.8 + vtbl.8		(multiple of 8 bits rotations only,
+  *					 needs index vector)
+  *
+  * ChaCha20 has 16, 12, 8, and 7-bit rotations.  For the 12 and 7-bit
+  * rotations, the only choices are (a) and (b).  We use (a) since it takes
+  * two-thirds the cycles of (b) on both Cortex-A7 and Cortex-A53.
+  *
+  * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest
+  * and doesn't need a temporary register.
+  *
+  * For the 8-bit rotation, we use vtbl.8 + vtbl.8.  On Cortex-A7, this sequence
+  * is twice as fast as (a), even when doing (a) on multiple registers
+  * simultaneously to eliminate the stall between vshl and vsri.  Also, it
+  * parallelizes better when temporary registers are scarce.
+  *
+  * A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as
+  * (a), so the need to load the rotation table actually makes the vtbl method
+  * slightly slower overall on that CPU (~1.3% slower ChaCha20).  Still, it
+  * seems to be a good compromise to get a more significant speed boost on some
+  * CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7.
+  */
+
 #include <linux/linkage.h>
 
 	.text
@@ -46,7 +74,9 @@ ENTRY(chacha20_block_xor_neon)
 	vmov		q10, q2
 	vmov		q11, q3
 
+	adr		ip, .Lrol8_table
 	mov		r3, #10
+	vld1.8		{d10}, [ip, :64]
 
 .Ldoubleround:
 	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
@@ -62,9 +92,9 @@ ENTRY(chacha20_block_xor_neon)
 
 	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
 	vadd.i32	q0, q0, q1
-	veor		q4, q3, q0
-	vshl.u32	q3, q4, #8
-	vsri.u32	q3, q4, #24
+	veor		q3, q3, q0
+	vtbl.8		d6, {d6}, d10
+	vtbl.8		d7, {d7}, d10
 
 	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
 	vadd.i32	q2, q2, q3
@@ -92,9 +122,9 @@ ENTRY(chacha20_block_xor_neon)
 
 	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
 	vadd.i32	q0, q0, q1
-	veor		q4, q3, q0
-	vshl.u32	q3, q4, #8
-	vsri.u32	q3, q4, #24
+	veor		q3, q3, q0
+	vtbl.8		d6, {d6}, d10
+	vtbl.8		d7, {d7}, d10
 
 	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
 	vadd.i32	q2, q2, q3
@@ -139,13 +169,17 @@ ENTRY(chacha20_block_xor_neon)
 	bx		lr
 ENDPROC(chacha20_block_xor_neon)
 
+	.align		4
+.Lctrinc:	.word	0, 1, 2, 3
+.Lrol8_table:	.byte	3, 0, 1, 2, 7, 4, 5, 6
+
 	.align		5
 ENTRY(chacha20_4block_xor_neon)
-	push		{r4-r6, lr}
-	mov		ip, sp			// preserve the stack pointer
-	sub		r3, sp, #0x20		// allocate a 32 byte buffer
-	bic		r3, r3, #0x1f		// aligned to 32 bytes
-	mov		sp, r3
+	push		{r4-r5}
+	mov		r4, sp			// preserve the stack pointer
+	sub		ip, sp, #0x20		// allocate a 32 byte buffer
+	bic		ip, ip, #0x1f		// aligned to 32 bytes
+	mov		sp, ip
 
 	// r0: Input state matrix, s
 	// r1: 4 data blocks output, o
@@ -155,25 +189,24 @@ ENTRY(chacha20_4block_xor_neon)
 	// This function encrypts four consecutive ChaCha20 blocks by loading
 	// the state matrix in NEON registers four times. The algorithm performs
 	// each operation on the corresponding word of each state matrix, hence
-	// requires no word shuffling. For final XORing step we transpose the
-	// matrix by interleaving 32- and then 64-bit words, which allows us to
-	// do XOR in NEON registers.
+	// requires no word shuffling. The words are re-interleaved before the
+	// final addition of the original state and the XORing step.
 	//
 
-	// x0..15[0-3] = s0..3[0..3]
-	add		r3, r0, #0x20
+	// x0..15[0-3] = s0..15[0-3]
+	add		ip, r0, #0x20
 	vld1.32		{q0-q1}, [r0]
-	vld1.32		{q2-q3}, [r3]
+	vld1.32		{q2-q3}, [ip]
 
-	adr		r3, CTRINC
+	adr		r5, .Lctrinc
 	vdup.32		q15, d7[1]
 	vdup.32		q14, d7[0]
-	vld1.32		{q11}, [r3, :128]
+	vld1.32		{q4}, [r5, :128]
 	vdup.32		q13, d6[1]
 	vdup.32		q12, d6[0]
-	vadd.i32	q12, q12, q11		// x12 += counter values 0-3
 	vdup.32		q11, d5[1]
 	vdup.32		q10, d5[0]
+	vadd.u32	q12, q12, q4		// x12 += counter values 0-3
 	vdup.32		q9, d4[1]
 	vdup.32		q8, d4[0]
 	vdup.32		q7, d3[1]
@@ -185,9 +218,13 @@ ENTRY(chacha20_4block_xor_neon)
 	vdup.32		q1, d0[1]
 	vdup.32		q0, d0[0]
 
+	adr		ip, .Lrol8_table
 	mov		r3, #10
+	b		1f
 
 .Ldoubleround4:
+	vld1.32		{q8-q9}, [sp, :256]
+1:
 	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
 	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
 	// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
@@ -236,24 +273,25 @@ ENTRY(chacha20_4block_xor_neon)
 	// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
 	// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
 	// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+	vld1.8		{d16}, [ip, :64]
 	vadd.i32	q0, q0, q4
 	vadd.i32	q1, q1, q5
 	vadd.i32	q2, q2, q6
 	vadd.i32	q3, q3, q7
 
-	veor		q8, q12, q0
-	veor		q9, q13, q1
-	vshl.u32	q12, q8, #8
-	vshl.u32	q13, q9, #8
-	vsri.u32	q12, q8, #24
-	vsri.u32	q13, q9, #24
+	veor		q12, q12, q0
+	veor		q13, q13, q1
+	veor		q14, q14, q2
+	veor		q15, q15, q3
 
-	veor		q8, q14, q2
-	veor		q9, q15, q3
-	vshl.u32	q14, q8, #8
-	vshl.u32	q15, q9, #8
-	vsri.u32	q14, q8, #24
-	vsri.u32	q15, q9, #24
+	vtbl.8		d24, {d24}, d16
+	vtbl.8		d25, {d25}, d16
+	vtbl.8		d26, {d26}, d16
+	vtbl.8		d27, {d27}, d16
+	vtbl.8		d28, {d28}, d16
+	vtbl.8		d29, {d29}, d16
+	vtbl.8		d30, {d30}, d16
+	vtbl.8		d31, {d31}, d16
 
 	vld1.32		{q8-q9}, [sp, :256]
 
@@ -332,24 +370,25 @@ ENTRY(chacha20_4block_xor_neon)
 	// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
 	// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
 	// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+	vld1.8		{d16}, [ip, :64]
 	vadd.i32	q0, q0, q5
 	vadd.i32	q1, q1, q6
 	vadd.i32	q2, q2, q7
 	vadd.i32	q3, q3, q4
 
-	veor		q8, q15, q0
-	veor		q9, q12, q1
-	vshl.u32	q15, q8, #8
-	vshl.u32	q12, q9, #8
-	vsri.u32	q15, q8, #24
-	vsri.u32	q12, q9, #24
+	veor		q15, q15, q0
+	veor		q12, q12, q1
+	veor		q13, q13, q2
+	veor		q14, q14, q3
 
-	veor		q8, q13, q2
-	veor		q9, q14, q3
-	vshl.u32	q13, q8, #8
-	vshl.u32	q14, q9, #8
-	vsri.u32	q13, q8, #24
-	vsri.u32	q14, q9, #24
+	vtbl.8		d30, {d30}, d16
+	vtbl.8		d31, {d31}, d16
+	vtbl.8		d24, {d24}, d16
+	vtbl.8		d25, {d25}, d16
+	vtbl.8		d26, {d26}, d16
+	vtbl.8		d27, {d27}, d16
+	vtbl.8		d28, {d28}, d16
+	vtbl.8		d29, {d29}, d16
 
 	vld1.32		{q8-q9}, [sp, :256]
 
@@ -379,104 +418,76 @@ ENTRY(chacha20_4block_xor_neon)
 	vsri.u32	q6, q9, #25
 
 	subs		r3, r3, #1
-	beq		0f
-
-	vld1.32		{q8-q9}, [sp, :256]
-	b		.Ldoubleround4
-
-	// x0[0-3] += s0[0]
-	// x1[0-3] += s0[1]
-	// x2[0-3] += s0[2]
-	// x3[0-3] += s0[3]
-0:	ldmia		r0!, {r3-r6}
-	vdup.32		q8, r3
-	vdup.32		q9, r4
-	vadd.i32	q0, q0, q8
-	vadd.i32	q1, q1, q9
-	vdup.32		q8, r5
-	vdup.32		q9, r6
-	vadd.i32	q2, q2, q8
-	vadd.i32	q3, q3, q9
-
-	// x4[0-3] += s1[0]
-	// x5[0-3] += s1[1]
-	// x6[0-3] += s1[2]
-	// x7[0-3] += s1[3]
-	ldmia		r0!, {r3-r6}
-	vdup.32		q8, r3
-	vdup.32		q9, r4
-	vadd.i32	q4, q4, q8
-	vadd.i32	q5, q5, q9
-	vdup.32		q8, r5
-	vdup.32		q9, r6
-	vadd.i32	q6, q6, q8
-	vadd.i32	q7, q7, q9
-
-	// interleave 32-bit words in state n, n+1
-	vzip.32		q0, q1
-	vzip.32		q2, q3
-	vzip.32		q4, q5
-	vzip.32		q6, q7
-
-	// interleave 64-bit words in state n, n+2
+	bne		.Ldoubleround4
+
+	// x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15.
+	// x8..9[0-3] are on the stack.
+
+	// Re-interleave the words in the first two rows of each block (x0..7).
+	// Also add the counter values 0-3 to x12[0-3].
+	  vld1.32	{q8}, [r5, :128]	// load counter values 0-3
+	vzip.32		q0, q1			// => (0 1 0 1) (0 1 0 1)
+	vzip.32		q2, q3			// => (2 3 2 3) (2 3 2 3)
+	vzip.32		q4, q5			// => (4 5 4 5) (4 5 4 5)
+	vzip.32		q6, q7			// => (6 7 6 7) (6 7 6 7)
+	  vadd.u32	q12, q8			// x12 += counter values 0-3
 	vswp		d1, d4
 	vswp		d3, d6
+	  vld1.32	{q8-q9}, [r0]!		// load s0..7
 	vswp		d9, d12
 	vswp		d11, d14
 
-	// xor with corresponding input, write to output
+	// Swap q1 and q4 so that we'll free up consecutive registers (q0-q1)
+	// after XORing the first 32 bytes.
+	vswp		q1, q4
+
+	// First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7)
+
+	// x0..3[0-3] += s0..3[0-3]	(add orig state to 1st row of each block)
+	vadd.u32	q0, q0, q8
+	vadd.u32	q2, q2, q8
+	vadd.u32	q4, q4, q8
+	vadd.u32	q3, q3, q8
+
+	// x4..7[0-3] += s4..7[0-3]	(add orig state to 2nd row of each block)
+	vadd.u32	q1, q1, q9
+	vadd.u32	q6, q6, q9
+	vadd.u32	q5, q5, q9
+	vadd.u32	q7, q7, q9
+
+	// XOR first 32 bytes using keystream from first two rows of first block
 	vld1.8		{q8-q9}, [r2]!
 	veor		q8, q8, q0
-	veor		q9, q9, q4
+	veor		q9, q9, q1
 	vst1.8		{q8-q9}, [r1]!
 
+	// Re-interleave the words in the last two rows of each block (x8..15).
 	vld1.32		{q8-q9}, [sp, :256]
-
-	// x8[0-3] += s2[0]
-	// x9[0-3] += s2[1]
-	// x10[0-3] += s2[2]
-	// x11[0-3] += s2[3]
-	ldmia		r0!, {r3-r6}
-	vdup.32		q0, r3
-	vdup.32		q4, r4
-	vadd.i32	q8, q8, q0
-	vadd.i32	q9, q9, q4
-	vdup.32		q0, r5
-	vdup.32		q4, r6
-	vadd.i32	q10, q10, q0
-	vadd.i32	q11, q11, q4
-
-	// x12[0-3] += s3[0]
-	// x13[0-3] += s3[1]
-	// x14[0-3] += s3[2]
-	// x15[0-3] += s3[3]
-	ldmia		r0!, {r3-r6}
-	vdup.32		q0, r3
-	vdup.32		q4, r4
-	adr		r3, CTRINC
-	vadd.i32	q12, q12, q0
-	vld1.32		{q0}, [r3, :128]
-	vadd.i32	q13, q13, q4
-	vadd.i32	q12, q12, q0		// x12 += counter values 0-3
-
-	vdup.32		q0, r5
-	vdup.32		q4, r6
-	vadd.i32	q14, q14, q0
-	vadd.i32	q15, q15, q4
-
-	// interleave 32-bit words in state n, n+1
-	vzip.32		q8, q9
-	vzip.32		q10, q11
-	vzip.32		q12, q13
-	vzip.32		q14, q15
-
-	// interleave 64-bit words in state n, n+2
-	vswp		d17, d20
-	vswp		d19, d22
+	vzip.32		q12, q13	// => (12 13 12 13) (12 13 12 13)
+	vzip.32		q14, q15	// => (14 15 14 15) (14 15 14 15)
+	vzip.32		q8, q9		// => (8 9 8 9) (8 9 8 9)
+	vzip.32		q10, q11	// => (10 11 10 11) (10 11 10 11)
+	  vld1.32	{q0-q1}, [r0]	// load s8..15
 	vswp		d25, d28
 	vswp		d27, d30
+	vswp		d17, d20
+	vswp		d19, d22
+
+	// Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15)
+
+	// x8..11[0-3] += s8..11[0-3]	(add orig state to 3rd row of each block)
+	vadd.u32	q8,  q8,  q0
+	vadd.u32	q10, q10, q0
+	vadd.u32	q9,  q9,  q0
+	vadd.u32	q11, q11, q0
+
+	// x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block)
+	vadd.u32	q12, q12, q1
+	vadd.u32	q14, q14, q1
+	vadd.u32	q13, q13, q1
+	vadd.u32	q15, q15, q1
 
-	vmov		q4, q1
+	// XOR the rest of the data with the keystream
 
 	vld1.8		{q0-q1}, [r2]!
 	veor		q0, q0, q8
@@ -509,13 +520,11 @@ ENTRY(chacha20_4block_xor_neon)
 	vst1.8		{q0-q1}, [r1]!
 
 	vld1.8		{q0-q1}, [r2]
+	  mov		sp, r4		// restore original stack pointer
 	veor		q0, q0, q11
 	veor		q1, q1, q15
 	vst1.8		{q0-q1}, [r1]
 
-	mov		sp, ip
-	pop		{r4-r6, pc}
+	pop		{r4-r5}
+	bx		lr
 ENDPROC(chacha20_4block_xor_neon)
-
-	.align		4
-CTRINC:	.word		0, 1, 2, 3
diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c
index 96e62ec105d0..cd9e93b46c2d 100644
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ b/arch/arm/crypto/crc32-ce-glue.c
@@ -236,7 +236,7 @@ static void __exit crc32_pmull_mod_exit(void)
 				  ARRAY_SIZE(crc32_pmull_algs));
 }
 
-static const struct cpu_feature crc32_cpu_feature[] = {
+static const struct cpu_feature __maybe_unused crc32_cpu_feature[] = {
 	{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
 };
 MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index 2f78c10b1881..406009afa9cf 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -63,6 +63,33 @@
 	k48		.req	d31
 	SHASH2_p64	.req	d31
 
+	HH		.req	q10
+	HH3		.req	q11
+	HH4		.req	q12
+	HH34		.req	q13
+
+	HH_L		.req	d20
+	HH_H		.req	d21
+	HH3_L		.req	d22
+	HH3_H		.req	d23
+	HH4_L		.req	d24
+	HH4_H		.req	d25
+	HH34_L		.req	d26
+	HH34_H		.req	d27
+	SHASH2_H	.req	d29
+
+	XL2		.req	q5
+	XM2		.req	q6
+	XH2		.req	q7
+	T3		.req	q8
+
+	XL2_L		.req	d10
+	XL2_H		.req	d11
+	XM2_L		.req	d12
+	XM2_H		.req	d13
+	T3_L		.req	d16
+	T3_H		.req	d17
+
 	.text
 	.fpu		crypto-neon-fp-armv8
 
@@ -175,12 +202,77 @@
 	beq		0f
 	vld1.64		{T1}, [ip]
 	teq		r0, #0
-	b		1f
+	b		3f
+
+0:	.ifc		\pn, p64
+	tst		r0, #3			// skip until #blocks is a
+	bne		2f			// round multiple of 4
+
+	vld1.8		{XL2-XM2}, [r2]!
+1:	vld1.8		{T3-T2}, [r2]!
+	vrev64.8	XL2, XL2
+	vrev64.8	XM2, XM2
+
+	subs		r0, r0, #4
+
+	vext.8		T1, XL2, XL2, #8
+	veor		XL2_H, XL2_H, XL_L
+	veor		XL, XL, T1
+
+	vrev64.8	T3, T3
+	vrev64.8	T1, T2
+
+	vmull.p64	XH, HH4_H, XL_H			// a1 * b1
+	veor		XL2_H, XL2_H, XL_H
+	vmull.p64	XL, HH4_L, XL_L			// a0 * b0
+	vmull.p64	XM, HH34_H, XL2_H		// (a1 + a0)(b1 + b0)
+
+	vmull.p64	XH2, HH3_H, XM2_L		// a1 * b1
+	veor		XM2_L, XM2_L, XM2_H
+	vmull.p64	XL2, HH3_L, XM2_H		// a0 * b0
+	vmull.p64	XM2, HH34_L, XM2_L		// (a1 + a0)(b1 + b0)
+
+	veor		XH, XH, XH2
+	veor		XL, XL, XL2
+	veor		XM, XM, XM2
+
+	vmull.p64	XH2, HH_H, T3_L			// a1 * b1
+	veor		T3_L, T3_L, T3_H
+	vmull.p64	XL2, HH_L, T3_H			// a0 * b0
+	vmull.p64	XM2, SHASH2_H, T3_L		// (a1 + a0)(b1 + b0)
+
+	veor		XH, XH, XH2
+	veor		XL, XL, XL2
+	veor		XM, XM, XM2
+
+	vmull.p64	XH2, SHASH_H, T1_L		// a1 * b1
+	veor		T1_L, T1_L, T1_H
+	vmull.p64	XL2, SHASH_L, T1_H		// a0 * b0
+	vmull.p64	XM2, SHASH2_p64, T1_L		// (a1 + a0)(b1 + b0)
+
+	veor		XH, XH, XH2
+	veor		XL, XL, XL2
+	veor		XM, XM, XM2
 
-0:	vld1.64		{T1}, [r2]!
+	beq		4f
+
+	vld1.8		{XL2-XM2}, [r2]!
+
+	veor		T1, XL, XH
+	veor		XM, XM, T1
+
+	__pmull_reduce_p64
+
+	veor		T1, T1, XH
+	veor		XL, XL, T1
+
+	b		1b
+	.endif
+
+2:	vld1.64		{T1}, [r2]!
 	subs		r0, r0, #1
 
-1:	/* multiply XL by SHASH in GF(2^128) */
+3:	/* multiply XL by SHASH in GF(2^128) */
 #ifndef CONFIG_CPU_BIG_ENDIAN
 	vrev64.8	T1, T1
 #endif
@@ -193,7 +285,7 @@
 	__pmull_\pn	XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l	@ a0 * b0
 	__pmull_\pn	XM, T1_L, SHASH2_\pn			@ (a1+a0)(b1+b0)
 
-	veor		T1, XL, XH
+4:	veor		T1, XL, XH
 	veor		XM, XM, T1
 
 	__pmull_reduce_\pn
@@ -212,8 +304,14 @@
 	 *			   struct ghash_key const *k, const char *head)
 	 */
 ENTRY(pmull_ghash_update_p64)
-	vld1.64		{SHASH}, [r3]
+	vld1.64		{SHASH}, [r3]!
+	vld1.64		{HH}, [r3]!
+	vld1.64		{HH3-HH4}, [r3]
+
 	veor		SHASH2_p64, SHASH_L, SHASH_H
+	veor		SHASH2_H, HH_L, HH_H
+	veor		HH34_L, HH3_L, HH3_H
+	veor		HH34_H, HH4_L, HH4_H
 
 	vmov.i8		MASK, #0xe1
 	vshl.u64	MASK, MASK, #57
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index 8930fc4e7c22..b7d30b6cf49c 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -1,7 +1,7 @@
 /*
  * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
  *
- * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -28,8 +28,10 @@ MODULE_ALIAS_CRYPTO("ghash");
 #define GHASH_DIGEST_SIZE	16
 
 struct ghash_key {
-	u64	a;
-	u64	b;
+	u64	h[2];
+	u64	h2[2];
+	u64	h3[2];
+	u64	h4[2];
 };
 
 struct ghash_desc_ctx {
@@ -117,26 +119,40 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 	return 0;
 }
 
+static void ghash_reflect(u64 h[], const be128 *k)
+{
+	u64 carry = be64_to_cpu(k->a) >> 63;
+
+	h[0] = (be64_to_cpu(k->b) << 1) | carry;
+	h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63);
+
+	if (carry)
+		h[1] ^= 0xc200000000000000UL;
+}
+
 static int ghash_setkey(struct crypto_shash *tfm,
 			const u8 *inkey, unsigned int keylen)
 {
 	struct ghash_key *key = crypto_shash_ctx(tfm);
-	u64 a, b;
+	be128 h, k;
 
 	if (keylen != GHASH_BLOCK_SIZE) {
 		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
-	/* perform multiplication by 'x' in GF(2^128) */
-	b = get_unaligned_be64(inkey);
-	a = get_unaligned_be64(inkey + 8);
+	memcpy(&k, inkey, GHASH_BLOCK_SIZE);
+	ghash_reflect(key->h, &k);
+
+	h = k;
+	gf128mul_lle(&h, &k);
+	ghash_reflect(key->h2, &h);
 
-	key->a = (a << 1) | (b >> 63);
-	key->b = (b << 1) | (a >> 63);
+	gf128mul_lle(&h, &k);
+	ghash_reflect(key->h3, &h);
 
-	if (b >> 63)
-		key->b ^= 0xc200000000000000UL;
+	gf128mul_lle(&h, &k);
+	ghash_reflect(key->h4, &h);
 
 	return 0;
 }
diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S
deleted file mode 100644
index 57caa742016e..000000000000
--- a/arch/arm/crypto/speck-neon-core.S
+++ /dev/null
@@ -1,434 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-
-#include <linux/linkage.h>
-
-	.text
-	.fpu		neon
-
-	// arguments
-	ROUND_KEYS	.req	r0	// const {u64,u32} *round_keys
-	NROUNDS		.req	r1	// int nrounds
-	DST		.req	r2	// void *dst
-	SRC		.req	r3	// const void *src
-	NBYTES		.req	r4	// unsigned int nbytes
-	TWEAK		.req	r5	// void *tweak
-
-	// registers which hold the data being encrypted/decrypted
-	X0		.req	q0
-	X0_L		.req	d0
-	X0_H		.req	d1
-	Y0		.req	q1
-	Y0_H		.req	d3
-	X1		.req	q2
-	X1_L		.req	d4
-	X1_H		.req	d5
-	Y1		.req	q3
-	Y1_H		.req	d7
-	X2		.req	q4
-	X2_L		.req	d8
-	X2_H		.req	d9
-	Y2		.req	q5
-	Y2_H		.req	d11
-	X3		.req	q6
-	X3_L		.req	d12
-	X3_H		.req	d13
-	Y3		.req	q7
-	Y3_H		.req	d15
-
-	// the round key, duplicated in all lanes
-	ROUND_KEY	.req	q8
-	ROUND_KEY_L	.req	d16
-	ROUND_KEY_H	.req	d17
-
-	// index vector for vtbl-based 8-bit rotates
-	ROTATE_TABLE	.req	d18
-
-	// multiplication table for updating XTS tweaks
-	GF128MUL_TABLE	.req	d19
-	GF64MUL_TABLE	.req	d19
-
-	// current XTS tweak value(s)
-	TWEAKV		.req	q10
-	TWEAKV_L	.req	d20
-	TWEAKV_H	.req	d21
-
-	TMP0		.req	q12
-	TMP0_L		.req	d24
-	TMP0_H		.req	d25
-	TMP1		.req	q13
-	TMP2		.req	q14
-	TMP3		.req	q15
-
-	.align		4
-.Lror64_8_table:
-	.byte		1, 2, 3, 4, 5, 6, 7, 0
-.Lror32_8_table:
-	.byte		1, 2, 3, 0, 5, 6, 7, 4
-.Lrol64_8_table:
-	.byte		7, 0, 1, 2, 3, 4, 5, 6
-.Lrol32_8_table:
-	.byte		3, 0, 1, 2, 7, 4, 5, 6
-.Lgf128mul_table:
-	.byte		0, 0x87
-	.fill		14
-.Lgf64mul_table:
-	.byte		0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b
-	.fill		12
-
-/*
- * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
- *
- * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
- * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
- * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
- *
- * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because
- * the vtbl approach is faster on some processors and the same speed on others.
- */
-.macro _speck_round_128bytes	n
-
-	// x = ror(x, 8)
-	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
-	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
-	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
-	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
-	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
-	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
-	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
-	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
-
-	// x += y
-	vadd.u\n	X0, Y0
-	vadd.u\n	X1, Y1
-	vadd.u\n	X2, Y2
-	vadd.u\n	X3, Y3
-
-	// x ^= k
-	veor		X0, ROUND_KEY
-	veor		X1, ROUND_KEY
-	veor		X2, ROUND_KEY
-	veor		X3, ROUND_KEY
-
-	// y = rol(y, 3)
-	vshl.u\n	TMP0, Y0, #3
-	vshl.u\n	TMP1, Y1, #3
-	vshl.u\n	TMP2, Y2, #3
-	vshl.u\n	TMP3, Y3, #3
-	vsri.u\n	TMP0, Y0, #(\n - 3)
-	vsri.u\n	TMP1, Y1, #(\n - 3)
-	vsri.u\n	TMP2, Y2, #(\n - 3)
-	vsri.u\n	TMP3, Y3, #(\n - 3)
-
-	// y ^= x
-	veor		Y0, TMP0, X0
-	veor		Y1, TMP1, X1
-	veor		Y2, TMP2, X2
-	veor		Y3, TMP3, X3
-.endm
-
-/*
- * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
- *
- * This is the inverse of _speck_round_128bytes().
- */
-.macro _speck_unround_128bytes	n
-
-	// y ^= x
-	veor		TMP0, Y0, X0
-	veor		TMP1, Y1, X1
-	veor		TMP2, Y2, X2
-	veor		TMP3, Y3, X3
-
-	// y = ror(y, 3)
-	vshr.u\n	Y0, TMP0, #3
-	vshr.u\n	Y1, TMP1, #3
-	vshr.u\n	Y2, TMP2, #3
-	vshr.u\n	Y3, TMP3, #3
-	vsli.u\n	Y0, TMP0, #(\n - 3)
-	vsli.u\n	Y1, TMP1, #(\n - 3)
-	vsli.u\n	Y2, TMP2, #(\n - 3)
-	vsli.u\n	Y3, TMP3, #(\n - 3)
-
-	// x ^= k
-	veor		X0, ROUND_KEY
-	veor		X1, ROUND_KEY
-	veor		X2, ROUND_KEY
-	veor		X3, ROUND_KEY
-
-	// x -= y
-	vsub.u\n	X0, Y0
-	vsub.u\n	X1, Y1
-	vsub.u\n	X2, Y2
-	vsub.u\n	X3, Y3
-
-	// x = rol(x, 8);
-	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
-	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
-	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
-	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
-	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
-	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
-	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
-	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
-.endm
-
-.macro _xts128_precrypt_one	dst_reg, tweak_buf, tmp
-
-	// Load the next source block
-	vld1.8		{\dst_reg}, [SRC]!
-
-	// Save the current tweak in the tweak buffer
-	vst1.8		{TWEAKV}, [\tweak_buf:128]!
-
-	// XOR the next source block with the current tweak
-	veor		\dst_reg, TWEAKV
-
-	/*
-	 * Calculate the next tweak by multiplying the current one by x,
-	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
-	 */
-	vshr.u64	\tmp, TWEAKV, #63
-	vshl.u64	TWEAKV, #1
-	veor		TWEAKV_H, \tmp\()_L
-	vtbl.8		\tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H
-	veor		TWEAKV_L, \tmp\()_H
-.endm
-
-.macro _xts64_precrypt_two	dst_reg, tweak_buf, tmp
-
-	// Load the next two source blocks
-	vld1.8		{\dst_reg}, [SRC]!
-
-	// Save the current two tweaks in the tweak buffer
-	vst1.8		{TWEAKV}, [\tweak_buf:128]!
-
-	// XOR the next two source blocks with the current two tweaks
-	veor		\dst_reg, TWEAKV
-
-	/*
-	 * Calculate the next two tweaks by multiplying the current ones by x^2,
-	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
-	 */
-	vshr.u64	\tmp, TWEAKV, #62
-	vshl.u64	TWEAKV, #2
-	vtbl.8		\tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L
-	vtbl.8		\tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H
-	veor		TWEAKV, \tmp
-.endm
-
-/*
- * _speck_xts_crypt() - Speck-XTS encryption/decryption
- *
- * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
- * using Speck-XTS, specifically the variant with a block size of '2n' and round
- * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
- * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
- * nonzero multiple of 128.
- */
-.macro _speck_xts_crypt	n, decrypting
-	push		{r4-r7}
-	mov		r7, sp
-
-	/*
-	 * The first four parameters were passed in registers r0-r3.  Load the
-	 * additional parameters, which were passed on the stack.
-	 */
-	ldr		NBYTES, [sp, #16]
-	ldr		TWEAK, [sp, #20]
-
-	/*
-	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
-	 * round key rather than the first, since for decryption the round keys
-	 * are used in reverse order.
-	 */
-.if \decrypting
-.if \n == 64
-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3
-	sub		ROUND_KEYS, #8
-.else
-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2
-	sub		ROUND_KEYS, #4
-.endif
-.endif
-
-	// Load the index vector for vtbl-based 8-bit rotates
-.if \decrypting
-	ldr		r12, =.Lrol\n\()_8_table
-.else
-	ldr		r12, =.Lror\n\()_8_table
-.endif
-	vld1.8		{ROTATE_TABLE}, [r12:64]
-
-	// One-time XTS preparation
-
-	/*
-	 * Allocate stack space to store 128 bytes worth of tweaks.  For
-	 * performance, this space is aligned to a 16-byte boundary so that we
-	 * can use the load/store instructions that declare 16-byte alignment.
-	 * For Thumb2 compatibility, don't do the 'bic' directly on 'sp'.
-	 */
-	sub		r12, sp, #128
-	bic		r12, #0xf
-	mov		sp, r12
-
-.if \n == 64
-	// Load first tweak
-	vld1.8		{TWEAKV}, [TWEAK]
-
-	// Load GF(2^128) multiplication table
-	ldr		r12, =.Lgf128mul_table
-	vld1.8		{GF128MUL_TABLE}, [r12:64]
-.else
-	// Load first tweak
-	vld1.8		{TWEAKV_L}, [TWEAK]
-
-	// Load GF(2^64) multiplication table
-	ldr		r12, =.Lgf64mul_table
-	vld1.8		{GF64MUL_TABLE}, [r12:64]
-
-	// Calculate second tweak, packing it together with the first
-	vshr.u64	TMP0_L, TWEAKV_L, #63
-	vtbl.u8		TMP0_L, {GF64MUL_TABLE}, TMP0_L
-	vshl.u64	TWEAKV_H, TWEAKV_L, #1
-	veor		TWEAKV_H, TMP0_L
-.endif
-
-.Lnext_128bytes_\@:
-
-	/*
-	 * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak
-	 * values, and save the tweaks on the stack for later.  Then
-	 * de-interleave the 'x' and 'y' elements of each block, i.e. make it so
-	 * that the X[0-3] registers contain only the second halves of blocks,
-	 * and the Y[0-3] registers contain only the first halves of blocks.
-	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
-	 */
-	mov		r12, sp
-.if \n == 64
-	_xts128_precrypt_one	X0, r12, TMP0
-	_xts128_precrypt_one	Y0, r12, TMP0
-	_xts128_precrypt_one	X1, r12, TMP0
-	_xts128_precrypt_one	Y1, r12, TMP0
-	_xts128_precrypt_one	X2, r12, TMP0
-	_xts128_precrypt_one	Y2, r12, TMP0
-	_xts128_precrypt_one	X3, r12, TMP0
-	_xts128_precrypt_one	Y3, r12, TMP0
-	vswp		X0_L, Y0_H
-	vswp		X1_L, Y1_H
-	vswp		X2_L, Y2_H
-	vswp		X3_L, Y3_H
-.else
-	_xts64_precrypt_two	X0, r12, TMP0
-	_xts64_precrypt_two	Y0, r12, TMP0
-	_xts64_precrypt_two	X1, r12, TMP0
-	_xts64_precrypt_two	Y1, r12, TMP0
-	_xts64_precrypt_two	X2, r12, TMP0
-	_xts64_precrypt_two	Y2, r12, TMP0
-	_xts64_precrypt_two	X3, r12, TMP0
-	_xts64_precrypt_two	Y3, r12, TMP0
-	vuzp.32		Y0, X0
-	vuzp.32		Y1, X1
-	vuzp.32		Y2, X2
-	vuzp.32		Y3, X3
-.endif
-
-	// Do the cipher rounds
-
-	mov		r12, ROUND_KEYS
-	mov		r6, NROUNDS
-
-.Lnext_round_\@:
-.if \decrypting
-.if \n == 64
-	vld1.64		ROUND_KEY_L, [r12]
-	sub		r12, #8
-	vmov		ROUND_KEY_H, ROUND_KEY_L
-.else
-	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]
-	sub		r12, #4
-.endif
-	_speck_unround_128bytes	\n
-.else
-.if \n == 64
-	vld1.64		ROUND_KEY_L, [r12]!
-	vmov		ROUND_KEY_H, ROUND_KEY_L
-.else
-	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]!
-.endif
-	_speck_round_128bytes	\n
-.endif
-	subs		r6, r6, #1
-	bne		.Lnext_round_\@
-
-	// Re-interleave the 'x' and 'y' elements of each block
-.if \n == 64
-	vswp		X0_L, Y0_H
-	vswp		X1_L, Y1_H
-	vswp		X2_L, Y2_H
-	vswp		X3_L, Y3_H
-.else
-	vzip.32		Y0, X0
-	vzip.32		Y1, X1
-	vzip.32		Y2, X2
-	vzip.32		Y3, X3
-.endif
-
-	// XOR the encrypted/decrypted blocks with the tweaks we saved earlier
-	mov		r12, sp
-	vld1.8		{TMP0, TMP1}, [r12:128]!
-	vld1.8		{TMP2, TMP3}, [r12:128]!
-	veor		X0, TMP0
-	veor		Y0, TMP1
-	veor		X1, TMP2
-	veor		Y1, TMP3
-	vld1.8		{TMP0, TMP1}, [r12:128]!
-	vld1.8		{TMP2, TMP3}, [r12:128]!
-	veor		X2, TMP0
-	veor		Y2, TMP1
-	veor		X3, TMP2
-	veor		Y3, TMP3
-
-	// Store the ciphertext in the destination buffer
-	vst1.8		{X0, Y0}, [DST]!
-	vst1.8		{X1, Y1}, [DST]!
-	vst1.8		{X2, Y2}, [DST]!
-	vst1.8		{X3, Y3}, [DST]!
-
-	// Continue if there are more 128-byte chunks remaining, else return
-	subs		NBYTES, #128
-	bne		.Lnext_128bytes_\@
-
-	// Store the next tweak
-.if \n == 64
-	vst1.8		{TWEAKV}, [TWEAK]
-.else
-	vst1.8		{TWEAKV_L}, [TWEAK]
-.endif
-
-	mov		sp, r7
-	pop		{r4-r7}
-	bx		lr
-.endm
-
-ENTRY(speck128_xts_encrypt_neon)
-	_speck_xts_crypt	n=64, decrypting=0
-ENDPROC(speck128_xts_encrypt_neon)
-
-ENTRY(speck128_xts_decrypt_neon)
-	_speck_xts_crypt	n=64, decrypting=1
-ENDPROC(speck128_xts_decrypt_neon)
-
-ENTRY(speck64_xts_encrypt_neon)
-	_speck_xts_crypt	n=32, decrypting=0
-ENDPROC(speck64_xts_encrypt_neon)
-
-ENTRY(speck64_xts_decrypt_neon)
-	_speck_xts_crypt	n=32, decrypting=1
-ENDPROC(speck64_xts_decrypt_neon)
diff --git a/arch/arm/crypto/speck-neon-glue.c b/arch/arm/crypto/speck-neon-glue.c
deleted file mode 100644
index f012c3ea998f..000000000000
--- a/arch/arm/crypto/speck-neon-glue.c
+++ /dev/null
@@ -1,288 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Note: the NIST recommendation for XTS only specifies a 128-bit block size,
- * but a 64-bit version (needed for Speck64) is fairly straightforward; the math
- * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial
- * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004:
- * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes
- * OCB and PMAC"), represented as 0x1B.
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/algapi.h>
-#include <crypto/gf128mul.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/speck.h>
-#include <crypto/xts.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-/* The assembly functions only handle multiples of 128 bytes */
-#define SPECK_NEON_CHUNK_SIZE	128
-
-/* Speck128 */
-
-struct speck128_xts_tfm_ctx {
-	struct speck128_tfm_ctx main_key;
-	struct speck128_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
-					  void *dst, const void *src,
-					  unsigned int nbytes, void *tweak);
-
-asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
-					  void *dst, const void *src,
-					  unsigned int nbytes, void *tweak);
-
-typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
-				     u8 *, const u8 *);
-typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
-					  const void *, unsigned int, void *);
-
-static __always_inline int
-__speck128_xts_crypt(struct skcipher_request *req,
-		     speck128_crypt_one_t crypt_one,
-		     speck128_xts_crypt_many_t crypt_many)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	le128 tweak;
-	int err;
-
-	err = skcipher_walk_virt(&walk, req, true);
-
-	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-		u8 *dst = walk.dst.virt.addr;
-		const u8 *src = walk.src.virt.addr;
-
-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
-			unsigned int count;
-
-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
-			kernel_neon_begin();
-			(*crypt_many)(ctx->main_key.round_keys,
-				      ctx->main_key.nrounds,
-				      dst, src, count, &tweak);
-			kernel_neon_end();
-			dst += count;
-			src += count;
-			nbytes -= count;
-		}
-
-		/* Handle any remainder with generic code */
-		while (nbytes >= sizeof(tweak)) {
-			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
-			(*crypt_one)(&ctx->main_key, dst, dst);
-			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
-			gf128mul_x_ble(&tweak, &tweak);
-
-			dst += sizeof(tweak);
-			src += sizeof(tweak);
-			nbytes -= sizeof(tweak);
-		}
-		err = skcipher_walk_done(&walk, nbytes);
-	}
-
-	return err;
-}
-
-static int speck128_xts_encrypt(struct skcipher_request *req)
-{
-	return __speck128_xts_crypt(req, crypto_speck128_encrypt,
-				    speck128_xts_encrypt_neon);
-}
-
-static int speck128_xts_decrypt(struct skcipher_request *req)
-{
-	return __speck128_xts_crypt(req, crypto_speck128_decrypt,
-				    speck128_xts_decrypt_neon);
-}
-
-static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			       unsigned int keylen)
-{
-	struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err;
-
-	err = xts_verify_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	keylen /= 2;
-
-	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
-	if (err)
-		return err;
-
-	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-/* Speck64 */
-
-struct speck64_xts_tfm_ctx {
-	struct speck64_tfm_ctx main_key;
-	struct speck64_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
-					 void *dst, const void *src,
-					 unsigned int nbytes, void *tweak);
-
-asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
-					 void *dst, const void *src,
-					 unsigned int nbytes, void *tweak);
-
-typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
-				    u8 *, const u8 *);
-typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
-					 const void *, unsigned int, void *);
-
-static __always_inline int
-__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
-		    speck64_xts_crypt_many_t crypt_many)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	__le64 tweak;
-	int err;
-
-	err = skcipher_walk_virt(&walk, req, true);
-
-	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-		u8 *dst = walk.dst.virt.addr;
-		const u8 *src = walk.src.virt.addr;
-
-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
-			unsigned int count;
-
-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
-			kernel_neon_begin();
-			(*crypt_many)(ctx->main_key.round_keys,
-				      ctx->main_key.nrounds,
-				      dst, src, count, &tweak);
-			kernel_neon_end();
-			dst += count;
-			src += count;
-			nbytes -= count;
-		}
-
-		/* Handle any remainder with generic code */
-		while (nbytes >= sizeof(tweak)) {
-			*(__le64 *)dst = *(__le64 *)src ^ tweak;
-			(*crypt_one)(&ctx->main_key, dst, dst);
-			*(__le64 *)dst ^= tweak;
-			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
-					    ((tweak & cpu_to_le64(1ULL << 63)) ?
-					     0x1B : 0));
-			dst += sizeof(tweak);
-			src += sizeof(tweak);
-			nbytes -= sizeof(tweak);
-		}
-		err = skcipher_walk_done(&walk, nbytes);
-	}
-
-	return err;
-}
-
-static int speck64_xts_encrypt(struct skcipher_request *req)
-{
-	return __speck64_xts_crypt(req, crypto_speck64_encrypt,
-				   speck64_xts_encrypt_neon);
-}
-
-static int speck64_xts_decrypt(struct skcipher_request *req)
-{
-	return __speck64_xts_crypt(req, crypto_speck64_decrypt,
-				   speck64_xts_decrypt_neon);
-}
-
-static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err;
-
-	err = xts_verify_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	keylen /= 2;
-
-	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
-	if (err)
-		return err;
-
-	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-static struct skcipher_alg speck_algs[] = {
-	{
-		.base.cra_name		= "xts(speck128)",
-		.base.cra_driver_name	= "xts-speck128-neon",
-		.base.cra_priority	= 300,
-		.base.cra_blocksize	= SPECK128_BLOCK_SIZE,
-		.base.cra_ctxsize	= sizeof(struct speck128_xts_tfm_ctx),
-		.base.cra_alignmask	= 7,
-		.base.cra_module	= THIS_MODULE,
-		.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
-		.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
-		.ivsize			= SPECK128_BLOCK_SIZE,
-		.walksize		= SPECK_NEON_CHUNK_SIZE,
-		.setkey			= speck128_xts_setkey,
-		.encrypt		= speck128_xts_encrypt,
-		.decrypt		= speck128_xts_decrypt,
-	}, {
-		.base.cra_name		= "xts(speck64)",
-		.base.cra_driver_name	= "xts-speck64-neon",
-		.base.cra_priority	= 300,
-		.base.cra_blocksize	= SPECK64_BLOCK_SIZE,
-		.base.cra_ctxsize	= sizeof(struct speck64_xts_tfm_ctx),
-		.base.cra_alignmask	= 7,
-		.base.cra_module	= THIS_MODULE,
-		.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
-		.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
-		.ivsize			= SPECK64_BLOCK_SIZE,
-		.walksize		= SPECK_NEON_CHUNK_SIZE,
-		.setkey			= speck64_xts_setkey,
-		.encrypt		= speck64_xts_encrypt,
-		.decrypt		= speck64_xts_decrypt,
-	}
-};
-
-static int __init speck_neon_module_init(void)
-{
-	if (!(elf_hwcap & HWCAP_NEON))
-		return -ENODEV;
-	return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-static void __exit speck_neon_module_exit(void)
-{
-	crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-module_init(speck_neon_module_init);
-module_exit(speck_neon_module_exit);
-
-MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("xts(speck128)");
-MODULE_ALIAS_CRYPTO("xts-speck128-neon");
-MODULE_ALIAS_CRYPTO("xts(speck64)");
-MODULE_ALIAS_CRYPTO("xts-speck64-neon");
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 2d43dca29c72..b95f8d0d9f17 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -133,8 +133,7 @@
  * space.
  */
 #define KVM_PHYS_SHIFT	(40)
-#define KVM_PHYS_SIZE	(_AC(1, ULL) << KVM_PHYS_SHIFT)
-#define KVM_PHYS_MASK	(KVM_PHYS_SIZE - _AC(1, ULL))
+
 #define PTRS_PER_S2_PGD	(_AC(1, ULL) << (KVM_PHYS_SHIFT - 30))
 
 /* Virtualization Translation Control Register (VTCR) bits */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 3ad482d2f1eb..5ca5d9af0c26 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -273,7 +273,7 @@ static inline void __cpu_init_stage2(void)
 	kvm_call_hyp(__init_stage2_translation);
 }
 
-static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
+static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	return 0;
 }
@@ -354,4 +354,15 @@ static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
 struct kvm *kvm_arch_alloc_vm(void);
 void kvm_arch_free_vm(struct kvm *kvm);
 
+static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
+{
+	/*
+	 * On 32bit ARM, VMs get a static 40bit IPA stage2 setup,
+	 * so any non-zero value used as type is illegal.
+	 */
+	if (type)
+		return -EINVAL;
+	return 0;
+}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 847f01fa429d..1098ffc3d54b 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -35,16 +35,12 @@
 		addr;							\
 	})
 
-/*
- * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
- */
-#define KVM_MMU_CACHE_MIN_PAGES	2
-
 #ifndef __ASSEMBLY__
 
 #include <linux/highmem.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
+#include <asm/kvm_arm.h>
 #include <asm/kvm_hyp.h>
 #include <asm/pgalloc.h>
 #include <asm/stage2_pgtable.h>
@@ -52,6 +48,13 @@
 /* Ensure compatibility with arm64 */
 #define VA_BITS			32
 
+#define kvm_phys_shift(kvm)		KVM_PHYS_SHIFT
+#define kvm_phys_size(kvm)		(1ULL << kvm_phys_shift(kvm))
+#define kvm_phys_mask(kvm)		(kvm_phys_size(kvm) - 1ULL)
+#define kvm_vttbr_baddr_mask(kvm)	VTTBR_BADDR_MASK
+
+#define stage2_pgd_size(kvm)		(PTRS_PER_S2_PGD * sizeof(pgd_t))
+
 int create_hyp_mappings(void *from, void *to, pgprot_t prot);
 int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
 			   void __iomem **kaddr,
@@ -355,6 +358,8 @@ static inline int hyp_map_aux_data(void)
 
 #define kvm_phys_to_vttbr(addr)		(addr)
 
+static inline void kvm_set_ipa_limit(void) {}
+
 static inline bool kvm_cpu_has_cnp(void)
 {
 	return false;
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h
index 460d616bb2d6..f6a7ea805232 100644
--- a/arch/arm/include/asm/stage2_pgtable.h
+++ b/arch/arm/include/asm/stage2_pgtable.h
@@ -19,43 +19,53 @@
 #ifndef __ARM_S2_PGTABLE_H_
 #define __ARM_S2_PGTABLE_H_
 
-#define stage2_pgd_none(pgd)			pgd_none(pgd)
-#define stage2_pgd_clear(pgd)			pgd_clear(pgd)
-#define stage2_pgd_present(pgd)			pgd_present(pgd)
-#define stage2_pgd_populate(pgd, pud)		pgd_populate(NULL, pgd, pud)
-#define stage2_pud_offset(pgd, address)		pud_offset(pgd, address)
-#define stage2_pud_free(pud)			pud_free(NULL, pud)
-
-#define stage2_pud_none(pud)			pud_none(pud)
-#define stage2_pud_clear(pud)			pud_clear(pud)
-#define stage2_pud_present(pud)			pud_present(pud)
-#define stage2_pud_populate(pud, pmd)		pud_populate(NULL, pud, pmd)
-#define stage2_pmd_offset(pud, address)		pmd_offset(pud, address)
-#define stage2_pmd_free(pmd)			pmd_free(NULL, pmd)
-
-#define stage2_pud_huge(pud)			pud_huge(pud)
+/*
+ * kvm_mmu_cache_min_pages() is the number of pages required
+ * to install a stage-2 translation. We pre-allocate the entry
+ * level table at VM creation. Since we have a 3 level page-table,
+ * we need only two pages to add a new mapping.
+ */
+#define kvm_mmu_cache_min_pages(kvm)	2
+
+#define stage2_pgd_none(kvm, pgd)		pgd_none(pgd)
+#define stage2_pgd_clear(kvm, pgd)		pgd_clear(pgd)
+#define stage2_pgd_present(kvm, pgd)		pgd_present(pgd)
+#define stage2_pgd_populate(kvm, pgd, pud)	pgd_populate(NULL, pgd, pud)
+#define stage2_pud_offset(kvm, pgd, address)	pud_offset(pgd, address)
+#define stage2_pud_free(kvm, pud)		pud_free(NULL, pud)
+
+#define stage2_pud_none(kvm, pud)		pud_none(pud)
+#define stage2_pud_clear(kvm, pud)		pud_clear(pud)
+#define stage2_pud_present(kvm, pud)		pud_present(pud)
+#define stage2_pud_populate(kvm, pud, pmd)	pud_populate(NULL, pud, pmd)
+#define stage2_pmd_offset(kvm, pud, address)	pmd_offset(pud, address)
+#define stage2_pmd_free(kvm, pmd)		pmd_free(NULL, pmd)
+
+#define stage2_pud_huge(kvm, pud)		pud_huge(pud)
 
 /* Open coded p*d_addr_end that can deal with 64bit addresses */
-static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
+static inline phys_addr_t
+stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK;
 
 	return (boundary - 1 < end - 1) ? boundary : end;
 }
 
-#define stage2_pud_addr_end(addr, end)		(end)
+#define stage2_pud_addr_end(kvm, addr, end)	(end)
 
-static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
+static inline phys_addr_t
+stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK;
 
 	return (boundary - 1 < end - 1) ? boundary : end;
 }
 
-#define stage2_pgd_index(addr)				pgd_index(addr)
+#define stage2_pgd_index(kvm, addr)		pgd_index(addr)
 
-#define stage2_pte_table_empty(ptep)			kvm_page_empty(ptep)
-#define stage2_pmd_table_empty(pmdp)			kvm_page_empty(pmdp)
-#define stage2_pud_table_empty(pudp)			false
+#define stage2_pte_table_empty(kvm, ptep)	kvm_page_empty(ptep)
+#define stage2_pmd_table_empty(kvm, pmdp)	kvm_page_empty(pmdp)
+#define stage2_pud_table_empty(kvm, pudp)	false
 
 #endif	/* __ARM_S2_PGTABLE_H_ */
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 076090d2dbf5..88ef2ce1f69a 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -16,23 +16,23 @@
 #include <uapi/asm/unistd.h>
 #include <asm/unistd-nr.h>
 
+#define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
+#define __ARCH_WANT_SYS_UTIME
 
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
 #define __ARCH_WANT_SYS_TIME
 #define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_ALARM
-#define __ARCH_WANT_SYS_UTIME
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_SOCKETCALL
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index ecaa68dd1af5..13bcd3b867cb 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -87,14 +87,11 @@ void __init arm_dt_init_cpu_maps(void)
 	if (!cpus)
 		return;
 
-	for_each_child_of_node(cpus, cpu) {
+	for_each_of_cpu_node(cpu) {
 		const __be32 *cell;
 		int prop_bytes;
 		u32 hwid;
 
-		if (of_node_cmp(cpu->type, "cpu"))
-			continue;
-
 		pr_debug(" * %pOF...\n", cpu);
 		/*
 		 * A device tree containing CPU nodes with missing "reg"
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 24ac3cab411d..60e375ce1ab2 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -94,12 +94,6 @@ static void __init parse_dt_topology(void)
 	__cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
 				 GFP_NOWAIT);
 
-	cn = of_find_node_by_path("/cpus");
-	if (!cn) {
-		pr_err("No CPU information found in DT\n");
-		return;
-	}
-
 	for_each_possible_cpu(cpu) {
 		const u32 *rate;
 		int len;
diff --git a/arch/arm/mach-mmp/devices.c b/arch/arm/mach-mmp/devices.c
index 671c7a09ab3d..0fca63c80e1a 100644
--- a/arch/arm/mach-mmp/devices.c
+++ b/arch/arm/mach-mmp/devices.c
@@ -277,21 +277,12 @@ struct platform_device pxa168_device_u2o = {
 
 #if IS_ENABLED(CONFIG_USB_EHCI_MV_U2O)
 struct resource pxa168_u2oehci_resources[] = {
-	/* regbase */
 	[0] = {
-		.start	= PXA168_U2O_REGBASE + U2x_CAPREGS_OFFSET,
+		.start	= PXA168_U2O_REGBASE,
 		.end	= PXA168_U2O_REGBASE + USB_REG_RANGE,
 		.flags	= IORESOURCE_MEM,
-		.name	= "capregs",
 	},
-	/* phybase */
 	[1] = {
-		.start	= PXA168_U2O_PHYBASE,
-		.end	= PXA168_U2O_PHYBASE + USB_PHY_RANGE,
-		.flags	= IORESOURCE_MEM,
-		.name	= "phyregs",
-	},
-	[2] = {
 		.start	= IRQ_PXA168_USB1,
 		.end	= IRQ_PXA168_USB1,
 		.flags	= IORESOURCE_IRQ,
diff --git a/arch/arm/mach-shmobile/pm-rcar-gen2.c b/arch/arm/mach-shmobile/pm-rcar-gen2.c
index 345af3ebcc3a..7efe95bd584f 100644
--- a/arch/arm/mach-shmobile/pm-rcar-gen2.c
+++ b/arch/arm/mach-shmobile/pm-rcar-gen2.c
@@ -50,7 +50,7 @@ void __init rcar_gen2_pm_init(void)
 	void __iomem *p;
 	u32 bar;
 	static int once;
-	struct device_node *np, *cpus;
+	struct device_node *np;
 	bool has_a7 = false;
 	bool has_a15 = false;
 	struct resource res;
@@ -59,11 +59,7 @@ void __init rcar_gen2_pm_init(void)
 	if (once++)
 		return;
 
-	cpus = of_find_node_by_path("/cpus");
-	if (!cpus)
-		return;
-
-	for_each_child_of_node(cpus, np) {
+	for_each_of_cpu_node(np) {
 		if (of_device_is_compatible(np, "arm,cortex-a15"))
 			has_a15 = true;
 		else if (of_device_is_compatible(np, "arm,cortex-a7"))
diff --git a/arch/arm/mach-shmobile/pm-rmobile.c b/arch/arm/mach-shmobile/pm-rmobile.c
index e348bcfe389d..94fdeef11b81 100644
--- a/arch/arm/mach-shmobile/pm-rmobile.c
+++ b/arch/arm/mach-shmobile/pm-rmobile.c
@@ -202,7 +202,7 @@ static void __init get_special_pds(void)
 	const struct of_device_id *id;
 
 	/* PM domains containing CPUs */
-	for_each_node_by_type(np, "cpu")
+	for_each_of_cpu_node(np)
 		add_special_pd(np, PD_CPU);
 
 	/* PM domain containing console */
diff --git a/arch/arm/mach-shmobile/timer.c b/arch/arm/mach-shmobile/timer.c
index 828e8aea037e..e48b0939693f 100644
--- a/arch/arm/mach-shmobile/timer.c
+++ b/arch/arm/mach-shmobile/timer.c
@@ -22,22 +22,16 @@
 
 void __init shmobile_init_delay(void)
 {
-	struct device_node *np, *cpus;
+	struct device_node *np;
 	u32 max_freq = 0;
 
-	cpus = of_find_node_by_path("/cpus");
-	if (!cpus)
-		return;
-
-	for_each_child_of_node(cpus, np) {
+	for_each_of_cpu_node(np) {
 		u32 freq;
 
 		if (!of_property_read_u32(np, "clock-frequency", &freq))
 			max_freq = max(max_freq, freq);
 	}
 
-	of_node_put(cpus);
-
 	if (!max_freq)
 		return;
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c03cd0d765d3..964f682a2b7b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -11,6 +11,8 @@ config ARM64
 	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
+	select ARCH_HAS_DMA_COHERENT_TO_PFN
+	select ARCH_HAS_DMA_MMAP_PGPROT
 	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
@@ -24,6 +26,8 @@ config ARM64
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_STRICT_MODULE_RWX
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYSCALL_WRAPPER
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 393d2b524284..5a89a957641b 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -128,6 +128,7 @@ config ARCH_MVEBU
 	select MVEBU_ICU
 	select MVEBU_ODMI
 	select MVEBU_PIC
+	select MVEBU_SEI
 	select OF_GPIO
 	select PINCTRL
 	select PINCTRL_ARMADA_37XX
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 106039d25e2f..b4e994cd3a42 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -113,9 +113,8 @@ core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
 # Default target when executing plain make
 boot		:= arch/arm64/boot
 KBUILD_IMAGE	:= $(boot)/Image.gz
-KBUILD_DTBS	:= dtbs
 
-all:	Image.gz $(KBUILD_DTBS)
+all:	Image.gz
 
 
 Image: vmlinux
@@ -127,17 +126,6 @@ Image.%: Image
 zinstall install:
 	$(Q)$(MAKE) $(build)=$(boot) $@
 
-%.dtb: scripts
-	$(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-PHONY += dtbs dtbs_install
-
-dtbs: prepare scripts
-	$(Q)$(MAKE) $(build)=$(boot)/dts
-
-dtbs_install:
-	$(Q)$(MAKE) $(dtbinst)=$(boot)/dts
-
 PHONY += vdso_install
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@
@@ -145,7 +133,6 @@ vdso_install:
 # We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
-	$(Q)$(MAKE) $(clean)=$(boot)/dts
 
 # We need to generate vdso-offsets.h before compiling certain files in kernel/.
 # In order to do that, we should use the archprepare target, but we can't since
@@ -160,8 +147,6 @@ vdso_prepare: prepare0
 define archhelp
   echo  '* Image.gz      - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
   echo  '  Image         - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
-  echo  '* dtbs          - Build device tree blobs for enabled boards'
-  echo  '  dtbs_install  - Install dtbs to $(INSTALL_DTBS_PATH)'
   echo  '  install       - Install uncompressed kernel'
   echo  '  zinstall      - Install compressed kernel'
   echo  '                  Install using (your) ~/bin/installkernel or'
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
index 8cb78dd99672..90c0faf8579f 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
@@ -148,6 +148,7 @@
 		#address-cells = <2>;
 		#size-cells = <2>;
 		ranges;
+		dma-ranges = <0x0 0x0 0x0 0x0 0x10000 0x00000000>;
 
 		clockgen: clocking@1300000 {
 			compatible = "fsl,ls2080a-clockgen";
@@ -321,6 +322,8 @@
 			reg = <0x00000008 0x0c000000 0 0x40>,	 /* MC portal base */
 			      <0x00000000 0x08340000 0 0x40000>; /* MC control reg */
 			msi-parent = <&its>;
+			iommu-map = <0 &smmu 0 0>;	/* This is fixed-up by u-boot */
+			dma-coherent;
 			#address-cells = <3>;
 			#size-cells = <1>;
 
@@ -424,6 +427,9 @@
 			compatible = "arm,mmu-500";
 			reg = <0 0x5000000 0 0x800000>;
 			#global-interrupts = <12>;
+			#iommu-cells = <1>;
+			stream-match-mask = <0x7C00>;
+			dma-coherent;
 			interrupts = <0 13 4>, /* global secure fault */
 				     <0 14 4>, /* combined secure interrupt */
 				     <0 15 4>, /* global non-secure fault */
@@ -466,7 +472,6 @@
 				     <0 204 4>, <0 205 4>,
 				     <0 206 4>, <0 207 4>,
 				     <0 208 4>, <0 209 4>;
-			mmu-masters = <&fsl_mc 0x300 0>;
 		};
 
 		dspi: dspi@2100000 {
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index db8d364f8476..3d165b4cdd2a 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -698,6 +698,7 @@ CONFIG_MEMTEST=y
 CONFIG_SECURITY=y
 CONFIG_CRYPTO_ECHAINIV=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
+CONFIG_CRYPTO_DEV_FSL_DPAA2_CAAM=y
 CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
 CONFIG_CRYPTO_SHA2_ARM64_CE=y
@@ -706,7 +707,6 @@ CONFIG_CRYPTO_SHA3_ARM64=m
 CONFIG_CRYPTO_SM3_ARM64_CE=m
 CONFIG_CRYPTO_GHASH_ARM64_CE=y
 CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m
-CONFIG_CRYPTO_CRC32_ARM64_CE=m
 CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
 CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
 CONFIG_CRYPTO_CHACHA20_NEON=m
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index e3fdb0fd6f70..a5606823ed4d 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -66,11 +66,6 @@ config CRYPTO_CRCT10DIF_ARM64_CE
 	depends on KERNEL_MODE_NEON && CRC_T10DIF
 	select CRYPTO_HASH
 
-config CRYPTO_CRC32_ARM64_CE
-	tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
-	depends on CRC32
-	select CRYPTO_HASH
-
 config CRYPTO_AES_ARM64
 	tristate "AES core cipher using scalar instructions"
 	select CRYPTO_AES
@@ -119,10 +114,4 @@ config CRYPTO_AES_ARM64_BS
 	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
 
-config CRYPTO_SPECK_NEON
-	tristate "NEON accelerated Speck cipher algorithms"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_SPECK
-
 endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index bcafd016618e..f476fede09ba 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -32,9 +32,6 @@ ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
 obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
 crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
 
-obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o
-crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o
-
 obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
 aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
 
@@ -56,9 +53,6 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
 
-obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
-speck-neon-y := speck-neon-core.o speck-neon-glue.o
-
 obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
 aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
 
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 623e74ed1c67..143070510809 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -17,6 +17,11 @@
 
 	.arch		armv8-a+crypto
 
+	xtsmask		.req	v16
+
+	.macro		xts_reload_mask, tmp
+	.endm
+
 	/* preload all round keys */
 	.macro		load_round_keys, rounds, rk
 	cmp		\rounds, #12
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index adcb83eb683c..1e676625ef33 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -15,6 +15,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/module.h>
 #include <linux/cpufeature.h>
 #include <crypto/xts.h>
@@ -31,6 +32,8 @@
 #define aes_ecb_decrypt		ce_aes_ecb_decrypt
 #define aes_cbc_encrypt		ce_aes_cbc_encrypt
 #define aes_cbc_decrypt		ce_aes_cbc_decrypt
+#define aes_cbc_cts_encrypt	ce_aes_cbc_cts_encrypt
+#define aes_cbc_cts_decrypt	ce_aes_cbc_cts_decrypt
 #define aes_ctr_encrypt		ce_aes_ctr_encrypt
 #define aes_xts_encrypt		ce_aes_xts_encrypt
 #define aes_xts_decrypt		ce_aes_xts_decrypt
@@ -45,6 +48,8 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
 #define aes_ecb_decrypt		neon_aes_ecb_decrypt
 #define aes_cbc_encrypt		neon_aes_cbc_encrypt
 #define aes_cbc_decrypt		neon_aes_cbc_decrypt
+#define aes_cbc_cts_encrypt	neon_aes_cbc_cts_encrypt
+#define aes_cbc_cts_decrypt	neon_aes_cbc_cts_decrypt
 #define aes_ctr_encrypt		neon_aes_ctr_encrypt
 #define aes_xts_encrypt		neon_aes_xts_encrypt
 #define aes_xts_decrypt		neon_aes_xts_decrypt
@@ -63,30 +68,41 @@ MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
 /* defined in aes-modes.S */
-asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
 				int rounds, int blocks);
-asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[],
 				int rounds, int blocks);
 
-asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
 				int rounds, int blocks, u8 iv[]);
-asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
 				int rounds, int blocks, u8 iv[]);
 
-asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
+				int rounds, int bytes, u8 const iv[]);
+asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
+				int rounds, int bytes, u8 const iv[]);
+
+asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
 				int rounds, int blocks, u8 ctr[]);
 
-asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
-				int rounds, int blocks, u8 const rk2[], u8 iv[],
+asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
+				int rounds, int blocks, u32 const rk2[], u8 iv[],
 				int first);
-asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
-				int rounds, int blocks, u8 const rk2[], u8 iv[],
+asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
+				int rounds, int blocks, u32 const rk2[], u8 iv[],
 				int first);
 
 asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
 			       int blocks, u8 dg[], int enc_before,
 			       int enc_after);
 
+struct cts_cbc_req_ctx {
+	struct scatterlist sg_src[2];
+	struct scatterlist sg_dst[2];
+	struct skcipher_request subreq;
+};
+
 struct crypto_aes_xts_ctx {
 	struct crypto_aes_ctx key1;
 	struct crypto_aes_ctx __aligned(8) key2;
@@ -142,7 +158,7 @@ static int ecb_encrypt(struct skcipher_request *req)
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
 		kernel_neon_begin();
 		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_enc, rounds, blocks);
+				ctx->key_enc, rounds, blocks);
 		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
@@ -162,7 +178,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
 		kernel_neon_begin();
 		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_dec, rounds, blocks);
+				ctx->key_dec, rounds, blocks);
 		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
@@ -182,7 +198,7 @@ static int cbc_encrypt(struct skcipher_request *req)
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
 		kernel_neon_begin();
 		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
+				ctx->key_enc, rounds, blocks, walk.iv);
 		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
@@ -202,13 +218,149 @@ static int cbc_decrypt(struct skcipher_request *req)
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
 		kernel_neon_begin();
 		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_dec, rounds, blocks, walk.iv);
+				ctx->key_dec, rounds, blocks, walk.iv);
 		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	return err;
 }
 
+static int cts_cbc_init_tfm(struct crypto_skcipher *tfm)
+{
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx));
+	return 0;
+}
+
+static int cts_cbc_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
+	int err, rounds = 6 + ctx->key_length / 4;
+	int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+	struct scatterlist *src = req->src, *dst = req->dst;
+	struct skcipher_walk walk;
+
+	skcipher_request_set_tfm(&rctx->subreq, tfm);
+
+	if (req->cryptlen <= AES_BLOCK_SIZE) {
+		if (req->cryptlen < AES_BLOCK_SIZE)
+			return -EINVAL;
+		cbc_blocks = 1;
+	}
+
+	if (cbc_blocks > 0) {
+		unsigned int blocks;
+
+		skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
+					   cbc_blocks * AES_BLOCK_SIZE,
+					   req->iv);
+
+		err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+
+		while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+			kernel_neon_begin();
+			aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_enc, rounds, blocks, walk.iv);
+			kernel_neon_end();
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (err)
+			return err;
+
+		if (req->cryptlen == AES_BLOCK_SIZE)
+			return 0;
+
+		dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
+					     rctx->subreq.cryptlen);
+		if (req->dst != req->src)
+			dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
+					       rctx->subreq.cryptlen);
+	}
+
+	/* handle ciphertext stealing */
+	skcipher_request_set_crypt(&rctx->subreq, src, dst,
+				   req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
+				   req->iv);
+
+	err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+	if (err)
+		return err;
+
+	kernel_neon_begin();
+	aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+			    ctx->key_enc, rounds, walk.nbytes, walk.iv);
+	kernel_neon_end();
+
+	return skcipher_walk_done(&walk, 0);
+}
+
+static int cts_cbc_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
+	int err, rounds = 6 + ctx->key_length / 4;
+	int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+	struct scatterlist *src = req->src, *dst = req->dst;
+	struct skcipher_walk walk;
+
+	skcipher_request_set_tfm(&rctx->subreq, tfm);
+
+	if (req->cryptlen <= AES_BLOCK_SIZE) {
+		if (req->cryptlen < AES_BLOCK_SIZE)
+			return -EINVAL;
+		cbc_blocks = 1;
+	}
+
+	if (cbc_blocks > 0) {
+		unsigned int blocks;
+
+		skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
+					   cbc_blocks * AES_BLOCK_SIZE,
+					   req->iv);
+
+		err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+
+		while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+			kernel_neon_begin();
+			aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_dec, rounds, blocks, walk.iv);
+			kernel_neon_end();
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (err)
+			return err;
+
+		if (req->cryptlen == AES_BLOCK_SIZE)
+			return 0;
+
+		dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
+					     rctx->subreq.cryptlen);
+		if (req->dst != req->src)
+			dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
+					       rctx->subreq.cryptlen);
+	}
+
+	/* handle ciphertext stealing */
+	skcipher_request_set_crypt(&rctx->subreq, src, dst,
+				   req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
+				   req->iv);
+
+	err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+	if (err)
+		return err;
+
+	kernel_neon_begin();
+	aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+			    ctx->key_dec, rounds, walk.nbytes, walk.iv);
+	kernel_neon_end();
+
+	return skcipher_walk_done(&walk, 0);
+}
+
 static int ctr_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -222,7 +374,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
 		kernel_neon_begin();
 		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
+				ctx->key_enc, rounds, blocks, walk.iv);
 		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
@@ -238,7 +390,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 		blocks = -1;
 
 		kernel_neon_begin();
-		aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
+		aes_ctr_encrypt(tail, NULL, ctx->key_enc, rounds,
 				blocks, walk.iv);
 		kernel_neon_end();
 		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
@@ -272,8 +424,8 @@ static int xts_encrypt(struct skcipher_request *req)
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		kernel_neon_begin();
 		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key1.key_enc, rounds, blocks,
-				(u8 *)ctx->key2.key_enc, walk.iv, first);
+				ctx->key1.key_enc, rounds, blocks,
+				ctx->key2.key_enc, walk.iv, first);
 		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
@@ -294,8 +446,8 @@ static int xts_decrypt(struct skcipher_request *req)
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		kernel_neon_begin();
 		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key1.key_dec, rounds, blocks,
-				(u8 *)ctx->key2.key_enc, walk.iv, first);
+				ctx->key1.key_dec, rounds, blocks,
+				ctx->key2.key_enc, walk.iv, first);
 		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
@@ -336,6 +488,24 @@ static struct skcipher_alg aes_algs[] = { {
 	.decrypt	= cbc_decrypt,
 }, {
 	.base = {
+		.cra_name		= "__cts(cbc(aes))",
+		.cra_driver_name	= "__cts-cbc-aes-" MODE,
+		.cra_priority		= PRIO,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+		.cra_module		= THIS_MODULE,
+	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.walksize	= 2 * AES_BLOCK_SIZE,
+	.setkey		= skcipher_aes_setkey,
+	.encrypt	= cts_cbc_encrypt,
+	.decrypt	= cts_cbc_decrypt,
+	.init		= cts_cbc_init_tfm,
+}, {
+	.base = {
 		.cra_name		= "__ctr(aes)",
 		.cra_driver_name	= "__ctr-aes-" MODE,
 		.cra_priority		= PRIO,
@@ -412,7 +582,6 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
 {
 	struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
 	be128 *consts = (be128 *)ctx->consts;
-	u8 *rk = (u8 *)ctx->key.key_enc;
 	int rounds = 6 + key_len / 4;
 	int err;
 
@@ -422,7 +591,8 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
 
 	/* encrypt the zero vector */
 	kernel_neon_begin();
-	aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1);
+	aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc,
+			rounds, 1);
 	kernel_neon_end();
 
 	cmac_gf128_mul_by_x(consts, consts);
@@ -441,7 +611,6 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
 	};
 
 	struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
-	u8 *rk = (u8 *)ctx->key.key_enc;
 	int rounds = 6 + key_len / 4;
 	u8 key[AES_BLOCK_SIZE];
 	int err;
@@ -451,8 +620,8 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
 		return err;
 
 	kernel_neon_begin();
-	aes_ecb_encrypt(key, ks[0], rk, rounds, 1);
-	aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2);
+	aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
+	aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
 	kernel_neon_end();
 
 	return cbcmac_setkey(tfm, key, sizeof(key));
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 483a7130cf0e..67700045a0e0 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -14,12 +14,12 @@
 	.align		4
 
 aes_encrypt_block4x:
-	encrypt_block4x	v0, v1, v2, v3, w22, x21, x8, w7
+	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
 	ret
 ENDPROC(aes_encrypt_block4x)
 
 aes_decrypt_block4x:
-	decrypt_block4x	v0, v1, v2, v3, w22, x21, x8, w7
+	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
 	ret
 ENDPROC(aes_decrypt_block4x)
 
@@ -31,71 +31,57 @@ ENDPROC(aes_decrypt_block4x)
 	 */
 
 AES_ENTRY(aes_ecb_encrypt)
-	frame_push	5
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-
-.Lecbencrestart:
-	enc_prepare	w22, x21, x5
+	enc_prepare	w3, x2, x5
 
 .LecbencloopNx:
-	subs		w23, w23, #4
+	subs		w4, w4, #4
 	bmi		.Lecbenc1x
-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
 	bl		aes_encrypt_block4x
-	st1		{v0.16b-v3.16b}, [x19], #64
-	cond_yield_neon	.Lecbencrestart
+	st1		{v0.16b-v3.16b}, [x0], #64
 	b		.LecbencloopNx
 .Lecbenc1x:
-	adds		w23, w23, #4
+	adds		w4, w4, #4
 	beq		.Lecbencout
 .Lecbencloop:
-	ld1		{v0.16b}, [x20], #16		/* get next pt block */
-	encrypt_block	v0, w22, x21, x5, w6
-	st1		{v0.16b}, [x19], #16
-	subs		w23, w23, #1
+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
+	encrypt_block	v0, w3, x2, x5, w6
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
 	bne		.Lecbencloop
 .Lecbencout:
-	frame_pop
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_ecb_encrypt)
 
 
 AES_ENTRY(aes_ecb_decrypt)
-	frame_push	5
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-
-.Lecbdecrestart:
-	dec_prepare	w22, x21, x5
+	dec_prepare	w3, x2, x5
 
 .LecbdecloopNx:
-	subs		w23, w23, #4
+	subs		w4, w4, #4
 	bmi		.Lecbdec1x
-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	bl		aes_decrypt_block4x
-	st1		{v0.16b-v3.16b}, [x19], #64
-	cond_yield_neon	.Lecbdecrestart
+	st1		{v0.16b-v3.16b}, [x0], #64
 	b		.LecbdecloopNx
 .Lecbdec1x:
-	adds		w23, w23, #4
+	adds		w4, w4, #4
 	beq		.Lecbdecout
 .Lecbdecloop:
-	ld1		{v0.16b}, [x20], #16		/* get next ct block */
-	decrypt_block	v0, w22, x21, x5, w6
-	st1		{v0.16b}, [x19], #16
-	subs		w23, w23, #1
+	ld1		{v0.16b}, [x1], #16		/* get next ct block */
+	decrypt_block	v0, w3, x2, x5, w6
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
 	bne		.Lecbdecloop
 .Lecbdecout:
-	frame_pop
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_ecb_decrypt)
 
@@ -108,162 +94,211 @@ AES_ENDPROC(aes_ecb_decrypt)
 	 */
 
 AES_ENTRY(aes_cbc_encrypt)
-	frame_push	6
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x5
-
-.Lcbcencrestart:
-	ld1		{v4.16b}, [x24]			/* get iv */
-	enc_prepare	w22, x21, x6
+	ld1		{v4.16b}, [x5]			/* get iv */
+	enc_prepare	w3, x2, x6
 
 .Lcbcencloop4x:
-	subs		w23, w23, #4
+	subs		w4, w4, #4
 	bmi		.Lcbcenc1x
-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
 	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
-	encrypt_block	v0, w22, x21, x6, w7
+	encrypt_block	v0, w3, x2, x6, w7
 	eor		v1.16b, v1.16b, v0.16b
-	encrypt_block	v1, w22, x21, x6, w7
+	encrypt_block	v1, w3, x2, x6, w7
 	eor		v2.16b, v2.16b, v1.16b
-	encrypt_block	v2, w22, x21, x6, w7
+	encrypt_block	v2, w3, x2, x6, w7
 	eor		v3.16b, v3.16b, v2.16b
-	encrypt_block	v3, w22, x21, x6, w7
-	st1		{v0.16b-v3.16b}, [x19], #64
+	encrypt_block	v3, w3, x2, x6, w7
+	st1		{v0.16b-v3.16b}, [x0], #64
 	mov		v4.16b, v3.16b
-	st1		{v4.16b}, [x24]			/* return iv */
-	cond_yield_neon	.Lcbcencrestart
 	b		.Lcbcencloop4x
 .Lcbcenc1x:
-	adds		w23, w23, #4
+	adds		w4, w4, #4
 	beq		.Lcbcencout
 .Lcbcencloop:
-	ld1		{v0.16b}, [x20], #16		/* get next pt block */
+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
 	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
-	encrypt_block	v4, w22, x21, x6, w7
-	st1		{v4.16b}, [x19], #16
-	subs		w23, w23, #1
+	encrypt_block	v4, w3, x2, x6, w7
+	st1		{v4.16b}, [x0], #16
+	subs		w4, w4, #1
 	bne		.Lcbcencloop
 .Lcbcencout:
-	st1		{v4.16b}, [x24]			/* return iv */
-	frame_pop
+	st1		{v4.16b}, [x5]			/* return iv */
 	ret
 AES_ENDPROC(aes_cbc_encrypt)
 
 
 AES_ENTRY(aes_cbc_decrypt)
-	frame_push	6
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x5
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
-.Lcbcdecrestart:
-	ld1		{v7.16b}, [x24]			/* get iv */
-	dec_prepare	w22, x21, x6
+	ld1		{v7.16b}, [x5]			/* get iv */
+	dec_prepare	w3, x2, x6
 
 .LcbcdecloopNx:
-	subs		w23, w23, #4
+	subs		w4, w4, #4
 	bmi		.Lcbcdec1x
-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	mov		v4.16b, v0.16b
 	mov		v5.16b, v1.16b
 	mov		v6.16b, v2.16b
 	bl		aes_decrypt_block4x
-	sub		x20, x20, #16
+	sub		x1, x1, #16
 	eor		v0.16b, v0.16b, v7.16b
 	eor		v1.16b, v1.16b, v4.16b
-	ld1		{v7.16b}, [x20], #16		/* reload 1 ct block */
+	ld1		{v7.16b}, [x1], #16		/* reload 1 ct block */
 	eor		v2.16b, v2.16b, v5.16b
 	eor		v3.16b, v3.16b, v6.16b
-	st1		{v0.16b-v3.16b}, [x19], #64
-	st1		{v7.16b}, [x24]			/* return iv */
-	cond_yield_neon	.Lcbcdecrestart
+	st1		{v0.16b-v3.16b}, [x0], #64
 	b		.LcbcdecloopNx
 .Lcbcdec1x:
-	adds		w23, w23, #4
+	adds		w4, w4, #4
 	beq		.Lcbcdecout
 .Lcbcdecloop:
-	ld1		{v1.16b}, [x20], #16		/* get next ct block */
+	ld1		{v1.16b}, [x1], #16		/* get next ct block */
 	mov		v0.16b, v1.16b			/* ...and copy to v0 */
-	decrypt_block	v0, w22, x21, x6, w7
+	decrypt_block	v0, w3, x2, x6, w7
 	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
 	mov		v7.16b, v1.16b			/* ct is next iv */
-	st1		{v0.16b}, [x19], #16
-	subs		w23, w23, #1
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
 	bne		.Lcbcdecloop
 .Lcbcdecout:
-	st1		{v7.16b}, [x24]			/* return iv */
-	frame_pop
+	st1		{v7.16b}, [x5]			/* return iv */
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_cbc_decrypt)
 
 
 	/*
+	 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
+	 *		       int rounds, int bytes, u8 const iv[])
+	 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
+	 *		       int rounds, int bytes, u8 const iv[])
+	 */
+
+AES_ENTRY(aes_cbc_cts_encrypt)
+	adr_l		x8, .Lcts_permute_table
+	sub		x4, x4, #16
+	add		x9, x8, #32
+	add		x8, x8, x4
+	sub		x9, x9, x4
+	ld1		{v3.16b}, [x8]
+	ld1		{v4.16b}, [x9]
+
+	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
+	ld1		{v1.16b}, [x1]
+
+	ld1		{v5.16b}, [x5]			/* get iv */
+	enc_prepare	w3, x2, x6
+
+	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
+	tbl		v1.16b, {v1.16b}, v4.16b
+	encrypt_block	v0, w3, x2, x6, w7
+
+	eor		v1.16b, v1.16b, v0.16b
+	tbl		v0.16b, {v0.16b}, v3.16b
+	encrypt_block	v1, w3, x2, x6, w7
+
+	add		x4, x0, x4
+	st1		{v0.16b}, [x4]			/* overlapping stores */
+	st1		{v1.16b}, [x0]
+	ret
+AES_ENDPROC(aes_cbc_cts_encrypt)
+
+AES_ENTRY(aes_cbc_cts_decrypt)
+	adr_l		x8, .Lcts_permute_table
+	sub		x4, x4, #16
+	add		x9, x8, #32
+	add		x8, x8, x4
+	sub		x9, x9, x4
+	ld1		{v3.16b}, [x8]
+	ld1		{v4.16b}, [x9]
+
+	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
+	ld1		{v1.16b}, [x1]
+
+	ld1		{v5.16b}, [x5]			/* get iv */
+	dec_prepare	w3, x2, x6
+
+	tbl		v2.16b, {v1.16b}, v4.16b
+	decrypt_block	v0, w3, x2, x6, w7
+	eor		v2.16b, v2.16b, v0.16b
+
+	tbx		v0.16b, {v1.16b}, v4.16b
+	tbl		v2.16b, {v2.16b}, v3.16b
+	decrypt_block	v0, w3, x2, x6, w7
+	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
+
+	add		x4, x0, x4
+	st1		{v2.16b}, [x4]			/* overlapping stores */
+	st1		{v0.16b}, [x0]
+	ret
+AES_ENDPROC(aes_cbc_cts_decrypt)
+
+	.section	".rodata", "a"
+	.align		6
+.Lcts_permute_table:
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.previous
+
+
+	/*
 	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 *		   int blocks, u8 ctr[])
 	 */
 
 AES_ENTRY(aes_ctr_encrypt)
-	frame_push	6
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x5
-
-.Lctrrestart:
-	enc_prepare	w22, x21, x6
-	ld1		{v4.16b}, [x24]
+	enc_prepare	w3, x2, x6
+	ld1		{v4.16b}, [x5]
 
 	umov		x6, v4.d[1]		/* keep swabbed ctr in reg */
 	rev		x6, x6
+	cmn		w6, w4			/* 32 bit overflow? */
+	bcs		.Lctrloop
 .LctrloopNx:
-	subs		w23, w23, #4
+	subs		w4, w4, #4
 	bmi		.Lctr1x
-	cmn		w6, #4			/* 32 bit overflow? */
-	bcs		.Lctr1x
-	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
-	dup		v7.4s, w6
+	add		w7, w6, #1
 	mov		v0.16b, v4.16b
-	add		v7.4s, v7.4s, v8.4s
+	add		w8, w6, #2
 	mov		v1.16b, v4.16b
-	rev32		v8.16b, v7.16b
+	add		w9, w6, #3
 	mov		v2.16b, v4.16b
+	rev		w7, w7
 	mov		v3.16b, v4.16b
-	mov		v1.s[3], v8.s[0]
-	mov		v2.s[3], v8.s[1]
-	mov		v3.s[3], v8.s[2]
-	ld1		{v5.16b-v7.16b}, [x20], #48	/* get 3 input blocks */
+	rev		w8, w8
+	mov		v1.s[3], w7
+	rev		w9, w9
+	mov		v2.s[3], w8
+	mov		v3.s[3], w9
+	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
 	bl		aes_encrypt_block4x
 	eor		v0.16b, v5.16b, v0.16b
-	ld1		{v5.16b}, [x20], #16		/* get 1 input block  */
+	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
 	eor		v1.16b, v6.16b, v1.16b
 	eor		v2.16b, v7.16b, v2.16b
 	eor		v3.16b, v5.16b, v3.16b
-	st1		{v0.16b-v3.16b}, [x19], #64
+	st1		{v0.16b-v3.16b}, [x0], #64
 	add		x6, x6, #4
 	rev		x7, x6
 	ins		v4.d[1], x7
-	cbz		w23, .Lctrout
-	st1		{v4.16b}, [x24]		/* return next CTR value */
-	cond_yield_neon	.Lctrrestart
+	cbz		w4, .Lctrout
 	b		.LctrloopNx
 .Lctr1x:
-	adds		w23, w23, #4
+	adds		w4, w4, #4
 	beq		.Lctrout
 .Lctrloop:
 	mov		v0.16b, v4.16b
-	encrypt_block	v0, w22, x21, x8, w7
+	encrypt_block	v0, w3, x2, x8, w7
 
 	adds		x6, x6, #1		/* increment BE ctr */
 	rev		x7, x6
@@ -271,22 +306,22 @@ AES_ENTRY(aes_ctr_encrypt)
 	bcs		.Lctrcarry		/* overflow? */
 
 .Lctrcarrydone:
-	subs		w23, w23, #1
+	subs		w4, w4, #1
 	bmi		.Lctrtailblock		/* blocks <0 means tail block */
-	ld1		{v3.16b}, [x20], #16
+	ld1		{v3.16b}, [x1], #16
 	eor		v3.16b, v0.16b, v3.16b
-	st1		{v3.16b}, [x19], #16
+	st1		{v3.16b}, [x0], #16
 	bne		.Lctrloop
 
 .Lctrout:
-	st1		{v4.16b}, [x24]		/* return next CTR value */
-.Lctrret:
-	frame_pop
+	st1		{v4.16b}, [x5]		/* return next CTR value */
+	ldp		x29, x30, [sp], #16
 	ret
 
 .Lctrtailblock:
-	st1		{v0.16b}, [x19]
-	b		.Lctrret
+	st1		{v0.16b}, [x0]
+	ldp		x29, x30, [sp], #16
+	ret
 
 .Lctrcarry:
 	umov		x7, v4.d[0]		/* load upper word of ctr  */
@@ -296,7 +331,6 @@ AES_ENTRY(aes_ctr_encrypt)
 	ins		v4.d[0], x7
 	b		.Lctrcarrydone
 AES_ENDPROC(aes_ctr_encrypt)
-	.ltorg
 
 
 	/*
@@ -306,150 +340,132 @@ AES_ENDPROC(aes_ctr_encrypt)
 	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
 	 */
 
-	.macro		next_tweak, out, in, const, tmp
+	.macro		next_tweak, out, in, tmp
 	sshr		\tmp\().2d,  \in\().2d,   #63
-	and		\tmp\().16b, \tmp\().16b, \const\().16b
+	and		\tmp\().16b, \tmp\().16b, xtsmask.16b
 	add		\out\().2d,  \in\().2d,   \in\().2d
 	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
 	eor		\out\().16b, \out\().16b, \tmp\().16b
 	.endm
 
-.Lxts_mul_x:
-CPU_LE(	.quad		1, 0x87		)
-CPU_BE(	.quad		0x87, 1		)
+	.macro		xts_load_mask, tmp
+	movi		xtsmask.2s, #0x1
+	movi		\tmp\().2s, #0x87
+	uzp1		xtsmask.4s, xtsmask.4s, \tmp\().4s
+	.endm
 
 AES_ENTRY(aes_xts_encrypt)
-	frame_push	6
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x6
-
-	ld1		{v4.16b}, [x24]
+	ld1		{v4.16b}, [x6]
+	xts_load_mask	v8
 	cbz		w7, .Lxtsencnotfirst
 
 	enc_prepare	w3, x5, x8
 	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
 	enc_switch_key	w3, x2, x8
-	ldr		q7, .Lxts_mul_x
 	b		.LxtsencNx
 
-.Lxtsencrestart:
-	ld1		{v4.16b}, [x24]
 .Lxtsencnotfirst:
-	enc_prepare	w22, x21, x8
+	enc_prepare	w3, x2, x8
 .LxtsencloopNx:
-	ldr		q7, .Lxts_mul_x
-	next_tweak	v4, v4, v7, v8
+	next_tweak	v4, v4, v8
 .LxtsencNx:
-	subs		w23, w23, #4
+	subs		w4, w4, #4
 	bmi		.Lxtsenc1x
-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
-	next_tweak	v5, v4, v7, v8
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	next_tweak	v5, v4, v8
 	eor		v0.16b, v0.16b, v4.16b
-	next_tweak	v6, v5, v7, v8
+	next_tweak	v6, v5, v8
 	eor		v1.16b, v1.16b, v5.16b
 	eor		v2.16b, v2.16b, v6.16b
-	next_tweak	v7, v6, v7, v8
+	next_tweak	v7, v6, v8
 	eor		v3.16b, v3.16b, v7.16b
 	bl		aes_encrypt_block4x
 	eor		v3.16b, v3.16b, v7.16b
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v1.16b, v1.16b, v5.16b
 	eor		v2.16b, v2.16b, v6.16b
-	st1		{v0.16b-v3.16b}, [x19], #64
+	st1		{v0.16b-v3.16b}, [x0], #64
 	mov		v4.16b, v7.16b
-	cbz		w23, .Lxtsencout
-	st1		{v4.16b}, [x24]
-	cond_yield_neon	.Lxtsencrestart
+	cbz		w4, .Lxtsencout
+	xts_reload_mask	v8
 	b		.LxtsencloopNx
 .Lxtsenc1x:
-	adds		w23, w23, #4
+	adds		w4, w4, #4
 	beq		.Lxtsencout
 .Lxtsencloop:
-	ld1		{v1.16b}, [x20], #16
+	ld1		{v1.16b}, [x1], #16
 	eor		v0.16b, v1.16b, v4.16b
-	encrypt_block	v0, w22, x21, x8, w7
+	encrypt_block	v0, w3, x2, x8, w7
 	eor		v0.16b, v0.16b, v4.16b
-	st1		{v0.16b}, [x19], #16
-	subs		w23, w23, #1
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
 	beq		.Lxtsencout
-	next_tweak	v4, v4, v7, v8
+	next_tweak	v4, v4, v8
 	b		.Lxtsencloop
 .Lxtsencout:
-	st1		{v4.16b}, [x24]
-	frame_pop
+	st1		{v4.16b}, [x6]
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_xts_encrypt)
 
 
 AES_ENTRY(aes_xts_decrypt)
-	frame_push	6
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x6
-
-	ld1		{v4.16b}, [x24]
+	ld1		{v4.16b}, [x6]
+	xts_load_mask	v8
 	cbz		w7, .Lxtsdecnotfirst
 
 	enc_prepare	w3, x5, x8
 	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
 	dec_prepare	w3, x2, x8
-	ldr		q7, .Lxts_mul_x
 	b		.LxtsdecNx
 
-.Lxtsdecrestart:
-	ld1		{v4.16b}, [x24]
 .Lxtsdecnotfirst:
-	dec_prepare	w22, x21, x8
+	dec_prepare	w3, x2, x8
 .LxtsdecloopNx:
-	ldr		q7, .Lxts_mul_x
-	next_tweak	v4, v4, v7, v8
+	next_tweak	v4, v4, v8
 .LxtsdecNx:
-	subs		w23, w23, #4
+	subs		w4, w4, #4
 	bmi		.Lxtsdec1x
-	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
-	next_tweak	v5, v4, v7, v8
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	next_tweak	v5, v4, v8
 	eor		v0.16b, v0.16b, v4.16b
-	next_tweak	v6, v5, v7, v8
+	next_tweak	v6, v5, v8
 	eor		v1.16b, v1.16b, v5.16b
 	eor		v2.16b, v2.16b, v6.16b
-	next_tweak	v7, v6, v7, v8
+	next_tweak	v7, v6, v8
 	eor		v3.16b, v3.16b, v7.16b
 	bl		aes_decrypt_block4x
 	eor		v3.16b, v3.16b, v7.16b
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v1.16b, v1.16b, v5.16b
 	eor		v2.16b, v2.16b, v6.16b
-	st1		{v0.16b-v3.16b}, [x19], #64
+	st1		{v0.16b-v3.16b}, [x0], #64
 	mov		v4.16b, v7.16b
-	cbz		w23, .Lxtsdecout
-	st1		{v4.16b}, [x24]
-	cond_yield_neon	.Lxtsdecrestart
+	cbz		w4, .Lxtsdecout
+	xts_reload_mask	v8
 	b		.LxtsdecloopNx
 .Lxtsdec1x:
-	adds		w23, w23, #4
+	adds		w4, w4, #4
 	beq		.Lxtsdecout
 .Lxtsdecloop:
-	ld1		{v1.16b}, [x20], #16
+	ld1		{v1.16b}, [x1], #16
 	eor		v0.16b, v1.16b, v4.16b
-	decrypt_block	v0, w22, x21, x8, w7
+	decrypt_block	v0, w3, x2, x8, w7
 	eor		v0.16b, v0.16b, v4.16b
-	st1		{v0.16b}, [x19], #16
-	subs		w23, w23, #1
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
 	beq		.Lxtsdecout
-	next_tweak	v4, v4, v7, v8
+	next_tweak	v4, v4, v8
 	b		.Lxtsdecloop
 .Lxtsdecout:
-	st1		{v4.16b}, [x24]
-	frame_pop
+	st1		{v4.16b}, [x6]
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_xts_decrypt)
 
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index 1c7b45b7268e..29100f692e8a 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -14,6 +14,12 @@
 #define AES_ENTRY(func)		ENTRY(neon_ ## func)
 #define AES_ENDPROC(func)	ENDPROC(neon_ ## func)
 
+	xtsmask		.req	v7
+
+	.macro		xts_reload_mask, tmp
+	xts_load_mask	\tmp
+	.endm
+
 	/* multiply by polynomial 'x' in GF(2^8) */
 	.macro		mul_by_x, out, in, temp, const
 	sshr		\temp, \in, #7
diff --git a/arch/arm64/crypto/crc32-ce-core.S b/arch/arm64/crypto/crc32-ce-core.S
deleted file mode 100644
index 8061bf0f9c66..000000000000
--- a/arch/arm64/crypto/crc32-ce-core.S
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Accelerated CRC32(C) using arm64 CRC, NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please  visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
- * calculation.
- * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
- * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
- * at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2B: Instruction Set Reference, N-Z
- *
- * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
- *	      Alexander Boyko <Alexander_Boyko@xyratex.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.section	".rodata", "a"
-	.align		6
-	.cpu		generic+crypto+crc
-
-.Lcrc32_constants:
-	/*
-	 * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
-	 * #define CONSTANT_R1  0x154442bd4LL
-	 *
-	 * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
-	 * #define CONSTANT_R2  0x1c6e41596LL
-	 */
-	.octa		0x00000001c6e415960000000154442bd4
-
-	/*
-	 * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
-	 * #define CONSTANT_R3  0x1751997d0LL
-	 *
-	 * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
-	 * #define CONSTANT_R4  0x0ccaa009eLL
-	 */
-	.octa		0x00000000ccaa009e00000001751997d0
-
-	/*
-	 * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
-	 * #define CONSTANT_R5  0x163cd6124LL
-	 */
-	.quad		0x0000000163cd6124
-	.quad		0x00000000FFFFFFFF
-
-	/*
-	 * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
-	 *
-	 * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
-	 *                                                      = 0x1F7011641LL
-	 * #define CONSTANT_RU  0x1F7011641LL
-	 */
-	.octa		0x00000001F701164100000001DB710641
-
-.Lcrc32c_constants:
-	.octa		0x000000009e4addf800000000740eef02
-	.octa		0x000000014cd00bd600000000f20c0dfe
-	.quad		0x00000000dd45aab8
-	.quad		0x00000000FFFFFFFF
-	.octa		0x00000000dea713f10000000105ec76f0
-
-	vCONSTANT	.req	v0
-	dCONSTANT	.req	d0
-	qCONSTANT	.req	q0
-
-	BUF		.req	x19
-	LEN		.req	x20
-	CRC		.req	x21
-	CONST		.req	x22
-
-	vzr		.req	v9
-
-	/**
-	 * Calculate crc32
-	 * BUF - buffer
-	 * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
-	 * CRC - initial crc32
-	 * return %eax crc32
-	 * uint crc32_pmull_le(unsigned char const *buffer,
-	 *                     size_t len, uint crc32)
-	 */
-	.text
-ENTRY(crc32_pmull_le)
-	adr_l		x3, .Lcrc32_constants
-	b		0f
-
-ENTRY(crc32c_pmull_le)
-	adr_l		x3, .Lcrc32c_constants
-
-0:	frame_push	4, 64
-
-	mov		BUF, x0
-	mov		LEN, x1
-	mov		CRC, x2
-	mov		CONST, x3
-
-	bic		LEN, LEN, #15
-	ld1		{v1.16b-v4.16b}, [BUF], #0x40
-	movi		vzr.16b, #0
-	fmov		dCONSTANT, CRC
-	eor		v1.16b, v1.16b, vCONSTANT.16b
-	sub		LEN, LEN, #0x40
-	cmp		LEN, #0x40
-	b.lt		less_64
-
-	ldr		qCONSTANT, [CONST]
-
-loop_64:		/* 64 bytes Full cache line folding */
-	sub		LEN, LEN, #0x40
-
-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
-	pmull2		v6.1q, v2.2d, vCONSTANT.2d
-	pmull2		v7.1q, v3.2d, vCONSTANT.2d
-	pmull2		v8.1q, v4.2d, vCONSTANT.2d
-
-	pmull		v1.1q, v1.1d, vCONSTANT.1d
-	pmull		v2.1q, v2.1d, vCONSTANT.1d
-	pmull		v3.1q, v3.1d, vCONSTANT.1d
-	pmull		v4.1q, v4.1d, vCONSTANT.1d
-
-	eor		v1.16b, v1.16b, v5.16b
-	ld1		{v5.16b}, [BUF], #0x10
-	eor		v2.16b, v2.16b, v6.16b
-	ld1		{v6.16b}, [BUF], #0x10
-	eor		v3.16b, v3.16b, v7.16b
-	ld1		{v7.16b}, [BUF], #0x10
-	eor		v4.16b, v4.16b, v8.16b
-	ld1		{v8.16b}, [BUF], #0x10
-
-	eor		v1.16b, v1.16b, v5.16b
-	eor		v2.16b, v2.16b, v6.16b
-	eor		v3.16b, v3.16b, v7.16b
-	eor		v4.16b, v4.16b, v8.16b
-
-	cmp		LEN, #0x40
-	b.lt		less_64
-
-	if_will_cond_yield_neon
-	stp		q1, q2, [sp, #.Lframe_local_offset]
-	stp		q3, q4, [sp, #.Lframe_local_offset + 32]
-	do_cond_yield_neon
-	ldp		q1, q2, [sp, #.Lframe_local_offset]
-	ldp		q3, q4, [sp, #.Lframe_local_offset + 32]
-	ldr		qCONSTANT, [CONST]
-	movi		vzr.16b, #0
-	endif_yield_neon
-	b		loop_64
-
-less_64:		/* Folding cache line into 128bit */
-	ldr		qCONSTANT, [CONST, #16]
-
-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
-	pmull		v1.1q, v1.1d, vCONSTANT.1d
-	eor		v1.16b, v1.16b, v5.16b
-	eor		v1.16b, v1.16b, v2.16b
-
-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
-	pmull		v1.1q, v1.1d, vCONSTANT.1d
-	eor		v1.16b, v1.16b, v5.16b
-	eor		v1.16b, v1.16b, v3.16b
-
-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
-	pmull		v1.1q, v1.1d, vCONSTANT.1d
-	eor		v1.16b, v1.16b, v5.16b
-	eor		v1.16b, v1.16b, v4.16b
-
-	cbz		LEN, fold_64
-
-loop_16:		/* Folding rest buffer into 128bit */
-	subs		LEN, LEN, #0x10
-
-	ld1		{v2.16b}, [BUF], #0x10
-	pmull2		v5.1q, v1.2d, vCONSTANT.2d
-	pmull		v1.1q, v1.1d, vCONSTANT.1d
-	eor		v1.16b, v1.16b, v5.16b
-	eor		v1.16b, v1.16b, v2.16b
-
-	b.ne		loop_16
-
-fold_64:
-	/* perform the last 64 bit fold, also adds 32 zeroes
-	 * to the input stream */
-	ext		v2.16b, v1.16b, v1.16b, #8
-	pmull2		v2.1q, v2.2d, vCONSTANT.2d
-	ext		v1.16b, v1.16b, vzr.16b, #8
-	eor		v1.16b, v1.16b, v2.16b
-
-	/* final 32-bit fold */
-	ldr		dCONSTANT, [CONST, #32]
-	ldr		d3, [CONST, #40]
-
-	ext		v2.16b, v1.16b, vzr.16b, #4
-	and		v1.16b, v1.16b, v3.16b
-	pmull		v1.1q, v1.1d, vCONSTANT.1d
-	eor		v1.16b, v1.16b, v2.16b
-
-	/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
-	ldr		qCONSTANT, [CONST, #48]
-
-	and		v2.16b, v1.16b, v3.16b
-	ext		v2.16b, vzr.16b, v2.16b, #8
-	pmull2		v2.1q, v2.2d, vCONSTANT.2d
-	and		v2.16b, v2.16b, v3.16b
-	pmull		v2.1q, v2.1d, vCONSTANT.1d
-	eor		v1.16b, v1.16b, v2.16b
-	mov		w0, v1.s[1]
-
-	frame_pop
-	ret
-ENDPROC(crc32_pmull_le)
-ENDPROC(crc32c_pmull_le)
-
-	.macro		__crc32, c
-0:	subs		x2, x2, #16
-	b.mi		8f
-	ldp		x3, x4, [x1], #16
-CPU_BE(	rev		x3, x3		)
-CPU_BE(	rev		x4, x4		)
-	crc32\c\()x	w0, w0, x3
-	crc32\c\()x	w0, w0, x4
-	b.ne		0b
-	ret
-
-8:	tbz		x2, #3, 4f
-	ldr		x3, [x1], #8
-CPU_BE(	rev		x3, x3		)
-	crc32\c\()x	w0, w0, x3
-4:	tbz		x2, #2, 2f
-	ldr		w3, [x1], #4
-CPU_BE(	rev		w3, w3		)
-	crc32\c\()w	w0, w0, w3
-2:	tbz		x2, #1, 1f
-	ldrh		w3, [x1], #2
-CPU_BE(	rev16		w3, w3		)
-	crc32\c\()h	w0, w0, w3
-1:	tbz		x2, #0, 0f
-	ldrb		w3, [x1]
-	crc32\c\()b	w0, w0, w3
-0:	ret
-	.endm
-
-	.align		5
-ENTRY(crc32_armv8_le)
-	__crc32
-ENDPROC(crc32_armv8_le)
-
-	.align		5
-ENTRY(crc32c_armv8_le)
-	__crc32		c
-ENDPROC(crc32c_armv8_le)
diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
deleted file mode 100644
index 34b4e3d46aab..000000000000
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <crypto/internal/hash.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <asm/unaligned.h>
-
-#define PMULL_MIN_LEN		64L	/* minimum size of buffer
-					 * for crc32_pmull_le_16 */
-#define SCALE_F			16L	/* size of NEON register */
-
-asmlinkage u32 crc32_pmull_le(const u8 buf[], u64 len, u32 init_crc);
-asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], size_t len);
-
-asmlinkage u32 crc32c_pmull_le(const u8 buf[], u64 len, u32 init_crc);
-asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], size_t len);
-
-static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], size_t len);
-static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], size_t len);
-
-static int crc32_pmull_cra_init(struct crypto_tfm *tfm)
-{
-	u32 *key = crypto_tfm_ctx(tfm);
-
-	*key = 0;
-	return 0;
-}
-
-static int crc32c_pmull_cra_init(struct crypto_tfm *tfm)
-{
-	u32 *key = crypto_tfm_ctx(tfm);
-
-	*key = ~0;
-	return 0;
-}
-
-static int crc32_pmull_setkey(struct crypto_shash *hash, const u8 *key,
-			      unsigned int keylen)
-{
-	u32 *mctx = crypto_shash_ctx(hash);
-
-	if (keylen != sizeof(u32)) {
-		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-	*mctx = le32_to_cpup((__le32 *)key);
-	return 0;
-}
-
-static int crc32_pmull_init(struct shash_desc *desc)
-{
-	u32 *mctx = crypto_shash_ctx(desc->tfm);
-	u32 *crc = shash_desc_ctx(desc);
-
-	*crc = *mctx;
-	return 0;
-}
-
-static int crc32_update(struct shash_desc *desc, const u8 *data,
-			unsigned int length)
-{
-	u32 *crc = shash_desc_ctx(desc);
-
-	*crc = crc32_armv8_le(*crc, data, length);
-	return 0;
-}
-
-static int crc32c_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	u32 *crc = shash_desc_ctx(desc);
-
-	*crc = crc32c_armv8_le(*crc, data, length);
-	return 0;
-}
-
-static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	u32 *crc = shash_desc_ctx(desc);
-	unsigned int l;
-
-	if ((u64)data % SCALE_F) {
-		l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
-
-		*crc = fallback_crc32(*crc, data, l);
-
-		data += l;
-		length -= l;
-	}
-
-	if (length >= PMULL_MIN_LEN && may_use_simd()) {
-		l = round_down(length, SCALE_F);
-
-		kernel_neon_begin();
-		*crc = crc32_pmull_le(data, l, *crc);
-		kernel_neon_end();
-
-		data += l;
-		length -= l;
-	}
-
-	if (length > 0)
-		*crc = fallback_crc32(*crc, data, length);
-
-	return 0;
-}
-
-static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	u32 *crc = shash_desc_ctx(desc);
-	unsigned int l;
-
-	if ((u64)data % SCALE_F) {
-		l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
-
-		*crc = fallback_crc32c(*crc, data, l);
-
-		data += l;
-		length -= l;
-	}
-
-	if (length >= PMULL_MIN_LEN && may_use_simd()) {
-		l = round_down(length, SCALE_F);
-
-		kernel_neon_begin();
-		*crc = crc32c_pmull_le(data, l, *crc);
-		kernel_neon_end();
-
-		data += l;
-		length -= l;
-	}
-
-	if (length > 0) {
-		*crc = fallback_crc32c(*crc, data, length);
-	}
-
-	return 0;
-}
-
-static int crc32_pmull_final(struct shash_desc *desc, u8 *out)
-{
-	u32 *crc = shash_desc_ctx(desc);
-
-	put_unaligned_le32(*crc, out);
-	return 0;
-}
-
-static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
-{
-	u32 *crc = shash_desc_ctx(desc);
-
-	put_unaligned_le32(~*crc, out);
-	return 0;
-}
-
-static struct shash_alg crc32_pmull_algs[] = { {
-	.setkey			= crc32_pmull_setkey,
-	.init			= crc32_pmull_init,
-	.update			= crc32_update,
-	.final			= crc32_pmull_final,
-	.descsize		= sizeof(u32),
-	.digestsize		= sizeof(u32),
-
-	.base.cra_ctxsize	= sizeof(u32),
-	.base.cra_init		= crc32_pmull_cra_init,
-	.base.cra_name		= "crc32",
-	.base.cra_driver_name	= "crc32-arm64-ce",
-	.base.cra_priority	= 200,
-	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
-	.base.cra_blocksize	= 1,
-	.base.cra_module	= THIS_MODULE,
-}, {
-	.setkey			= crc32_pmull_setkey,
-	.init			= crc32_pmull_init,
-	.update			= crc32c_update,
-	.final			= crc32c_pmull_final,
-	.descsize		= sizeof(u32),
-	.digestsize		= sizeof(u32),
-
-	.base.cra_ctxsize	= sizeof(u32),
-	.base.cra_init		= crc32c_pmull_cra_init,
-	.base.cra_name		= "crc32c",
-	.base.cra_driver_name	= "crc32c-arm64-ce",
-	.base.cra_priority	= 200,
-	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
-	.base.cra_blocksize	= 1,
-	.base.cra_module	= THIS_MODULE,
-} };
-
-static int __init crc32_pmull_mod_init(void)
-{
-	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
-		crc32_pmull_algs[0].update = crc32_pmull_update;
-		crc32_pmull_algs[1].update = crc32c_pmull_update;
-
-		if (elf_hwcap & HWCAP_CRC32) {
-			fallback_crc32 = crc32_armv8_le;
-			fallback_crc32c = crc32c_armv8_le;
-		} else {
-			fallback_crc32 = crc32_le;
-			fallback_crc32c = __crc32c_le;
-		}
-	} else if (!(elf_hwcap & HWCAP_CRC32)) {
-		return -ENODEV;
-	}
-	return crypto_register_shashes(crc32_pmull_algs,
-				       ARRAY_SIZE(crc32_pmull_algs));
-}
-
-static void __exit crc32_pmull_mod_exit(void)
-{
-	crypto_unregister_shashes(crc32_pmull_algs,
-				  ARRAY_SIZE(crc32_pmull_algs));
-}
-
-static const struct cpu_feature crc32_cpu_feature[] = {
-	{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
-};
-MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
-
-module_init(crc32_pmull_mod_init);
-module_exit(crc32_pmull_mod_exit);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index 663ea71cdb38..9e82e8e8ed05 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -80,7 +80,186 @@
 
 	vzr		.req	v13
 
-ENTRY(crc_t10dif_pmull)
+	ad		.req	v14
+	bd		.req	v10
+
+	k00_16		.req	v15
+	k32_48		.req	v16
+
+	t3		.req	v17
+	t4		.req	v18
+	t5		.req	v19
+	t6		.req	v20
+	t7		.req	v21
+	t8		.req	v22
+	t9		.req	v23
+
+	perm1		.req	v24
+	perm2		.req	v25
+	perm3		.req	v26
+	perm4		.req	v27
+
+	bd1		.req	v28
+	bd2		.req	v29
+	bd3		.req	v30
+	bd4		.req	v31
+
+	.macro		__pmull_init_p64
+	.endm
+
+	.macro		__pmull_pre_p64, bd
+	.endm
+
+	.macro		__pmull_init_p8
+	// k00_16 := 0x0000000000000000_000000000000ffff
+	// k32_48 := 0x00000000ffffffff_0000ffffffffffff
+	movi		k32_48.2d, #0xffffffff
+	mov		k32_48.h[2], k32_48.h[0]
+	ushr		k00_16.2d, k32_48.2d, #32
+
+	// prepare the permutation vectors
+	mov_q		x5, 0x080f0e0d0c0b0a09
+	movi		perm4.8b, #8
+	dup		perm1.2d, x5
+	eor		perm1.16b, perm1.16b, perm4.16b
+	ushr		perm2.2d, perm1.2d, #8
+	ushr		perm3.2d, perm1.2d, #16
+	ushr		perm4.2d, perm1.2d, #24
+	sli		perm2.2d, perm1.2d, #56
+	sli		perm3.2d, perm1.2d, #48
+	sli		perm4.2d, perm1.2d, #40
+	.endm
+
+	.macro		__pmull_pre_p8, bd
+	tbl		bd1.16b, {\bd\().16b}, perm1.16b
+	tbl		bd2.16b, {\bd\().16b}, perm2.16b
+	tbl		bd3.16b, {\bd\().16b}, perm3.16b
+	tbl		bd4.16b, {\bd\().16b}, perm4.16b
+	.endm
+
+__pmull_p8_core:
+.L__pmull_p8_core:
+	ext		t4.8b, ad.8b, ad.8b, #1			// A1
+	ext		t5.8b, ad.8b, ad.8b, #2			// A2
+	ext		t6.8b, ad.8b, ad.8b, #3			// A3
+
+	pmull		t4.8h, t4.8b, bd.8b			// F = A1*B
+	pmull		t8.8h, ad.8b, bd1.8b			// E = A*B1
+	pmull		t5.8h, t5.8b, bd.8b			// H = A2*B
+	pmull		t7.8h, ad.8b, bd2.8b			// G = A*B2
+	pmull		t6.8h, t6.8b, bd.8b			// J = A3*B
+	pmull		t9.8h, ad.8b, bd3.8b			// I = A*B3
+	pmull		t3.8h, ad.8b, bd4.8b			// K = A*B4
+	b		0f
+
+.L__pmull_p8_core2:
+	tbl		t4.16b, {ad.16b}, perm1.16b		// A1
+	tbl		t5.16b, {ad.16b}, perm2.16b		// A2
+	tbl		t6.16b, {ad.16b}, perm3.16b		// A3
+
+	pmull2		t4.8h, t4.16b, bd.16b			// F = A1*B
+	pmull2		t8.8h, ad.16b, bd1.16b			// E = A*B1
+	pmull2		t5.8h, t5.16b, bd.16b			// H = A2*B
+	pmull2		t7.8h, ad.16b, bd2.16b			// G = A*B2
+	pmull2		t6.8h, t6.16b, bd.16b			// J = A3*B
+	pmull2		t9.8h, ad.16b, bd3.16b			// I = A*B3
+	pmull2		t3.8h, ad.16b, bd4.16b			// K = A*B4
+
+0:	eor		t4.16b, t4.16b, t8.16b			// L = E + F
+	eor		t5.16b, t5.16b, t7.16b			// M = G + H
+	eor		t6.16b, t6.16b, t9.16b			// N = I + J
+
+	uzp1		t8.2d, t4.2d, t5.2d
+	uzp2		t4.2d, t4.2d, t5.2d
+	uzp1		t7.2d, t6.2d, t3.2d
+	uzp2		t6.2d, t6.2d, t3.2d
+
+	// t4 = (L) (P0 + P1) << 8
+	// t5 = (M) (P2 + P3) << 16
+	eor		t8.16b, t8.16b, t4.16b
+	and		t4.16b, t4.16b, k32_48.16b
+
+	// t6 = (N) (P4 + P5) << 24
+	// t7 = (K) (P6 + P7) << 32
+	eor		t7.16b, t7.16b, t6.16b
+	and		t6.16b, t6.16b, k00_16.16b
+
+	eor		t8.16b, t8.16b, t4.16b
+	eor		t7.16b, t7.16b, t6.16b
+
+	zip2		t5.2d, t8.2d, t4.2d
+	zip1		t4.2d, t8.2d, t4.2d
+	zip2		t3.2d, t7.2d, t6.2d
+	zip1		t6.2d, t7.2d, t6.2d
+
+	ext		t4.16b, t4.16b, t4.16b, #15
+	ext		t5.16b, t5.16b, t5.16b, #14
+	ext		t6.16b, t6.16b, t6.16b, #13
+	ext		t3.16b, t3.16b, t3.16b, #12
+
+	eor		t4.16b, t4.16b, t5.16b
+	eor		t6.16b, t6.16b, t3.16b
+	ret
+ENDPROC(__pmull_p8_core)
+
+	.macro		__pmull_p8, rq, ad, bd, i
+	.ifnc		\bd, v10
+	.err
+	.endif
+	mov		ad.16b, \ad\().16b
+	.ifb		\i
+	pmull		\rq\().8h, \ad\().8b, bd.8b		// D = A*B
+	.else
+	pmull2		\rq\().8h, \ad\().16b, bd.16b		// D = A*B
+	.endif
+
+	bl		.L__pmull_p8_core\i
+
+	eor		\rq\().16b, \rq\().16b, t4.16b
+	eor		\rq\().16b, \rq\().16b, t6.16b
+	.endm
+
+	.macro		fold64, p, reg1, reg2
+	ldp		q11, q12, [arg2], #0x20
+
+	__pmull_\p	v8, \reg1, v10, 2
+	__pmull_\p	\reg1, \reg1, v10
+
+CPU_LE(	rev64		v11.16b, v11.16b		)
+CPU_LE(	rev64		v12.16b, v12.16b		)
+
+	__pmull_\p	v9, \reg2, v10, 2
+	__pmull_\p	\reg2, \reg2, v10
+
+CPU_LE(	ext		v11.16b, v11.16b, v11.16b, #8	)
+CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
+
+	eor		\reg1\().16b, \reg1\().16b, v8.16b
+	eor		\reg2\().16b, \reg2\().16b, v9.16b
+	eor		\reg1\().16b, \reg1\().16b, v11.16b
+	eor		\reg2\().16b, \reg2\().16b, v12.16b
+	.endm
+
+	.macro		fold16, p, reg, rk
+	__pmull_\p	v8, \reg, v10
+	__pmull_\p	\reg, \reg, v10, 2
+	.ifnb		\rk
+	ldr_l		q10, \rk, x8
+	__pmull_pre_\p	v10
+	.endif
+	eor		v7.16b, v7.16b, v8.16b
+	eor		v7.16b, v7.16b, \reg\().16b
+	.endm
+
+	.macro		__pmull_p64, rd, rn, rm, n
+	.ifb		\n
+	pmull		\rd\().1q, \rn\().1d, \rm\().1d
+	.else
+	pmull2		\rd\().1q, \rn\().2d, \rm\().2d
+	.endif
+	.endm
+
+	.macro		crc_t10dif_pmull, p
 	frame_push	3, 128
 
 	mov		arg1_low32, w0
@@ -89,6 +268,8 @@ ENTRY(crc_t10dif_pmull)
 
 	movi		vzr.16b, #0		// init zero register
 
+	__pmull_init_\p
+
 	// adjust the 16-bit initial_crc value, scale it to 32 bits
 	lsl		arg1_low32, arg1_low32, #16
 
@@ -96,7 +277,7 @@ ENTRY(crc_t10dif_pmull)
 	cmp		arg3, #256
 
 	// for sizes less than 128, we can't fold 64B at a time...
-	b.lt		_less_than_128
+	b.lt		.L_less_than_128_\@
 
 	// load the initial crc value
 	// crc value does not need to be byte-reflected, but it needs
@@ -137,6 +318,7 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 	ldr_l		q10, rk3, x8	// xmm10 has rk3 and rk4
 					// type of pmull instruction
 					// will determine which constant to use
+	__pmull_pre_\p	v10
 
 	//
 	// we subtract 256 instead of 128 to save one instruction from the loop
@@ -147,41 +329,19 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 	// buffer. The _fold_64_B_loop will fold 64B at a time
 	// until we have 64+y Bytes of buffer
 
-
 	// fold 64B at a time. This section of the code folds 4 vector
 	// registers in parallel
-_fold_64_B_loop:
+.L_fold_64_B_loop_\@:
 
-	.macro		fold64, reg1, reg2
-	ldp		q11, q12, [arg2], #0x20
-
-	pmull2		v8.1q, \reg1\().2d, v10.2d
-	pmull		\reg1\().1q, \reg1\().1d, v10.1d
-
-CPU_LE(	rev64		v11.16b, v11.16b		)
-CPU_LE(	rev64		v12.16b, v12.16b		)
-
-	pmull2		v9.1q, \reg2\().2d, v10.2d
-	pmull		\reg2\().1q, \reg2\().1d, v10.1d
-
-CPU_LE(	ext		v11.16b, v11.16b, v11.16b, #8	)
-CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
-
-	eor		\reg1\().16b, \reg1\().16b, v8.16b
-	eor		\reg2\().16b, \reg2\().16b, v9.16b
-	eor		\reg1\().16b, \reg1\().16b, v11.16b
-	eor		\reg2\().16b, \reg2\().16b, v12.16b
-	.endm
-
-	fold64		v0, v1
-	fold64		v2, v3
-	fold64		v4, v5
-	fold64		v6, v7
+	fold64		\p, v0, v1
+	fold64		\p, v2, v3
+	fold64		\p, v4, v5
+	fold64		\p, v6, v7
 
 	subs		arg3, arg3, #128
 
 	// check if there is another 64B in the buffer to be able to fold
-	b.lt		_fold_64_B_end
+	b.lt		.L_fold_64_B_end_\@
 
 	if_will_cond_yield_neon
 	stp		q0, q1, [sp, #.Lframe_local_offset]
@@ -195,11 +355,13 @@ CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
 	ldp		q6, q7, [sp, #.Lframe_local_offset + 96]
 	ldr_l		q10, rk3, x8
 	movi		vzr.16b, #0		// init zero register
+	__pmull_init_\p
+	__pmull_pre_\p	v10
 	endif_yield_neon
 
-	b		_fold_64_B_loop
+	b		.L_fold_64_B_loop_\@
 
-_fold_64_B_end:
+.L_fold_64_B_end_\@:
 	// at this point, the buffer pointer is pointing at the last y Bytes
 	// of the buffer the 64B of folded data is in 4 of the vector
 	// registers: v0, v1, v2, v3
@@ -208,38 +370,29 @@ _fold_64_B_end:
 	// constants
 
 	ldr_l		q10, rk9, x8
+	__pmull_pre_\p	v10
 
-	.macro		fold16, reg, rk
-	pmull		v8.1q, \reg\().1d, v10.1d
-	pmull2		\reg\().1q, \reg\().2d, v10.2d
-	.ifnb		\rk
-	ldr_l		q10, \rk, x8
-	.endif
-	eor		v7.16b, v7.16b, v8.16b
-	eor		v7.16b, v7.16b, \reg\().16b
-	.endm
-
-	fold16		v0, rk11
-	fold16		v1, rk13
-	fold16		v2, rk15
-	fold16		v3, rk17
-	fold16		v4, rk19
-	fold16		v5, rk1
-	fold16		v6
+	fold16		\p, v0, rk11
+	fold16		\p, v1, rk13
+	fold16		\p, v2, rk15
+	fold16		\p, v3, rk17
+	fold16		\p, v4, rk19
+	fold16		\p, v5, rk1
+	fold16		\p, v6
 
 	// instead of 64, we add 48 to the loop counter to save 1 instruction
 	// from the loop instead of a cmp instruction, we use the negative
 	// flag with the jl instruction
 	adds		arg3, arg3, #(128-16)
-	b.lt		_final_reduction_for_128
+	b.lt		.L_final_reduction_for_128_\@
 
 	// now we have 16+y bytes left to reduce. 16 Bytes is in register v7
 	// and the rest is in memory. We can fold 16 bytes at a time if y>=16
 	// continue folding 16B at a time
 
-_16B_reduction_loop:
-	pmull		v8.1q, v7.1d, v10.1d
-	pmull2		v7.1q, v7.2d, v10.2d
+.L_16B_reduction_loop_\@:
+	__pmull_\p	v8, v7, v10
+	__pmull_\p	v7, v7, v10, 2
 	eor		v7.16b, v7.16b, v8.16b
 
 	ldr		q0, [arg2], #16
@@ -251,22 +404,22 @@ CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
 	// instead of a cmp instruction, we utilize the flags with the
 	// jge instruction equivalent of: cmp arg3, 16-16
 	// check if there is any more 16B in the buffer to be able to fold
-	b.ge		_16B_reduction_loop
+	b.ge		.L_16B_reduction_loop_\@
 
 	// now we have 16+z bytes left to reduce, where 0<= z < 16.
 	// first, we reduce the data in the xmm7 register
 
-_final_reduction_for_128:
+.L_final_reduction_for_128_\@:
 	// check if any more data to fold. If not, compute the CRC of
 	// the final 128 bits
 	adds		arg3, arg3, #16
-	b.eq		_128_done
+	b.eq		.L_128_done_\@
 
 	// here we are getting data that is less than 16 bytes.
 	// since we know that there was data before the pointer, we can
 	// offset the input pointer before the actual point, to receive
 	// exactly 16 bytes. after that the registers need to be adjusted.
-_get_last_two_regs:
+.L_get_last_two_regs_\@:
 	add		arg2, arg2, arg3
 	ldr		q1, [arg2, #-16]
 CPU_LE(	rev64		v1.16b, v1.16b			)
@@ -291,47 +444,48 @@ CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
 	bsl		v0.16b, v2.16b, v1.16b
 
 	// fold 16 Bytes
-	pmull		v8.1q, v7.1d, v10.1d
-	pmull2		v7.1q, v7.2d, v10.2d
+	__pmull_\p	v8, v7, v10
+	__pmull_\p	v7, v7, v10, 2
 	eor		v7.16b, v7.16b, v8.16b
 	eor		v7.16b, v7.16b, v0.16b
 
-_128_done:
+.L_128_done_\@:
 	// compute crc of a 128-bit value
 	ldr_l		q10, rk5, x8		// rk5 and rk6 in xmm10
+	__pmull_pre_\p	v10
 
 	// 64b fold
 	ext		v0.16b, vzr.16b, v7.16b, #8
 	mov		v7.d[0], v7.d[1]
-	pmull		v7.1q, v7.1d, v10.1d
+	__pmull_\p	v7, v7, v10
 	eor		v7.16b, v7.16b, v0.16b
 
 	// 32b fold
 	ext		v0.16b, v7.16b, vzr.16b, #4
 	mov		v7.s[3], vzr.s[0]
-	pmull2		v0.1q, v0.2d, v10.2d
+	__pmull_\p	v0, v0, v10, 2
 	eor		v7.16b, v7.16b, v0.16b
 
 	// barrett reduction
-_barrett:
 	ldr_l		q10, rk7, x8
+	__pmull_pre_\p	v10
 	mov		v0.d[0], v7.d[1]
 
-	pmull		v0.1q, v0.1d, v10.1d
+	__pmull_\p	v0, v0, v10
 	ext		v0.16b, vzr.16b, v0.16b, #12
-	pmull2		v0.1q, v0.2d, v10.2d
+	__pmull_\p	v0, v0, v10, 2
 	ext		v0.16b, vzr.16b, v0.16b, #12
 	eor		v7.16b, v7.16b, v0.16b
 	mov		w0, v7.s[1]
 
-_cleanup:
+.L_cleanup_\@:
 	// scale the result back to 16 bits
 	lsr		x0, x0, #16
 	frame_pop
 	ret
 
-_less_than_128:
-	cbz		arg3, _cleanup
+.L_less_than_128_\@:
+	cbz		arg3, .L_cleanup_\@
 
 	movi		v0.16b, #0
 	mov		v0.s[3], arg1_low32	// get the initial crc value
@@ -342,20 +496,21 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 	eor		v7.16b, v7.16b, v0.16b	// xor the initial crc value
 
 	cmp		arg3, #16
-	b.eq		_128_done		// exactly 16 left
-	b.lt		_less_than_16_left
+	b.eq		.L_128_done_\@		// exactly 16 left
+	b.lt		.L_less_than_16_left_\@
 
 	ldr_l		q10, rk1, x8		// rk1 and rk2 in xmm10
+	__pmull_pre_\p	v10
 
 	// update the counter. subtract 32 instead of 16 to save one
 	// instruction from the loop
 	subs		arg3, arg3, #32
-	b.ge		_16B_reduction_loop
+	b.ge		.L_16B_reduction_loop_\@
 
 	add		arg3, arg3, #16
-	b		_get_last_two_regs
+	b		.L_get_last_two_regs_\@
 
-_less_than_16_left:
+.L_less_than_16_left_\@:
 	// shl r9, 4
 	adr_l		x0, tbl_shf_table + 16
 	sub		x0, x0, arg3
@@ -363,8 +518,17 @@ _less_than_16_left:
 	movi		v9.16b, #0x80
 	eor		v0.16b, v0.16b, v9.16b
 	tbl		v7.16b, {v7.16b}, v0.16b
-	b		_128_done
-ENDPROC(crc_t10dif_pmull)
+	b		.L_128_done_\@
+	.endm
+
+ENTRY(crc_t10dif_pmull_p8)
+	crc_t10dif_pmull	p8
+ENDPROC(crc_t10dif_pmull_p8)
+
+	.align		5
+ENTRY(crc_t10dif_pmull_p64)
+	crc_t10dif_pmull	p64
+ENDPROC(crc_t10dif_pmull_p64)
 
 // precomputed constants
 // these constants are precomputed from the poly:
diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
index 96f0cae4a022..b461d62023f2 100644
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -22,7 +22,10 @@
 
 #define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
 
-asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u64 len);
+asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 buf[], u64 len);
+asmlinkage u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 buf[], u64 len);
+
+static u16 (*crc_t10dif_pmull)(u16 init_crc, const u8 buf[], u64 len);
 
 static int crct10dif_init(struct shash_desc *desc)
 {
@@ -85,6 +88,11 @@ static struct shash_alg crc_t10dif_alg = {
 
 static int __init crc_t10dif_mod_init(void)
 {
+	if (elf_hwcap & HWCAP_PMULL)
+		crc_t10dif_pmull = crc_t10dif_pmull_p64;
+	else
+		crc_t10dif_pmull = crc_t10dif_pmull_p8;
+
 	return crypto_register_shash(&crc_t10dif_alg);
 }
 
@@ -93,8 +101,10 @@ static void __exit crc_t10dif_mod_exit(void)
 	crypto_unregister_shash(&crc_t10dif_alg);
 }
 
-module_cpu_feature_match(PMULL, crc_t10dif_mod_init);
+module_cpu_feature_match(ASIMD, crc_t10dif_mod_init);
 module_exit(crc_t10dif_mod_exit);
 
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("crct10dif");
+MODULE_ALIAS_CRYPTO("crct10dif-arm64-ce");
diff --git a/arch/arm64/crypto/speck-neon-core.S b/arch/arm64/crypto/speck-neon-core.S
deleted file mode 100644
index b14463438b09..000000000000
--- a/arch/arm64/crypto/speck-neon-core.S
+++ /dev/null
@@ -1,352 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-
-#include <linux/linkage.h>
-
-	.text
-
-	// arguments
-	ROUND_KEYS	.req	x0	// const {u64,u32} *round_keys
-	NROUNDS		.req	w1	// int nrounds
-	NROUNDS_X	.req	x1
-	DST		.req	x2	// void *dst
-	SRC		.req	x3	// const void *src
-	NBYTES		.req	w4	// unsigned int nbytes
-	TWEAK		.req	x5	// void *tweak
-
-	// registers which hold the data being encrypted/decrypted
-	// (underscores avoid a naming collision with ARM64 registers x0-x3)
-	X_0		.req	v0
-	Y_0		.req	v1
-	X_1		.req	v2
-	Y_1		.req	v3
-	X_2		.req	v4
-	Y_2		.req	v5
-	X_3		.req	v6
-	Y_3		.req	v7
-
-	// the round key, duplicated in all lanes
-	ROUND_KEY	.req	v8
-
-	// index vector for tbl-based 8-bit rotates
-	ROTATE_TABLE	.req	v9
-	ROTATE_TABLE_Q	.req	q9
-
-	// temporary registers
-	TMP0		.req	v10
-	TMP1		.req	v11
-	TMP2		.req	v12
-	TMP3		.req	v13
-
-	// multiplication table for updating XTS tweaks
-	GFMUL_TABLE	.req	v14
-	GFMUL_TABLE_Q	.req	q14
-
-	// next XTS tweak value(s)
-	TWEAKV_NEXT	.req	v15
-
-	// XTS tweaks for the blocks currently being encrypted/decrypted
-	TWEAKV0		.req	v16
-	TWEAKV1		.req	v17
-	TWEAKV2		.req	v18
-	TWEAKV3		.req	v19
-	TWEAKV4		.req	v20
-	TWEAKV5		.req	v21
-	TWEAKV6		.req	v22
-	TWEAKV7		.req	v23
-
-	.align		4
-.Lror64_8_table:
-	.octa		0x080f0e0d0c0b0a090007060504030201
-.Lror32_8_table:
-	.octa		0x0c0f0e0d080b0a090407060500030201
-.Lrol64_8_table:
-	.octa		0x0e0d0c0b0a09080f0605040302010007
-.Lrol32_8_table:
-	.octa		0x0e0d0c0f0a09080b0605040702010003
-.Lgf128mul_table:
-	.octa		0x00000000000000870000000000000001
-.Lgf64mul_table:
-	.octa		0x0000000000000000000000002d361b00
-
-/*
- * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
- *
- * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
- * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
- * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
- * 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64.
- */
-.macro _speck_round_128bytes	n, lanes
-
-	// x = ror(x, 8)
-	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
-	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
-	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
-	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
-
-	// x += y
-	add		X_0.\lanes, X_0.\lanes, Y_0.\lanes
-	add		X_1.\lanes, X_1.\lanes, Y_1.\lanes
-	add		X_2.\lanes, X_2.\lanes, Y_2.\lanes
-	add		X_3.\lanes, X_3.\lanes, Y_3.\lanes
-
-	// x ^= k
-	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
-	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
-	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
-	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
-
-	// y = rol(y, 3)
-	shl		TMP0.\lanes, Y_0.\lanes, #3
-	shl		TMP1.\lanes, Y_1.\lanes, #3
-	shl		TMP2.\lanes, Y_2.\lanes, #3
-	shl		TMP3.\lanes, Y_3.\lanes, #3
-	sri		TMP0.\lanes, Y_0.\lanes, #(\n - 3)
-	sri		TMP1.\lanes, Y_1.\lanes, #(\n - 3)
-	sri		TMP2.\lanes, Y_2.\lanes, #(\n - 3)
-	sri		TMP3.\lanes, Y_3.\lanes, #(\n - 3)
-
-	// y ^= x
-	eor		Y_0.16b, TMP0.16b, X_0.16b
-	eor		Y_1.16b, TMP1.16b, X_1.16b
-	eor		Y_2.16b, TMP2.16b, X_2.16b
-	eor		Y_3.16b, TMP3.16b, X_3.16b
-.endm
-
-/*
- * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
- *
- * This is the inverse of _speck_round_128bytes().
- */
-.macro _speck_unround_128bytes	n, lanes
-
-	// y ^= x
-	eor		TMP0.16b, Y_0.16b, X_0.16b
-	eor		TMP1.16b, Y_1.16b, X_1.16b
-	eor		TMP2.16b, Y_2.16b, X_2.16b
-	eor		TMP3.16b, Y_3.16b, X_3.16b
-
-	// y = ror(y, 3)
-	ushr		Y_0.\lanes, TMP0.\lanes, #3
-	ushr		Y_1.\lanes, TMP1.\lanes, #3
-	ushr		Y_2.\lanes, TMP2.\lanes, #3
-	ushr		Y_3.\lanes, TMP3.\lanes, #3
-	sli		Y_0.\lanes, TMP0.\lanes, #(\n - 3)
-	sli		Y_1.\lanes, TMP1.\lanes, #(\n - 3)
-	sli		Y_2.\lanes, TMP2.\lanes, #(\n - 3)
-	sli		Y_3.\lanes, TMP3.\lanes, #(\n - 3)
-
-	// x ^= k
-	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
-	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
-	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
-	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
-
-	// x -= y
-	sub		X_0.\lanes, X_0.\lanes, Y_0.\lanes
-	sub		X_1.\lanes, X_1.\lanes, Y_1.\lanes
-	sub		X_2.\lanes, X_2.\lanes, Y_2.\lanes
-	sub		X_3.\lanes, X_3.\lanes, Y_3.\lanes
-
-	// x = rol(x, 8)
-	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
-	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
-	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
-	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
-.endm
-
-.macro _next_xts_tweak	next, cur, tmp, n
-.if \n == 64
-	/*
-	 * Calculate the next tweak by multiplying the current one by x,
-	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
-	 */
-	sshr		\tmp\().2d, \cur\().2d, #63
-	and		\tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b
-	shl		\next\().2d, \cur\().2d, #1
-	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
-	eor		\next\().16b, \next\().16b, \tmp\().16b
-.else
-	/*
-	 * Calculate the next two tweaks by multiplying the current ones by x^2,
-	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
-	 */
-	ushr		\tmp\().2d, \cur\().2d, #62
-	shl		\next\().2d, \cur\().2d, #2
-	tbl		\tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b
-	eor		\next\().16b, \next\().16b, \tmp\().16b
-.endif
-.endm
-
-/*
- * _speck_xts_crypt() - Speck-XTS encryption/decryption
- *
- * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
- * using Speck-XTS, specifically the variant with a block size of '2n' and round
- * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
- * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
- * nonzero multiple of 128.
- */
-.macro _speck_xts_crypt	n, lanes, decrypting
-
-	/*
-	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
-	 * round key rather than the first, since for decryption the round keys
-	 * are used in reverse order.
-	 */
-.if \decrypting
-	mov		NROUNDS, NROUNDS	/* zero the high 32 bits */
-.if \n == 64
-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3
-	sub		ROUND_KEYS, ROUND_KEYS, #8
-.else
-	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2
-	sub		ROUND_KEYS, ROUND_KEYS, #4
-.endif
-.endif
-
-	// Load the index vector for tbl-based 8-bit rotates
-.if \decrypting
-	ldr		ROTATE_TABLE_Q, .Lrol\n\()_8_table
-.else
-	ldr		ROTATE_TABLE_Q, .Lror\n\()_8_table
-.endif
-
-	// One-time XTS preparation
-.if \n == 64
-	// Load first tweak
-	ld1		{TWEAKV0.16b}, [TWEAK]
-
-	// Load GF(2^128) multiplication table
-	ldr		GFMUL_TABLE_Q, .Lgf128mul_table
-.else
-	// Load first tweak
-	ld1		{TWEAKV0.8b}, [TWEAK]
-
-	// Load GF(2^64) multiplication table
-	ldr		GFMUL_TABLE_Q, .Lgf64mul_table
-
-	// Calculate second tweak, packing it together with the first
-	ushr		TMP0.2d, TWEAKV0.2d, #63
-	shl		TMP1.2d, TWEAKV0.2d, #1
-	tbl		TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b
-	eor		TMP0.8b, TMP0.8b, TMP1.8b
-	mov		TWEAKV0.d[1], TMP0.d[0]
-.endif
-
-.Lnext_128bytes_\@:
-
-	// Calculate XTS tweaks for next 128 bytes
-	_next_xts_tweak	TWEAKV1, TWEAKV0, TMP0, \n
-	_next_xts_tweak	TWEAKV2, TWEAKV1, TMP0, \n
-	_next_xts_tweak	TWEAKV3, TWEAKV2, TMP0, \n
-	_next_xts_tweak	TWEAKV4, TWEAKV3, TMP0, \n
-	_next_xts_tweak	TWEAKV5, TWEAKV4, TMP0, \n
-	_next_xts_tweak	TWEAKV6, TWEAKV5, TMP0, \n
-	_next_xts_tweak	TWEAKV7, TWEAKV6, TMP0, \n
-	_next_xts_tweak	TWEAKV_NEXT, TWEAKV7, TMP0, \n
-
-	// Load the next source blocks into {X,Y}[0-3]
-	ld1		{X_0.16b-Y_1.16b}, [SRC], #64
-	ld1		{X_2.16b-Y_3.16b}, [SRC], #64
-
-	// XOR the source blocks with their XTS tweaks
-	eor		TMP0.16b, X_0.16b, TWEAKV0.16b
-	eor		Y_0.16b,  Y_0.16b, TWEAKV1.16b
-	eor		TMP1.16b, X_1.16b, TWEAKV2.16b
-	eor		Y_1.16b,  Y_1.16b, TWEAKV3.16b
-	eor		TMP2.16b, X_2.16b, TWEAKV4.16b
-	eor		Y_2.16b,  Y_2.16b, TWEAKV5.16b
-	eor		TMP3.16b, X_3.16b, TWEAKV6.16b
-	eor		Y_3.16b,  Y_3.16b, TWEAKV7.16b
-
-	/*
-	 * De-interleave the 'x' and 'y' elements of each block, i.e. make it so
-	 * that the X[0-3] registers contain only the second halves of blocks,
-	 * and the Y[0-3] registers contain only the first halves of blocks.
-	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
-	 */
-	uzp2		X_0.\lanes, TMP0.\lanes, Y_0.\lanes
-	uzp1		Y_0.\lanes, TMP0.\lanes, Y_0.\lanes
-	uzp2		X_1.\lanes, TMP1.\lanes, Y_1.\lanes
-	uzp1		Y_1.\lanes, TMP1.\lanes, Y_1.\lanes
-	uzp2		X_2.\lanes, TMP2.\lanes, Y_2.\lanes
-	uzp1		Y_2.\lanes, TMP2.\lanes, Y_2.\lanes
-	uzp2		X_3.\lanes, TMP3.\lanes, Y_3.\lanes
-	uzp1		Y_3.\lanes, TMP3.\lanes, Y_3.\lanes
-
-	// Do the cipher rounds
-	mov		x6, ROUND_KEYS
-	mov		w7, NROUNDS
-.Lnext_round_\@:
-.if \decrypting
-	ld1r		{ROUND_KEY.\lanes}, [x6]
-	sub		x6, x6, #( \n / 8 )
-	_speck_unround_128bytes	\n, \lanes
-.else
-	ld1r		{ROUND_KEY.\lanes}, [x6], #( \n / 8 )
-	_speck_round_128bytes	\n, \lanes
-.endif
-	subs		w7, w7, #1
-	bne		.Lnext_round_\@
-
-	// Re-interleave the 'x' and 'y' elements of each block
-	zip1		TMP0.\lanes, Y_0.\lanes, X_0.\lanes
-	zip2		Y_0.\lanes,  Y_0.\lanes, X_0.\lanes
-	zip1		TMP1.\lanes, Y_1.\lanes, X_1.\lanes
-	zip2		Y_1.\lanes,  Y_1.\lanes, X_1.\lanes
-	zip1		TMP2.\lanes, Y_2.\lanes, X_2.\lanes
-	zip2		Y_2.\lanes,  Y_2.\lanes, X_2.\lanes
-	zip1		TMP3.\lanes, Y_3.\lanes, X_3.\lanes
-	zip2		Y_3.\lanes,  Y_3.\lanes, X_3.\lanes
-
-	// XOR the encrypted/decrypted blocks with the tweaks calculated earlier
-	eor		X_0.16b, TMP0.16b, TWEAKV0.16b
-	eor		Y_0.16b, Y_0.16b,  TWEAKV1.16b
-	eor		X_1.16b, TMP1.16b, TWEAKV2.16b
-	eor		Y_1.16b, Y_1.16b,  TWEAKV3.16b
-	eor		X_2.16b, TMP2.16b, TWEAKV4.16b
-	eor		Y_2.16b, Y_2.16b,  TWEAKV5.16b
-	eor		X_3.16b, TMP3.16b, TWEAKV6.16b
-	eor		Y_3.16b, Y_3.16b,  TWEAKV7.16b
-	mov		TWEAKV0.16b, TWEAKV_NEXT.16b
-
-	// Store the ciphertext in the destination buffer
-	st1		{X_0.16b-Y_1.16b}, [DST], #64
-	st1		{X_2.16b-Y_3.16b}, [DST], #64
-
-	// Continue if there are more 128-byte chunks remaining
-	subs		NBYTES, NBYTES, #128
-	bne		.Lnext_128bytes_\@
-
-	// Store the next tweak and return
-.if \n == 64
-	st1		{TWEAKV_NEXT.16b}, [TWEAK]
-.else
-	st1		{TWEAKV_NEXT.8b}, [TWEAK]
-.endif
-	ret
-.endm
-
-ENTRY(speck128_xts_encrypt_neon)
-	_speck_xts_crypt	n=64, lanes=2d, decrypting=0
-ENDPROC(speck128_xts_encrypt_neon)
-
-ENTRY(speck128_xts_decrypt_neon)
-	_speck_xts_crypt	n=64, lanes=2d, decrypting=1
-ENDPROC(speck128_xts_decrypt_neon)
-
-ENTRY(speck64_xts_encrypt_neon)
-	_speck_xts_crypt	n=32, lanes=4s, decrypting=0
-ENDPROC(speck64_xts_encrypt_neon)
-
-ENTRY(speck64_xts_decrypt_neon)
-	_speck_xts_crypt	n=32, lanes=4s, decrypting=1
-ENDPROC(speck64_xts_decrypt_neon)
diff --git a/arch/arm64/crypto/speck-neon-glue.c b/arch/arm64/crypto/speck-neon-glue.c
deleted file mode 100644
index 6e233aeb4ff4..000000000000
--- a/arch/arm64/crypto/speck-neon-glue.c
+++ /dev/null
@@ -1,282 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- * (64-bit version; based on the 32-bit version)
- *
- * Copyright (c) 2018 Google, Inc
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/algapi.h>
-#include <crypto/gf128mul.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/speck.h>
-#include <crypto/xts.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-/* The assembly functions only handle multiples of 128 bytes */
-#define SPECK_NEON_CHUNK_SIZE	128
-
-/* Speck128 */
-
-struct speck128_xts_tfm_ctx {
-	struct speck128_tfm_ctx main_key;
-	struct speck128_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
-					  void *dst, const void *src,
-					  unsigned int nbytes, void *tweak);
-
-asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
-					  void *dst, const void *src,
-					  unsigned int nbytes, void *tweak);
-
-typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
-				     u8 *, const u8 *);
-typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
-					  const void *, unsigned int, void *);
-
-static __always_inline int
-__speck128_xts_crypt(struct skcipher_request *req,
-		     speck128_crypt_one_t crypt_one,
-		     speck128_xts_crypt_many_t crypt_many)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	le128 tweak;
-	int err;
-
-	err = skcipher_walk_virt(&walk, req, true);
-
-	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-		u8 *dst = walk.dst.virt.addr;
-		const u8 *src = walk.src.virt.addr;
-
-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
-			unsigned int count;
-
-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
-			kernel_neon_begin();
-			(*crypt_many)(ctx->main_key.round_keys,
-				      ctx->main_key.nrounds,
-				      dst, src, count, &tweak);
-			kernel_neon_end();
-			dst += count;
-			src += count;
-			nbytes -= count;
-		}
-
-		/* Handle any remainder with generic code */
-		while (nbytes >= sizeof(tweak)) {
-			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
-			(*crypt_one)(&ctx->main_key, dst, dst);
-			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
-			gf128mul_x_ble(&tweak, &tweak);
-
-			dst += sizeof(tweak);
-			src += sizeof(tweak);
-			nbytes -= sizeof(tweak);
-		}
-		err = skcipher_walk_done(&walk, nbytes);
-	}
-
-	return err;
-}
-
-static int speck128_xts_encrypt(struct skcipher_request *req)
-{
-	return __speck128_xts_crypt(req, crypto_speck128_encrypt,
-				    speck128_xts_encrypt_neon);
-}
-
-static int speck128_xts_decrypt(struct skcipher_request *req)
-{
-	return __speck128_xts_crypt(req, crypto_speck128_decrypt,
-				    speck128_xts_decrypt_neon);
-}
-
-static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			       unsigned int keylen)
-{
-	struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err;
-
-	err = xts_verify_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	keylen /= 2;
-
-	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
-	if (err)
-		return err;
-
-	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-/* Speck64 */
-
-struct speck64_xts_tfm_ctx {
-	struct speck64_tfm_ctx main_key;
-	struct speck64_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
-					 void *dst, const void *src,
-					 unsigned int nbytes, void *tweak);
-
-asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
-					 void *dst, const void *src,
-					 unsigned int nbytes, void *tweak);
-
-typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
-				    u8 *, const u8 *);
-typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
-					 const void *, unsigned int, void *);
-
-static __always_inline int
-__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
-		    speck64_xts_crypt_many_t crypt_many)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	__le64 tweak;
-	int err;
-
-	err = skcipher_walk_virt(&walk, req, true);
-
-	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-		u8 *dst = walk.dst.virt.addr;
-		const u8 *src = walk.src.virt.addr;
-
-		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
-			unsigned int count;
-
-			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
-			kernel_neon_begin();
-			(*crypt_many)(ctx->main_key.round_keys,
-				      ctx->main_key.nrounds,
-				      dst, src, count, &tweak);
-			kernel_neon_end();
-			dst += count;
-			src += count;
-			nbytes -= count;
-		}
-
-		/* Handle any remainder with generic code */
-		while (nbytes >= sizeof(tweak)) {
-			*(__le64 *)dst = *(__le64 *)src ^ tweak;
-			(*crypt_one)(&ctx->main_key, dst, dst);
-			*(__le64 *)dst ^= tweak;
-			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
-					    ((tweak & cpu_to_le64(1ULL << 63)) ?
-					     0x1B : 0));
-			dst += sizeof(tweak);
-			src += sizeof(tweak);
-			nbytes -= sizeof(tweak);
-		}
-		err = skcipher_walk_done(&walk, nbytes);
-	}
-
-	return err;
-}
-
-static int speck64_xts_encrypt(struct skcipher_request *req)
-{
-	return __speck64_xts_crypt(req, crypto_speck64_encrypt,
-				   speck64_xts_encrypt_neon);
-}
-
-static int speck64_xts_decrypt(struct skcipher_request *req)
-{
-	return __speck64_xts_crypt(req, crypto_speck64_decrypt,
-				   speck64_xts_decrypt_neon);
-}
-
-static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err;
-
-	err = xts_verify_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	keylen /= 2;
-
-	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
-	if (err)
-		return err;
-
-	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-static struct skcipher_alg speck_algs[] = {
-	{
-		.base.cra_name		= "xts(speck128)",
-		.base.cra_driver_name	= "xts-speck128-neon",
-		.base.cra_priority	= 300,
-		.base.cra_blocksize	= SPECK128_BLOCK_SIZE,
-		.base.cra_ctxsize	= sizeof(struct speck128_xts_tfm_ctx),
-		.base.cra_alignmask	= 7,
-		.base.cra_module	= THIS_MODULE,
-		.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
-		.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
-		.ivsize			= SPECK128_BLOCK_SIZE,
-		.walksize		= SPECK_NEON_CHUNK_SIZE,
-		.setkey			= speck128_xts_setkey,
-		.encrypt		= speck128_xts_encrypt,
-		.decrypt		= speck128_xts_decrypt,
-	}, {
-		.base.cra_name		= "xts(speck64)",
-		.base.cra_driver_name	= "xts-speck64-neon",
-		.base.cra_priority	= 300,
-		.base.cra_blocksize	= SPECK64_BLOCK_SIZE,
-		.base.cra_ctxsize	= sizeof(struct speck64_xts_tfm_ctx),
-		.base.cra_alignmask	= 7,
-		.base.cra_module	= THIS_MODULE,
-		.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
-		.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
-		.ivsize			= SPECK64_BLOCK_SIZE,
-		.walksize		= SPECK_NEON_CHUNK_SIZE,
-		.setkey			= speck64_xts_setkey,
-		.encrypt		= speck64_xts_encrypt,
-		.decrypt		= speck64_xts_decrypt,
-	}
-};
-
-static int __init speck_neon_module_init(void)
-{
-	if (!(elf_hwcap & HWCAP_ASIMD))
-		return -ENODEV;
-	return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-static void __exit speck_neon_module_exit(void)
-{
-	crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-module_init(speck_neon_module_init);
-module_exit(speck_neon_module_exit);
-
-MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("xts(speck128)");
-MODULE_ALIAS_CRYPTO("xts-speck128-neon");
-MODULE_ALIAS_CRYPTO("xts(speck64)");
-MODULE_ALIAS_CRYPTO("xts-speck64-neon");
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index cee28a05ee98..93ce86d5dae1 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -25,6 +25,8 @@
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 
+#include <asm-generic/compat.h>
+
 #define COMPAT_USER_HZ		100
 #ifdef __AARCH64EB__
 #define COMPAT_UTS_MACHINE	"armv8b\0\0"
@@ -32,10 +34,6 @@
 #define COMPAT_UTS_MACHINE	"armv8l\0\0"
 #endif
 
-typedef u32		compat_size_t;
-typedef s32		compat_ssize_t;
-typedef s32		compat_clock_t;
-typedef s32		compat_pid_t;
 typedef u16		__compat_uid_t;
 typedef u16		__compat_gid_t;
 typedef u16		__compat_uid16_t;
@@ -43,27 +41,13 @@ typedef u16		__compat_gid16_t;
 typedef u32		__compat_uid32_t;
 typedef u32		__compat_gid32_t;
 typedef u16		compat_mode_t;
-typedef u32		compat_ino_t;
 typedef u32		compat_dev_t;
-typedef s32		compat_off_t;
-typedef s64		compat_loff_t;
 typedef s32		compat_nlink_t;
 typedef u16		compat_ipc_pid_t;
-typedef s32		compat_daddr_t;
 typedef u32		compat_caddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
-typedef s32		compat_key_t;
-typedef s32		compat_timer_t;
-
-typedef s16		compat_short_t;
-typedef s32		compat_int_t;
-typedef s32		compat_long_t;
 typedef s64		compat_s64;
-typedef u16		compat_ushort_t;
-typedef u32		compat_uint_t;
-typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
-typedef u32		compat_uptr_t;
 
 struct compat_stat {
 #ifdef __AARCH64EB__
@@ -86,11 +70,11 @@ struct compat_stat {
 	compat_off_t	st_size;
 	compat_off_t	st_blksize;
 	compat_off_t	st_blocks;
-	compat_time_t	st_atime;
+	old_time32_t	st_atime;
 	compat_ulong_t	st_atime_nsec;
-	compat_time_t	st_mtime;
+	old_time32_t	st_mtime;
 	compat_ulong_t	st_mtime_nsec;
-	compat_time_t	st_ctime;
+	old_time32_t	st_ctime;
 	compat_ulong_t	st_ctime_nsec;
 	compat_ulong_t	__unused4[2];
 };
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 6db48d90ad63..7e2ec64aa414 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -537,6 +537,27 @@ static inline void arm64_set_ssbd_mitigation(bool state) {}
 #endif
 
 extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
+
+static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
+{
+	switch (parange) {
+	case 0: return 32;
+	case 1: return 36;
+	case 2: return 40;
+	case 3: return 42;
+	case 4: return 44;
+	case 5: return 48;
+	case 6: return 52;
+	/*
+	 * A future PE could use a value unknown to the kernel.
+	 * However, by the "D10.1.4 Principles of the ID scheme
+	 * for fields in ID registers", ARM DDI 0487C.a, any new
+	 * value is guaranteed to be higher than what we know already.
+	 * As a safe limit, we return the limit supported by the kernel.
+	 */
+	default: return CONFIG_ARM64_PA_BITS;
+	}
+}
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
index 5a5fa47a6b18..3dd3d664c5c5 100644
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -23,7 +23,6 @@ struct dev_archdata {
 #ifdef CONFIG_XEN
 	const struct dma_map_ops *dev_dma_ops;
 #endif
-	bool dma_coherent;
 };
 
 struct pdev_archdata {
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index b7847eb8a7bb..c41f3fb1446c 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -44,10 +44,13 @@ void arch_teardown_dma_ops(struct device *dev);
 #define arch_teardown_dma_ops	arch_teardown_dma_ops
 #endif
 
-/* do not use this function in a driver */
+/*
+ * Do not use this function in a driver, it is only provided for
+ * arch/arm/mm/xen.c, which is used by arm64 as well.
+ */
 static inline bool is_device_dma_coherent(struct device *dev)
 {
-	return dev->archdata.dma_coherent;
+	return dev->dma_coherent;
 }
 
 #endif	/* __KERNEL__ */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index b476bc46f0ab..6f602af5263c 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -107,6 +107,7 @@
 #define VTCR_EL2_RES1		(1 << 31)
 #define VTCR_EL2_HD		(1 << 22)
 #define VTCR_EL2_HA		(1 << 21)
+#define VTCR_EL2_PS_SHIFT	TCR_EL2_PS_SHIFT
 #define VTCR_EL2_PS_MASK	TCR_EL2_PS_MASK
 #define VTCR_EL2_TG0_MASK	TCR_TG0_MASK
 #define VTCR_EL2_TG0_4K		TCR_TG0_4K
@@ -120,63 +121,150 @@
 #define VTCR_EL2_IRGN0_WBWA	TCR_IRGN0_WBWA
 #define VTCR_EL2_SL0_SHIFT	6
 #define VTCR_EL2_SL0_MASK	(3 << VTCR_EL2_SL0_SHIFT)
-#define VTCR_EL2_SL0_LVL1	(1 << VTCR_EL2_SL0_SHIFT)
 #define VTCR_EL2_T0SZ_MASK	0x3f
-#define VTCR_EL2_T0SZ_40B	24
 #define VTCR_EL2_VS_SHIFT	19
 #define VTCR_EL2_VS_8BIT	(0 << VTCR_EL2_VS_SHIFT)
 #define VTCR_EL2_VS_16BIT	(1 << VTCR_EL2_VS_SHIFT)
 
+#define VTCR_EL2_T0SZ(x)	TCR_T0SZ(x)
+
 /*
  * We configure the Stage-2 page tables to always restrict the IPA space to be
  * 40 bits wide (T0SZ = 24).  Systems with a PARange smaller than 40 bits are
  * not known to exist and will break with this configuration.
  *
- * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
- * (see hyp-init.S).
+ * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2().
  *
  * Note that when using 4K pages, we concatenate two first level page tables
  * together. With 16K pages, we concatenate 16 first level page tables.
  *
- * The magic numbers used for VTTBR_X in this patch can be found in Tables
- * D4-23 and D4-25 in ARM DDI 0487A.b.
  */
 
-#define VTCR_EL2_T0SZ_IPA	VTCR_EL2_T0SZ_40B
 #define VTCR_EL2_COMMON_BITS	(VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
 				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1)
 
-#ifdef CONFIG_ARM64_64K_PAGES
 /*
- * Stage2 translation configuration:
- * 64kB pages (TG0 = 1)
- * 2 level page tables (SL = 1)
+ * VTCR_EL2:SL0 indicates the entry level for Stage2 translation.
+ * Interestingly, it depends on the page size.
+ * See D.10.2.121, VTCR_EL2, in ARM DDI 0487C.a
+ *
+ *	-----------------------------------------
+ *	| Entry level		|  4K  | 16K/64K |
+ *	------------------------------------------
+ *	| Level: 0		|  2   |   -     |
+ *	------------------------------------------
+ *	| Level: 1		|  1   |   2     |
+ *	------------------------------------------
+ *	| Level: 2		|  0   |   1     |
+ *	------------------------------------------
+ *	| Level: 3		|  -   |   0     |
+ *	------------------------------------------
+ *
+ * The table roughly translates to :
+ *
+ *	SL0(PAGE_SIZE, Entry_level) = TGRAN_SL0_BASE - Entry_Level
+ *
+ * Where TGRAN_SL0_BASE is a magic number depending on the page size:
+ * 	TGRAN_SL0_BASE(4K) = 2
+ *	TGRAN_SL0_BASE(16K) = 3
+ *	TGRAN_SL0_BASE(64K) = 3
+ * provided we take care of ruling out the unsupported cases and
+ * Entry_Level = 4 - Number_of_levels.
+ *
  */
-#define VTCR_EL2_TGRAN_FLAGS		(VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1)
-#define VTTBR_X_TGRAN_MAGIC		38
+#ifdef CONFIG_ARM64_64K_PAGES
+
+#define VTCR_EL2_TGRAN			VTCR_EL2_TG0_64K
+#define VTCR_EL2_TGRAN_SL0_BASE		3UL
+
 #elif defined(CONFIG_ARM64_16K_PAGES)
-/*
- * Stage2 translation configuration:
- * 16kB pages (TG0 = 2)
- * 2 level page tables (SL = 1)
- */
-#define VTCR_EL2_TGRAN_FLAGS		(VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1)
-#define VTTBR_X_TGRAN_MAGIC		42
+
+#define VTCR_EL2_TGRAN			VTCR_EL2_TG0_16K
+#define VTCR_EL2_TGRAN_SL0_BASE		3UL
+
 #else	/* 4K */
-/*
- * Stage2 translation configuration:
- * 4kB pages (TG0 = 0)
- * 3 level page tables (SL = 1)
- */
-#define VTCR_EL2_TGRAN_FLAGS		(VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1)
-#define VTTBR_X_TGRAN_MAGIC		37
+
+#define VTCR_EL2_TGRAN			VTCR_EL2_TG0_4K
+#define VTCR_EL2_TGRAN_SL0_BASE		2UL
+
 #endif
 
-#define VTCR_EL2_FLAGS			(VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
-#define VTTBR_X				(VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
+#define VTCR_EL2_LVLS_TO_SL0(levels)	\
+	((VTCR_EL2_TGRAN_SL0_BASE - (4 - (levels))) << VTCR_EL2_SL0_SHIFT)
+#define VTCR_EL2_SL0_TO_LVLS(sl0)	\
+	((sl0) + 4 - VTCR_EL2_TGRAN_SL0_BASE)
+#define VTCR_EL2_LVLS(vtcr)		\
+	VTCR_EL2_SL0_TO_LVLS(((vtcr) & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT)
+
+#define VTCR_EL2_FLAGS			(VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN)
+#define VTCR_EL2_IPA(vtcr)		(64 - ((vtcr) & VTCR_EL2_T0SZ_MASK))
+
+/*
+ * ARM VMSAv8-64 defines an algorithm for finding the translation table
+ * descriptors in section D4.2.8 in ARM DDI 0487C.a.
+ *
+ * The algorithm defines the expectations on the translation table
+ * addresses for each level, based on PAGE_SIZE, entry level
+ * and the translation table size (T0SZ). The variable "x" in the
+ * algorithm determines the alignment of a table base address at a given
+ * level and thus determines the alignment of VTTBR:BADDR for stage2
+ * page table entry level.
+ * Since the number of bits resolved at the entry level could vary
+ * depending on the T0SZ, the value of "x" is defined based on a
+ * Magic constant for a given PAGE_SIZE and Entry Level. The
+ * intermediate levels must be always aligned to the PAGE_SIZE (i.e,
+ * x = PAGE_SHIFT).
+ *
+ * The value of "x" for entry level is calculated as :
+ *    x = Magic_N - T0SZ
+ *
+ * where Magic_N is an integer depending on the page size and the entry
+ * level of the page table as below:
+ *
+ *	--------------------------------------------
+ *	| Entry level		|  4K    16K   64K |
+ *	--------------------------------------------
+ *	| Level: 0 (4 levels)	| 28   |  -  |  -  |
+ *	--------------------------------------------
+ *	| Level: 1 (3 levels)	| 37   | 31  | 25  |
+ *	--------------------------------------------
+ *	| Level: 2 (2 levels)	| 46   | 42  | 38  |
+ *	--------------------------------------------
+ *	| Level: 3 (1 level)	| -    | 53  | 51  |
+ *	--------------------------------------------
+ *
+ * We have a magic formula for the Magic_N below:
+ *
+ *  Magic_N(PAGE_SIZE, Level) = 64 - ((PAGE_SHIFT - 3) * Number_of_levels)
+ *
+ * where Number_of_levels = (4 - Level). We are only interested in the
+ * value for Entry_Level for the stage2 page table.
+ *
+ * So, given that T0SZ = (64 - IPA_SHIFT), we can compute 'x' as follows:
+ *
+ *	x = (64 - ((PAGE_SHIFT - 3) * Number_of_levels)) - (64 - IPA_SHIFT)
+ *	  = IPA_SHIFT - ((PAGE_SHIFT - 3) * Number of levels)
+ *
+ * Here is one way to explain the Magic Formula:
+ *
+ *  x = log2(Size_of_Entry_Level_Table)
+ *
+ * Since, we can resolve (PAGE_SHIFT - 3) bits at each level, and another
+ * PAGE_SHIFT bits in the PTE, we have :
+ *
+ *  Bits_Entry_level = IPA_SHIFT - ((PAGE_SHIFT - 3) * (n - 1) + PAGE_SHIFT)
+ *		     = IPA_SHIFT - (PAGE_SHIFT - 3) * n - 3
+ *  where n = number of levels, and since each pointer is 8bytes, we have:
+ *
+ *  x = Bits_Entry_Level + 3
+ *    = IPA_SHIFT - (PAGE_SHIFT - 3) * n
+ *
+ * The only constraint here is that, we have to find the number of page table
+ * levels for a given IPA size (which we do, see stage2_pt_levels())
+ */
+#define ARM64_VTTBR_X(ipa, levels)	((ipa) - ((levels) * (PAGE_SHIFT - 3)))
 
 #define VTTBR_CNP_BIT     (UL(1))
-#define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
 #define VTTBR_VMID_SHIFT  (UL(48))
 #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 
@@ -224,6 +312,13 @@
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
 #define HPFAR_MASK	(~UL(0xf))
+/*
+ * We have
+ *	PAR	[PA_Shift - 1	: 12] = PA	[PA_Shift - 1 : 12]
+ *	HPFAR	[PA_Shift - 9	: 4]  = FIPA	[PA_Shift - 1 : 12]
+ */
+#define PAR_TO_HPFAR(par)		\
+	(((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
 
 #define kvm_arm_exception_type	\
 	{0, "IRQ" }, 		\
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 102b5a5c47b6..aea01a09eb94 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -30,6 +30,7 @@
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_EL1_SERROR  1
 #define ARM_EXCEPTION_TRAP	  2
+#define ARM_EXCEPTION_IL	  3
 /* The hyp-stub will return this for any kvm_call_hyp() call */
 #define ARM_EXCEPTION_HYP_GONE	  HVC_STUB_ERR
 
@@ -72,8 +73,6 @@ extern void __vgic_v3_init_lrs(void);
 
 extern u32 __kvm_get_mdcr_el2(void);
 
-extern u32 __init_stage2_translation(void);
-
 /* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
 #define __hyp_this_cpu_ptr(sym)						\
 	({								\
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2842bf149029..52fbc823ff8c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -53,7 +53,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
-int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
+int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
 void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
 
 struct kvm_arch {
@@ -61,11 +61,13 @@ struct kvm_arch {
 	u64    vmid_gen;
 	u32    vmid;
 
-	/* 1-level 2nd stage table, protected by kvm->mmu_lock */
+	/* stage2 entry level table */
 	pgd_t *pgd;
 
 	/* VTTBR value associated with above pgd and vmid */
 	u64    vttbr;
+	/* VTCR_EL2 value for this VM */
+	u64    vtcr;
 
 	/* The last vcpu id that ran on each physical CPU */
 	int __percpu *last_vcpu_ran;
@@ -451,13 +453,7 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
 
-static inline void __cpu_init_stage2(void)
-{
-	u32 parange = kvm_call_hyp(__init_stage2_translation);
-
-	WARN_ONCE(parange < 40,
-		  "PARange is %d bits, unsupported configuration!", parange);
-}
+static inline void __cpu_init_stage2(void) {}
 
 /* Guest/host FPSIMD coordination helpers */
 int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
@@ -520,8 +516,12 @@ static inline int kvm_arm_have_ssbd(void)
 void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
 void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
 
+void kvm_set_ipa_limit(void);
+
 #define __KVM_HAVE_ARCH_VM_ALLOC
 struct kvm *kvm_arch_alloc_vm(void);
 void kvm_arch_free_vm(struct kvm *kvm);
 
+int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 384c34397619..23aca66767f9 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -155,5 +155,15 @@ void deactivate_traps_vhe_put(void);
 u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
 void __noreturn __hyp_do_panic(unsigned long, ...);
 
+/*
+ * Must be called from hyp code running at EL2 with an updated VTTBR
+ * and interrupts disabled.
+ */
+static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm)
+{
+	write_sysreg(kvm->arch.vtcr, vtcr_el2);
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+}
+
 #endif /* __ARM64_KVM_HYP_H__ */
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 64337afbf124..658657367f2f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -141,8 +141,16 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
  * We currently only support a 40bit IPA.
  */
 #define KVM_PHYS_SHIFT	(40)
-#define KVM_PHYS_SIZE	(1UL << KVM_PHYS_SHIFT)
-#define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1UL)
+
+#define kvm_phys_shift(kvm)		VTCR_EL2_IPA(kvm->arch.vtcr)
+#define kvm_phys_size(kvm)		(_AC(1, ULL) << kvm_phys_shift(kvm))
+#define kvm_phys_mask(kvm)		(kvm_phys_size(kvm) - _AC(1, ULL))
+
+static inline bool kvm_page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
 
 #include <asm/stage2_pgtable.h>
 
@@ -238,12 +246,6 @@ static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
 	return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
 }
 
-static inline bool kvm_page_empty(void *ptr)
-{
-	struct page *ptr_page = virt_to_page(ptr);
-	return page_count(ptr_page) == 1;
-}
-
 #define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
 
 #ifdef __PAGETABLE_PMD_FOLDED
@@ -517,6 +519,30 @@ static inline int hyp_map_aux_data(void)
 
 #define kvm_phys_to_vttbr(addr)		phys_to_ttbr(addr)
 
+/*
+ * Get the magic number 'x' for VTTBR:BADDR of this KVM instance.
+ * With v8.2 LVA extensions, 'x' should be a minimum of 6 with
+ * 52bit IPS.
+ */
+static inline int arm64_vttbr_x(u32 ipa_shift, u32 levels)
+{
+	int x = ARM64_VTTBR_X(ipa_shift, levels);
+
+	return (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && x < 6) ? 6 : x;
+}
+
+static inline u64 vttbr_baddr_mask(u32 ipa_shift, u32 levels)
+{
+	unsigned int x = arm64_vttbr_x(ipa_shift, levels);
+
+	return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x);
+}
+
+static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
+{
+	return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm));
+}
+
 static inline bool kvm_cpu_has_cnp(void)
 {
 	return system_supports_cnp();
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 6bc43889d11e..fce22c4b2f73 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -25,6 +25,9 @@
 #define CurrentEL_EL1		(1 << 2)
 #define CurrentEL_EL2		(2 << 2)
 
+/* Additional SPSR bits not exposed in the UABI */
+#define PSR_IL_BIT		(1 << 20)
+
 /* AArch32-specific ptrace requests */
 #define COMPAT_PTRACE_GETREGS		12
 #define COMPAT_PTRACE_SETREGS		13
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopmd.h b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
deleted file mode 100644
index 2656a0fd05a6..000000000000
--- a/arch/arm64/include/asm/stage2_pgtable-nopmd.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2016 - ARM Ltd
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM64_S2_PGTABLE_NOPMD_H_
-#define __ARM64_S2_PGTABLE_NOPMD_H_
-
-#include <asm/stage2_pgtable-nopud.h>
-
-#define __S2_PGTABLE_PMD_FOLDED
-
-#define S2_PMD_SHIFT		S2_PUD_SHIFT
-#define S2_PTRS_PER_PMD		1
-#define S2_PMD_SIZE		(1UL << S2_PMD_SHIFT)
-#define S2_PMD_MASK		(~(S2_PMD_SIZE-1))
-
-#define stage2_pud_none(pud)			(0)
-#define stage2_pud_present(pud)			(1)
-#define stage2_pud_clear(pud)			do { } while (0)
-#define stage2_pud_populate(pud, pmd)		do { } while (0)
-#define stage2_pmd_offset(pud, address)		((pmd_t *)(pud))
-
-#define stage2_pmd_free(pmd)			do { } while (0)
-
-#define stage2_pmd_addr_end(addr, end)		(end)
-
-#define stage2_pud_huge(pud)			(0)
-#define stage2_pmd_table_empty(pmdp)		(0)
-
-#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopud.h b/arch/arm64/include/asm/stage2_pgtable-nopud.h
deleted file mode 100644
index 5ee87b54ebf3..000000000000
--- a/arch/arm64/include/asm/stage2_pgtable-nopud.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2016 - ARM Ltd
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM64_S2_PGTABLE_NOPUD_H_
-#define __ARM64_S2_PGTABLE_NOPUD_H_
-
-#define __S2_PGTABLE_PUD_FOLDED
-
-#define S2_PUD_SHIFT		S2_PGDIR_SHIFT
-#define S2_PTRS_PER_PUD		1
-#define S2_PUD_SIZE		(_AC(1, UL) << S2_PUD_SHIFT)
-#define S2_PUD_MASK		(~(S2_PUD_SIZE-1))
-
-#define stage2_pgd_none(pgd)			(0)
-#define stage2_pgd_present(pgd)			(1)
-#define stage2_pgd_clear(pgd)			do { } while (0)
-#define stage2_pgd_populate(pgd, pud)	do { } while (0)
-
-#define stage2_pud_offset(pgd, address)		((pud_t *)(pgd))
-
-#define stage2_pud_free(x)			do { } while (0)
-
-#define stage2_pud_addr_end(addr, end)		(end)
-#define stage2_pud_table_empty(pmdp)		(0)
-
-#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
index 8b68099348e5..d352f6df8d2c 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -19,9 +19,17 @@
 #ifndef __ARM64_S2_PGTABLE_H_
 #define __ARM64_S2_PGTABLE_H_
 
+#include <linux/hugetlb.h>
 #include <asm/pgtable.h>
 
 /*
+ * PGDIR_SHIFT determines the size a top-level page table entry can map
+ * and depends on the number of levels in the page table. Compute the
+ * PGDIR_SHIFT for a given number of levels.
+ */
+#define pt_levels_pgdir_shift(lvls)	ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls))
+
+/*
  * The hardware supports concatenation of up to 16 tables at stage2 entry level
  * and we use the feature whenever possible.
  *
@@ -29,112 +37,208 @@
  * On arm64, the smallest PAGE_SIZE supported is 4k, which means
  *             (PAGE_SHIFT - 3) > 4 holds for all page sizes.
  * This implies, the total number of page table levels at stage2 expected
- * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4)
+ * by the hardware is actually the number of levels required for (IPA_SHIFT - 4)
  * in normal translations(e.g, stage1), since we cannot have another level in
- * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4).
+ * the range (IPA_SHIFT, IPA_SHIFT - 4).
  */
-#define STAGE2_PGTABLE_LEVELS		ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4)
+#define stage2_pgtable_levels(ipa)	ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
+#define kvm_stage2_levels(kvm)		VTCR_EL2_LVLS(kvm->arch.vtcr)
 
-/*
- * With all the supported VA_BITs and 40bit guest IPA, the following condition
- * is always true:
- *
- *       STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS
- *
- * We base our stage-2 page table walker helpers on this assumption and
- * fall back to using the host version of the helper wherever possible.
- * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back
- * to using the host version, since it is guaranteed it is not folded at host.
- *
- * If the condition breaks in the future, we can rearrange the host level
- * definitions and reuse them for stage2. Till then...
- */
-#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS
-#error "Unsupported combination of guest IPA and host VA_BITS."
-#endif
-
-/* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */
-#define S2_PGDIR_SHIFT			ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS)
-#define S2_PGDIR_SIZE			(_AC(1, UL) << S2_PGDIR_SHIFT)
-#define S2_PGDIR_MASK			(~(S2_PGDIR_SIZE - 1))
+/* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */
+#define stage2_pgdir_shift(kvm)		pt_levels_pgdir_shift(kvm_stage2_levels(kvm))
+#define stage2_pgdir_size(kvm)		(1ULL << stage2_pgdir_shift(kvm))
+#define stage2_pgdir_mask(kvm)		~(stage2_pgdir_size(kvm) - 1)
 
 /*
  * The number of PTRS across all concatenated stage2 tables given by the
  * number of bits resolved at the initial level.
+ * If we force more levels than necessary, we may have (stage2_pgdir_shift > IPA),
+ * in which case, stage2_pgd_ptrs will have one entry.
  */
-#define PTRS_PER_S2_PGD			(1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT))
+#define pgd_ptrs_shift(ipa, pgdir_shift)	\
+	((ipa) > (pgdir_shift) ? ((ipa) - (pgdir_shift)) : 0)
+#define __s2_pgd_ptrs(ipa, lvls)		\
+	(1 << (pgd_ptrs_shift((ipa), pt_levels_pgdir_shift(lvls))))
+#define __s2_pgd_size(ipa, lvls)	(__s2_pgd_ptrs((ipa), (lvls)) * sizeof(pgd_t))
+
+#define stage2_pgd_ptrs(kvm)		__s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
+#define stage2_pgd_size(kvm)		__s2_pgd_size(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
 
 /*
- * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
- * levels in addition to the PGD.
+ * kvm_mmmu_cache_min_pages() is the number of pages required to install
+ * a stage-2 translation. We pre-allocate the entry level page table at
+ * the VM creation.
  */
-#define KVM_MMU_CACHE_MIN_PAGES		(STAGE2_PGTABLE_LEVELS - 1)
+#define kvm_mmu_cache_min_pages(kvm)	(kvm_stage2_levels(kvm) - 1)
 
-
-#if STAGE2_PGTABLE_LEVELS > 3
+/* Stage2 PUD definitions when the level is present */
+static inline bool kvm_stage2_has_pud(struct kvm *kvm)
+{
+	return (CONFIG_PGTABLE_LEVELS > 3) && (kvm_stage2_levels(kvm) > 3);
+}
 
 #define S2_PUD_SHIFT			ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
-#define S2_PUD_SIZE			(_AC(1, UL) << S2_PUD_SHIFT)
+#define S2_PUD_SIZE			(1UL << S2_PUD_SHIFT)
 #define S2_PUD_MASK			(~(S2_PUD_SIZE - 1))
 
-#define stage2_pgd_none(pgd)				pgd_none(pgd)
-#define stage2_pgd_clear(pgd)				pgd_clear(pgd)
-#define stage2_pgd_present(pgd)				pgd_present(pgd)
-#define stage2_pgd_populate(pgd, pud)			pgd_populate(NULL, pgd, pud)
-#define stage2_pud_offset(pgd, address)			pud_offset(pgd, address)
-#define stage2_pud_free(pud)				pud_free(NULL, pud)
+static inline bool stage2_pgd_none(struct kvm *kvm, pgd_t pgd)
+{
+	if (kvm_stage2_has_pud(kvm))
+		return pgd_none(pgd);
+	else
+		return 0;
+}
 
-#define stage2_pud_table_empty(pudp)			kvm_page_empty(pudp)
+static inline void stage2_pgd_clear(struct kvm *kvm, pgd_t *pgdp)
+{
+	if (kvm_stage2_has_pud(kvm))
+		pgd_clear(pgdp);
+}
 
-static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end)
+static inline bool stage2_pgd_present(struct kvm *kvm, pgd_t pgd)
 {
-	phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
+	if (kvm_stage2_has_pud(kvm))
+		return pgd_present(pgd);
+	else
+		return 1;
+}
 
-	return (boundary - 1 < end - 1) ? boundary : end;
+static inline void stage2_pgd_populate(struct kvm *kvm, pgd_t *pgd, pud_t *pud)
+{
+	if (kvm_stage2_has_pud(kvm))
+		pgd_populate(NULL, pgd, pud);
+}
+
+static inline pud_t *stage2_pud_offset(struct kvm *kvm,
+				       pgd_t *pgd, unsigned long address)
+{
+	if (kvm_stage2_has_pud(kvm))
+		return pud_offset(pgd, address);
+	else
+		return (pud_t *)pgd;
 }
 
-#endif		/* STAGE2_PGTABLE_LEVELS > 3 */
+static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud)
+{
+	if (kvm_stage2_has_pud(kvm))
+		pud_free(NULL, pud);
+}
 
+static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp)
+{
+	if (kvm_stage2_has_pud(kvm))
+		return kvm_page_empty(pudp);
+	else
+		return false;
+}
 
-#if STAGE2_PGTABLE_LEVELS > 2
+static inline phys_addr_t
+stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+{
+	if (kvm_stage2_has_pud(kvm)) {
+		phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
+
+		return (boundary - 1 < end - 1) ? boundary : end;
+	} else {
+		return end;
+	}
+}
+
+/* Stage2 PMD definitions when the level is present */
+static inline bool kvm_stage2_has_pmd(struct kvm *kvm)
+{
+	return (CONFIG_PGTABLE_LEVELS > 2) && (kvm_stage2_levels(kvm) > 2);
+}
 
 #define S2_PMD_SHIFT			ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
-#define S2_PMD_SIZE			(_AC(1, UL) << S2_PMD_SHIFT)
+#define S2_PMD_SIZE			(1UL << S2_PMD_SHIFT)
 #define S2_PMD_MASK			(~(S2_PMD_SIZE - 1))
 
-#define stage2_pud_none(pud)				pud_none(pud)
-#define stage2_pud_clear(pud)				pud_clear(pud)
-#define stage2_pud_present(pud)				pud_present(pud)
-#define stage2_pud_populate(pud, pmd)			pud_populate(NULL, pud, pmd)
-#define stage2_pmd_offset(pud, address)			pmd_offset(pud, address)
-#define stage2_pmd_free(pmd)				pmd_free(NULL, pmd)
+static inline bool stage2_pud_none(struct kvm *kvm, pud_t pud)
+{
+	if (kvm_stage2_has_pmd(kvm))
+		return pud_none(pud);
+	else
+		return 0;
+}
+
+static inline void stage2_pud_clear(struct kvm *kvm, pud_t *pud)
+{
+	if (kvm_stage2_has_pmd(kvm))
+		pud_clear(pud);
+}
 
-#define stage2_pud_huge(pud)				pud_huge(pud)
-#define stage2_pmd_table_empty(pmdp)			kvm_page_empty(pmdp)
+static inline bool stage2_pud_present(struct kvm *kvm, pud_t pud)
+{
+	if (kvm_stage2_has_pmd(kvm))
+		return pud_present(pud);
+	else
+		return 1;
+}
 
-static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
+static inline void stage2_pud_populate(struct kvm *kvm, pud_t *pud, pmd_t *pmd)
 {
-	phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
+	if (kvm_stage2_has_pmd(kvm))
+		pud_populate(NULL, pud, pmd);
+}
 
-	return (boundary - 1 < end - 1) ? boundary : end;
+static inline pmd_t *stage2_pmd_offset(struct kvm *kvm,
+				       pud_t *pud, unsigned long address)
+{
+	if (kvm_stage2_has_pmd(kvm))
+		return pmd_offset(pud, address);
+	else
+		return (pmd_t *)pud;
 }
 
-#endif		/* STAGE2_PGTABLE_LEVELS > 2 */
+static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd)
+{
+	if (kvm_stage2_has_pmd(kvm))
+		pmd_free(NULL, pmd);
+}
+
+static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud)
+{
+	if (kvm_stage2_has_pmd(kvm))
+		return pud_huge(pud);
+	else
+		return 0;
+}
+
+static inline bool stage2_pmd_table_empty(struct kvm *kvm, pmd_t *pmdp)
+{
+	if (kvm_stage2_has_pmd(kvm))
+		return kvm_page_empty(pmdp);
+	else
+		return 0;
+}
 
-#define stage2_pte_table_empty(ptep)			kvm_page_empty(ptep)
+static inline phys_addr_t
+stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+{
+	if (kvm_stage2_has_pmd(kvm)) {
+		phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
 
-#if STAGE2_PGTABLE_LEVELS == 2
-#include <asm/stage2_pgtable-nopmd.h>
-#elif STAGE2_PGTABLE_LEVELS == 3
-#include <asm/stage2_pgtable-nopud.h>
-#endif
+		return (boundary - 1 < end - 1) ? boundary : end;
+	} else {
+		return end;
+	}
+}
 
+static inline bool stage2_pte_table_empty(struct kvm *kvm, pte_t *ptep)
+{
+	return kvm_page_empty(ptep);
+}
 
-#define stage2_pgd_index(addr)				(((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+static inline unsigned long stage2_pgd_index(struct kvm *kvm, phys_addr_t addr)
+{
+	return (((addr) >> stage2_pgdir_shift(kvm)) & (stage2_pgd_ptrs(kvm) - 1));
+}
 
-static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
+static inline phys_addr_t
+stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 {
-	phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK;
+	phys_addr_t boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm);
 
 	return (boundary - 1 < end - 1) ? boundary : end;
 }
diff --git a/arch/arm64/include/asm/stat.h b/arch/arm64/include/asm/stat.h
index eab738019707..397c6ccd04e7 100644
--- a/arch/arm64/include/asm/stat.h
+++ b/arch/arm64/include/asm/stat.h
@@ -20,7 +20,7 @@
 
 #ifdef CONFIG_COMPAT
 
-#include <linux/compat_time.h>
+#include <linux/time.h>
 #include <asm/compat.h>
 
 /*
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index e0d0f5b856e7..b13ca091f833 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -18,11 +18,11 @@
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_SYS_UTIME32
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 
diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h
index 5072cbd15c82..dae1584cf017 100644
--- a/arch/arm64/include/uapi/asm/unistd.h
+++ b/arch/arm64/include/uapi/asm/unistd.h
@@ -16,5 +16,6 @@
  */
 
 #define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_NEW_STAT
 
 #include <asm-generic/unistd.h>
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 25fcd22a4bb2..96b8f2f51ab2 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -602,7 +602,7 @@ static void __init of_parse_and_init_cpus(void)
 {
 	struct device_node *dn;
 
-	for_each_node_by_type(dn, "cpu") {
+	for_each_of_cpu_node(dn) {
 		u64 hwid = of_get_cpu_mpidr(dn);
 
 		if (hwid == INVALID_HWID)
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index a6c9fbaeaefc..dd436a50fce7 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -391,15 +391,15 @@ int __attribute_const__ kvm_target_cpu(void)
 			return KVM_ARM_TARGET_CORTEX_A53;
 		case ARM_CPU_PART_CORTEX_A57:
 			return KVM_ARM_TARGET_CORTEX_A57;
-		};
+		}
 		break;
 	case ARM_CPU_IMP_APM:
 		switch (part_number) {
 		case APM_CPU_PART_POTENZA:
 			return KVM_ARM_TARGET_XGENE_POTENZA;
-		};
+		}
 		break;
-	};
+	}
 
 	/* Return a default generic target */
 	return KVM_ARM_TARGET_GENERIC_V8;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index e5e741bfffe1..35a81bebd02b 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -284,6 +284,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		 */
 		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		return 0;
+	case ARM_EXCEPTION_IL:
+		/*
+		 * We attempted an illegal exception return.  Guest state must
+		 * have been corrupted somehow.  Give up.
+		 */
+		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+		return -EINVAL;
 	default:
 		kvm_pr_unimpl("Unsupported exception type: %d",
 			      exception_index);
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 2fabc2dc1966..82d1904328ad 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o
 obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
 obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
-obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
 
 # KVM code is run at a different exception code with a different map, so
 # compiler instrumentation that inserts callbacks or checks into the code may
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 24b4fbafe3e4..b1f14f736962 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -162,6 +162,20 @@ el1_error:
 	mov	x0, #ARM_EXCEPTION_EL1_SERROR
 	b	__guest_exit
 
+el2_sync:
+	/* Check for illegal exception return, otherwise panic */
+	mrs	x0, spsr_el2
+
+	/* if this was something else, then panic! */
+	tst	x0, #PSR_IL_BIT
+	b.eq	__hyp_panic
+
+	/* Let's attempt a recovery from the illegal exception return */
+	get_vcpu_ptr	x1, x0
+	mov	x0, #ARM_EXCEPTION_IL
+	b	__guest_exit
+
+
 el2_error:
 	ldp	x0, x1, [sp], #16
 
@@ -240,7 +254,7 @@ ENTRY(__kvm_hyp_vector)
 	invalid_vect	el2t_fiq_invalid	// FIQ EL2t
 	invalid_vect	el2t_error_invalid	// Error EL2t
 
-	invalid_vect	el2h_sync_invalid	// Synchronous EL2h
+	valid_vect	el2_sync		// Synchronous EL2h
 	invalid_vect	el2h_irq_invalid	// IRQ EL2h
 	invalid_vect	el2h_fiq_invalid	// FIQ EL2h
 	valid_vect	el2_error		// Error EL2h
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
deleted file mode 100644
index 603e1ee83e89..000000000000
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2016 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/types.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_hyp.h>
-
-u32 __hyp_text __init_stage2_translation(void)
-{
-	u64 val = VTCR_EL2_FLAGS;
-	u64 parange;
-	u64 tmp;
-
-	/*
-	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS
-	 * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while
-	 * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
-	 */
-	parange = read_sysreg(id_aa64mmfr0_el1) & 7;
-	if (parange > ID_AA64MMFR0_PARANGE_MAX)
-		parange = ID_AA64MMFR0_PARANGE_MAX;
-	val |= parange << 16;
-
-	/* Compute the actual PARange... */
-	switch (parange) {
-	case 0:
-		parange = 32;
-		break;
-	case 1:
-		parange = 36;
-		break;
-	case 2:
-		parange = 40;
-		break;
-	case 3:
-		parange = 42;
-		break;
-	case 4:
-		parange = 44;
-		break;
-	case 5:
-	default:
-		parange = 48;
-		break;
-	}
-
-	/*
-	 * ... and clamp it to 40 bits, unless we have some braindead
-	 * HW that implements less than that. In all cases, we'll
-	 * return that value for the rest of the kernel to decide what
-	 * to do.
-	 */
-	val |= 64 - (parange > 40 ? 40 : parange);
-
-	/*
-	 * Check the availability of Hardware Access Flag / Dirty Bit
-	 * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
-	 */
-	tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
-	if (tmp)
-		val |= VTCR_EL2_HA;
-
-	/*
-	 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
-	 * bit in VTCR_EL2.
-	 */
-	tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_VMIDBITS_SHIFT) & 0xf;
-	val |= (tmp == ID_AA64MMFR1_VMIDBITS_16) ?
-			VTCR_EL2_VS_16BIT :
-			VTCR_EL2_VS_8BIT;
-
-	write_sysreg(val, vtcr_el2);
-
-	return parange;
-}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index ca46153d7915..7cc175c88a37 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -198,7 +198,7 @@ void deactivate_traps_vhe_put(void)
 
 static void __hyp_text __activate_vm(struct kvm *kvm)
 {
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	__load_guest_stage2(kvm);
 }
 
 static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
@@ -263,7 +263,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
 		return false; /* Translation failed, back to guest */
 
 	/* Convert PAR to HPFAR format */
-	*hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4;
+	*hpfar = PAR_TO_HPFAR(tmp);
 	return true;
 }
 
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 76d016b446b2..68d6f7c3b237 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -152,8 +152,25 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 static void __hyp_text
 __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
 {
+	u64 pstate = ctxt->gp_regs.regs.pstate;
+	u64 mode = pstate & PSR_AA32_MODE_MASK;
+
+	/*
+	 * Safety check to ensure we're setting the CPU up to enter the guest
+	 * in a less privileged mode.
+	 *
+	 * If we are attempting a return to EL2 or higher in AArch64 state,
+	 * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that
+	 * we'll take an illegal exception state exception immediately after
+	 * the ERET to the guest.  Attempts to return to AArch32 Hyp will
+	 * result in an illegal exception return because EL2's execution state
+	 * is determined by SCR_EL3.RW.
+	 */
+	if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
+		pstate = PSR_MODE_EL2h | PSR_IL_BIT;
+
 	write_sysreg_el2(ctxt->gp_regs.regs.pc,		elr);
-	write_sysreg_el2(ctxt->gp_regs.regs.pstate,	spsr);
+	write_sysreg_el2(pstate,			spsr);
 
 	if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
 		write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 131c7772703c..4dbd9c69a96d 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -30,7 +30,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
 	 * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
 	 * let's flip TGE before executing the TLB operation.
 	 */
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	__load_guest_stage2(kvm);
 	val = read_sysreg(hcr_el2);
 	val &= ~HCR_TGE;
 	write_sysreg(val, hcr_el2);
@@ -39,7 +39,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
 
 static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
 {
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	__load_guest_stage2(kvm);
 	isb();
 }
 
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index e37c78bbe1ca..b72a3dd56204 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -26,6 +26,7 @@
 
 #include <kvm/arm_arch_timer.h>
 
+#include <asm/cpufeature.h>
 #include <asm/cputype.h>
 #include <asm/ptrace.h>
 #include <asm/kvm_arm.h>
@@ -33,6 +34,9 @@
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_mmu.h>
 
+/* Maximum phys_shift supported for any VM on this host */
+static u32 kvm_ipa_limit;
+
 /*
  * ARMv8 Reset Values
  */
@@ -55,12 +59,12 @@ static bool cpu_has_32bit_el1(void)
 }
 
 /**
- * kvm_arch_dev_ioctl_check_extension
+ * kvm_arch_vm_ioctl_check_extension
  *
  * We currently assume that the number of HW registers is uniform
  * across all CPUs (see cpuinfo_sanity_check).
  */
-int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
+int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
@@ -82,9 +86,11 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
 		break;
 	case KVM_CAP_SET_GUEST_DEBUG:
 	case KVM_CAP_VCPU_ATTRIBUTES:
-	case KVM_CAP_VCPU_EVENTS:
 		r = 1;
 		break;
+	case KVM_CAP_ARM_VM_IPA_SIZE:
+		r = kvm_ipa_limit;
+		break;
 	default:
 		r = 0;
 	}
@@ -133,3 +139,99 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	/* Reset timer */
 	return kvm_timer_vcpu_reset(vcpu);
 }
+
+void kvm_set_ipa_limit(void)
+{
+	unsigned int ipa_max, pa_max, va_max, parange;
+
+	parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7;
+	pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
+
+	/* Clamp the IPA limit to the PA size supported by the kernel */
+	ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max;
+	/*
+	 * Since our stage2 table is dependent on the stage1 page table code,
+	 * we must always honor the following condition:
+	 *
+	 *  Number of levels in Stage1 >= Number of levels in Stage2.
+	 *
+	 * So clamp the ipa limit further down to limit the number of levels.
+	 * Since we can concatenate upto 16 tables at entry level, we could
+	 * go upto 4bits above the maximum VA addressible with the current
+	 * number of levels.
+	 */
+	va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;
+	va_max += 4;
+
+	if (va_max < ipa_max)
+		ipa_max = va_max;
+
+	/*
+	 * If the final limit is lower than the real physical address
+	 * limit of the CPUs, report the reason.
+	 */
+	if (ipa_max < pa_max)
+		pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n",
+			(va_max < pa_max) ? "Virtual" : "Physical");
+
+	WARN(ipa_max < KVM_PHYS_SHIFT,
+	     "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max);
+	kvm_ipa_limit = ipa_max;
+	kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit);
+}
+
+/*
+ * Configure the VTCR_EL2 for this VM. The VTCR value is common
+ * across all the physical CPUs on the system. We use system wide
+ * sanitised values to fill in different fields, except for Hardware
+ * Management of Access Flags. HA Flag is set unconditionally on
+ * all CPUs, as it is safe to run with or without the feature and
+ * the bit is RES0 on CPUs that don't support it.
+ */
+int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
+{
+	u64 vtcr = VTCR_EL2_FLAGS;
+	u32 parange, phys_shift;
+	u8 lvls;
+
+	if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
+		return -EINVAL;
+
+	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
+	if (phys_shift) {
+		if (phys_shift > kvm_ipa_limit ||
+		    phys_shift < 32)
+			return -EINVAL;
+	} else {
+		phys_shift = KVM_PHYS_SHIFT;
+	}
+
+	parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
+	if (parange > ID_AA64MMFR0_PARANGE_MAX)
+		parange = ID_AA64MMFR0_PARANGE_MAX;
+	vtcr |= parange << VTCR_EL2_PS_SHIFT;
+
+	vtcr |= VTCR_EL2_T0SZ(phys_shift);
+	/*
+	 * Use a minimum 2 level page table to prevent splitting
+	 * host PMD huge pages at stage2.
+	 */
+	lvls = stage2_pgtable_levels(phys_shift);
+	if (lvls < 2)
+		lvls = 2;
+	vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
+
+	/*
+	 * Enable the Hardware Access Flag management, unconditionally
+	 * on all CPUs. The features is RES0 on CPUs without the support
+	 * and must be ignored by the CPUs.
+	 */
+	vtcr |= VTCR_EL2_HA;
+
+	/* Set the vmid bits */
+	vtcr |= (kvm_get_vmid_bits() == 16) ?
+		VTCR_EL2_VS_16BIT :
+		VTCR_EL2_VS_8BIT;
+	kvm->arch.vtcr = vtcr;
+	return 0;
+}
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 072c51fb07d7..d190612b8f33 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/genalloc.h>
 #include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/dma-contiguous.h>
 #include <linux/vmalloc.h>
 #include <linux/swiotlb.h>
@@ -32,16 +33,6 @@
 
 #include <asm/cacheflush.h>
 
-static int swiotlb __ro_after_init;
-
-static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
-				 bool coherent)
-{
-	if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE))
-		return pgprot_writecombine(prot);
-	return prot;
-}
-
 static struct gen_pool *atomic_pool __ro_after_init;
 
 #define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
@@ -91,18 +82,16 @@ static int __free_from_pool(void *start, size_t size)
 	return 1;
 }
 
-static void *__dma_alloc(struct device *dev, size_t size,
-			 dma_addr_t *dma_handle, gfp_t flags,
-			 unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t flags, unsigned long attrs)
 {
 	struct page *page;
 	void *ptr, *coherent_ptr;
-	bool coherent = is_device_dma_coherent(dev);
-	pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false);
+	pgprot_t prot = pgprot_writecombine(PAGE_KERNEL);
 
 	size = PAGE_ALIGN(size);
 
-	if (!coherent && !gfpflags_allow_blocking(flags)) {
+	if (!gfpflags_allow_blocking(flags)) {
 		struct page *page = NULL;
 		void *addr = __alloc_from_pool(size, &page, flags);
 
@@ -112,14 +101,10 @@ static void *__dma_alloc(struct device *dev, size_t size,
 		return addr;
 	}
 
-	ptr = swiotlb_alloc(dev, size, dma_handle, flags, attrs);
+	ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
 	if (!ptr)
 		goto no_mem;
 
-	/* no need for non-cacheable mapping if coherent */
-	if (coherent)
-		return ptr;
-
 	/* remove any dirty cache lines on the kernel alias */
 	__dma_flush_area(ptr, size);
 
@@ -133,130 +118,57 @@ static void *__dma_alloc(struct device *dev, size_t size,
 	return coherent_ptr;
 
 no_map:
-	swiotlb_free(dev, size, ptr, *dma_handle, attrs);
+	dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs);
 no_mem:
 	return NULL;
 }
 
-static void __dma_free(struct device *dev, size_t size,
-		       void *vaddr, dma_addr_t dma_handle,
-		       unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+		dma_addr_t dma_handle, unsigned long attrs)
 {
-	void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
+	if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) {
+		void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
 
-	size = PAGE_ALIGN(size);
-
-	if (!is_device_dma_coherent(dev)) {
-		if (__free_from_pool(vaddr, size))
-			return;
 		vunmap(vaddr);
+		dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
 	}
-	swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs);
 }
 
-static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
-				     unsigned long offset, size_t size,
-				     enum dma_data_direction dir,
-				     unsigned long attrs)
+long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
+		dma_addr_t dma_addr)
 {
-	dma_addr_t dev_addr;
-
-	dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
-	if (!is_device_dma_coherent(dev) &&
-	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-		__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
-
-	return dev_addr;
-}
-
-
-static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
-				 size_t size, enum dma_data_direction dir,
-				 unsigned long attrs)
-{
-	if (!is_device_dma_coherent(dev) &&
-	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
-	swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
+	return __phys_to_pfn(dma_to_phys(dev, dma_addr));
 }
 
-static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
-				  int nelems, enum dma_data_direction dir,
-				  unsigned long attrs)
+pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
+		unsigned long attrs)
 {
-	struct scatterlist *sg;
-	int i, ret;
-
-	ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
-	if (!is_device_dma_coherent(dev) &&
-	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-		for_each_sg(sgl, sg, ret, i)
-			__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
-				       sg->length, dir);
-
-	return ret;
-}
-
-static void __swiotlb_unmap_sg_attrs(struct device *dev,
-				     struct scatterlist *sgl, int nelems,
-				     enum dma_data_direction dir,
-				     unsigned long attrs)
-{
-	struct scatterlist *sg;
-	int i;
-
-	if (!is_device_dma_coherent(dev) &&
-	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-		for_each_sg(sgl, sg, nelems, i)
-			__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
-					 sg->length, dir);
-	swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
+	if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE))
+		return pgprot_writecombine(prot);
+	return prot;
 }
 
-static void __swiotlb_sync_single_for_cpu(struct device *dev,
-					  dma_addr_t dev_addr, size_t size,
-					  enum dma_data_direction dir)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-	if (!is_device_dma_coherent(dev))
-		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
-	swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
+	__dma_map_area(phys_to_virt(paddr), size, dir);
 }
 
-static void __swiotlb_sync_single_for_device(struct device *dev,
-					     dma_addr_t dev_addr, size_t size,
-					     enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-	swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
-	if (!is_device_dma_coherent(dev))
-		__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+	__dma_unmap_area(phys_to_virt(paddr), size, dir);
 }
 
-static void __swiotlb_sync_sg_for_cpu(struct device *dev,
-				      struct scatterlist *sgl, int nelems,
-				      enum dma_data_direction dir)
+static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
+				      struct page *page, size_t size)
 {
-	struct scatterlist *sg;
-	int i;
-
-	if (!is_device_dma_coherent(dev))
-		for_each_sg(sgl, sg, nelems, i)
-			__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
-					 sg->length, dir);
-	swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
-}
+	int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
 
-static void __swiotlb_sync_sg_for_device(struct device *dev,
-					 struct scatterlist *sgl, int nelems,
-					 enum dma_data_direction dir)
-{
-	struct scatterlist *sg;
-	int i;
+	if (!ret)
+		sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
 
-	swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
-	if (!is_device_dma_coherent(dev))
-		for_each_sg(sgl, sg, nelems, i)
-			__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
-				       sg->length, dir);
+	return ret;
 }
 
 static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
@@ -277,74 +189,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
 	return ret;
 }
 
-static int __swiotlb_mmap(struct device *dev,
-			  struct vm_area_struct *vma,
-			  void *cpu_addr, dma_addr_t dma_addr, size_t size,
-			  unsigned long attrs)
-{
-	int ret;
-	unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
-
-	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
-					     is_device_dma_coherent(dev));
-
-	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
-		return ret;
-
-	return __swiotlb_mmap_pfn(vma, pfn, size);
-}
-
-static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
-				      struct page *page, size_t size)
-{
-	int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
-
-	if (!ret)
-		sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
-
-	return ret;
-}
-
-static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
-				 void *cpu_addr, dma_addr_t handle, size_t size,
-				 unsigned long attrs)
-{
-	struct page *page = phys_to_page(dma_to_phys(dev, handle));
-
-	return __swiotlb_get_sgtable_page(sgt, page, size);
-}
-
-static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
-{
-	if (swiotlb)
-		return swiotlb_dma_supported(hwdev, mask);
-	return 1;
-}
-
-static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr)
-{
-	if (swiotlb)
-		return swiotlb_dma_mapping_error(hwdev, addr);
-	return 0;
-}
-
-static const struct dma_map_ops arm64_swiotlb_dma_ops = {
-	.alloc = __dma_alloc,
-	.free = __dma_free,
-	.mmap = __swiotlb_mmap,
-	.get_sgtable = __swiotlb_get_sgtable,
-	.map_page = __swiotlb_map_page,
-	.unmap_page = __swiotlb_unmap_page,
-	.map_sg = __swiotlb_map_sg_attrs,
-	.unmap_sg = __swiotlb_unmap_sg_attrs,
-	.sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
-	.sync_single_for_device = __swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device = __swiotlb_sync_sg_for_device,
-	.dma_supported = __swiotlb_dma_supported,
-	.mapping_error = __swiotlb_dma_mapping_error,
-};
-
 static int __init atomic_pool_init(void)
 {
 	pgprot_t prot = __pgprot(PROT_NORMAL_NC);
@@ -500,10 +344,6 @@ EXPORT_SYMBOL(dummy_dma_ops);
 
 static int __init arm64_dma_init(void)
 {
-	if (swiotlb_force == SWIOTLB_FORCE ||
-	    max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
-		swiotlb = 1;
-
 	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
 		   TAINT_CPU_OUT_OF_SPEC,
 		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
@@ -528,7 +368,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 				 dma_addr_t *handle, gfp_t gfp,
 				 unsigned long attrs)
 {
-	bool coherent = is_device_dma_coherent(dev);
+	bool coherent = dev_is_dma_coherent(dev);
 	int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
 	size_t iosize = size;
 	void *addr;
@@ -569,7 +409,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 			addr = NULL;
 		}
 	} else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
-		pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+		pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
 		struct page *page;
 
 		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
@@ -596,7 +436,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 						    size >> PAGE_SHIFT);
 		}
 	} else {
-		pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+		pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
 		struct page **pages;
 
 		pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot,
@@ -658,8 +498,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 	struct vm_struct *area;
 	int ret;
 
-	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
-					     is_device_dma_coherent(dev));
+	vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs);
 
 	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
@@ -709,11 +548,11 @@ static void __iommu_sync_single_for_cpu(struct device *dev,
 {
 	phys_addr_t phys;
 
-	if (is_device_dma_coherent(dev))
+	if (dev_is_dma_coherent(dev))
 		return;
 
-	phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
-	__dma_unmap_area(phys_to_virt(phys), size, dir);
+	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
+	arch_sync_dma_for_cpu(dev, phys, size, dir);
 }
 
 static void __iommu_sync_single_for_device(struct device *dev,
@@ -722,11 +561,11 @@ static void __iommu_sync_single_for_device(struct device *dev,
 {
 	phys_addr_t phys;
 
-	if (is_device_dma_coherent(dev))
+	if (dev_is_dma_coherent(dev))
 		return;
 
-	phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
-	__dma_map_area(phys_to_virt(phys), size, dir);
+	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
+	arch_sync_dma_for_device(dev, phys, size, dir);
 }
 
 static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
@@ -734,13 +573,13 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
 				   enum dma_data_direction dir,
 				   unsigned long attrs)
 {
-	bool coherent = is_device_dma_coherent(dev);
+	bool coherent = dev_is_dma_coherent(dev);
 	int prot = dma_info_to_prot(dir, coherent, attrs);
 	dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
 
-	if (!iommu_dma_mapping_error(dev, dev_addr) &&
-	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-		__iommu_sync_single_for_device(dev, dev_addr, size, dir);
+	if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+	    !iommu_dma_mapping_error(dev, dev_addr))
+		__dma_map_area(page_address(page) + offset, size, dir);
 
 	return dev_addr;
 }
@@ -762,11 +601,11 @@ static void __iommu_sync_sg_for_cpu(struct device *dev,
 	struct scatterlist *sg;
 	int i;
 
-	if (is_device_dma_coherent(dev))
+	if (dev_is_dma_coherent(dev))
 		return;
 
 	for_each_sg(sgl, sg, nelems, i)
-		__dma_unmap_area(sg_virt(sg), sg->length, dir);
+		arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
 }
 
 static void __iommu_sync_sg_for_device(struct device *dev,
@@ -776,18 +615,18 @@ static void __iommu_sync_sg_for_device(struct device *dev,
 	struct scatterlist *sg;
 	int i;
 
-	if (is_device_dma_coherent(dev))
+	if (dev_is_dma_coherent(dev))
 		return;
 
 	for_each_sg(sgl, sg, nelems, i)
-		__dma_map_area(sg_virt(sg), sg->length, dir);
+		arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
 }
 
 static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
 				int nelems, enum dma_data_direction dir,
 				unsigned long attrs)
 {
-	bool coherent = is_device_dma_coherent(dev);
+	bool coherent = dev_is_dma_coherent(dev);
 
 	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		__iommu_sync_sg_for_device(dev, sgl, nelems, dir);
@@ -879,9 +718,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent)
 {
 	if (!dev->dma_ops)
-		dev->dma_ops = &arm64_swiotlb_dma_ops;
+		dev->dma_ops = &swiotlb_dma_ops;
 
-	dev->archdata.dma_coherent = coherent;
+	dev->dma_coherent = coherent;
 	__iommu_setup_dma_ops(dev, dma_base, size, iommu);
 
 #ifdef CONFIG_XEN
diff --git a/arch/c6x/Makefile b/arch/c6x/Makefile
index 3fe8a948e94c..b7aa854f7008 100644
--- a/arch/c6x/Makefile
+++ b/arch/c6x/Makefile
@@ -40,9 +40,7 @@ boot := arch/$(ARCH)/boot
 DTB:=$(subst dtbImage.,,$(filter dtbImage.%, $(MAKECMDGOALS)))
 export DTB
 
-ifneq ($(DTB),)
 core-y	+= $(boot)/dts/
-endif
 
 # With make 3.82 we cannot mix normal and wildcard targets
 
diff --git a/arch/c6x/boot/dts/Makefile b/arch/c6x/boot/dts/Makefile
index b212d278ebc4..f438285c3640 100644
--- a/arch/c6x/boot/dts/Makefile
+++ b/arch/c6x/boot/dts/Makefile
@@ -5,15 +5,12 @@
 
 DTC_FLAGS ?= -p 1024
 
+dtb-$(CONFIG_SOC_TMS320C6455) += dsk6455.dtb
+dtb-$(CONFIG_SOC_TMS320C6457) += evmc6457.dtb
+dtb-$(CONFIG_SOC_TMS320C6472) += evmc6472.dtb
+dtb-$(CONFIG_SOC_TMS320C6474) += evmc6474.dtb
+dtb-$(CONFIG_SOC_TMS320C6678) += evmc6678.dtb
+
 ifneq ($(DTB),)
-obj-y += linked_dtb.o
+obj-y += $(DTB).dtb.o
 endif
-
-quiet_cmd_cp = CP      $< $@$2
-	cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
-
-# Generate builtin.dtb from $(DTB).dtb
-$(obj)/builtin.dtb: $(obj)/$(DTB).dtb
-	$(call if_changed,cp)
-
-$(obj)/linked_dtb.o: $(obj)/builtin.dtb
diff --git a/arch/c6x/boot/dts/linked_dtb.S b/arch/c6x/boot/dts/linked_dtb.S
deleted file mode 100644
index cf347f1d16ce..000000000000
--- a/arch/c6x/boot/dts/linked_dtb.S
+++ /dev/null
@@ -1,2 +0,0 @@
-.section __fdt_blob,"a"
-.incbin "arch/c6x/boot/dts/builtin.dtb"
diff --git a/arch/c6x/include/asm/sections.h b/arch/c6x/include/asm/sections.h
index d6c591ab5b7e..dc2f15eb3bde 100644
--- a/arch/c6x/include/asm/sections.h
+++ b/arch/c6x/include/asm/sections.h
@@ -8,6 +8,5 @@ extern char _vectors_start[];
 extern char _vectors_end[];
 
 extern char _data_lma[];
-extern char _fdt_start[], _fdt_end[];
 
 #endif /* _ASM_C6X_SECTIONS_H */
diff --git a/arch/c6x/include/uapi/asm/unistd.h b/arch/c6x/include/uapi/asm/unistd.h
index 0d2daf7f9809..6b2fe792de9d 100644
--- a/arch/c6x/include/uapi/asm/unistd.h
+++ b/arch/c6x/include/uapi/asm/unistd.h
@@ -16,6 +16,7 @@
  */
 
 #define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_CLONE
 
 /* Use the standard ABI for syscalls. */
diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c
index 786e36e2f61d..05d96a9541b5 100644
--- a/arch/c6x/kernel/setup.c
+++ b/arch/c6x/kernel/setup.c
@@ -96,7 +96,7 @@ static void __init get_cpuinfo(void)
 	unsigned long core_khz;
 	u64 tmp;
 	struct cpuinfo_c6x *p;
-	struct device_node *node, *np;
+	struct device_node *node;
 
 	p = &per_cpu(cpu_data, smp_processor_id());
 
@@ -190,13 +190,8 @@ static void __init get_cpuinfo(void)
 
 	p->core_id = get_coreid();
 
-	node = of_find_node_by_name(NULL, "cpus");
-	if (node) {
-		for_each_child_of_node(node, np)
-			if (!strcmp("cpu", np->name))
-				++c6x_num_cores;
-		of_node_put(node);
-	}
+	for_each_of_cpu_node(node)
+		++c6x_num_cores;
 
 	node = of_find_node_by_name(NULL, "soc");
 	if (node) {
@@ -270,7 +265,7 @@ int __init c6x_add_memory(phys_addr_t start, unsigned long size)
 notrace void __init machine_init(unsigned long dt_ptr)
 {
 	void *dtb = __va(dt_ptr);
-	void *fdt = _fdt_start;
+	void *fdt = __dtb_start;
 
 	/* interrupts must be masked */
 	set_creg(IER, 2);
@@ -363,7 +358,7 @@ void __init setup_arch(char **cmdline_p)
 					 memory_end >> PAGE_SHIFT);
 	memblock_reserve(memory_start, bootmap_size);
 
-	unflatten_device_tree();
+	unflatten_and_copy_device_tree();
 
 	c6x_cache_init();
 
diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S
index 1fba5b421eee..584bab2bace6 100644
--- a/arch/c6x/kernel/vmlinux.lds.S
+++ b/arch/c6x/kernel/vmlinux.lds.S
@@ -90,16 +90,6 @@ SECTIONS
 		*(.switch)
 	}
 
-	. = ALIGN (8) ;
-	__fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET)
-	{
-		_fdt_start = . ;	/* place for fdt blob */
-		*(__fdt_blob) ;		/* Any link-placed DTB */
-		BYTE(0);		/* section always has contents */
-	        . = _fdt_start + 0x4000;	/* Pad up to 16kbyte */
-		_fdt_end = . ;
-	}
-
 	_etext = .;
 
 	/*
diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile
index 58634e6bae92..4003ddc616e1 100644
--- a/arch/h8300/Makefile
+++ b/arch/h8300/Makefile
@@ -31,21 +31,12 @@ CROSS_COMPILE := h8300-unknown-linux-
 endif
 
 core-y	+= arch/$(ARCH)/kernel/ arch/$(ARCH)/mm/
-ifneq '$(CONFIG_H8300_BUILTIN_DTB)' '""'
-core-y += arch/h8300/boot/dts/
-endif
+core-y	+= arch/$(ARCH)/boot/dts/
 
 libs-y	+= arch/$(ARCH)/lib/
 
 boot := arch/h8300/boot
 
-%.dtb %.dtb.S %.dtb.o: | scripts
-	$(Q)$(MAKE) $(build)=arch/h8300/boot/dts arch/h8300/boot/dts/$@
-
-PHONY += dtbs
-dtbs: scripts
-	$(Q)$(MAKE) $(build)=arch/h8300/boot/dts
-
 archmrproper:
 
 archclean:
diff --git a/arch/h8300/include/uapi/asm/unistd.h b/arch/h8300/include/uapi/asm/unistd.h
index 7dd20ef7625a..628195823816 100644
--- a/arch/h8300/include/uapi/asm/unistd.h
+++ b/arch/h8300/include/uapi/asm/unistd.h
@@ -1,5 +1,6 @@
 #define __ARCH_NOMMU
 
 #define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
 
 #include <asm-generic/unistd.h>
diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h
index ea181e79162e..c91ca7d02461 100644
--- a/arch/hexagon/include/uapi/asm/unistd.h
+++ b/arch/hexagon/include/uapi/asm/unistd.h
@@ -29,6 +29,7 @@
 
 #define sys_mmap2 sys_mmap_pgoff
 #define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index ffb705dc9c13..49e34db2529c 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -28,6 +28,9 @@
 #define __IGNORE_vfork		/* clone() */
 #define __IGNORE_umount2	/* umount() */
 
+#define __ARCH_WANT_NEW_STAT
+#define __ARCH_WANT_SYS_UTIME
+
 #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
 
 #include <linux/types.h>
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 1d5483f6e457..85904b73e261 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -621,7 +621,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
@@ -657,7 +656,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 52a0af127951..9b3818bbb68b 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -578,7 +578,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
@@ -614,7 +613,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index b3103e51268a..769677809945 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -599,7 +599,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
@@ -635,7 +634,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index fb7d651a4cab..7dd264ddf2ea 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 6b37f5537c39..515f7439c755 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -580,7 +580,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
@@ -616,7 +615,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index c717bf879449..8e1038ceb407 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -602,7 +602,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
@@ -638,7 +637,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 226c994ce794..62c8aaa15cc7 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -684,7 +684,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
@@ -720,7 +719,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index b383327fd77a..733973f91297 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 9783d3deb9e9..fee30cc9ac16 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index a35d10ee10cb..eebf9c9088e7 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -593,7 +593,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
@@ -629,7 +628,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 573bf922d448..dabc54318c09 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -571,7 +571,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
@@ -607,7 +606,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index efb27a7fcc55..0d9a5c2a311a 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -572,7 +572,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
@@ -608,7 +607,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 30d0d3fbd4ef..e680031bda7b 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -7,6 +7,7 @@
 
 #define NR_syscalls		380
 
+#define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_STAT
 #define __ARCH_WANT_STAT64
@@ -21,7 +22,6 @@
 #define __ARCH_WANT_SYS_SOCKETCALL
 #define __ARCH_WANT_SYS_FADVISE64
 #define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
 #define __ARCH_WANT_SYS_OLD_MMAP
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index 4f3ab5707265..0823d291fbeb 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -65,9 +65,7 @@ boot := arch/microblaze/boot
 # Are we making a simpleImage.<boardname> target? If so, crack out the boardname
 DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS)))
 
-ifneq ($(DTB),)
-	core-y	+= $(boot)/dts/
-endif
+core-y	+= $(boot)/dts/
 
 # defines filename extension depending memory management type
 ifeq ($(CONFIG_MMU),)
diff --git a/arch/microblaze/boot/dts/Makefile b/arch/microblaze/boot/dts/Makefile
index 1f77913d404d..c7324e74f9ef 100644
--- a/arch/microblaze/boot/dts/Makefile
+++ b/arch/microblaze/boot/dts/Makefile
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 #
 
+dtb-y := system.dtb
+
+ifneq ($(DTB),)
 obj-y += linked_dtb.o
 
 # Ensure system.dtb exists
@@ -11,6 +14,7 @@ ifneq ($(DTB),system)
 $(obj)/system.dtb: $(obj)/$(DTB).dtb
 	$(call if_changed,cp)
 endif
+endif
 
 quiet_cmd_cp = CP      $< $@$2
 	cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index a62d09420a47..f42c40f5001b 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -15,6 +15,7 @@
 
 /* #define __ARCH_WANT_OLD_READDIR */
 /* #define __ARCH_WANT_OLD_STAT */
+#define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
@@ -26,7 +27,6 @@
 #define __ARCH_WANT_SYS_SOCKETCALL
 #define __ARCH_WANT_SYS_FADVISE64
 #define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 /* #define __ARCH_WANT_SYS_OLD_GETRLIMIT */
 #define __ARCH_WANT_SYS_OLDUMOUNT
diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c
index 96b3f26d16be..ef2f49471a2a 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo.c
@@ -89,9 +89,9 @@ static struct device_node *cpu;
 
 void __init setup_cpuinfo(void)
 {
-	cpu = (struct device_node *) of_find_node_by_type(NULL, "cpu");
+	cpu = of_get_cpu_node(0, NULL);
 	if (!cpu)
-		pr_err("You don't have cpu!!!\n");
+		pr_err("You don't have cpu or are missing cpu reg property!!!\n");
 
 	pr_info("%s: initialising\n", __func__);
 
@@ -117,6 +117,8 @@ void __init setup_cpuinfo(void)
 	if (cpuinfo.mmu_privins)
 		pr_warn("%s: Stream instructions enabled"
 			" - USERSPACE CAN LOCK THIS KERNEL!\n", __func__);
+
+	of_node_put(cpu);
 }
 
 void __init setup_cpuinfo_clk(void)
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index d74b3742fa5d..d43eeaa6d75b 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -407,18 +407,7 @@ endif
 CLEAN_FILES += vmlinux.32 vmlinux.64
 
 # device-trees
-core-$(CONFIG_BUILTIN_DTB) += arch/mips/boot/dts/
-
-%.dtb %.dtb.S %.dtb.o: | scripts
-	$(Q)$(MAKE) $(build)=arch/mips/boot/dts arch/mips/boot/dts/$@
-
-PHONY += dtbs
-dtbs: scripts
-	$(Q)$(MAKE) $(build)=arch/mips/boot/dts
-
-PHONY += dtbs_install
-dtbs_install:
-	$(Q)$(MAKE) $(dtbinst)=arch/mips/boot/dts
+core-y += arch/mips/boot/dts/
 
 archprepare:
 ifdef CONFIG_MIPS32_N32
@@ -461,8 +450,6 @@ define archhelp
 	echo '  uImage.lzma          - U-Boot image (lzma)'
 	echo '  uImage.lzo           - U-Boot image (lzo)'
 	echo '  uzImage.bin          - U-Boot image (self-extracting)'
-	echo '  dtbs                 - Device-tree blobs for enabled boards'
-	echo '  dtbs_install         - Install dtbs to $(INSTALL_DTBS_PATH)'
 	echo
 	echo '  These will be default as appropriate for a configured platform.'
 	echo
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 78675f19440f..c99166eadbde 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -9,43 +9,25 @@
 #include <asm/page.h>
 #include <asm/ptrace.h>
 
+#include <asm-generic/compat.h>
+
 #define COMPAT_USER_HZ		100
 #define COMPAT_UTS_MACHINE	"mips\0\0\0"
 
-typedef u32		compat_size_t;
-typedef s32		compat_ssize_t;
-typedef s32		compat_clock_t;
-typedef s32		compat_suseconds_t;
-
-typedef s32		compat_pid_t;
 typedef s32		__compat_uid_t;
 typedef s32		__compat_gid_t;
 typedef __compat_uid_t	__compat_uid32_t;
 typedef __compat_gid_t	__compat_gid32_t;
 typedef u32		compat_mode_t;
-typedef u32		compat_ino_t;
 typedef u32		compat_dev_t;
-typedef s32		compat_off_t;
-typedef s64		compat_loff_t;
 typedef u32		compat_nlink_t;
 typedef s32		compat_ipc_pid_t;
-typedef s32		compat_daddr_t;
 typedef s32		compat_caddr_t;
 typedef struct {
 	s32	val[2];
 } compat_fsid_t;
-typedef s32		compat_timer_t;
-typedef s32		compat_key_t;
-
-typedef s16		compat_short_t;
-typedef s32		compat_int_t;
-typedef s32		compat_long_t;
 typedef s64		compat_s64;
-typedef u16		compat_ushort_t;
-typedef u32		compat_uint_t;
-typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
-typedef u32		compat_uptr_t;
 
 struct compat_stat {
 	compat_dev_t	st_dev;
@@ -59,11 +41,11 @@ struct compat_stat {
 	s32		st_pad2[2];
 	compat_off_t	st_size;
 	s32		st_pad3;
-	compat_time_t	st_atime;
+	old_time32_t	st_atime;
 	s32		st_atime_nsec;
-	compat_time_t	st_mtime;
+	old_time32_t	st_mtime;
 	s32		st_mtime_nsec;
-	compat_time_t	st_ctime;
+	old_time32_t	st_ctime;
 	s32		st_ctime_nsec;
 	s32		st_blksize;
 	s32		st_blocks;
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index 3c09450908aa..c68b8ae3efcb 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -24,16 +24,17 @@
 
 #ifndef __ASSEMBLY__
 
+#define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_UTIME32
 #define __ARCH_WANT_SYS_WAITPID
 #define __ARCH_WANT_SYS_SOCKETCALL
 #define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLDUMOUNT
diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c
index 89b234844534..7a12763d553a 100644
--- a/arch/mips/kernel/binfmt_elfn32.c
+++ b/arch/mips/kernel/binfmt_elfn32.c
@@ -54,10 +54,10 @@ struct elf_prstatus32
 	pid_t	pr_ppid;
 	pid_t	pr_pgrp;
 	pid_t	pr_sid;
-	struct compat_timeval pr_utime; /* User time */
-	struct compat_timeval pr_stime; /* System time */
-	struct compat_timeval pr_cutime;/* Cumulative user time */
-	struct compat_timeval pr_cstime;/* Cumulative system time */
+	struct old_timeval32 pr_utime; /* User time */
+	struct old_timeval32 pr_stime; /* System time */
+	struct old_timeval32 pr_cutime;/* Cumulative user time */
+	struct old_timeval32 pr_cstime;/* Cumulative system time */
 	elf_gregset_t pr_reg;	/* GP registers */
 	int pr_fpvalid;		/* True if math co-processor being used.  */
 };
@@ -81,9 +81,9 @@ struct elf_prpsinfo32
 #define elf_caddr_t	u32
 #define init_elf_binfmt init_elfn32_binfmt
 
-#define jiffies_to_timeval jiffies_to_compat_timeval
+#define jiffies_to_timeval jiffies_to_old_timeval32
 static __inline__ void
-jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
+jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value)
 {
 	/*
 	 * Convert jiffies to nanoseconds and separate with
@@ -101,6 +101,6 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
 #define TASK_SIZE TASK_SIZE32
 
 #undef ns_to_timeval
-#define ns_to_timeval ns_to_compat_timeval
+#define ns_to_timeval ns_to_old_timeval32
 
 #include "../../../fs/binfmt_elf.c"
diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c
index a88c59db3d48..e6db06a1d31a 100644
--- a/arch/mips/kernel/binfmt_elfo32.c
+++ b/arch/mips/kernel/binfmt_elfo32.c
@@ -59,10 +59,10 @@ struct elf_prstatus32
 	pid_t	pr_ppid;
 	pid_t	pr_pgrp;
 	pid_t	pr_sid;
-	struct compat_timeval pr_utime; /* User time */
-	struct compat_timeval pr_stime; /* System time */
-	struct compat_timeval pr_cutime;/* Cumulative user time */
-	struct compat_timeval pr_cstime;/* Cumulative system time */
+	struct old_timeval32 pr_utime; /* User time */
+	struct old_timeval32 pr_stime; /* System time */
+	struct old_timeval32 pr_cutime;/* Cumulative user time */
+	struct old_timeval32 pr_cstime;/* Cumulative system time */
 	elf_gregset_t pr_reg;	/* GP registers */
 	int pr_fpvalid;		/* True if math co-processor being used.  */
 };
@@ -86,9 +86,9 @@ struct elf_prpsinfo32
 #define elf_caddr_t	u32
 #define init_elf_binfmt init_elf32_binfmt
 
-#define jiffies_to_timeval jiffies_to_compat_timeval
+#define jiffies_to_timeval jiffies_to_old_timeval32
 static inline void
-jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
+jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value)
 {
 	/*
 	 * Convert jiffies to nanoseconds and separate with
@@ -104,6 +104,6 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
 #define TASK_SIZE TASK_SIZE32
 
 #undef ns_to_timeval
-#define ns_to_timeval ns_to_compat_timeval
+#define ns_to_timeval ns_to_old_timeval32
 
 #include "../../../fs/binfmt_elf.c"
diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile
index 3509fac10491..9f525ed70049 100644
--- a/arch/nds32/Makefile
+++ b/arch/nds32/Makefile
@@ -47,7 +47,7 @@ CHECKFLAGS      += -D__NDS32_EB__
 endif
 
 boot := arch/nds32/boot
-core-$(BUILTIN_DTB) += $(boot)/dts/
+core-y += $(boot)/dts/
 
 .PHONY: FORCE
 
diff --git a/arch/nds32/include/uapi/asm/unistd.h b/arch/nds32/include/uapi/asm/unistd.h
index 6e95901cabe3..603e826e0449 100644
--- a/arch/nds32/include/uapi/asm/unistd.h
+++ b/arch/nds32/include/uapi/asm/unistd.h
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
+#define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYNC_FILE_RANGE2
 
 /* Use the standard ABI for syscalls */
diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile
index 8673a79dca9c..52c03e60b114 100644
--- a/arch/nios2/Makefile
+++ b/arch/nios2/Makefile
@@ -49,21 +49,13 @@ BOOT_TARGETS = vmImage zImage
 PHONY += $(BOOT_TARGETS) install
 KBUILD_IMAGE := $(nios2-boot)/vmImage
 
-ifneq ($(CONFIG_NIOS2_DTB_SOURCE),"")
-	core-y	+= $(nios2-boot)/
-endif
+core-y	+= $(nios2-boot)/dts/
 
 all: vmImage
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(nios2-boot)
 
-%.dtb: | scripts
-	$(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@
-
-dtbs:
-	$(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@
-
 $(BOOT_TARGETS): vmlinux
 	$(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@
 
@@ -76,5 +68,4 @@ define archhelp
   echo  '                     (your) ~/bin/$(INSTALLKERNEL) or'
   echo  '                     (distribution) /sbin/$(INSTALLKERNEL) or'
   echo  '                     install to $$(INSTALL_PATH)'
-  echo  '  dtbs            - Build device tree blobs for enabled boards'
 endef
diff --git a/arch/nios2/boot/Makefile b/arch/nios2/boot/Makefile
index 2ba23a679732..37dfc7e584bc 100644
--- a/arch/nios2/boot/Makefile
+++ b/arch/nios2/boot/Makefile
@@ -31,27 +31,5 @@ $(obj)/zImage: $(obj)/compressed/vmlinux FORCE
 $(obj)/compressed/vmlinux: $(obj)/vmlinux.gz FORCE
 	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
 
-# Rule to build device tree blobs
-DTB_SRC := $(patsubst "%",%,$(CONFIG_NIOS2_DTB_SOURCE))
-
-# Make sure the generated dtb gets removed during clean
-extra-$(CONFIG_NIOS2_DTB_SOURCE_BOOL) += system.dtb
-
-$(obj)/system.dtb: $(DTB_SRC) FORCE
-	$(call cmd,dtc)
-
-# Ensure system.dtb exists
-$(obj)/linked_dtb.o: $(obj)/system.dtb
-
-obj-$(CONFIG_NIOS2_DTB_SOURCE_BOOL) += linked_dtb.o
-
-targets += $(dtb-y)
-
-# Rule to build device tree blobs with make command
-$(obj)/%.dtb: $(src)/dts/%.dts FORCE
-	$(call if_changed_dep,dtc)
-
-$(obj)/dtbs: $(addprefix $(obj)/, $(dtb-y))
-
 install:
 	sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)"
diff --git a/arch/nios2/boot/dts/Makefile b/arch/nios2/boot/dts/Makefile
new file mode 100644
index 000000000000..a91a0b09be63
--- /dev/null
+++ b/arch/nios2/boot/dts/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y := $(patsubst "%.dts",%.dtb.o,$(CONFIG_NIOS2_DTB_SOURCE))
+
+dtstree		:= $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/nios2/include/uapi/asm/unistd.h b/arch/nios2/include/uapi/asm/unistd.h
index b6bdae04bc84..d9948d88790b 100644
--- a/arch/nios2/include/uapi/asm/unistd.h
+++ b/arch/nios2/include/uapi/asm/unistd.h
@@ -19,6 +19,7 @@
  #define sys_mmap2 sys_mmap_pgoff
 
 #define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
 
 /* Use the standard ABI for syscalls */
 #include <asm-generic/unistd.h>
diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c
index 93207718bb22..ccc1d2a15a0a 100644
--- a/arch/nios2/kernel/cpuinfo.c
+++ b/arch/nios2/kernel/cpuinfo.c
@@ -47,7 +47,7 @@ void __init setup_cpuinfo(void)
 	const char *str;
 	int len;
 
-	cpu = of_find_node_by_type(NULL, "cpu");
+	cpu = of_get_cpu_node(0, NULL);
 	if (!cpu)
 		panic("%s: No CPU found in devicetree!\n", __func__);
 
@@ -120,6 +120,8 @@ void __init setup_cpuinfo(void)
 	cpuinfo.reset_addr = fcpu(cpu, "altr,reset-addr");
 	cpuinfo.exception_addr = fcpu(cpu, "altr,exception-addr");
 	cpuinfo.fast_tlb_miss_exc_addr = fcpu(cpu, "altr,fast-tlb-miss-addr");
+
+	of_node_put(cpu);
 }
 
 #ifdef CONFIG_PROC_FS
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index ab88b6dd4679..54467d0085a1 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -214,12 +214,12 @@ static int __init nios2_timer_get_base_and_freq(struct device_node *np,
 {
 	*base = of_iomap(np, 0);
 	if (!*base) {
-		pr_crit("Unable to map reg for %s\n", np->name);
+		pr_crit("Unable to map reg for %pOFn\n", np);
 		return -ENXIO;
 	}
 
 	if (of_property_read_u32(np, "clock-frequency", freq)) {
-		pr_crit("Unable to get %s clock frequency\n", np->name);
+		pr_crit("Unable to get %pOFn clock frequency\n", np);
 		return -EINVAL;
 	}
 
diff --git a/arch/openrisc/include/uapi/asm/unistd.h b/arch/openrisc/include/uapi/asm/unistd.h
index 11c5a58ab333..ec37df18d8ed 100644
--- a/arch/openrisc/include/uapi/asm/unistd.h
+++ b/arch/openrisc/include/uapi/asm/unistd.h
@@ -20,6 +20,7 @@
 #define sys_mmap2 sys_mmap_pgoff
 
 #define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_CLONE
 
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 9d28ab14d139..e17fcd83120f 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -158,9 +158,8 @@ static struct device_node *setup_find_cpu_node(int cpu)
 {
 	u32 hwid;
 	struct device_node *cpun;
-	struct device_node *cpus = of_find_node_by_path("/cpus");
 
-	for_each_available_child_of_node(cpus, cpun) {
+	for_each_of_cpu_node(cpun) {
 		if (of_property_read_u32(cpun, "reg", &hwid))
 			continue;
 		if (hwid == cpu)
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index ab8a54771507..e03e3c849f40 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -8,36 +8,22 @@
 #include <linux/sched.h>
 #include <linux/thread_info.h>
 
+#include <asm-generic/compat.h>
+
 #define COMPAT_USER_HZ 		100
 #define COMPAT_UTS_MACHINE	"parisc\0\0"
 
-typedef u32	compat_size_t;
-typedef s32	compat_ssize_t;
-typedef s32	compat_clock_t;
-typedef s32	compat_pid_t;
 typedef u32	__compat_uid_t;
 typedef u32	__compat_gid_t;
 typedef u32	__compat_uid32_t;
 typedef u32	__compat_gid32_t;
 typedef u16	compat_mode_t;
-typedef u32	compat_ino_t;
 typedef u32	compat_dev_t;
-typedef s32	compat_off_t;
-typedef s64	compat_loff_t;
 typedef u16	compat_nlink_t;
 typedef u16	compat_ipc_pid_t;
-typedef s32	compat_daddr_t;
 typedef u32	compat_caddr_t;
-typedef s32	compat_key_t;
-typedef s32	compat_timer_t;
-
-typedef s32	compat_int_t;
-typedef s32	compat_long_t;
 typedef s64	compat_s64;
-typedef u32	compat_uint_t;
-typedef u32	compat_ulong_t;
 typedef u64	compat_u64;
-typedef u32	compat_uptr_t;
 
 struct compat_stat {
 	compat_dev_t		st_dev;	/* dev_t is 32 bits on parisc */
@@ -48,11 +34,11 @@ struct compat_stat {
 	u16			st_reserved2;	/* old st_gid */
 	compat_dev_t		st_rdev;
 	compat_off_t		st_size;
-	compat_time_t		st_atime;
+	old_time32_t		st_atime;
 	u32			st_atime_nsec;
-	compat_time_t		st_mtime;
+	old_time32_t		st_mtime;
 	u32			st_mtime_nsec;
-	compat_time_t		st_ctime;
+	old_time32_t		st_ctime;
 	u32			st_ctime_nsec;
 	s32			st_blksize;
 	s32			st_blocks;
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 3d507d04eb4c..bc37a4953eaa 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -141,6 +141,7 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5)	\
     return K_INLINE_SYSCALL(name, 5, arg1, arg2, arg3, arg4, arg5);	\
 }
 
+#define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
@@ -151,11 +152,11 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5)	\
 #define __ARCH_WANT_COMPAT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_UTIME32
 #define __ARCH_WANT_SYS_WAITPID
 #define __ARCH_WANT_SYS_SOCKETCALL
 #define __ARCH_WANT_SYS_FADVISE64
 #define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 11a1acba164a..42f225f6ec93 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -293,9 +293,6 @@ $(BOOT_TARGETS2): vmlinux
 bootwrapper_install:
 	$(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
 
-%.dtb: scripts
-	$(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
-
 # Used to create 'merged defconfigs'
 # To use it $(call) it with the first argument as the base defconfig
 # and the second argument as a space separated list of .config files to merge,
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 0fb96c26136f..bca5c23767df 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -304,9 +304,9 @@ image-$(CONFIG_PPC_ADDER875)		+= cuImage.adder875-uboot \
 					   dtbImage.adder875-redboot
 
 # Board ports in arch/powerpc/platform/52xx/Kconfig
-image-$(CONFIG_PPC_LITE5200)		+= cuImage.lite5200 lite5200.dtb
-image-$(CONFIG_PPC_LITE5200)		+= cuImage.lite5200b lite5200b.dtb
-image-$(CONFIG_PPC_MEDIA5200)		+= cuImage.media5200 media5200.dtb
+image-$(CONFIG_PPC_LITE5200)		+= cuImage.lite5200
+image-$(CONFIG_PPC_LITE5200)		+= cuImage.lite5200b
+image-$(CONFIG_PPC_MEDIA5200)		+= cuImage.media5200
 
 # Board ports in arch/powerpc/platform/82xx/Kconfig
 image-$(CONFIG_MPC8272_ADS)		+= cuImage.mpc8272ads
@@ -381,11 +381,11 @@ $(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperb
 	$(call if_changed,wrap,$(subst $(obj)/zImage.,,$@))
 
 # dtbImage% - a dtbImage is a zImage with an embedded device tree blob
-$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE
-	$(call if_changed,wrap,$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
+	$(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
 
-$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE
-	$(call if_changed,wrap,$*,,$(obj)/$*.dtb)
+$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
+	$(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb)
 
 # This cannot be in the root of $(src) as the zImage rule always adds a $(obj)
 # prefix
@@ -395,36 +395,33 @@ $(obj)/vmlinux.strip: vmlinux
 $(obj)/uImage: vmlinux $(wrapperbits) FORCE
 	$(call if_changed,wrap,uboot)
 
-$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-	$(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/uImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
 
-$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-	$(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb)
+$(obj)/uImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb)
 
-$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-	$(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/cuImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
 
-$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-	$(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb)
+$(obj)/cuImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb)
 
-$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-	$(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/simpleImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
 
-$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-	$(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb)
+$(obj)/simpleImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb)
 
-$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-	$(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/treeImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
 
-$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-	$(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb)
+$(obj)/treeImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb)
 
-# Rule to build device tree blobs
-$(obj)/%.dtb: $(src)/dts/%.dts FORCE
-	$(call if_changed_dep,dtc)
-
-$(obj)/%.dtb: $(src)/dts/fsl/%.dts FORCE
-	$(call if_changed_dep,dtc)
+# Needed for the above targets to work with dts/fsl/ files
+$(obj)/dts/%.dtb: $(obj)/dts/fsl/%.dtb
+	@cp $< $@
 
 # If there isn't a platform selected then just strip the vmlinux.
 ifeq (,$(image-y))
diff --git a/arch/powerpc/boot/dts/Makefile b/arch/powerpc/boot/dts/Makefile
new file mode 100644
index 000000000000..fb335d05aae8
--- /dev/null
+++ b/arch/powerpc/boot/dts/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+subdir-y += fsl
+
+dtstree		:= $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/powerpc/boot/dts/fsl/Makefile b/arch/powerpc/boot/dts/fsl/Makefile
new file mode 100644
index 000000000000..3bae982641e9
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+dtstree		:= $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h
index 2a0c8b1bf147..2abc8e83b95e 100644
--- a/arch/powerpc/boot/libfdt_env.h
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -5,6 +5,8 @@
 #include <types.h>
 #include <string.h>
 
+#define INT_MAX			((int)(~0U>>1))
+
 #include "of.h"
 
 typedef unsigned long uintptr_t;
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 1f4691ce4126..c55ba3b4873b 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -150,4 +150,25 @@ extern s32 patch__memset_nocache, patch__memcpy_nocache;
 
 extern long flush_count_cache;
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
+void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
+#else
+static inline void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
+				     bool preserve_nv) { }
+static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
+					bool preserve_nv) { }
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+void kvmhv_save_host_pmu(void);
+void kvmhv_load_host_pmu(void);
+void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
+void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
+
+int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+
+long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
+long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
+			unsigned long dabrx);
+
 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index b3520b549cba..66db23e2f4dc 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -203,6 +203,18 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
 	BUG();
 }
 
+static inline unsigned int ap_to_shift(unsigned long ap)
+{
+	int psize;
+
+	for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+		if (mmu_psize_defs[psize].ap == ap)
+			return mmu_psize_defs[psize].shift;
+	}
+
+	return -1;
+}
+
 static inline unsigned long get_sllp_encoding(int psize)
 {
 	unsigned long sllp;
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 1154a6dc6d26..671316f9e95d 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -53,6 +53,7 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid,
 					unsigned long addr,
 					unsigned long page_size);
 extern void radix__flush_pwc_lpid(unsigned int lpid);
+extern void radix__flush_tlb_lpid(unsigned int lpid);
 extern void radix__local_flush_tlb_lpid(unsigned int lpid);
 extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
 
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 85c8af2bb272..74d0db511099 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -8,6 +8,8 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 
+#include <asm-generic/compat.h>
+
 #define COMPAT_USER_HZ		100
 #ifdef __BIG_ENDIAN__
 #define COMPAT_UTS_MACHINE	"ppc\0\0"
@@ -15,34 +17,18 @@
 #define COMPAT_UTS_MACHINE	"ppcle\0\0"
 #endif
 
-typedef u32		compat_size_t;
-typedef s32		compat_ssize_t;
-typedef s32		compat_clock_t;
-typedef s32		compat_pid_t;
 typedef u32		__compat_uid_t;
 typedef u32		__compat_gid_t;
 typedef u32		__compat_uid32_t;
 typedef u32		__compat_gid32_t;
 typedef u32		compat_mode_t;
-typedef u32		compat_ino_t;
 typedef u32		compat_dev_t;
-typedef s32		compat_off_t;
-typedef s64		compat_loff_t;
 typedef s16		compat_nlink_t;
 typedef u16		compat_ipc_pid_t;
-typedef s32		compat_daddr_t;
 typedef u32		compat_caddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
-typedef s32		compat_key_t;
-typedef s32		compat_timer_t;
-
-typedef s32		compat_int_t;
-typedef s32		compat_long_t;
 typedef s64		compat_s64;
-typedef u32		compat_uint_t;
-typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
-typedef u32		compat_uptr_t;
 
 struct compat_stat {
 	compat_dev_t	st_dev;
@@ -55,11 +41,11 @@ struct compat_stat {
 	compat_off_t	st_size;
 	compat_off_t	st_blksize;
 	compat_off_t	st_blocks;
-	compat_time_t	st_atime;
+	old_time32_t	st_atime;
 	u32		st_atime_nsec;
-	compat_time_t	st_mtime;
+	old_time32_t	st_mtime;
 	u32		st_mtime_nsec;
-	compat_time_t	st_ctime;
+	old_time32_t	st_ctime;
 	u32		st_ctime_nsec;
 	u32		__unused4[2];
 };
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a0b17f9f1ea4..45e8789bb770 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -322,6 +322,11 @@
 #define H_GET_24X7_DATA		0xF07C
 #define H_GET_PERF_COUNTER_INFO	0xF080
 
+/* Platform-specific hcalls used for nested HV KVM */
+#define H_SET_PARTITION_TABLE	0xF800
+#define H_ENTER_NESTED		0xF804
+#define H_TLB_INVALIDATE	0xF808
+
 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR		1
 #define H_SET_MODE_RESOURCE_SET_DAWR		2
@@ -461,6 +466,42 @@ struct h_cpu_char_result {
 	u64 behaviour;
 };
 
+/* Register state for entering a nested guest with H_ENTER_NESTED */
+struct hv_guest_state {
+	u64 version;		/* version of this structure layout */
+	u32 lpid;
+	u32 vcpu_token;
+	/* These registers are hypervisor privileged (at least for writing) */
+	u64 lpcr;
+	u64 pcr;
+	u64 amor;
+	u64 dpdes;
+	u64 hfscr;
+	s64 tb_offset;
+	u64 dawr0;
+	u64 dawrx0;
+	u64 ciabr;
+	u64 hdec_expiry;
+	u64 purr;
+	u64 spurr;
+	u64 ic;
+	u64 vtb;
+	u64 hdar;
+	u64 hdsisr;
+	u64 heir;
+	u64 asdr;
+	/* These are OS privileged but need to be set late in guest entry */
+	u64 srr0;
+	u64 srr1;
+	u64 sprg[4];
+	u64 pidr;
+	u64 cfar;
+	u64 ppr;
+};
+
+/* Latest version of hv_guest_state structure */
+#define HV_GUEST_STATE_VERSION	1
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 3d4b88cb8599..35db0cbc9222 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -126,7 +126,7 @@ struct iommu_table {
 	int it_nid;
 };
 
-#define IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry) \
+#define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \
 		((tbl)->it_ops->useraddrptr((tbl), (entry), false))
 #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
 		((tbl)->it_ops->useraddrptr((tbl), (entry), true))
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index a790d5cf6ea3..1f321914676d 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -84,7 +84,6 @@
 #define BOOK3S_INTERRUPT_INST_STORAGE	0x400
 #define BOOK3S_INTERRUPT_INST_SEGMENT	0x480
 #define BOOK3S_INTERRUPT_EXTERNAL	0x500
-#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL	0x501
 #define BOOK3S_INTERRUPT_EXTERNAL_HV	0x502
 #define BOOK3S_INTERRUPT_ALIGNMENT	0x600
 #define BOOK3S_INTERRUPT_PROGRAM	0x700
@@ -134,8 +133,7 @@
 #define BOOK3S_IRQPRIO_EXTERNAL			14
 #define BOOK3S_IRQPRIO_DECREMENTER		15
 #define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR	16
-#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL		17
-#define BOOK3S_IRQPRIO_MAX			18
+#define BOOK3S_IRQPRIO_MAX			17
 
 #define BOOK3S_HFLAG_DCBZ32			0x1
 #define BOOK3S_HFLAG_SLB			0x2
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 83a9aa3cf689..09f8e9ba69bc 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -188,14 +188,37 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
 extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
 			struct kvm_vcpu *vcpu,
 			unsigned long ea, unsigned long dsisr);
+extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
+				      struct kvmppc_pte *gpte, u64 root,
+				      u64 *pte_ret_p);
+extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
+			struct kvmppc_pte *gpte, u64 table,
+			int table_index, u64 *pte_ret_p);
 extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 			struct kvmppc_pte *gpte, bool data, bool iswrite);
+extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
+			unsigned int shift, struct kvm_memory_slot *memslot,
+			unsigned int lpid);
+extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
+				    bool writing, unsigned long gpa,
+				    unsigned int lpid);
+extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
+				unsigned long gpa,
+				struct kvm_memory_slot *memslot,
+				bool writing, bool kvm_ro,
+				pte_t *inserted_pte, unsigned int *levelp);
 extern int kvmppc_init_vm_radix(struct kvm *kvm);
 extern void kvmppc_free_radix(struct kvm *kvm);
+extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
+				      unsigned int lpid);
 extern int kvmppc_radix_init(void);
 extern void kvmppc_radix_exit(void);
 extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			unsigned long gfn);
+extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
+			     unsigned long gpa, unsigned int shift,
+			     struct kvm_memory_slot *memslot,
+			     unsigned int lpid);
 extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			unsigned long gfn);
 extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
@@ -271,6 +294,21 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {}
 static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {}
 #endif
 
+long kvmhv_nested_init(void);
+void kvmhv_nested_exit(void);
+void kvmhv_vm_nested_init(struct kvm *kvm);
+long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
+void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
+void kvmhv_release_all_nested(struct kvm *kvm);
+long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
+long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu,
+			  u64 time_limit, unsigned long lpcr);
+void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
+void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
+				   struct hv_guest_state *hr);
+long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu);
+
 void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 
 extern int kvm_irq_bypass;
@@ -301,12 +339,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
 
 static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
 {
-	vcpu->arch.cr = val;
+	vcpu->arch.regs.ccr = val;
 }
 
 static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.cr;
+	return vcpu->arch.regs.ccr;
 }
 
 static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
@@ -384,9 +422,6 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
 /* TO = 31 for unconditional trap */
 #define INS_TW				0x7fe00008
 
-/* LPIDs we support with this build -- runtime limit may be lower */
-#define KVMPPC_NR_LPIDS			(LPID_RSVD + 1)
-
 #define SPLIT_HACK_MASK			0xff000000
 #define SPLIT_HACK_OFFS			0xfb000000
 
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index dc435a5af7d6..6d298145d564 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -23,6 +23,108 @@
 #include <linux/string.h>
 #include <asm/bitops.h>
 #include <asm/book3s/64/mmu-hash.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef CONFIG_PPC_PSERIES
+static inline bool kvmhv_on_pseries(void)
+{
+	return !cpu_has_feature(CPU_FTR_HVMODE);
+}
+#else
+static inline bool kvmhv_on_pseries(void)
+{
+	return false;
+}
+#endif
+
+/*
+ * Structure for a nested guest, that is, for a guest that is managed by
+ * one of our guests.
+ */
+struct kvm_nested_guest {
+	struct kvm *l1_host;		/* L1 VM that owns this nested guest */
+	int l1_lpid;			/* lpid L1 guest thinks this guest is */
+	int shadow_lpid;		/* real lpid of this nested guest */
+	pgd_t *shadow_pgtable;		/* our page table for this guest */
+	u64 l1_gr_to_hr;		/* L1's addr of part'n-scoped table */
+	u64 process_table;		/* process table entry for this guest */
+	long refcnt;			/* number of pointers to this struct */
+	struct mutex tlb_lock;		/* serialize page faults and tlbies */
+	struct kvm_nested_guest *next;
+	cpumask_t need_tlb_flush;
+	cpumask_t cpu_in_guest;
+	short prev_cpu[NR_CPUS];
+};
+
+/*
+ * We define a nested rmap entry as a single 64-bit quantity
+ * 0xFFF0000000000000	12-bit lpid field
+ * 0x000FFFFFFFFFF000	40-bit guest 4k page frame number
+ * 0x0000000000000001	1-bit  single entry flag
+ */
+#define RMAP_NESTED_LPID_MASK		0xFFF0000000000000UL
+#define RMAP_NESTED_LPID_SHIFT		(52)
+#define RMAP_NESTED_GPA_MASK		0x000FFFFFFFFFF000UL
+#define RMAP_NESTED_IS_SINGLE_ENTRY	0x0000000000000001UL
+
+/* Structure for a nested guest rmap entry */
+struct rmap_nested {
+	struct llist_node list;
+	u64 rmap;
+};
+
+/*
+ * for_each_nest_rmap_safe - iterate over the list of nested rmap entries
+ *			     safe against removal of the list entry or NULL list
+ * @pos:	a (struct rmap_nested *) to use as a loop cursor
+ * @node:	pointer to the first entry
+ *		NOTE: this can be NULL
+ * @rmapp:	an (unsigned long *) in which to return the rmap entries on each
+ *		iteration
+ *		NOTE: this must point to already allocated memory
+ *
+ * The nested_rmap is a llist of (struct rmap_nested) entries pointed to by the
+ * rmap entry in the memslot. The list is always terminated by a "single entry"
+ * stored in the list element of the final entry of the llist. If there is ONLY
+ * a single entry then this is itself in the rmap entry of the memslot, not a
+ * llist head pointer.
+ *
+ * Note that the iterator below assumes that a nested rmap entry is always
+ * non-zero.  This is true for our usage because the LPID field is always
+ * non-zero (zero is reserved for the host).
+ *
+ * This should be used to iterate over the list of rmap_nested entries with
+ * processing done on the u64 rmap value given by each iteration. This is safe
+ * against removal of list entries and it is always safe to call free on (pos).
+ *
+ * e.g.
+ * struct rmap_nested *cursor;
+ * struct llist_node *first;
+ * unsigned long rmap;
+ * for_each_nest_rmap_safe(cursor, first, &rmap) {
+ *	do_something(rmap);
+ *	free(cursor);
+ * }
+ */
+#define for_each_nest_rmap_safe(pos, node, rmapp)			       \
+	for ((pos) = llist_entry((node), typeof(*(pos)), list);		       \
+	     (node) &&							       \
+	     (*(rmapp) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ?     \
+			  ((u64) (node)) : ((pos)->rmap))) &&		       \
+	     (((node) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ?      \
+			 ((struct llist_node *) ((pos) = NULL)) :	       \
+			 (pos)->list.next)), true);			       \
+	     (pos) = llist_entry((node), typeof(*(pos)), list))
+
+struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
+					  bool create);
+void kvmhv_put_nested(struct kvm_nested_guest *gp);
+int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid);
+
+/* Encoding of first parameter for H_TLB_INVALIDATE */
+#define H_TLBIE_P1_ENC(ric, prs, r)	(___PPC_RIC(ric) | ___PPC_PRS(prs) | \
+					 ___PPC_R(r))
 
 /* Power architecture requires HPT is at least 256kiB, at most 64TiB */
 #define PPC_MIN_HPT_ORDER	18
@@ -435,6 +537,7 @@ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
 }
 
 extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
+extern void kvmhv_radix_debugfs_init(struct kvm *kvm);
 
 extern void kvmhv_rm_send_ipi(int cpu);
 
@@ -482,7 +585,7 @@ static inline u64 sanitize_msr(u64 msr)
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.cr  = vcpu->arch.cr_tm;
+	vcpu->arch.regs.ccr  = vcpu->arch.cr_tm;
 	vcpu->arch.regs.xer = vcpu->arch.xer_tm;
 	vcpu->arch.regs.link  = vcpu->arch.lr_tm;
 	vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
@@ -499,7 +602,7 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
 
 static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.cr_tm  = vcpu->arch.cr;
+	vcpu->arch.cr_tm  = vcpu->arch.regs.ccr;
 	vcpu->arch.xer_tm = vcpu->arch.regs.xer;
 	vcpu->arch.lr_tm  = vcpu->arch.regs.link;
 	vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
@@ -515,6 +618,17 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
 }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
+extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
+			     unsigned long gpa, unsigned int level,
+			     unsigned long mmu_seq, unsigned int lpid,
+			     unsigned long *rmapp, struct rmap_nested **n_rmap);
+extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
+				   struct rmap_nested **n_rmap);
+extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
+				struct kvm_memory_slot *memslot,
+				unsigned long gpa, unsigned long hpa,
+				unsigned long nbytes);
+
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index d978fdf698af..eb3ba6390108 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -25,6 +25,9 @@
 #define XICS_MFRR		0xc
 #define XICS_IPI		2	/* interrupt source # for IPIs */
 
+/* LPIDs we support with this build -- runtime limit may be lower */
+#define KVMPPC_NR_LPIDS			(LPID_RSVD + 1)
+
 /* Maximum number of threads per physical core */
 #define MAX_SMT_THREADS		8
 
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index d513e3ed1c65..f0cef625f17c 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -46,12 +46,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
 
 static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
 {
-	vcpu->arch.cr = val;
+	vcpu->arch.regs.ccr = val;
 }
 
 static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.cr;
+	return vcpu->arch.regs.ccr;
 }
 
 static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 906bcbdfd2a1..fac6f631ed29 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -46,6 +46,7 @@
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 #include <asm/kvm_book3s_asm.h>		/* for MAX_SMT_THREADS */
 #define KVM_MAX_VCPU_ID		(MAX_SMT_THREADS * KVM_MAX_VCORES)
+#define KVM_MAX_NESTED_GUESTS	KVMPPC_NR_LPIDS
 
 #else
 #define KVM_MAX_VCPU_ID		KVM_MAX_VCPUS
@@ -94,6 +95,7 @@ struct dtl_entry;
 
 struct kvmppc_vcpu_book3s;
 struct kvmppc_book3s_shadow_vcpu;
+struct kvm_nested_guest;
 
 struct kvm_vm_stat {
 	ulong remote_tlb_flush;
@@ -287,10 +289,12 @@ struct kvm_arch {
 	u8 radix;
 	u8 fwnmi_enabled;
 	bool threads_indep;
+	bool nested_enable;
 	pgd_t *pgtable;
 	u64 process_table;
 	struct dentry *debugfs_dir;
 	struct dentry *htab_dentry;
+	struct dentry *radix_dentry;
 	struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -311,6 +315,9 @@ struct kvm_arch {
 #endif
 	struct kvmppc_ops *kvm_ops;
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	u64 l1_ptcr;
+	int max_nested_lpid;
+	struct kvm_nested_guest *nested_guests[KVM_MAX_NESTED_GUESTS];
 	/* This array can grow quite large, keep it at the end */
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
 #endif
@@ -360,7 +367,9 @@ struct kvmppc_pte {
 	bool may_write		: 1;
 	bool may_execute	: 1;
 	unsigned long wimg;
+	unsigned long rc;
 	u8 page_size;		/* MMU_PAGE_xxx */
+	u8 page_shift;
 };
 
 struct kvmppc_mmu {
@@ -537,8 +546,6 @@ struct kvm_vcpu_arch {
 	ulong tar;
 #endif
 
-	u32 cr;
-
 #ifdef CONFIG_PPC_BOOK3S
 	ulong hflags;
 	ulong guest_owned_ext;
@@ -707,6 +714,7 @@ struct kvm_vcpu_arch {
 	u8 hcall_needed;
 	u8 epr_flags; /* KVMPPC_EPR_xxx */
 	u8 epr_needed;
+	u8 external_oneshot;	/* clear external irq after delivery */
 
 	u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
 
@@ -781,6 +789,10 @@ struct kvm_vcpu_arch {
 	u32 emul_inst;
 
 	u32 online;
+
+	/* For support of nested guests */
+	struct kvm_nested_guest *nested;
+	u32 nested_vcpu_id;
 #endif
 
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index e991821dd7fa..9b89b1918dfc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -194,9 +194,7 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
 		(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
 				(stt)->size, (ioba), (npages)) ?        \
 				H_PARAMETER : H_SUCCESS)
-extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
-		unsigned long tce);
-extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
+extern long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
 		unsigned long *ua, unsigned long **prmap);
 extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
 		unsigned long idx, unsigned long tce);
@@ -327,6 +325,7 @@ struct kvmppc_ops {
 	int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
 			    unsigned long flags);
 	void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr);
+	int (*enable_nested)(struct kvm *kvm);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
@@ -585,6 +584,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
 
 extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
 			       int level, bool line_status);
+extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
 #else
 static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
 				       u32 priority) { return -1; }
@@ -607,6 +607,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
 
 static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
 				      int level, bool line_status) { return -ENODEV; }
+static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
 #endif /* CONFIG_KVM_XIVE */
 
 /*
@@ -652,6 +653,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
                     unsigned long mfrr);
 int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
 int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
+void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
 
 /*
  * Host-side operations we want to set up while running in real
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 665af14850e4..6093bc8f74e5 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -104,6 +104,7 @@
 #define OP_31_XOP_LHZUX     311
 #define OP_31_XOP_MSGSNDP   142
 #define OP_31_XOP_MSGCLRP   174
+#define OP_31_XOP_TLBIE     306
 #define OP_31_XOP_MFSPR     339
 #define OP_31_XOP_LWAX      341
 #define OP_31_XOP_LHAX      343
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e5b314ed054e..c90698972f42 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -415,6 +415,7 @@
 #define   HFSCR_DSCR	__MASK(FSCR_DSCR_LG)
 #define   HFSCR_VECVSX	__MASK(FSCR_VECVSX_LG)
 #define   HFSCR_FP	__MASK(FSCR_FP_LG)
+#define   HFSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56)	/* interrupt cause */
 #define SPRN_TAR	0x32f	/* Target Address Register */
 #define SPRN_LPCR	0x13E	/* LPAR Control Register */
 #define   LPCR_VPM0		ASM_CONST(0x8000000000000000)
@@ -766,6 +767,7 @@
 #define SPRN_HSRR0	0x13A	/* Save/Restore Register 0 */
 #define SPRN_HSRR1	0x13B	/* Save/Restore Register 1 */
 #define   HSRR1_DENORM		0x00100000 /* Denorm exception */
+#define   HSRR1_HISI_WRITE	0x00010000 /* HISI bcs couldn't update mem */
 
 #define SPRN_TBCTL	0x35f	/* PA6T Timebase control register */
 #define   TBCTL_FREEZE		0x0000000000000000ull /* Freeze all tbs */
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index c19379f0a32e..b0de85b477e1 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -22,6 +22,7 @@
 #include <linux/compiler.h>
 #include <linux/linkage.h>
 
+#define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
@@ -35,7 +36,6 @@
 #define __ARCH_WANT_SYS_SOCKETCALL
 #define __ARCH_WANT_SYS_FADVISE64
 #define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
 #define __ARCH_WANT_SYS_OLD_UNAME
@@ -47,6 +47,7 @@
 #endif
 #ifdef CONFIG_PPC64
 #define __ARCH_WANT_COMPAT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME32
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 1b32b56a03d3..8c876c166ef2 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char {
 
 #define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
 #define KVM_REG_PPC_ONLINE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
+#define KVM_REG_PPC_PTCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 89cf15566c4e..d68b9ef38328 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -387,12 +387,12 @@ int main(void)
 	OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
 	OFFSET(TVAL64_TV_SEC, timeval, tv_sec);
 	OFFSET(TVAL64_TV_USEC, timeval, tv_usec);
-	OFFSET(TVAL32_TV_SEC, compat_timeval, tv_sec);
-	OFFSET(TVAL32_TV_USEC, compat_timeval, tv_usec);
+	OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
+	OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
 	OFFSET(TSPC64_TV_SEC, timespec, tv_sec);
 	OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec);
-	OFFSET(TSPC32_TV_SEC, compat_timespec, tv_sec);
-	OFFSET(TSPC32_TV_NSEC, compat_timespec, tv_nsec);
+	OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
+	OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
 #else
 	OFFSET(TVAL32_TV_SEC, timeval, tv_sec);
 	OFFSET(TVAL32_TV_USEC, timeval, tv_usec);
@@ -438,7 +438,7 @@ int main(void)
 #ifdef CONFIG_PPC_BOOK3S
 	OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
 #endif
-	OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
+	OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
 	OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
@@ -503,6 +503,7 @@ int main(void)
 	OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
 	OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
 	OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
+	OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested);
 	OFFSET(VCPU_CPU, kvm_vcpu, cpu);
 	OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
 #endif
@@ -695,7 +696,7 @@ int main(void)
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 #else /* CONFIG_PPC_BOOK3S */
-	OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
+	OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
 	OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
 	OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
 	OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 458b928dbd84..c317080db771 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -147,8 +147,8 @@ __init_hvmode_206:
 	rldicl.	r0,r3,4,63
 	bnelr
 	ld	r5,CPU_SPEC_FEATURES(r4)
-	LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
-	xor	r5,r5,r6
+	LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST)
+	andc	r5,r5,r6
 	std	r5,CPU_SPEC_FEATURES(r4)
 	blr
 
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 88f3963ca30f..5fc335f4d9cd 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -11,7 +11,7 @@
  *
  */
 
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/memblock.h>
 #include <linux/pfn.h>
 #include <linux/of_platform.h>
@@ -59,7 +59,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = {
 	.sync_single_for_device = swiotlb_sync_single_for_device,
 	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
 	.sync_sg_for_device = swiotlb_sync_sg_for_device,
-	.mapping_error = swiotlb_dma_mapping_error,
+	.mapping_error = dma_direct_mapping_error,
 	.get_required_mask = swiotlb_powerpc_get_required,
 };
 
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index f872c04bb5b1..e814f40ab836 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -75,7 +75,8 @@ kvm-hv-y += \
 	book3s_hv.o \
 	book3s_hv_interrupts.o \
 	book3s_64_mmu_hv.o \
-	book3s_64_mmu_radix.o
+	book3s_64_mmu_radix.o \
+	book3s_hv_nested.o
 
 kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
 	book3s_hv_tm.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 87348e498c89..fd9893bc7aa1 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -78,8 +78,11 @@ void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
 		ulong pc = kvmppc_get_pc(vcpu);
+		ulong lr = kvmppc_get_lr(vcpu);
 		if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
 			kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
+		if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+			kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
 		vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
 	}
 }
@@ -150,7 +153,6 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
 	case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE;		break;
 	case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT;		break;
 	case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL;		break;
-	case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL;	break;
 	case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT;		break;
 	case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM;		break;
 	case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL;		break;
@@ -236,18 +238,35 @@ EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
 {
-	unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
-
-	if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
-		vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
+	/*
+	 * This case (KVM_INTERRUPT_SET) should never actually arise for
+	 * a pseries guest (because pseries guests expect their interrupt
+	 * controllers to continue asserting an external interrupt request
+	 * until it is acknowledged at the interrupt controller), but is
+	 * included to avoid ABI breakage and potentially for other
+	 * sorts of guest.
+	 *
+	 * There is a subtlety here: HV KVM does not test the
+	 * external_oneshot flag in the code that synthesizes
+	 * external interrupts for the guest just before entering
+	 * the guest.  That is OK even if userspace did do a
+	 * KVM_INTERRUPT_SET on a pseries guest vcpu, because the
+	 * caller (kvm_vcpu_ioctl_interrupt) does a kvm_vcpu_kick()
+	 * which ends up doing a smp_send_reschedule(), which will
+	 * pull the guest all the way out to the host, meaning that
+	 * we will call kvmppc_core_prepare_to_enter() before entering
+	 * the guest again, and that will handle the external_oneshot
+	 * flag correctly.
+	 */
+	if (irq->irq == KVM_INTERRUPT_SET)
+		vcpu->arch.external_oneshot = 1;
 
-	kvmppc_book3s_queue_irqprio(vcpu, vec);
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
 }
 
 void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
 {
 	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
-	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
 }
 
 void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
@@ -278,7 +297,6 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
 		vec = BOOK3S_INTERRUPT_DECREMENTER;
 		break;
 	case BOOK3S_IRQPRIO_EXTERNAL:
-	case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
 		deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
 		vec = BOOK3S_INTERRUPT_EXTERNAL;
 		break;
@@ -352,8 +370,16 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
 		case BOOK3S_IRQPRIO_DECREMENTER:
 			/* DEC interrupts get cleared by mtdec */
 			return false;
-		case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
-			/* External interrupts get cleared by userspace */
+		case BOOK3S_IRQPRIO_EXTERNAL:
+			/*
+			 * External interrupts get cleared by userspace
+			 * except when set by the KVM_INTERRUPT ioctl with
+			 * KVM_INTERRUPT_SET (not KVM_INTERRUPT_SET_LEVEL).
+			 */
+			if (vcpu->arch.external_oneshot) {
+				vcpu->arch.external_oneshot = 0;
+				return true;
+			}
 			return false;
 	}
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 68e14afecac8..c615617e78ac 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -268,14 +268,13 @@ int kvmppc_mmu_hv_init(void)
 {
 	unsigned long host_lpid, rsvd_lpid;
 
-	if (!cpu_has_feature(CPU_FTR_HVMODE))
-		return -EINVAL;
-
 	if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
 		return -EINVAL;
 
 	/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
-	host_lpid = mfspr(SPRN_LPID);
+	host_lpid = 0;
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		host_lpid = mfspr(SPRN_LPID);
 	rsvd_lpid = LPID_RSVD;
 
 	kvmppc_init_lpid(rsvd_lpid + 1);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 998f8d089ac7..d68162ee159b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -10,6 +10,9 @@
 #include <linux/string.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/debugfs.h>
 
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
@@ -26,87 +29,74 @@
  */
 static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
 
-int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
-			   struct kvmppc_pte *gpte, bool data, bool iswrite)
+int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
+			       struct kvmppc_pte *gpte, u64 root,
+			       u64 *pte_ret_p)
 {
 	struct kvm *kvm = vcpu->kvm;
-	u32 pid;
 	int ret, level, ps;
-	__be64 prte, rpte;
-	unsigned long ptbl;
-	unsigned long root, pte, index;
-	unsigned long rts, bits, offset;
-	unsigned long gpa;
-	unsigned long proc_tbl_size;
-
-	/* Work out effective PID */
-	switch (eaddr >> 62) {
-	case 0:
-		pid = vcpu->arch.pid;
-		break;
-	case 3:
-		pid = 0;
-		break;
-	default:
-		return -EINVAL;
-	}
-	proc_tbl_size = 1 << ((kvm->arch.process_table & PRTS_MASK) + 12);
-	if (pid * 16 >= proc_tbl_size)
-		return -EINVAL;
-
-	/* Read partition table to find root of tree for effective PID */
-	ptbl = (kvm->arch.process_table & PRTB_MASK) + (pid * 16);
-	ret = kvm_read_guest(kvm, ptbl, &prte, sizeof(prte));
-	if (ret)
-		return ret;
+	unsigned long rts, bits, offset, index;
+	u64 pte, base, gpa;
+	__be64 rpte;
 
-	root = be64_to_cpu(prte);
 	rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) |
 		((root & RTS2_MASK) >> RTS2_SHIFT);
 	bits = root & RPDS_MASK;
-	root = root & RPDB_MASK;
+	base = root & RPDB_MASK;
 
 	offset = rts + 31;
 
-	/* current implementations only support 52-bit space */
+	/* Current implementations only support 52-bit space */
 	if (offset != 52)
 		return -EINVAL;
 
+	/* Walk each level of the radix tree */
 	for (level = 3; level >= 0; --level) {
+		u64 addr;
+		/* Check a valid size */
 		if (level && bits != p9_supported_radix_bits[level])
 			return -EINVAL;
 		if (level == 0 && !(bits == 5 || bits == 9))
 			return -EINVAL;
 		offset -= bits;
 		index = (eaddr >> offset) & ((1UL << bits) - 1);
-		/* check that low bits of page table base are zero */
-		if (root & ((1UL << (bits + 3)) - 1))
+		/* Check that low bits of page table base are zero */
+		if (base & ((1UL << (bits + 3)) - 1))
 			return -EINVAL;
-		ret = kvm_read_guest(kvm, root + index * 8,
-				     &rpte, sizeof(rpte));
-		if (ret)
+		/* Read the entry from guest memory */
+		addr = base + (index * sizeof(rpte));
+		ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
+		if (ret) {
+			if (pte_ret_p)
+				*pte_ret_p = addr;
 			return ret;
+		}
 		pte = __be64_to_cpu(rpte);
 		if (!(pte & _PAGE_PRESENT))
 			return -ENOENT;
+		/* Check if a leaf entry */
 		if (pte & _PAGE_PTE)
 			break;
-		bits = pte & 0x1f;
-		root = pte & 0x0fffffffffffff00ul;
+		/* Get ready to walk the next level */
+		base = pte & RPDB_MASK;
+		bits = pte & RPDS_MASK;
 	}
-	/* need a leaf at lowest level; 512GB pages not supported */
+
+	/* Need a leaf at lowest level; 512GB pages not supported */
 	if (level < 0 || level == 3)
 		return -EINVAL;
 
-	/* offset is now log base 2 of the page size */
+	/* We found a valid leaf PTE */
+	/* Offset is now log base 2 of the page size */
 	gpa = pte & 0x01fffffffffff000ul;
 	if (gpa & ((1ul << offset) - 1))
 		return -EINVAL;
-	gpa += eaddr & ((1ul << offset) - 1);
+	gpa |= eaddr & ((1ul << offset) - 1);
 	for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps)
 		if (offset == mmu_psize_defs[ps].shift)
 			break;
 	gpte->page_size = ps;
+	gpte->page_shift = offset;
 
 	gpte->eaddr = eaddr;
 	gpte->raddr = gpa;
@@ -115,6 +105,77 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	gpte->may_read = !!(pte & _PAGE_READ);
 	gpte->may_write = !!(pte & _PAGE_WRITE);
 	gpte->may_execute = !!(pte & _PAGE_EXEC);
+
+	gpte->rc = pte & (_PAGE_ACCESSED | _PAGE_DIRTY);
+
+	if (pte_ret_p)
+		*pte_ret_p = pte;
+
+	return 0;
+}
+
+/*
+ * Used to walk a partition or process table radix tree in guest memory
+ * Note: We exploit the fact that a partition table and a process
+ * table have the same layout, a partition-scoped page table and a
+ * process-scoped page table have the same layout, and the 2nd
+ * doubleword of a partition table entry has the same layout as
+ * the PTCR register.
+ */
+int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
+				     struct kvmppc_pte *gpte, u64 table,
+				     int table_index, u64 *pte_ret_p)
+{
+	struct kvm *kvm = vcpu->kvm;
+	int ret;
+	unsigned long size, ptbl, root;
+	struct prtb_entry entry;
+
+	if ((table & PRTS_MASK) > 24)
+		return -EINVAL;
+	size = 1ul << ((table & PRTS_MASK) + 12);
+
+	/* Is the table big enough to contain this entry? */
+	if ((table_index * sizeof(entry)) >= size)
+		return -EINVAL;
+
+	/* Read the table to find the root of the radix tree */
+	ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
+	ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
+	if (ret)
+		return ret;
+
+	/* Root is stored in the first double word */
+	root = be64_to_cpu(entry.prtb0);
+
+	return kvmppc_mmu_walk_radix_tree(vcpu, eaddr, gpte, root, pte_ret_p);
+}
+
+int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+			   struct kvmppc_pte *gpte, bool data, bool iswrite)
+{
+	u32 pid;
+	u64 pte;
+	int ret;
+
+	/* Work out effective PID */
+	switch (eaddr >> 62) {
+	case 0:
+		pid = vcpu->arch.pid;
+		break;
+	case 3:
+		pid = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = kvmppc_mmu_radix_translate_table(vcpu, eaddr, gpte,
+				vcpu->kvm->arch.process_table, pid, &pte);
+	if (ret)
+		return ret;
+
+	/* Check privilege (applies only to process scoped translations) */
 	if (kvmppc_get_msr(vcpu) & MSR_PR) {
 		if (pte & _PAGE_PRIVILEGED) {
 			gpte->may_read = 0;
@@ -137,20 +198,46 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 }
 
 static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
-				    unsigned int pshift)
+				    unsigned int pshift, unsigned int lpid)
 {
 	unsigned long psize = PAGE_SIZE;
+	int psi;
+	long rc;
+	unsigned long rb;
 
 	if (pshift)
 		psize = 1UL << pshift;
+	else
+		pshift = PAGE_SHIFT;
 
 	addr &= ~(psize - 1);
-	radix__flush_tlb_lpid_page(kvm->arch.lpid, addr, psize);
+
+	if (!kvmhv_on_pseries()) {
+		radix__flush_tlb_lpid_page(lpid, addr, psize);
+		return;
+	}
+
+	psi = shift_to_mmu_psize(pshift);
+	rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
+				lpid, rb);
+	if (rc)
+		pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
 }
 
-static void kvmppc_radix_flush_pwc(struct kvm *kvm)
+static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
 {
-	radix__flush_pwc_lpid(kvm->arch.lpid);
+	long rc;
+
+	if (!kvmhv_on_pseries()) {
+		radix__flush_pwc_lpid(lpid);
+		return;
+	}
+
+	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
+				lpid, TLBIEL_INVAL_SET_LPID);
+	if (rc)
+		pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
 }
 
 static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
@@ -195,23 +282,38 @@ static void kvmppc_pmd_free(pmd_t *pmdp)
 	kmem_cache_free(kvm_pmd_cache, pmdp);
 }
 
-static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
-			     unsigned long gpa, unsigned int shift)
+/* Called with kvm->mmu_lock held */
+void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
+		      unsigned int shift, struct kvm_memory_slot *memslot,
+		      unsigned int lpid)
 
 {
-	unsigned long page_size = 1ul << shift;
 	unsigned long old;
+	unsigned long gfn = gpa >> PAGE_SHIFT;
+	unsigned long page_size = PAGE_SIZE;
+	unsigned long hpa;
 
 	old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift);
-	kvmppc_radix_tlbie_page(kvm, gpa, shift);
-	if (old & _PAGE_DIRTY) {
-		unsigned long gfn = gpa >> PAGE_SHIFT;
-		struct kvm_memory_slot *memslot;
+	kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
+
+	/* The following only applies to L1 entries */
+	if (lpid != kvm->arch.lpid)
+		return;
 
+	if (!memslot) {
 		memslot = gfn_to_memslot(kvm, gfn);
-		if (memslot && memslot->dirty_bitmap)
-			kvmppc_update_dirty_map(memslot, gfn, page_size);
+		if (!memslot)
+			return;
 	}
+	if (shift)
+		page_size = 1ul << shift;
+
+	gpa &= ~(page_size - 1);
+	hpa = old & PTE_RPN_MASK;
+	kvmhv_remove_nest_rmap_range(kvm, memslot, gpa, hpa, page_size);
+
+	if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap)
+		kvmppc_update_dirty_map(memslot, gfn, page_size);
 }
 
 /*
@@ -224,7 +326,8 @@ static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
  * and emit a warning if encountered, but there may already be data
  * corruption due to the unexpected mappings.
  */
-static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full)
+static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full,
+				  unsigned int lpid)
 {
 	if (full) {
 		memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
@@ -238,14 +341,15 @@ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full)
 			WARN_ON_ONCE(1);
 			kvmppc_unmap_pte(kvm, p,
 					 pte_pfn(*p) << PAGE_SHIFT,
-					 PAGE_SHIFT);
+					 PAGE_SHIFT, NULL, lpid);
 		}
 	}
 
 	kvmppc_pte_free(pte);
 }
 
-static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full)
+static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full,
+				  unsigned int lpid)
 {
 	unsigned long im;
 	pmd_t *p = pmd;
@@ -260,20 +364,21 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full)
 				WARN_ON_ONCE(1);
 				kvmppc_unmap_pte(kvm, (pte_t *)p,
 					 pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
-					 PMD_SHIFT);
+					 PMD_SHIFT, NULL, lpid);
 			}
 		} else {
 			pte_t *pte;
 
 			pte = pte_offset_map(p, 0);
-			kvmppc_unmap_free_pte(kvm, pte, full);
+			kvmppc_unmap_free_pte(kvm, pte, full, lpid);
 			pmd_clear(p);
 		}
 	}
 	kvmppc_pmd_free(pmd);
 }
 
-static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud)
+static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud,
+				  unsigned int lpid)
 {
 	unsigned long iu;
 	pud_t *p = pud;
@@ -287,36 +392,40 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud)
 			pmd_t *pmd;
 
 			pmd = pmd_offset(p, 0);
-			kvmppc_unmap_free_pmd(kvm, pmd, true);
+			kvmppc_unmap_free_pmd(kvm, pmd, true, lpid);
 			pud_clear(p);
 		}
 	}
 	pud_free(kvm->mm, pud);
 }
 
-void kvmppc_free_radix(struct kvm *kvm)
+void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid)
 {
 	unsigned long ig;
-	pgd_t *pgd;
 
-	if (!kvm->arch.pgtable)
-		return;
-	pgd = kvm->arch.pgtable;
 	for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
 		pud_t *pud;
 
 		if (!pgd_present(*pgd))
 			continue;
 		pud = pud_offset(pgd, 0);
-		kvmppc_unmap_free_pud(kvm, pud);
+		kvmppc_unmap_free_pud(kvm, pud, lpid);
 		pgd_clear(pgd);
 	}
-	pgd_free(kvm->mm, kvm->arch.pgtable);
-	kvm->arch.pgtable = NULL;
+}
+
+void kvmppc_free_radix(struct kvm *kvm)
+{
+	if (kvm->arch.pgtable) {
+		kvmppc_free_pgtable_radix(kvm, kvm->arch.pgtable,
+					  kvm->arch.lpid);
+		pgd_free(kvm->mm, kvm->arch.pgtable);
+		kvm->arch.pgtable = NULL;
+	}
 }
 
 static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
-					      unsigned long gpa)
+					unsigned long gpa, unsigned int lpid)
 {
 	pte_t *pte = pte_offset_kernel(pmd, 0);
 
@@ -326,13 +435,13 @@ static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
 	 * flushing the PWC again.
 	 */
 	pmd_clear(pmd);
-	kvmppc_radix_flush_pwc(kvm);
+	kvmppc_radix_flush_pwc(kvm, lpid);
 
-	kvmppc_unmap_free_pte(kvm, pte, false);
+	kvmppc_unmap_free_pte(kvm, pte, false, lpid);
 }
 
 static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
-					unsigned long gpa)
+					unsigned long gpa, unsigned int lpid)
 {
 	pmd_t *pmd = pmd_offset(pud, 0);
 
@@ -342,9 +451,9 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
 	 * so can be freed without flushing the PWC again.
 	 */
 	pud_clear(pud);
-	kvmppc_radix_flush_pwc(kvm);
+	kvmppc_radix_flush_pwc(kvm, lpid);
 
-	kvmppc_unmap_free_pmd(kvm, pmd, false);
+	kvmppc_unmap_free_pmd(kvm, pmd, false, lpid);
 }
 
 /*
@@ -356,8 +465,10 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
  */
 #define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED))
 
-static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
-			     unsigned int level, unsigned long mmu_seq)
+int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
+		      unsigned long gpa, unsigned int level,
+		      unsigned long mmu_seq, unsigned int lpid,
+		      unsigned long *rmapp, struct rmap_nested **n_rmap)
 {
 	pgd_t *pgd;
 	pud_t *pud, *new_pud = NULL;
@@ -366,7 +477,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 	int ret;
 
 	/* Traverse the guest's 2nd-level tree, allocate new levels needed */
-	pgd = kvm->arch.pgtable + pgd_index(gpa);
+	pgd = pgtable + pgd_index(gpa);
 	pud = NULL;
 	if (pgd_present(*pgd))
 		pud = pud_offset(pgd, gpa);
@@ -423,7 +534,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			goto out_unlock;
 		}
 		/* Valid 1GB page here already, remove it */
-		kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT);
+		kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT, NULL,
+				 lpid);
 	}
 	if (level == 2) {
 		if (!pud_none(*pud)) {
@@ -432,9 +544,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			 * install a large page, so remove and free the page
 			 * table page.
 			 */
-			kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa);
+			kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa, lpid);
 		}
 		kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
+		if (rmapp && n_rmap)
+			kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
 		ret = 0;
 		goto out_unlock;
 	}
@@ -458,7 +572,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) &
 							PTE_BITS_MUST_MATCH);
 			kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
-					      0, pte_val(pte), lgpa, PMD_SHIFT);
+					0, pte_val(pte), lgpa, PMD_SHIFT);
 			ret = 0;
 			goto out_unlock;
 		}
@@ -472,7 +586,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			goto out_unlock;
 		}
 		/* Valid 2MB page here already, remove it */
-		kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT);
+		kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT, NULL,
+				 lpid);
 	}
 	if (level == 1) {
 		if (!pmd_none(*pmd)) {
@@ -481,9 +596,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			 * install a large page, so remove and free the page
 			 * table page.
 			 */
-			kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa);
+			kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa, lpid);
 		}
 		kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
+		if (rmapp && n_rmap)
+			kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
 		ret = 0;
 		goto out_unlock;
 	}
@@ -508,6 +625,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 		goto out_unlock;
 	}
 	kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
+	if (rmapp && n_rmap)
+		kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
 	ret = 0;
 
  out_unlock:
@@ -521,95 +640,49 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 	return ret;
 }
 
-int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
-				   unsigned long ea, unsigned long dsisr)
+bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing,
+			     unsigned long gpa, unsigned int lpid)
+{
+	unsigned long pgflags;
+	unsigned int shift;
+	pte_t *ptep;
+
+	/*
+	 * Need to set an R or C bit in the 2nd-level tables;
+	 * since we are just helping out the hardware here,
+	 * it is sufficient to do what the hardware does.
+	 */
+	pgflags = _PAGE_ACCESSED;
+	if (writing)
+		pgflags |= _PAGE_DIRTY;
+	/*
+	 * We are walking the secondary (partition-scoped) page table here.
+	 * We can do this without disabling irq because the Linux MM
+	 * subsystem doesn't do THP splits and collapses on this tree.
+	 */
+	ptep = __find_linux_pte(pgtable, gpa, NULL, &shift);
+	if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) {
+		kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift);
+		return true;
+	}
+	return false;
+}
+
+int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
+				   unsigned long gpa,
+				   struct kvm_memory_slot *memslot,
+				   bool writing, bool kvm_ro,
+				   pte_t *inserted_pte, unsigned int *levelp)
 {
 	struct kvm *kvm = vcpu->kvm;
-	unsigned long mmu_seq;
-	unsigned long gpa, gfn, hva;
-	struct kvm_memory_slot *memslot;
 	struct page *page = NULL;
-	long ret;
-	bool writing;
+	unsigned long mmu_seq;
+	unsigned long hva, gfn = gpa >> PAGE_SHIFT;
 	bool upgrade_write = false;
 	bool *upgrade_p = &upgrade_write;
 	pte_t pte, *ptep;
-	unsigned long pgflags;
 	unsigned int shift, level;
-
-	/* Check for unusual errors */
-	if (dsisr & DSISR_UNSUPP_MMU) {
-		pr_err("KVM: Got unsupported MMU fault\n");
-		return -EFAULT;
-	}
-	if (dsisr & DSISR_BADACCESS) {
-		/* Reflect to the guest as DSI */
-		pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
-		kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
-		return RESUME_GUEST;
-	}
-
-	/* Translate the logical address and get the page */
-	gpa = vcpu->arch.fault_gpa & ~0xfffUL;
-	gpa &= ~0xF000000000000000ul;
-	gfn = gpa >> PAGE_SHIFT;
-	if (!(dsisr & DSISR_PRTABLE_FAULT))
-		gpa |= ea & 0xfff;
-	memslot = gfn_to_memslot(kvm, gfn);
-
-	/* No memslot means it's an emulated MMIO region */
-	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
-		if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
-			     DSISR_SET_RC)) {
-			/*
-			 * Bad address in guest page table tree, or other
-			 * unusual error - reflect it to the guest as DSI.
-			 */
-			kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
-			return RESUME_GUEST;
-		}
-		return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
-					      dsisr & DSISR_ISSTORE);
-	}
-
-	writing = (dsisr & DSISR_ISSTORE) != 0;
-	if (memslot->flags & KVM_MEM_READONLY) {
-		if (writing) {
-			/* give the guest a DSI */
-			dsisr = DSISR_ISSTORE | DSISR_PROTFAULT;
-			kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
-			return RESUME_GUEST;
-		}
-		upgrade_p = NULL;
-	}
-
-	if (dsisr & DSISR_SET_RC) {
-		/*
-		 * Need to set an R or C bit in the 2nd-level tables;
-		 * since we are just helping out the hardware here,
-		 * it is sufficient to do what the hardware does.
-		 */
-		pgflags = _PAGE_ACCESSED;
-		if (writing)
-			pgflags |= _PAGE_DIRTY;
-		/*
-		 * We are walking the secondary page table here. We can do this
-		 * without disabling irq.
-		 */
-		spin_lock(&kvm->mmu_lock);
-		ptep = __find_linux_pte(kvm->arch.pgtable,
-					gpa, NULL, &shift);
-		if (ptep && pte_present(*ptep) &&
-		    (!writing || pte_write(*ptep))) {
-			kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
-						gpa, shift);
-			dsisr &= ~DSISR_SET_RC;
-		}
-		spin_unlock(&kvm->mmu_lock);
-		if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
-			       DSISR_PROTFAULT | DSISR_SET_RC)))
-			return RESUME_GUEST;
-	}
+	int ret;
 
 	/* used to check for invalidations in progress */
 	mmu_seq = kvm->mmu_notifier_seq;
@@ -622,7 +695,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * is that the page is writable.
 	 */
 	hva = gfn_to_hva_memslot(memslot, gfn);
-	if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
+	if (!kvm_ro && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
 		upgrade_write = true;
 	} else {
 		unsigned long pfn;
@@ -690,7 +763,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	}
 
 	/* Allocate space in the tree and write the PTE */
-	ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+	ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level,
+				mmu_seq, kvm->arch.lpid, NULL, NULL);
+	if (inserted_pte)
+		*inserted_pte = pte;
+	if (levelp)
+		*levelp = level;
 
 	if (page) {
 		if (!ret && (pte_val(pte) & _PAGE_WRITE))
@@ -698,6 +776,82 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		put_page(page);
 	}
 
+	return ret;
+}
+
+int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+				   unsigned long ea, unsigned long dsisr)
+{
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long gpa, gfn;
+	struct kvm_memory_slot *memslot;
+	long ret;
+	bool writing = !!(dsisr & DSISR_ISSTORE);
+	bool kvm_ro = false;
+
+	/* Check for unusual errors */
+	if (dsisr & DSISR_UNSUPP_MMU) {
+		pr_err("KVM: Got unsupported MMU fault\n");
+		return -EFAULT;
+	}
+	if (dsisr & DSISR_BADACCESS) {
+		/* Reflect to the guest as DSI */
+		pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
+		kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
+		return RESUME_GUEST;
+	}
+
+	/* Translate the logical address */
+	gpa = vcpu->arch.fault_gpa & ~0xfffUL;
+	gpa &= ~0xF000000000000000ul;
+	gfn = gpa >> PAGE_SHIFT;
+	if (!(dsisr & DSISR_PRTABLE_FAULT))
+		gpa |= ea & 0xfff;
+
+	/* Get the corresponding memslot */
+	memslot = gfn_to_memslot(kvm, gfn);
+
+	/* No memslot means it's an emulated MMIO region */
+	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
+		if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
+			     DSISR_SET_RC)) {
+			/*
+			 * Bad address in guest page table tree, or other
+			 * unusual error - reflect it to the guest as DSI.
+			 */
+			kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
+			return RESUME_GUEST;
+		}
+		return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing);
+	}
+
+	if (memslot->flags & KVM_MEM_READONLY) {
+		if (writing) {
+			/* give the guest a DSI */
+			kvmppc_core_queue_data_storage(vcpu, ea, DSISR_ISSTORE |
+						       DSISR_PROTFAULT);
+			return RESUME_GUEST;
+		}
+		kvm_ro = true;
+	}
+
+	/* Failed to set the reference/change bits */
+	if (dsisr & DSISR_SET_RC) {
+		spin_lock(&kvm->mmu_lock);
+		if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable,
+					    writing, gpa, kvm->arch.lpid))
+			dsisr &= ~DSISR_SET_RC;
+		spin_unlock(&kvm->mmu_lock);
+
+		if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
+			       DSISR_PROTFAULT | DSISR_SET_RC)))
+			return RESUME_GUEST;
+	}
+
+	/* Try to insert a pte */
+	ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, writing,
+					     kvm_ro, NULL, NULL);
+
 	if (ret == 0 || ret == -EAGAIN)
 		ret = RESUME_GUEST;
 	return ret;
@@ -710,20 +864,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	pte_t *ptep;
 	unsigned long gpa = gfn << PAGE_SHIFT;
 	unsigned int shift;
-	unsigned long old;
 
 	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
-	if (ptep && pte_present(*ptep)) {
-		old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0,
-					      gpa, shift);
-		kvmppc_radix_tlbie_page(kvm, gpa, shift);
-		if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
-			unsigned long psize = PAGE_SIZE;
-			if (shift)
-				psize = 1ul << shift;
-			kvmppc_update_dirty_map(memslot, gfn, psize);
-		}
-	}
+	if (ptep && pte_present(*ptep))
+		kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
+				 kvm->arch.lpid);
 	return 0;				
 }
 
@@ -778,7 +923,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
 			ret = 1 << (shift - PAGE_SHIFT);
 		kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
 					gpa, shift);
-		kvmppc_radix_tlbie_page(kvm, gpa, shift);
+		kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
 	}
 	return ret;
 }
@@ -863,6 +1008,215 @@ static void pmd_ctor(void *addr)
 	memset(addr, 0, RADIX_PMD_TABLE_SIZE);
 }
 
+struct debugfs_radix_state {
+	struct kvm	*kvm;
+	struct mutex	mutex;
+	unsigned long	gpa;
+	int		lpid;
+	int		chars_left;
+	int		buf_index;
+	char		buf[128];
+	u8		hdr;
+};
+
+static int debugfs_radix_open(struct inode *inode, struct file *file)
+{
+	struct kvm *kvm = inode->i_private;
+	struct debugfs_radix_state *p;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	kvm_get_kvm(kvm);
+	p->kvm = kvm;
+	mutex_init(&p->mutex);
+	file->private_data = p;
+
+	return nonseekable_open(inode, file);
+}
+
+static int debugfs_radix_release(struct inode *inode, struct file *file)
+{
+	struct debugfs_radix_state *p = file->private_data;
+
+	kvm_put_kvm(p->kvm);
+	kfree(p);
+	return 0;
+}
+
+static ssize_t debugfs_radix_read(struct file *file, char __user *buf,
+				 size_t len, loff_t *ppos)
+{
+	struct debugfs_radix_state *p = file->private_data;
+	ssize_t ret, r;
+	unsigned long n;
+	struct kvm *kvm;
+	unsigned long gpa;
+	pgd_t *pgt;
+	struct kvm_nested_guest *nested;
+	pgd_t pgd, *pgdp;
+	pud_t pud, *pudp;
+	pmd_t pmd, *pmdp;
+	pte_t *ptep;
+	int shift;
+	unsigned long pte;
+
+	kvm = p->kvm;
+	if (!kvm_is_radix(kvm))
+		return 0;
+
+	ret = mutex_lock_interruptible(&p->mutex);
+	if (ret)
+		return ret;
+
+	if (p->chars_left) {
+		n = p->chars_left;
+		if (n > len)
+			n = len;
+		r = copy_to_user(buf, p->buf + p->buf_index, n);
+		n -= r;
+		p->chars_left -= n;
+		p->buf_index += n;
+		buf += n;
+		len -= n;
+		ret = n;
+		if (r) {
+			if (!n)
+				ret = -EFAULT;
+			goto out;
+		}
+	}
+
+	gpa = p->gpa;
+	nested = NULL;
+	pgt = NULL;
+	while (len != 0 && p->lpid >= 0) {
+		if (gpa >= RADIX_PGTABLE_RANGE) {
+			gpa = 0;
+			pgt = NULL;
+			if (nested) {
+				kvmhv_put_nested(nested);
+				nested = NULL;
+			}
+			p->lpid = kvmhv_nested_next_lpid(kvm, p->lpid);
+			p->hdr = 0;
+			if (p->lpid < 0)
+				break;
+		}
+		if (!pgt) {
+			if (p->lpid == 0) {
+				pgt = kvm->arch.pgtable;
+			} else {
+				nested = kvmhv_get_nested(kvm, p->lpid, false);
+				if (!nested) {
+					gpa = RADIX_PGTABLE_RANGE;
+					continue;
+				}
+				pgt = nested->shadow_pgtable;
+			}
+		}
+		n = 0;
+		if (!p->hdr) {
+			if (p->lpid > 0)
+				n = scnprintf(p->buf, sizeof(p->buf),
+					      "\nNested LPID %d: ", p->lpid);
+			n += scnprintf(p->buf + n, sizeof(p->buf) - n,
+				      "pgdir: %lx\n", (unsigned long)pgt);
+			p->hdr = 1;
+			goto copy;
+		}
+
+		pgdp = pgt + pgd_index(gpa);
+		pgd = READ_ONCE(*pgdp);
+		if (!(pgd_val(pgd) & _PAGE_PRESENT)) {
+			gpa = (gpa & PGDIR_MASK) + PGDIR_SIZE;
+			continue;
+		}
+
+		pudp = pud_offset(&pgd, gpa);
+		pud = READ_ONCE(*pudp);
+		if (!(pud_val(pud) & _PAGE_PRESENT)) {
+			gpa = (gpa & PUD_MASK) + PUD_SIZE;
+			continue;
+		}
+		if (pud_val(pud) & _PAGE_PTE) {
+			pte = pud_val(pud);
+			shift = PUD_SHIFT;
+			goto leaf;
+		}
+
+		pmdp = pmd_offset(&pud, gpa);
+		pmd = READ_ONCE(*pmdp);
+		if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
+			gpa = (gpa & PMD_MASK) + PMD_SIZE;
+			continue;
+		}
+		if (pmd_val(pmd) & _PAGE_PTE) {
+			pte = pmd_val(pmd);
+			shift = PMD_SHIFT;
+			goto leaf;
+		}
+
+		ptep = pte_offset_kernel(&pmd, gpa);
+		pte = pte_val(READ_ONCE(*ptep));
+		if (!(pte & _PAGE_PRESENT)) {
+			gpa += PAGE_SIZE;
+			continue;
+		}
+		shift = PAGE_SHIFT;
+	leaf:
+		n = scnprintf(p->buf, sizeof(p->buf),
+			      " %lx: %lx %d\n", gpa, pte, shift);
+		gpa += 1ul << shift;
+	copy:
+		p->chars_left = n;
+		if (n > len)
+			n = len;
+		r = copy_to_user(buf, p->buf, n);
+		n -= r;
+		p->chars_left -= n;
+		p->buf_index = n;
+		buf += n;
+		len -= n;
+		ret += n;
+		if (r) {
+			if (!ret)
+				ret = -EFAULT;
+			break;
+		}
+	}
+	p->gpa = gpa;
+	if (nested)
+		kvmhv_put_nested(nested);
+
+ out:
+	mutex_unlock(&p->mutex);
+	return ret;
+}
+
+static ssize_t debugfs_radix_write(struct file *file, const char __user *buf,
+			   size_t len, loff_t *ppos)
+{
+	return -EACCES;
+}
+
+static const struct file_operations debugfs_radix_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = debugfs_radix_open,
+	.release = debugfs_radix_release,
+	.read	 = debugfs_radix_read,
+	.write	 = debugfs_radix_write,
+	.llseek	 = generic_file_llseek,
+};
+
+void kvmhv_radix_debugfs_init(struct kvm *kvm)
+{
+	kvm->arch.radix_dentry = debugfs_create_file("radix", 0400,
+						     kvm->arch.debugfs_dir, kvm,
+						     &debugfs_radix_fops);
+}
+
 int kvmppc_radix_init(void)
 {
 	unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE;
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 9a3f2646ecc7..62a8d03ba7e9 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -363,6 +363,40 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	return ret;
 }
 
+static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
+		unsigned long tce)
+{
+	unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+	enum dma_data_direction dir = iommu_tce_direction(tce);
+	struct kvmppc_spapr_tce_iommu_table *stit;
+	unsigned long ua = 0;
+
+	/* Allow userspace to poison TCE table */
+	if (dir == DMA_NONE)
+		return H_SUCCESS;
+
+	if (iommu_tce_check_gpa(stt->page_shift, gpa))
+		return H_TOO_HARD;
+
+	if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
+		return H_TOO_HARD;
+
+	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
+		unsigned long hpa = 0;
+		struct mm_iommu_table_group_mem_t *mem;
+		long shift = stit->tbl->it_page_shift;
+
+		mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
+		if (!mem)
+			return H_TOO_HARD;
+
+		if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa))
+			return H_TOO_HARD;
+	}
+
+	return H_SUCCESS;
+}
+
 static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
 {
 	unsigned long hpa = 0;
@@ -376,11 +410,10 @@ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
 {
 	struct mm_iommu_table_group_mem_t *mem = NULL;
 	const unsigned long pgsize = 1ULL << tbl->it_page_shift;
-	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
 
 	if (!pua)
-		/* it_userspace allocation might be delayed */
-		return H_TOO_HARD;
+		return H_SUCCESS;
 
 	mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
 	if (!mem)
@@ -401,7 +434,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
 	long ret;
 
 	if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
-		return H_HARDWARE;
+		return H_TOO_HARD;
 
 	if (dir == DMA_NONE)
 		return H_SUCCESS;
@@ -449,15 +482,15 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
 		return H_TOO_HARD;
 
 	if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
-		return H_HARDWARE;
+		return H_TOO_HARD;
 
 	if (mm_iommu_mapped_inc(mem))
-		return H_CLOSED;
+		return H_TOO_HARD;
 
 	ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
 	if (WARN_ON_ONCE(ret)) {
 		mm_iommu_mapped_dec(mem);
-		return H_HARDWARE;
+		return H_TOO_HARD;
 	}
 
 	if (dir != DMA_NONE)
@@ -517,8 +550,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-	if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
-			tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) {
+	if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
 		ret = H_PARAMETER;
 		goto unlock_exit;
 	}
@@ -533,14 +565,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
 					entry, ua, dir);
 
-		if (ret == H_SUCCESS)
-			continue;
-
-		if (ret == H_TOO_HARD)
+		if (ret != H_SUCCESS) {
+			kvmppc_clear_tce(stit->tbl, entry);
 			goto unlock_exit;
-
-		WARN_ON_ONCE(1);
-		kvmppc_clear_tce(stit->tbl, entry);
+		}
 	}
 
 	kvmppc_tce_put(stt, entry, tce);
@@ -583,7 +611,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 		return ret;
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
-	if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
+	if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
 		ret = H_TOO_HARD;
 		goto unlock_exit;
 	}
@@ -599,10 +627,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 		ret = kvmppc_tce_validate(stt, tce);
 		if (ret != H_SUCCESS)
 			goto unlock_exit;
+	}
+
+	for (i = 0; i < npages; ++i) {
+		/*
+		 * This looks unsafe, because we validate, then regrab
+		 * the TCE from userspace which could have been changed by
+		 * another thread.
+		 *
+		 * But it actually is safe, because the relevant checks will be
+		 * re-executed in the following code.  If userspace tries to
+		 * change this dodgily it will result in a messier failure mode
+		 * but won't threaten the host.
+		 */
+		if (get_user(tce, tces + i)) {
+			ret = H_TOO_HARD;
+			goto unlock_exit;
+		}
+		tce = be64_to_cpu(tce);
 
-		if (kvmppc_gpa_to_ua(vcpu->kvm,
-				tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
-				&ua, NULL))
+		if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
 			return H_PARAMETER;
 
 		list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -610,14 +654,10 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 					stit->tbl, entry + i, ua,
 					iommu_tce_direction(tce));
 
-			if (ret == H_SUCCESS)
-				continue;
-
-			if (ret == H_TOO_HARD)
+			if (ret != H_SUCCESS) {
+				kvmppc_clear_tce(stit->tbl, entry);
 				goto unlock_exit;
-
-			WARN_ON_ONCE(1);
-			kvmppc_clear_tce(stit->tbl, entry);
+			}
 		}
 
 		kvmppc_tce_put(stt, entry + i, tce);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 6821ead4b4eb..2206bc729b9a 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -87,6 +87,7 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
 }
 EXPORT_SYMBOL_GPL(kvmppc_find_table);
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
  * Validates TCE address.
  * At the moment flags and page mask are validated.
@@ -94,14 +95,14 @@ EXPORT_SYMBOL_GPL(kvmppc_find_table);
  * to the table and user space is supposed to process them), we can skip
  * checking other things (such as TCE is a guest RAM address or the page
  * was actually allocated).
- *
- * WARNING: This will be called in real-mode on HV KVM and virtual
- *          mode on PR KVM
  */
-long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
+static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
+		unsigned long tce)
 {
 	unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
 	enum dma_data_direction dir = iommu_tce_direction(tce);
+	struct kvmppc_spapr_tce_iommu_table *stit;
+	unsigned long ua = 0;
 
 	/* Allow userspace to poison TCE table */
 	if (dir == DMA_NONE)
@@ -110,9 +111,25 @@ long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
 	if (iommu_tce_check_gpa(stt->page_shift, gpa))
 		return H_PARAMETER;
 
+	if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
+		return H_TOO_HARD;
+
+	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+		unsigned long hpa = 0;
+		struct mm_iommu_table_group_mem_t *mem;
+		long shift = stit->tbl->it_page_shift;
+
+		mem = mm_iommu_lookup_rm(stt->kvm->mm, ua, 1ULL << shift);
+		if (!mem)
+			return H_TOO_HARD;
+
+		if (mm_iommu_ua_to_hpa_rm(mem, ua, shift, &hpa))
+			return H_TOO_HARD;
+	}
+
 	return H_SUCCESS;
 }
-EXPORT_SYMBOL_GPL(kvmppc_tce_validate);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 /* Note on the use of page_address() in real mode,
  *
@@ -164,10 +181,10 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
 }
 EXPORT_SYMBOL_GPL(kvmppc_tce_put);
 
-long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
+long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
 		unsigned long *ua, unsigned long **prmap)
 {
-	unsigned long gfn = gpa >> PAGE_SHIFT;
+	unsigned long gfn = tce >> PAGE_SHIFT;
 	struct kvm_memory_slot *memslot;
 
 	memslot = search_memslots(kvm_memslots(kvm), gfn);
@@ -175,7 +192,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
 		return -EINVAL;
 
 	*ua = __gfn_to_hva_memslot(memslot, gfn) |
-		(gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+		(tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	if (prmap)
@@ -184,7 +201,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
+EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua);
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
@@ -197,7 +214,7 @@ static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
 
 	if (!ret && ((*direction == DMA_FROM_DEVICE) ||
 				(*direction == DMA_BIDIRECTIONAL))) {
-		__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
+		__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
 		/*
 		 * kvmppc_rm_tce_iommu_do_map() updates the UA cache after
 		 * calling this so we still get here a valid UA.
@@ -223,7 +240,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
 {
 	struct mm_iommu_table_group_mem_t *mem = NULL;
 	const unsigned long pgsize = 1ULL << tbl->it_page_shift;
-	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
+	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
 
 	if (!pua)
 		/* it_userspace allocation might be delayed */
@@ -287,7 +304,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
 {
 	long ret;
 	unsigned long hpa = 0;
-	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
+	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
 	struct mm_iommu_table_group_mem_t *mem;
 
 	if (!pua)
@@ -300,10 +317,10 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
 
 	if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift,
 			&hpa)))
-		return H_HARDWARE;
+		return H_TOO_HARD;
 
 	if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
-		return H_CLOSED;
+		return H_TOO_HARD;
 
 	ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
 	if (ret) {
@@ -368,13 +385,12 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 	if (ret != H_SUCCESS)
 		return ret;
 
-	ret = kvmppc_tce_validate(stt, tce);
+	ret = kvmppc_rm_tce_validate(stt, tce);
 	if (ret != H_SUCCESS)
 		return ret;
 
 	dir = iommu_tce_direction(tce);
-	if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
-			tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
+	if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
 		return H_PARAMETER;
 
 	entry = ioba >> stt->page_shift;
@@ -387,14 +403,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
 					stit->tbl, entry, ua, dir);
 
-		if (ret == H_SUCCESS)
-			continue;
-
-		if (ret == H_TOO_HARD)
+		if (ret != H_SUCCESS) {
+			kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
 			return ret;
-
-		WARN_ON_ONCE_RM(1);
-		kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+		}
 	}
 
 	kvmppc_tce_put(stt, entry, tce);
@@ -480,7 +492,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 		 */
 		struct mm_iommu_table_group_mem_t *mem;
 
-		if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL))
+		if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
 			return H_TOO_HARD;
 
 		mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
@@ -496,12 +508,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 		 * We do not require memory to be preregistered in this case
 		 * so lock rmap and do __find_linux_pte_or_hugepte().
 		 */
-		if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+		if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
 			return H_TOO_HARD;
 
 		rmap = (void *) vmalloc_to_phys(rmap);
 		if (WARN_ON_ONCE_RM(!rmap))
-			return H_HARDWARE;
+			return H_TOO_HARD;
 
 		/*
 		 * Synchronize with the MMU notifier callbacks in
@@ -521,14 +533,16 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 	for (i = 0; i < npages; ++i) {
 		unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
 
-		ret = kvmppc_tce_validate(stt, tce);
+		ret = kvmppc_rm_tce_validate(stt, tce);
 		if (ret != H_SUCCESS)
 			goto unlock_exit;
+	}
+
+	for (i = 0; i < npages; ++i) {
+		unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
 
 		ua = 0;
-		if (kvmppc_gpa_to_ua(vcpu->kvm,
-				tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
-				&ua, NULL))
+		if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
 			return H_PARAMETER;
 
 		list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -536,14 +550,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 					stit->tbl, entry + i, ua,
 					iommu_tce_direction(tce));
 
-			if (ret == H_SUCCESS)
-				continue;
-
-			if (ret == H_TOO_HARD)
+			if (ret != H_SUCCESS) {
+				kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
+						entry);
 				goto unlock_exit;
-
-			WARN_ON_ONCE_RM(1);
-			kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+			}
 		}
 
 		kvmppc_tce_put(stt, entry + i, tce);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 36b11c5a0dbb..8c7e933e942e 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -36,7 +36,6 @@
 #define OP_31_XOP_MTSR		210
 #define OP_31_XOP_MTSRIN	242
 #define OP_31_XOP_TLBIEL	274
-#define OP_31_XOP_TLBIE		306
 /* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */
 #define OP_31_XOP_FAKE_SC1	308
 #define OP_31_XOP_SLBMTE	402
@@ -110,7 +109,7 @@ static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
 	vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
 	vcpu->arch.tar_tm = vcpu->arch.tar;
 	vcpu->arch.lr_tm = vcpu->arch.regs.link;
-	vcpu->arch.cr_tm = vcpu->arch.cr;
+	vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
 	vcpu->arch.xer_tm = vcpu->arch.regs.xer;
 	vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
 }
@@ -129,7 +128,7 @@ static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
 	vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
 	vcpu->arch.tar = vcpu->arch.tar_tm;
 	vcpu->arch.regs.link = vcpu->arch.lr_tm;
-	vcpu->arch.cr = vcpu->arch.cr_tm;
+	vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
 	vcpu->arch.regs.xer = vcpu->arch.xer_tm;
 	vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
 }
@@ -141,7 +140,7 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
 	uint64_t texasr;
 
 	/* CR0 = 0 | MSR[TS] | 0 */
-	vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) |
+	vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
 		(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
 		 << CR0_SHIFT);
 
@@ -220,7 +219,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
 	tm_abort(ra_val);
 
 	/* CR0 = 0 | MSR[TS] | 0 */
-	vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) |
+	vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
 		(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
 		 << CR0_SHIFT);
 
@@ -494,8 +493,8 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 			if (!(kvmppc_get_msr(vcpu) & MSR_PR)) {
 				preempt_disable();
-				vcpu->arch.cr = (CR0_TBEGIN_FAILURE |
-				  (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)));
+				vcpu->arch.regs.ccr = (CR0_TBEGIN_FAILURE |
+				  (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)));
 
 				vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT |
 					(((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3e3a71594e63..bf8def2159c3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -50,6 +50,7 @@
 #include <asm/reg.h>
 #include <asm/ppc-opcode.h>
 #include <asm/asm-prototypes.h>
+#include <asm/archrandom.h>
 #include <asm/debug.h>
 #include <asm/disassemble.h>
 #include <asm/cputable.h>
@@ -104,6 +105,10 @@ static bool indep_threads_mode = true;
 module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
 
+static bool one_vm_per_core;
+module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
+
 #ifdef CONFIG_KVM_XICS
 static struct kernel_param_ops module_param_ops = {
 	.set = param_set_int,
@@ -117,6 +122,16 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
 MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
 #endif
 
+/* If set, guests are allowed to create and control nested guests */
+static bool nested = true;
+module_param(nested, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)");
+
+static inline bool nesting_enabled(struct kvm *kvm)
+{
+	return kvm->arch.nested_enable && kvm_is_radix(kvm);
+}
+
 /* If set, the threads on each CPU core have to be in the same MMU mode */
 static bool no_mixing_hpt_and_radix;
 
@@ -173,6 +188,10 @@ static bool kvmppc_ipi_thread(int cpu)
 {
 	unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
 
+	/* If we're a nested hypervisor, fall back to ordinary IPIs for now */
+	if (kvmhv_on_pseries())
+		return false;
+
 	/* On POWER9 we can use msgsnd to IPI any cpu */
 	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 		msg |= get_hard_smp_processor_id(cpu);
@@ -410,8 +429,8 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
 	       vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
 	pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
 	       vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
-	pr_err("cr = %.8x  xer = %.16lx  dsisr = %.8x\n",
-	       vcpu->arch.cr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr);
+	pr_err("cr = %.8lx  xer = %.16lx  dsisr = %.8x\n",
+	       vcpu->arch.regs.ccr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr);
 	pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
 	pr_err("fault dar = %.16lx dsisr = %.8x\n",
 	       vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
@@ -730,8 +749,7 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
 	/*
 	 * Ensure that the read of vcore->dpdes comes after the read
 	 * of vcpu->doorbell_request.  This barrier matches the
-	 * lwsync in book3s_hv_rmhandlers.S just before the
-	 * fast_guest_return label.
+	 * smb_wmb() in kvmppc_guest_entry_inject().
 	 */
 	smp_rmb();
 	vc = vcpu->arch.vcore;
@@ -912,6 +930,19 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 			break;
 		}
 		return RESUME_HOST;
+	case H_SET_DABR:
+		ret = kvmppc_h_set_dabr(vcpu, kvmppc_get_gpr(vcpu, 4));
+		break;
+	case H_SET_XDABR:
+		ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4),
+						kvmppc_get_gpr(vcpu, 5));
+		break;
+	case H_GET_TCE:
+		ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+						kvmppc_get_gpr(vcpu, 5));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
 	case H_PUT_TCE:
 		ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
 						kvmppc_get_gpr(vcpu, 5),
@@ -935,6 +966,32 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		if (ret == H_TOO_HARD)
 			return RESUME_HOST;
 		break;
+	case H_RANDOM:
+		if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
+			ret = H_HARDWARE;
+		break;
+
+	case H_SET_PARTITION_TABLE:
+		ret = H_FUNCTION;
+		if (nesting_enabled(vcpu->kvm))
+			ret = kvmhv_set_partition_table(vcpu);
+		break;
+	case H_ENTER_NESTED:
+		ret = H_FUNCTION;
+		if (!nesting_enabled(vcpu->kvm))
+			break;
+		ret = kvmhv_enter_nested_guest(vcpu);
+		if (ret == H_INTERRUPT) {
+			kvmppc_set_gpr(vcpu, 3, 0);
+			return -EINTR;
+		}
+		break;
+	case H_TLB_INVALIDATE:
+		ret = H_FUNCTION;
+		if (nesting_enabled(vcpu->kvm))
+			ret = kvmhv_do_nested_tlbie(vcpu);
+		break;
+
 	default:
 		return RESUME_HOST;
 	}
@@ -943,6 +1000,24 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	return RESUME_GUEST;
 }
 
+/*
+ * Handle H_CEDE in the nested virtualization case where we haven't
+ * called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
+ * This has to be done early, not in kvmppc_pseries_do_hcall(), so
+ * that the cede logic in kvmppc_run_single_vcpu() works properly.
+ */
+static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.shregs.msr |= MSR_EE;
+	vcpu->arch.ceded = 1;
+	smp_mb();
+	if (vcpu->arch.prodded) {
+		vcpu->arch.prodded = 0;
+		smp_mb();
+		vcpu->arch.ceded = 0;
+	}
+}
+
 static int kvmppc_hcall_impl_hv(unsigned long cmd)
 {
 	switch (cmd) {
@@ -1085,7 +1160,6 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
 	return RESUME_GUEST;
 }
 
-/* Called with vcpu->arch.vcore->lock held */
 static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				 struct task_struct *tsk)
 {
@@ -1190,7 +1264,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 	case BOOK3S_INTERRUPT_H_INST_STORAGE:
 		vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
-		vcpu->arch.fault_dsisr = 0;
+		vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
+			DSISR_SRR1_MATCH_64S;
+		if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
+			vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
 		r = RESUME_PAGE_FAULT;
 		break;
 	/*
@@ -1206,10 +1283,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				swab32(vcpu->arch.emul_inst) :
 				vcpu->arch.emul_inst;
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
-			/* Need vcore unlocked to call kvmppc_get_last_inst */
-			spin_unlock(&vcpu->arch.vcore->lock);
 			r = kvmppc_emulate_debug_inst(run, vcpu);
-			spin_lock(&vcpu->arch.vcore->lock);
 		} else {
 			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
 			r = RESUME_GUEST;
@@ -1225,12 +1299,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
 		r = EMULATE_FAIL;
 		if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
-		    cpu_has_feature(CPU_FTR_ARCH_300)) {
-			/* Need vcore unlocked to call kvmppc_get_last_inst */
-			spin_unlock(&vcpu->arch.vcore->lock);
+		    cpu_has_feature(CPU_FTR_ARCH_300))
 			r = kvmppc_emulate_doorbell_instr(vcpu);
-			spin_lock(&vcpu->arch.vcore->lock);
-		}
 		if (r == EMULATE_FAIL) {
 			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
 			r = RESUME_GUEST;
@@ -1265,6 +1335,104 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return r;
 }
 
+static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
+{
+	int r;
+	int srcu_idx;
+
+	vcpu->stat.sum_exits++;
+
+	/*
+	 * This can happen if an interrupt occurs in the last stages
+	 * of guest entry or the first stages of guest exit (i.e. after
+	 * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+	 * and before setting it to KVM_GUEST_MODE_HOST_HV).
+	 * That can happen due to a bug, or due to a machine check
+	 * occurring at just the wrong time.
+	 */
+	if (vcpu->arch.shregs.msr & MSR_HV) {
+		pr_emerg("KVM trap in HV mode while nested!\n");
+		pr_emerg("trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+			 vcpu->arch.trap, kvmppc_get_pc(vcpu),
+			 vcpu->arch.shregs.msr);
+		kvmppc_dump_regs(vcpu);
+		return RESUME_HOST;
+	}
+	switch (vcpu->arch.trap) {
+	/* We're good on these - the host merely wanted to get our attention */
+	case BOOK3S_INTERRUPT_HV_DECREMENTER:
+		vcpu->stat.dec_exits++;
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_EXTERNAL:
+		vcpu->stat.ext_intr_exits++;
+		r = RESUME_HOST;
+		break;
+	case BOOK3S_INTERRUPT_H_DOORBELL:
+	case BOOK3S_INTERRUPT_H_VIRT:
+		vcpu->stat.ext_intr_exits++;
+		r = RESUME_GUEST;
+		break;
+	/* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
+	case BOOK3S_INTERRUPT_HMI:
+	case BOOK3S_INTERRUPT_PERFMON:
+	case BOOK3S_INTERRUPT_SYSTEM_RESET:
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_MACHINE_CHECK:
+		/* Pass the machine check to the L1 guest */
+		r = RESUME_HOST;
+		/* Print the MCE event to host console. */
+		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
+		break;
+	/*
+	 * We get these next two if the guest accesses a page which it thinks
+	 * it has mapped but which is not actually present, either because
+	 * it is for an emulated I/O device or because the corresonding
+	 * host page has been paged out.
+	 */
+	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+		r = kvmhv_nested_page_fault(vcpu);
+		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+		break;
+	case BOOK3S_INTERRUPT_H_INST_STORAGE:
+		vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
+		vcpu->arch.fault_dsisr = kvmppc_get_msr(vcpu) &
+					 DSISR_SRR1_MATCH_64S;
+		if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
+			vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
+		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+		r = kvmhv_nested_page_fault(vcpu);
+		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+		break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+		/*
+		 * This occurs for various TM-related instructions that
+		 * we need to emulate on POWER9 DD2.2.  We have already
+		 * handled the cases where the guest was in real-suspend
+		 * mode and was transitioning to transactional state.
+		 */
+		r = kvmhv_p9_tm_emulation(vcpu);
+		break;
+#endif
+
+	case BOOK3S_INTERRUPT_HV_RM_HARD:
+		vcpu->arch.trap = 0;
+		r = RESUME_GUEST;
+		if (!xive_enabled())
+			kvmppc_xics_rm_complete(vcpu, 0);
+		break;
+	default:
+		r = RESUME_HOST;
+		break;
+	}
+
+	return r;
+}
+
 static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
 					    struct kvm_sregs *sregs)
 {
@@ -1555,6 +1723,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_ONLINE:
 		*val = get_reg_val(id, vcpu->arch.online);
 		break;
+	case KVM_REG_PPC_PTCR:
+		*val = get_reg_val(id, vcpu->kvm->arch.l1_ptcr);
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -1786,6 +1957,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 			atomic_dec(&vcpu->arch.vcore->online_count);
 		vcpu->arch.online = i;
 		break;
+	case KVM_REG_PPC_PTCR:
+		vcpu->kvm->arch.l1_ptcr = set_reg_val(id, *val);
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -2019,15 +2193,18 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 	 * Set the default HFSCR for the guest from the host value.
 	 * This value is only used on POWER9.
 	 * On POWER9, we want to virtualize the doorbell facility, so we
-	 * turn off the HFSCR bit, which causes those instructions to trap.
+	 * don't set the HFSCR_MSGP bit, and that causes those instructions
+	 * to trap and then we emulate them.
 	 */
-	vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
-	if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+	vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
+		HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP;
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
+		if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+			vcpu->arch.hfscr |= HFSCR_TM;
+	}
+	if (cpu_has_feature(CPU_FTR_TM_COMP))
 		vcpu->arch.hfscr |= HFSCR_TM;
-	else if (!cpu_has_feature(CPU_FTR_TM_COMP))
-		vcpu->arch.hfscr &= ~HFSCR_TM;
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
-		vcpu->arch.hfscr &= ~HFSCR_MSGP;
 
 	kvmppc_mmu_book3s_hv_init(vcpu);
 
@@ -2242,10 +2419,18 @@ static void kvmppc_release_hwthread(int cpu)
 
 static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 {
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
+	cpumask_t *cpu_in_guest;
 	int i;
 
 	cpu = cpu_first_thread_sibling(cpu);
-	cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
+	if (nested) {
+		cpumask_set_cpu(cpu, &nested->need_tlb_flush);
+		cpu_in_guest = &nested->cpu_in_guest;
+	} else {
+		cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
+		cpu_in_guest = &kvm->arch.cpu_in_guest;
+	}
 	/*
 	 * Make sure setting of bit in need_tlb_flush precedes
 	 * testing of cpu_in_guest bits.  The matching barrier on
@@ -2253,13 +2438,23 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 	 */
 	smp_mb();
 	for (i = 0; i < threads_per_core; ++i)
-		if (cpumask_test_cpu(cpu + i, &kvm->arch.cpu_in_guest))
+		if (cpumask_test_cpu(cpu + i, cpu_in_guest))
 			smp_call_function_single(cpu + i, do_nothing, NULL, 1);
 }
 
 static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
 {
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
 	struct kvm *kvm = vcpu->kvm;
+	int prev_cpu;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return;
+
+	if (nested)
+		prev_cpu = nested->prev_cpu[vcpu->arch.nested_vcpu_id];
+	else
+		prev_cpu = vcpu->arch.prev_cpu;
 
 	/*
 	 * With radix, the guest can do TLB invalidations itself,
@@ -2273,12 +2468,46 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
 	 * ran to flush the TLB.  The TLB is shared between threads,
 	 * so we use a single bit in .need_tlb_flush for all 4 threads.
 	 */
-	if (vcpu->arch.prev_cpu != pcpu) {
-		if (vcpu->arch.prev_cpu >= 0 &&
-		    cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
+	if (prev_cpu != pcpu) {
+		if (prev_cpu >= 0 &&
+		    cpu_first_thread_sibling(prev_cpu) !=
 		    cpu_first_thread_sibling(pcpu))
-			radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
-		vcpu->arch.prev_cpu = pcpu;
+			radix_flush_cpu(kvm, prev_cpu, vcpu);
+		if (nested)
+			nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
+		else
+			vcpu->arch.prev_cpu = pcpu;
+	}
+}
+
+static void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu,
+					      struct kvm_nested_guest *nested)
+{
+	cpumask_t *need_tlb_flush;
+	int lpid;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		pcpu &= ~0x3UL;
+
+	if (nested) {
+		lpid = nested->shadow_lpid;
+		need_tlb_flush = &nested->need_tlb_flush;
+	} else {
+		lpid = kvm->arch.lpid;
+		need_tlb_flush = &kvm->arch.need_tlb_flush;
+	}
+
+	mtspr(SPRN_LPID, lpid);
+	isync();
+	smp_mb();
+
+	if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
+		radix__local_flush_tlb_lpid_guest(lpid);
+		/* Clear the bit after the TLB flush */
+		cpumask_clear_cpu(pcpu, need_tlb_flush);
 	}
 }
 
@@ -2493,6 +2722,10 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
 	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
 		return false;
 
+	/* In one_vm_per_core mode, require all vcores to be from the same vm */
+	if (one_vm_per_core && vc->kvm != cip->vc[0]->kvm)
+		return false;
+
 	/* Some POWER9 chips require all threads to be in the same MMU mode */
 	if (no_mixing_hpt_and_radix &&
 	    kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
@@ -2600,6 +2833,14 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 	spin_lock(&vc->lock);
 	now = get_tb();
 	for_each_runnable_thread(i, vcpu, vc) {
+		/*
+		 * It's safe to unlock the vcore in the loop here, because
+		 * for_each_runnable_thread() is safe against removal of
+		 * the vcpu, and the vcore state is VCORE_EXITING here,
+		 * so any vcpus becoming runnable will have their arch.trap
+		 * set to zero and can't actually run in the guest.
+		 */
+		spin_unlock(&vc->lock);
 		/* cancel pending dec exception if dec is positive */
 		if (now < vcpu->arch.dec_expires &&
 		    kvmppc_core_pending_dec(vcpu))
@@ -2615,6 +2856,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 		vcpu->arch.ret = ret;
 		vcpu->arch.trap = 0;
 
+		spin_lock(&vc->lock);
 		if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
 			if (vcpu->arch.pending_exceptions)
 				kvmppc_core_prepare_to_enter(vcpu);
@@ -2963,8 +3205,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		spin_unlock(&core_info.vc[sub]->lock);
 
 	if (kvm_is_radix(vc->kvm)) {
-		int tmp = pcpu;
-
 		/*
 		 * Do we need to flush the process scoped TLB for the LPAR?
 		 *
@@ -2975,17 +3215,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		 *
 		 * Hash must be flushed in realmode in order to use tlbiel.
 		 */
-		mtspr(SPRN_LPID, vc->kvm->arch.lpid);
-		isync();
-
-		if (cpu_has_feature(CPU_FTR_ARCH_300))
-			tmp &= ~0x3UL;
-
-		if (cpumask_test_cpu(tmp, &vc->kvm->arch.need_tlb_flush)) {
-			radix__local_flush_tlb_lpid_guest(vc->kvm->arch.lpid);
-			/* Clear the bit after the TLB flush */
-			cpumask_clear_cpu(tmp, &vc->kvm->arch.need_tlb_flush);
-		}
+		kvmppc_radix_check_need_tlb_flush(vc->kvm, pcpu, NULL);
 	}
 
 	/*
@@ -3080,6 +3310,300 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 }
 
 /*
+ * Load up hypervisor-mode registers on P9.
+ */
+static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
+				     unsigned long lpcr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	s64 hdec;
+	u64 tb, purr, spurr;
+	int trap;
+	unsigned long host_hfscr = mfspr(SPRN_HFSCR);
+	unsigned long host_ciabr = mfspr(SPRN_CIABR);
+	unsigned long host_dawr = mfspr(SPRN_DAWR);
+	unsigned long host_dawrx = mfspr(SPRN_DAWRX);
+	unsigned long host_psscr = mfspr(SPRN_PSSCR);
+	unsigned long host_pidr = mfspr(SPRN_PID);
+
+	hdec = time_limit - mftb();
+	if (hdec < 0)
+		return BOOK3S_INTERRUPT_HV_DECREMENTER;
+	mtspr(SPRN_HDEC, hdec);
+
+	if (vc->tb_offset) {
+		u64 new_tb = mftb() + vc->tb_offset;
+		mtspr(SPRN_TBU40, new_tb);
+		tb = mftb();
+		if ((tb & 0xffffff) < (new_tb & 0xffffff))
+			mtspr(SPRN_TBU40, new_tb + 0x1000000);
+		vc->tb_offset_applied = vc->tb_offset;
+	}
+
+	if (vc->pcr)
+		mtspr(SPRN_PCR, vc->pcr);
+	mtspr(SPRN_DPDES, vc->dpdes);
+	mtspr(SPRN_VTB, vc->vtb);
+
+	local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
+	local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+	mtspr(SPRN_PURR, vcpu->arch.purr);
+	mtspr(SPRN_SPURR, vcpu->arch.spurr);
+
+	if (cpu_has_feature(CPU_FTR_DAWR)) {
+		mtspr(SPRN_DAWR, vcpu->arch.dawr);
+		mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
+	}
+	mtspr(SPRN_CIABR, vcpu->arch.ciabr);
+	mtspr(SPRN_IC, vcpu->arch.ic);
+	mtspr(SPRN_PID, vcpu->arch.pid);
+
+	mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+	      (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+
+	mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
+
+	mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
+	mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
+	mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
+	mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
+
+	mtspr(SPRN_AMOR, ~0UL);
+
+	mtspr(SPRN_LPCR, lpcr);
+	isync();
+
+	kvmppc_xive_push_vcpu(vcpu);
+
+	mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+	mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
+
+	trap = __kvmhv_vcpu_entry_p9(vcpu);
+
+	/* Advance host PURR/SPURR by the amount used by guest */
+	purr = mfspr(SPRN_PURR);
+	spurr = mfspr(SPRN_SPURR);
+	mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
+	      purr - vcpu->arch.purr);
+	mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
+	      spurr - vcpu->arch.spurr);
+	vcpu->arch.purr = purr;
+	vcpu->arch.spurr = spurr;
+
+	vcpu->arch.ic = mfspr(SPRN_IC);
+	vcpu->arch.pid = mfspr(SPRN_PID);
+	vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+
+	vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
+	vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
+	vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
+	vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+
+	mtspr(SPRN_PSSCR, host_psscr);
+	mtspr(SPRN_HFSCR, host_hfscr);
+	mtspr(SPRN_CIABR, host_ciabr);
+	mtspr(SPRN_DAWR, host_dawr);
+	mtspr(SPRN_DAWRX, host_dawrx);
+	mtspr(SPRN_PID, host_pidr);
+
+	/*
+	 * Since this is radix, do a eieio; tlbsync; ptesync sequence in
+	 * case we interrupted the guest between a tlbie and a ptesync.
+	 */
+	asm volatile("eieio; tlbsync; ptesync");
+
+	mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid);	/* restore host LPID */
+	isync();
+
+	vc->dpdes = mfspr(SPRN_DPDES);
+	vc->vtb = mfspr(SPRN_VTB);
+	mtspr(SPRN_DPDES, 0);
+	if (vc->pcr)
+		mtspr(SPRN_PCR, 0);
+
+	if (vc->tb_offset_applied) {
+		u64 new_tb = mftb() - vc->tb_offset_applied;
+		mtspr(SPRN_TBU40, new_tb);
+		tb = mftb();
+		if ((tb & 0xffffff) < (new_tb & 0xffffff))
+			mtspr(SPRN_TBU40, new_tb + 0x1000000);
+		vc->tb_offset_applied = 0;
+	}
+
+	mtspr(SPRN_HDEC, 0x7fffffff);
+	mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
+
+	return trap;
+}
+
+/*
+ * Virtual-mode guest entry for POWER9 and later when the host and
+ * guest are both using the radix MMU.  The LPIDR has already been set.
+ */
+int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
+			 unsigned long lpcr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	unsigned long host_dscr = mfspr(SPRN_DSCR);
+	unsigned long host_tidr = mfspr(SPRN_TIDR);
+	unsigned long host_iamr = mfspr(SPRN_IAMR);
+	s64 dec;
+	u64 tb;
+	int trap, save_pmu;
+
+	dec = mfspr(SPRN_DEC);
+	tb = mftb();
+	if (dec < 512)
+		return BOOK3S_INTERRUPT_HV_DECREMENTER;
+	local_paca->kvm_hstate.dec_expires = dec + tb;
+	if (local_paca->kvm_hstate.dec_expires < time_limit)
+		time_limit = local_paca->kvm_hstate.dec_expires;
+
+	vcpu->arch.ceded = 0;
+
+	kvmhv_save_host_pmu();		/* saves it to PACA kvm_hstate */
+
+	kvmppc_subcore_enter_guest();
+
+	vc->entry_exit_map = 1;
+	vc->in_guest = 1;
+
+	if (vcpu->arch.vpa.pinned_addr) {
+		struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+		u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
+		lp->yield_count = cpu_to_be32(yield_count);
+		vcpu->arch.vpa.dirty = 1;
+	}
+
+	if (cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+		kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+
+	kvmhv_load_guest_pmu(vcpu);
+
+	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
+	load_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+	load_vr_state(&vcpu->arch.vr);
+#endif
+
+	mtspr(SPRN_DSCR, vcpu->arch.dscr);
+	mtspr(SPRN_IAMR, vcpu->arch.iamr);
+	mtspr(SPRN_PSPB, vcpu->arch.pspb);
+	mtspr(SPRN_FSCR, vcpu->arch.fscr);
+	mtspr(SPRN_TAR, vcpu->arch.tar);
+	mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+	mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+	mtspr(SPRN_BESCR, vcpu->arch.bescr);
+	mtspr(SPRN_WORT, vcpu->arch.wort);
+	mtspr(SPRN_TIDR, vcpu->arch.tid);
+	mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+	mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+	mtspr(SPRN_AMR, vcpu->arch.amr);
+	mtspr(SPRN_UAMOR, vcpu->arch.uamor);
+
+	if (!(vcpu->arch.ctrl & 1))
+		mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+
+	mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
+
+	if (kvmhv_on_pseries()) {
+		/* call our hypervisor to load up HV regs and go */
+		struct hv_guest_state hvregs;
+
+		kvmhv_save_hv_regs(vcpu, &hvregs);
+		hvregs.lpcr = lpcr;
+		vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
+		hvregs.version = HV_GUEST_STATE_VERSION;
+		if (vcpu->arch.nested) {
+			hvregs.lpid = vcpu->arch.nested->shadow_lpid;
+			hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
+		} else {
+			hvregs.lpid = vcpu->kvm->arch.lpid;
+			hvregs.vcpu_token = vcpu->vcpu_id;
+		}
+		hvregs.hdec_expiry = time_limit;
+		trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
+					  __pa(&vcpu->arch.regs));
+		kvmhv_restore_hv_return_state(vcpu, &hvregs);
+		vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
+		vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+		vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+
+		/* H_CEDE has to be handled now, not later */
+		if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
+		    kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
+			kvmppc_nested_cede(vcpu);
+			trap = 0;
+		}
+	} else {
+		trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+	}
+
+	vcpu->arch.slb_max = 0;
+	dec = mfspr(SPRN_DEC);
+	tb = mftb();
+	vcpu->arch.dec_expires = dec + tb;
+	vcpu->cpu = -1;
+	vcpu->arch.thread_cpu = -1;
+	vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+
+	vcpu->arch.iamr = mfspr(SPRN_IAMR);
+	vcpu->arch.pspb = mfspr(SPRN_PSPB);
+	vcpu->arch.fscr = mfspr(SPRN_FSCR);
+	vcpu->arch.tar = mfspr(SPRN_TAR);
+	vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+	vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+	vcpu->arch.bescr = mfspr(SPRN_BESCR);
+	vcpu->arch.wort = mfspr(SPRN_WORT);
+	vcpu->arch.tid = mfspr(SPRN_TIDR);
+	vcpu->arch.amr = mfspr(SPRN_AMR);
+	vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+	vcpu->arch.dscr = mfspr(SPRN_DSCR);
+
+	mtspr(SPRN_PSPB, 0);
+	mtspr(SPRN_WORT, 0);
+	mtspr(SPRN_AMR, 0);
+	mtspr(SPRN_UAMOR, 0);
+	mtspr(SPRN_DSCR, host_dscr);
+	mtspr(SPRN_TIDR, host_tidr);
+	mtspr(SPRN_IAMR, host_iamr);
+	mtspr(SPRN_PSPB, 0);
+
+	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
+	store_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+	store_vr_state(&vcpu->arch.vr);
+#endif
+
+	if (cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+		kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+
+	save_pmu = 1;
+	if (vcpu->arch.vpa.pinned_addr) {
+		struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+		u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
+		lp->yield_count = cpu_to_be32(yield_count);
+		vcpu->arch.vpa.dirty = 1;
+		save_pmu = lp->pmcregs_in_use;
+	}
+
+	kvmhv_save_guest_pmu(vcpu, save_pmu);
+
+	vc->entry_exit_map = 0x101;
+	vc->in_guest = 0;
+
+	mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
+
+	kvmhv_load_host_pmu();
+
+	kvmppc_subcore_exit_guest();
+
+	return trap;
+}
+
+/*
  * Wait for some other vcpu thread to execute us, and
  * wake us up when we need to handle something in the host.
  */
@@ -3256,6 +3780,11 @@ out:
 	trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
 }
 
+/*
+ * This never fails for a radix guest, as none of the operations it does
+ * for a radix guest can fail or have a way to report failure.
+ * kvmhv_run_single_vcpu() relies on this fact.
+ */
 static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
 {
 	int r = 0;
@@ -3405,6 +3934,171 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	return vcpu->arch.ret;
 }
 
+int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
+			  struct kvm_vcpu *vcpu, u64 time_limit,
+			  unsigned long lpcr)
+{
+	int trap, r, pcpu;
+	int srcu_idx;
+	struct kvmppc_vcore *vc;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
+
+	trace_kvmppc_run_vcpu_enter(vcpu);
+
+	kvm_run->exit_reason = 0;
+	vcpu->arch.ret = RESUME_GUEST;
+	vcpu->arch.trap = 0;
+
+	vc = vcpu->arch.vcore;
+	vcpu->arch.ceded = 0;
+	vcpu->arch.run_task = current;
+	vcpu->arch.kvm_run = kvm_run;
+	vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
+	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+	vcpu->arch.busy_preempt = TB_NIL;
+	vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+	vc->runnable_threads[0] = vcpu;
+	vc->n_runnable = 1;
+	vc->runner = vcpu;
+
+	/* See if the MMU is ready to go */
+	if (!kvm->arch.mmu_ready)
+		kvmhv_setup_mmu(vcpu);
+
+	if (need_resched())
+		cond_resched();
+
+	kvmppc_update_vpas(vcpu);
+
+	init_vcore_to_run(vc);
+	vc->preempt_tb = TB_NIL;
+
+	preempt_disable();
+	pcpu = smp_processor_id();
+	vc->pcpu = pcpu;
+	kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+
+	local_irq_disable();
+	hard_irq_disable();
+	if (signal_pending(current))
+		goto sigpend;
+	if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready)
+		goto out;
+
+	if (!nested) {
+		kvmppc_core_prepare_to_enter(vcpu);
+		if (vcpu->arch.doorbell_request) {
+			vc->dpdes = 1;
+			smp_wmb();
+			vcpu->arch.doorbell_request = 0;
+		}
+		if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
+			     &vcpu->arch.pending_exceptions))
+			lpcr |= LPCR_MER;
+	} else if (vcpu->arch.pending_exceptions ||
+		   vcpu->arch.doorbell_request ||
+		   xive_interrupt_pending(vcpu)) {
+		vcpu->arch.ret = RESUME_HOST;
+		goto out;
+	}
+
+	kvmppc_clear_host_core(pcpu);
+
+	local_paca->kvm_hstate.tid = 0;
+	local_paca->kvm_hstate.napping = 0;
+	local_paca->kvm_hstate.kvm_split_mode = NULL;
+	kvmppc_start_thread(vcpu, vc);
+	kvmppc_create_dtl_entry(vcpu, vc);
+	trace_kvm_guest_enter(vcpu);
+
+	vc->vcore_state = VCORE_RUNNING;
+	trace_kvmppc_run_core(vc, 0);
+
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		kvmppc_radix_check_need_tlb_flush(kvm, pcpu, nested);
+
+	trace_hardirqs_on();
+	guest_enter_irqoff();
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+
+	this_cpu_disable_ftrace();
+
+	trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr);
+	vcpu->arch.trap = trap;
+
+	this_cpu_enable_ftrace();
+
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		mtspr(SPRN_LPID, kvm->arch.host_lpid);
+		isync();
+	}
+
+	trace_hardirqs_off();
+	set_irq_happened(trap);
+
+	kvmppc_set_host_core(pcpu);
+
+	local_irq_enable();
+	guest_exit();
+
+	cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
+
+	preempt_enable();
+
+	/* cancel pending decrementer exception if DEC is now positive */
+	if (get_tb() < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu))
+		kvmppc_core_dequeue_dec(vcpu);
+
+	trace_kvm_guest_exit(vcpu);
+	r = RESUME_GUEST;
+	if (trap) {
+		if (!nested)
+			r = kvmppc_handle_exit_hv(kvm_run, vcpu, current);
+		else
+			r = kvmppc_handle_nested_exit(vcpu);
+	}
+	vcpu->arch.ret = r;
+
+	if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded &&
+	    !kvmppc_vcpu_woken(vcpu)) {
+		kvmppc_set_timer(vcpu);
+		while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) {
+			if (signal_pending(current)) {
+				vcpu->stat.signal_exits++;
+				kvm_run->exit_reason = KVM_EXIT_INTR;
+				vcpu->arch.ret = -EINTR;
+				break;
+			}
+			spin_lock(&vc->lock);
+			kvmppc_vcore_blocked(vc);
+			spin_unlock(&vc->lock);
+		}
+	}
+	vcpu->arch.ceded = 0;
+
+	vc->vcore_state = VCORE_INACTIVE;
+	trace_kvmppc_run_core(vc, 1);
+
+ done:
+	kvmppc_remove_runnable(vc, vcpu);
+	trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
+
+	return vcpu->arch.ret;
+
+ sigpend:
+	vcpu->stat.signal_exits++;
+	kvm_run->exit_reason = KVM_EXIT_INTR;
+	vcpu->arch.ret = -EINTR;
+ out:
+	local_irq_enable();
+	preempt_enable();
+	goto done;
+}
+
 static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	int r;
@@ -3480,7 +4174,20 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
 	do {
-		r = kvmppc_run_vcpu(run, vcpu);
+		/*
+		 * The early POWER9 chips that can't mix radix and HPT threads
+		 * on the same core also need the workaround for the problem
+		 * where the TLB would prefetch entries in the guest exit path
+		 * for radix guests using the guest PIDR value and LPID 0.
+		 * The workaround is in the old path (kvmppc_run_vcpu())
+		 * but not the new path (kvmhv_run_single_vcpu()).
+		 */
+		if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
+		    !no_mixing_hpt_and_radix)
+			r = kvmhv_run_single_vcpu(run, vcpu, ~(u64)0,
+						  vcpu->arch.vcore->lpcr);
+		else
+			r = kvmppc_run_vcpu(run, vcpu);
 
 		if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
 		    !(vcpu->arch.shregs.msr & MSR_PR)) {
@@ -3559,6 +4266,10 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
 	kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
 	kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
 
+	/* If running as a nested hypervisor, we don't support HPT guests */
+	if (kvmhv_on_pseries())
+		info->flags |= KVM_PPC_NO_HASH;
+
 	return 0;
 }
 
@@ -3723,8 +4434,7 @@ void kvmppc_setup_partition_table(struct kvm *kvm)
 			__pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE;
 		dw1 = PATB_GR | kvm->arch.process_table;
 	}
-
-	mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
+	kvmhv_set_ptbl_entry(kvm->arch.lpid, dw0, dw1);
 }
 
 /*
@@ -3820,6 +4530,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 /* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */
 int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
 {
+	if (nesting_enabled(kvm))
+		kvmhv_release_all_nested(kvm);
 	kvmppc_free_radix(kvm);
 	kvmppc_update_lpcr(kvm, LPCR_VPM1,
 			   LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
@@ -3841,6 +4553,7 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
 	kvmppc_free_hpt(&kvm->arch.hpt);
 	kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
 			   LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+	kvmppc_rmap_reset(kvm);
 	kvm->arch.radix = 1;
 	return 0;
 }
@@ -3940,6 +4653,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 
 	kvmppc_alloc_host_rm_ops();
 
+	kvmhv_vm_nested_init(kvm);
+
 	/*
 	 * Since we don't flush the TLB when tearing down a VM,
 	 * and this lpid might have previously been used,
@@ -3958,9 +4673,13 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 		kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
 
 	/* Init LPCR for virtual RMA mode */
-	kvm->arch.host_lpid = mfspr(SPRN_LPID);
-	kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
-	lpcr &= LPCR_PECE | LPCR_LPES;
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		kvm->arch.host_lpid = mfspr(SPRN_LPID);
+		kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+		lpcr &= LPCR_PECE | LPCR_LPES;
+	} else {
+		lpcr = 0;
+	}
 	lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
 		LPCR_VPM0 | LPCR_VPM1;
 	kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
@@ -4027,8 +4746,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	 * On POWER9, we only need to do this if the "indep_threads_mode"
 	 * module parameter has been set to N.
 	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
-		kvm->arch.threads_indep = indep_threads_mode;
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
+			pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
+			kvm->arch.threads_indep = true;
+		} else {
+			kvm->arch.threads_indep = indep_threads_mode;
+		}
+	}
 	if (!kvm->arch.threads_indep)
 		kvm_hv_vm_activated();
 
@@ -4051,6 +4776,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	snprintf(buf, sizeof(buf), "vm%d", current->pid);
 	kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
 	kvmppc_mmu_debugfs_init(kvm);
+	if (radix_enabled())
+		kvmhv_radix_debugfs_init(kvm);
 
 	return 0;
 }
@@ -4073,13 +4800,21 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 
 	kvmppc_free_vcores(kvm);
 
-	kvmppc_free_lpid(kvm->arch.lpid);
 
 	if (kvm_is_radix(kvm))
 		kvmppc_free_radix(kvm);
 	else
 		kvmppc_free_hpt(&kvm->arch.hpt);
 
+	/* Perform global invalidation and return lpid to the pool */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (nesting_enabled(kvm))
+			kvmhv_release_all_nested(kvm);
+		kvm->arch.process_table = 0;
+		kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
+	}
+	kvmppc_free_lpid(kvm->arch.lpid);
+
 	kvmppc_free_pimap(kvm);
 }
 
@@ -4104,11 +4839,15 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
 
 static int kvmppc_core_check_processor_compat_hv(void)
 {
-	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-	    !cpu_has_feature(CPU_FTR_ARCH_206))
-		return -EIO;
+	if (cpu_has_feature(CPU_FTR_HVMODE) &&
+	    cpu_has_feature(CPU_FTR_ARCH_206))
+		return 0;
 
-	return 0;
+	/* POWER9 in radix mode is capable of being a nested hypervisor. */
+	if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
+		return 0;
+
+	return -EIO;
 }
 
 #ifdef CONFIG_KVM_XICS
@@ -4426,6 +5165,10 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
 	if (radix && !radix_enabled())
 		return -EINVAL;
 
+	/* If we're a nested hypervisor, we currently only support radix */
+	if (kvmhv_on_pseries() && !radix)
+		return -EINVAL;
+
 	mutex_lock(&kvm->lock);
 	if (radix != kvm_is_radix(kvm)) {
 		if (kvm->arch.mmu_ready) {
@@ -4458,6 +5201,19 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
 	return err;
 }
 
+static int kvmhv_enable_nested(struct kvm *kvm)
+{
+	if (!nested)
+		return -EPERM;
+	if (!cpu_has_feature(CPU_FTR_ARCH_300) || no_mixing_hpt_and_radix)
+		return -ENODEV;
+
+	/* kvm == NULL means the caller is testing if the capability exists */
+	if (kvm)
+		kvm->arch.nested_enable = true;
+	return 0;
+}
+
 static struct kvmppc_ops kvm_ops_hv = {
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -4497,6 +5253,7 @@ static struct kvmppc_ops kvm_ops_hv = {
 	.configure_mmu = kvmhv_configure_mmu,
 	.get_rmmu_info = kvmhv_get_rmmu_info,
 	.set_smt_mode = kvmhv_set_smt_mode,
+	.enable_nested = kvmhv_enable_nested,
 };
 
 static int kvm_init_subcore_bitmap(void)
@@ -4547,6 +5304,10 @@ static int kvmppc_book3s_init_hv(void)
 	if (r < 0)
 		return -ENODEV;
 
+	r = kvmhv_nested_init();
+	if (r)
+		return r;
+
 	r = kvm_init_subcore_bitmap();
 	if (r)
 		return r;
@@ -4557,7 +5318,8 @@ static int kvmppc_book3s_init_hv(void)
 	 * indirectly, via OPAL.
 	 */
 #ifdef CONFIG_SMP
-	if (!xive_enabled() && !local_paca->kvm_hstate.xics_phys) {
+	if (!xive_enabled() && !kvmhv_on_pseries() &&
+	    !local_paca->kvm_hstate.xics_phys) {
 		struct device_node *np;
 
 		np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
@@ -4605,6 +5367,7 @@ static void kvmppc_book3s_exit_hv(void)
 	if (kvmppc_radix_possible())
 		kvmppc_radix_exit();
 	kvmppc_hv_ops = NULL;
+	kvmhv_nested_exit();
 }
 
 module_init(kvmppc_book3s_init_hv);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fc6bb9630a9c..a71e2fc00a4e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -231,6 +231,15 @@ void kvmhv_rm_send_ipi(int cpu)
 	void __iomem *xics_phys;
 	unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
 
+	/* For a nested hypervisor, use the XICS via hcall */
+	if (kvmhv_on_pseries()) {
+		unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+		plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
+				IPI_PRIORITY);
+		return;
+	}
+
 	/* On POWER9 we can use msgsnd for any destination cpu. */
 	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 		msg |= get_hard_smp_processor_id(cpu);
@@ -460,12 +469,19 @@ static long kvmppc_read_one_intr(bool *again)
 		return 1;
 
 	/* Now read the interrupt from the ICP */
-	xics_phys = local_paca->kvm_hstate.xics_phys;
-	rc = 0;
-	if (!xics_phys)
-		rc = opal_int_get_xirr(&xirr, false);
-	else
-		xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
+	if (kvmhv_on_pseries()) {
+		unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+		rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
+		xirr = cpu_to_be32(retbuf[0]);
+	} else {
+		xics_phys = local_paca->kvm_hstate.xics_phys;
+		rc = 0;
+		if (!xics_phys)
+			rc = opal_int_get_xirr(&xirr, false);
+		else
+			xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
+	}
 	if (rc < 0)
 		return 1;
 
@@ -494,7 +510,13 @@ static long kvmppc_read_one_intr(bool *again)
 	 */
 	if (xisr == XICS_IPI) {
 		rc = 0;
-		if (xics_phys) {
+		if (kvmhv_on_pseries()) {
+			unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+			plpar_hcall_raw(H_IPI, retbuf,
+					hard_smp_processor_id(), 0xff);
+			plpar_hcall_raw(H_EOI, retbuf, h_xirr);
+		} else if (xics_phys) {
 			__raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
 			__raw_rm_writel(xirr, xics_phys + XICS_XIRR);
 		} else {
@@ -520,7 +542,13 @@ static long kvmppc_read_one_intr(bool *again)
 			/* We raced with the host,
 			 * we need to resend that IPI, bummer
 			 */
-			if (xics_phys)
+			if (kvmhv_on_pseries()) {
+				unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+				plpar_hcall_raw(H_IPI, retbuf,
+						hard_smp_processor_id(),
+						IPI_PRIORITY);
+			} else if (xics_phys)
 				__raw_rm_writeb(IPI_PRIORITY,
 						xics_phys + XICS_MFRR);
 			else
@@ -729,3 +757,51 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
 	smp_mb();
 	local_paca->kvm_hstate.kvm_split_mode = NULL;
 }
+
+/*
+ * Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
+ * Can we inject a Decrementer or a External interrupt?
+ */
+void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
+{
+	int ext;
+	unsigned long vec = 0;
+	unsigned long lpcr;
+
+	/* Insert EXTERNAL bit into LPCR at the MER bit position */
+	ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |= ext << LPCR_MER_SH;
+	mtspr(SPRN_LPCR, lpcr);
+	isync();
+
+	if (vcpu->arch.shregs.msr & MSR_EE) {
+		if (ext) {
+			vec = BOOK3S_INTERRUPT_EXTERNAL;
+		} else {
+			long int dec = mfspr(SPRN_DEC);
+			if (!(lpcr & LPCR_LD))
+				dec = (int) dec;
+			if (dec < 0)
+				vec = BOOK3S_INTERRUPT_DECREMENTER;
+		}
+	}
+	if (vec) {
+		unsigned long msr, old_msr = vcpu->arch.shregs.msr;
+
+		kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
+		kvmppc_set_srr1(vcpu, old_msr);
+		kvmppc_set_pc(vcpu, vec);
+		msr = vcpu->arch.intr_msr;
+		if (MSR_TM_ACTIVE(old_msr))
+			msr |= MSR_TS_S;
+		vcpu->arch.shregs.msr = msr;
+	}
+
+	if (vcpu->arch.doorbell_request) {
+		mtspr(SPRN_DPDES, 1);
+		vcpu->arch.vcore->dpdes = 1;
+		smp_wmb();
+		vcpu->arch.doorbell_request = 0;
+	}
+}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 666b91c79eb4..a6d10010d9e8 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -64,52 +64,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 	/* Save host PMU registers */
-BEGIN_FTR_SECTION
-	/* Work around P8 PMAE bug */
-	li	r3, -1
-	clrrdi	r3, r3, 10
-	mfspr	r8, SPRN_MMCR2
-	mtspr	SPRN_MMCR2, r3		/* freeze all counters using MMCR2 */
-	isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	li	r3, 1
-	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
-	mfspr	r7, SPRN_MMCR0		/* save MMCR0 */
-	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable interrupts */
-	mfspr	r6, SPRN_MMCRA
-	/* Clear MMCRA in order to disable SDAR updates */
-	li	r5, 0
-	mtspr	SPRN_MMCRA, r5
-	isync
-	lbz	r5, PACA_PMCINUSE(r13)	/* is the host using the PMU? */
-	cmpwi	r5, 0
-	beq	31f			/* skip if not */
-	mfspr	r5, SPRN_MMCR1
-	mfspr	r9, SPRN_SIAR
-	mfspr	r10, SPRN_SDAR
-	std	r7, HSTATE_MMCR0(r13)
-	std	r5, HSTATE_MMCR1(r13)
-	std	r6, HSTATE_MMCRA(r13)
-	std	r9, HSTATE_SIAR(r13)
-	std	r10, HSTATE_SDAR(r13)
-BEGIN_FTR_SECTION
-	mfspr	r9, SPRN_SIER
-	std	r8, HSTATE_MMCR2(r13)
-	std	r9, HSTATE_SIER(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	mfspr	r3, SPRN_PMC1
-	mfspr	r5, SPRN_PMC2
-	mfspr	r6, SPRN_PMC3
-	mfspr	r7, SPRN_PMC4
-	mfspr	r8, SPRN_PMC5
-	mfspr	r9, SPRN_PMC6
-	stw	r3, HSTATE_PMC1(r13)
-	stw	r5, HSTATE_PMC2(r13)
-	stw	r6, HSTATE_PMC3(r13)
-	stw	r7, HSTATE_PMC4(r13)
-	stw	r8, HSTATE_PMC5(r13)
-	stw	r9, HSTATE_PMC6(r13)
-31:
+	bl	kvmhv_save_host_pmu
 
 	/*
 	 * Put whatever is in the decrementer into the
@@ -161,3 +116,51 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	ld	r0, PPC_LR_STKOFF(r1)
 	mtlr	r0
 	blr
+
+_GLOBAL(kvmhv_save_host_pmu)
+BEGIN_FTR_SECTION
+	/* Work around P8 PMAE bug */
+	li	r3, -1
+	clrrdi	r3, r3, 10
+	mfspr	r8, SPRN_MMCR2
+	mtspr	SPRN_MMCR2, r3		/* freeze all counters using MMCR2 */
+	isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	li	r3, 1
+	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
+	mfspr	r7, SPRN_MMCR0		/* save MMCR0 */
+	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable interrupts */
+	mfspr	r6, SPRN_MMCRA
+	/* Clear MMCRA in order to disable SDAR updates */
+	li	r5, 0
+	mtspr	SPRN_MMCRA, r5
+	isync
+	lbz	r5, PACA_PMCINUSE(r13)	/* is the host using the PMU? */
+	cmpwi	r5, 0
+	beq	31f			/* skip if not */
+	mfspr	r5, SPRN_MMCR1
+	mfspr	r9, SPRN_SIAR
+	mfspr	r10, SPRN_SDAR
+	std	r7, HSTATE_MMCR0(r13)
+	std	r5, HSTATE_MMCR1(r13)
+	std	r6, HSTATE_MMCRA(r13)
+	std	r9, HSTATE_SIAR(r13)
+	std	r10, HSTATE_SDAR(r13)
+BEGIN_FTR_SECTION
+	mfspr	r9, SPRN_SIER
+	std	r8, HSTATE_MMCR2(r13)
+	std	r9, HSTATE_SIER(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	mfspr	r3, SPRN_PMC1
+	mfspr	r5, SPRN_PMC2
+	mfspr	r6, SPRN_PMC3
+	mfspr	r7, SPRN_PMC4
+	mfspr	r8, SPRN_PMC5
+	mfspr	r9, SPRN_PMC6
+	stw	r3, HSTATE_PMC1(r13)
+	stw	r5, HSTATE_PMC2(r13)
+	stw	r6, HSTATE_PMC3(r13)
+	stw	r7, HSTATE_PMC4(r13)
+	stw	r8, HSTATE_PMC5(r13)
+	stw	r9, HSTATE_PMC6(r13)
+31:	blr
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
new file mode 100644
index 000000000000..401d2ecbebc5
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -0,0 +1,1291 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corporation, 2018
+ * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com>
+ *	   Paul Mackerras <paulus@ozlabs.org>
+ *
+ * Description: KVM functions specific to running nested KVM-HV guests
+ * on Book3S processors (specifically POWER9 and later).
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/llist.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/pte-walk.h>
+#include <asm/reg.h>
+
+static struct patb_entry *pseries_partition_tb;
+
+static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
+static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free);
+
+void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+	hr->pcr = vc->pcr;
+	hr->dpdes = vc->dpdes;
+	hr->hfscr = vcpu->arch.hfscr;
+	hr->tb_offset = vc->tb_offset;
+	hr->dawr0 = vcpu->arch.dawr;
+	hr->dawrx0 = vcpu->arch.dawrx;
+	hr->ciabr = vcpu->arch.ciabr;
+	hr->purr = vcpu->arch.purr;
+	hr->spurr = vcpu->arch.spurr;
+	hr->ic = vcpu->arch.ic;
+	hr->vtb = vc->vtb;
+	hr->srr0 = vcpu->arch.shregs.srr0;
+	hr->srr1 = vcpu->arch.shregs.srr1;
+	hr->sprg[0] = vcpu->arch.shregs.sprg0;
+	hr->sprg[1] = vcpu->arch.shregs.sprg1;
+	hr->sprg[2] = vcpu->arch.shregs.sprg2;
+	hr->sprg[3] = vcpu->arch.shregs.sprg3;
+	hr->pidr = vcpu->arch.pid;
+	hr->cfar = vcpu->arch.cfar;
+	hr->ppr = vcpu->arch.ppr;
+}
+
+static void byteswap_pt_regs(struct pt_regs *regs)
+{
+	unsigned long *addr = (unsigned long *) regs;
+
+	for (; addr < ((unsigned long *) (regs + 1)); addr++)
+		*addr = swab64(*addr);
+}
+
+static void byteswap_hv_regs(struct hv_guest_state *hr)
+{
+	hr->version = swab64(hr->version);
+	hr->lpid = swab32(hr->lpid);
+	hr->vcpu_token = swab32(hr->vcpu_token);
+	hr->lpcr = swab64(hr->lpcr);
+	hr->pcr = swab64(hr->pcr);
+	hr->amor = swab64(hr->amor);
+	hr->dpdes = swab64(hr->dpdes);
+	hr->hfscr = swab64(hr->hfscr);
+	hr->tb_offset = swab64(hr->tb_offset);
+	hr->dawr0 = swab64(hr->dawr0);
+	hr->dawrx0 = swab64(hr->dawrx0);
+	hr->ciabr = swab64(hr->ciabr);
+	hr->hdec_expiry = swab64(hr->hdec_expiry);
+	hr->purr = swab64(hr->purr);
+	hr->spurr = swab64(hr->spurr);
+	hr->ic = swab64(hr->ic);
+	hr->vtb = swab64(hr->vtb);
+	hr->hdar = swab64(hr->hdar);
+	hr->hdsisr = swab64(hr->hdsisr);
+	hr->heir = swab64(hr->heir);
+	hr->asdr = swab64(hr->asdr);
+	hr->srr0 = swab64(hr->srr0);
+	hr->srr1 = swab64(hr->srr1);
+	hr->sprg[0] = swab64(hr->sprg[0]);
+	hr->sprg[1] = swab64(hr->sprg[1]);
+	hr->sprg[2] = swab64(hr->sprg[2]);
+	hr->sprg[3] = swab64(hr->sprg[3]);
+	hr->pidr = swab64(hr->pidr);
+	hr->cfar = swab64(hr->cfar);
+	hr->ppr = swab64(hr->ppr);
+}
+
+static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
+				 struct hv_guest_state *hr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+	hr->dpdes = vc->dpdes;
+	hr->hfscr = vcpu->arch.hfscr;
+	hr->purr = vcpu->arch.purr;
+	hr->spurr = vcpu->arch.spurr;
+	hr->ic = vcpu->arch.ic;
+	hr->vtb = vc->vtb;
+	hr->srr0 = vcpu->arch.shregs.srr0;
+	hr->srr1 = vcpu->arch.shregs.srr1;
+	hr->sprg[0] = vcpu->arch.shregs.sprg0;
+	hr->sprg[1] = vcpu->arch.shregs.sprg1;
+	hr->sprg[2] = vcpu->arch.shregs.sprg2;
+	hr->sprg[3] = vcpu->arch.shregs.sprg3;
+	hr->pidr = vcpu->arch.pid;
+	hr->cfar = vcpu->arch.cfar;
+	hr->ppr = vcpu->arch.ppr;
+	switch (trap) {
+	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+		hr->hdar = vcpu->arch.fault_dar;
+		hr->hdsisr = vcpu->arch.fault_dsisr;
+		hr->asdr = vcpu->arch.fault_gpa;
+		break;
+	case BOOK3S_INTERRUPT_H_INST_STORAGE:
+		hr->asdr = vcpu->arch.fault_gpa;
+		break;
+	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+		hr->heir = vcpu->arch.emul_inst;
+		break;
+	}
+}
+
+static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+{
+	/*
+	 * Don't let L1 enable features for L2 which we've disabled for L1,
+	 * but preserve the interrupt cause field.
+	 */
+	hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
+
+	/* Don't let data address watchpoint match in hypervisor state */
+	hr->dawrx0 &= ~DAWRX_HYP;
+
+	/* Don't let completed instruction address breakpt match in HV state */
+	if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
+		hr->ciabr &= ~CIABR_PRIV;
+}
+
+static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+	vc->pcr = hr->pcr;
+	vc->dpdes = hr->dpdes;
+	vcpu->arch.hfscr = hr->hfscr;
+	vcpu->arch.dawr = hr->dawr0;
+	vcpu->arch.dawrx = hr->dawrx0;
+	vcpu->arch.ciabr = hr->ciabr;
+	vcpu->arch.purr = hr->purr;
+	vcpu->arch.spurr = hr->spurr;
+	vcpu->arch.ic = hr->ic;
+	vc->vtb = hr->vtb;
+	vcpu->arch.shregs.srr0 = hr->srr0;
+	vcpu->arch.shregs.srr1 = hr->srr1;
+	vcpu->arch.shregs.sprg0 = hr->sprg[0];
+	vcpu->arch.shregs.sprg1 = hr->sprg[1];
+	vcpu->arch.shregs.sprg2 = hr->sprg[2];
+	vcpu->arch.shregs.sprg3 = hr->sprg[3];
+	vcpu->arch.pid = hr->pidr;
+	vcpu->arch.cfar = hr->cfar;
+	vcpu->arch.ppr = hr->ppr;
+}
+
+void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
+				   struct hv_guest_state *hr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+	vc->dpdes = hr->dpdes;
+	vcpu->arch.hfscr = hr->hfscr;
+	vcpu->arch.purr = hr->purr;
+	vcpu->arch.spurr = hr->spurr;
+	vcpu->arch.ic = hr->ic;
+	vc->vtb = hr->vtb;
+	vcpu->arch.fault_dar = hr->hdar;
+	vcpu->arch.fault_dsisr = hr->hdsisr;
+	vcpu->arch.fault_gpa = hr->asdr;
+	vcpu->arch.emul_inst = hr->heir;
+	vcpu->arch.shregs.srr0 = hr->srr0;
+	vcpu->arch.shregs.srr1 = hr->srr1;
+	vcpu->arch.shregs.sprg0 = hr->sprg[0];
+	vcpu->arch.shregs.sprg1 = hr->sprg[1];
+	vcpu->arch.shregs.sprg2 = hr->sprg[2];
+	vcpu->arch.shregs.sprg3 = hr->sprg[3];
+	vcpu->arch.pid = hr->pidr;
+	vcpu->arch.cfar = hr->cfar;
+	vcpu->arch.ppr = hr->ppr;
+}
+
+long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
+{
+	long int err, r;
+	struct kvm_nested_guest *l2;
+	struct pt_regs l2_regs, saved_l1_regs;
+	struct hv_guest_state l2_hv, saved_l1_hv;
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	u64 hv_ptr, regs_ptr;
+	u64 hdec_exp;
+	s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
+	u64 mask;
+	unsigned long lpcr;
+
+	if (vcpu->kvm->arch.l1_ptcr == 0)
+		return H_NOT_AVAILABLE;
+
+	/* copy parameters in */
+	hv_ptr = kvmppc_get_gpr(vcpu, 4);
+	err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
+				  sizeof(struct hv_guest_state));
+	if (err)
+		return H_PARAMETER;
+	if (kvmppc_need_byteswap(vcpu))
+		byteswap_hv_regs(&l2_hv);
+	if (l2_hv.version != HV_GUEST_STATE_VERSION)
+		return H_P2;
+
+	regs_ptr = kvmppc_get_gpr(vcpu, 5);
+	err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
+				  sizeof(struct pt_regs));
+	if (err)
+		return H_PARAMETER;
+	if (kvmppc_need_byteswap(vcpu))
+		byteswap_pt_regs(&l2_regs);
+	if (l2_hv.vcpu_token >= NR_CPUS)
+		return H_PARAMETER;
+
+	/* translate lpid */
+	l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
+	if (!l2)
+		return H_PARAMETER;
+	if (!l2->l1_gr_to_hr) {
+		mutex_lock(&l2->tlb_lock);
+		kvmhv_update_ptbl_cache(l2);
+		mutex_unlock(&l2->tlb_lock);
+	}
+
+	/* save l1 values of things */
+	vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
+	saved_l1_regs = vcpu->arch.regs;
+	kvmhv_save_hv_regs(vcpu, &saved_l1_hv);
+
+	/* convert TB values/offsets to host (L0) values */
+	hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
+	vc->tb_offset += l2_hv.tb_offset;
+
+	/* set L1 state to L2 state */
+	vcpu->arch.nested = l2;
+	vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
+	vcpu->arch.regs = l2_regs;
+	vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
+	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
+		LPCR_LPES | LPCR_MER;
+	lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
+	sanitise_hv_regs(vcpu, &l2_hv);
+	restore_hv_regs(vcpu, &l2_hv);
+
+	vcpu->arch.ret = RESUME_GUEST;
+	vcpu->arch.trap = 0;
+	do {
+		if (mftb() >= hdec_exp) {
+			vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+			r = RESUME_HOST;
+			break;
+		}
+		r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu, hdec_exp,
+					  lpcr);
+	} while (is_kvmppc_resume_guest(r));
+
+	/* save L2 state for return */
+	l2_regs = vcpu->arch.regs;
+	l2_regs.msr = vcpu->arch.shregs.msr;
+	delta_purr = vcpu->arch.purr - l2_hv.purr;
+	delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
+	delta_ic = vcpu->arch.ic - l2_hv.ic;
+	delta_vtb = vc->vtb - l2_hv.vtb;
+	save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv);
+
+	/* restore L1 state */
+	vcpu->arch.nested = NULL;
+	vcpu->arch.regs = saved_l1_regs;
+	vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK;
+	/* set L1 MSR TS field according to L2 transaction state */
+	if (l2_regs.msr & MSR_TS_MASK)
+		vcpu->arch.shregs.msr |= MSR_TS_S;
+	vc->tb_offset = saved_l1_hv.tb_offset;
+	restore_hv_regs(vcpu, &saved_l1_hv);
+	vcpu->arch.purr += delta_purr;
+	vcpu->arch.spurr += delta_spurr;
+	vcpu->arch.ic += delta_ic;
+	vc->vtb += delta_vtb;
+
+	kvmhv_put_nested(l2);
+
+	/* copy l2_hv_state and regs back to guest */
+	if (kvmppc_need_byteswap(vcpu)) {
+		byteswap_hv_regs(&l2_hv);
+		byteswap_pt_regs(&l2_regs);
+	}
+	err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
+				   sizeof(struct hv_guest_state));
+	if (err)
+		return H_AUTHORITY;
+	err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
+				   sizeof(struct pt_regs));
+	if (err)
+		return H_AUTHORITY;
+
+	if (r == -EINTR)
+		return H_INTERRUPT;
+
+	return vcpu->arch.trap;
+}
+
+long kvmhv_nested_init(void)
+{
+	long int ptb_order;
+	unsigned long ptcr;
+	long rc;
+
+	if (!kvmhv_on_pseries())
+		return 0;
+	if (!radix_enabled())
+		return -ENODEV;
+
+	/* find log base 2 of KVMPPC_NR_LPIDS, rounding up */
+	ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1;
+	if (ptb_order < 8)
+		ptb_order = 8;
+	pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
+				       GFP_KERNEL);
+	if (!pseries_partition_tb) {
+		pr_err("kvm-hv: failed to allocated nested partition table\n");
+		return -ENOMEM;
+	}
+
+	ptcr = __pa(pseries_partition_tb) | (ptb_order - 8);
+	rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
+	if (rc != H_SUCCESS) {
+		pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
+		       rc);
+		kfree(pseries_partition_tb);
+		pseries_partition_tb = NULL;
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+void kvmhv_nested_exit(void)
+{
+	/*
+	 * N.B. the kvmhv_on_pseries() test is there because it enables
+	 * the compiler to remove the call to plpar_hcall_norets()
+	 * when CONFIG_PPC_PSERIES=n.
+	 */
+	if (kvmhv_on_pseries() && pseries_partition_tb) {
+		plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
+		kfree(pseries_partition_tb);
+		pseries_partition_tb = NULL;
+	}
+}
+
+static void kvmhv_flush_lpid(unsigned int lpid)
+{
+	long rc;
+
+	if (!kvmhv_on_pseries()) {
+		radix__flush_tlb_lpid(lpid);
+		return;
+	}
+
+	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
+				lpid, TLBIEL_INVAL_SET_LPID);
+	if (rc)
+		pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
+}
+
+void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
+{
+	if (!kvmhv_on_pseries()) {
+		mmu_partition_table_set_entry(lpid, dw0, dw1);
+		return;
+	}
+
+	pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+	pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+	/* L0 will do the necessary barriers */
+	kvmhv_flush_lpid(lpid);
+}
+
+static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
+{
+	unsigned long dw0;
+
+	dw0 = PATB_HR | radix__get_tree_size() |
+		__pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
+	kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
+}
+
+void kvmhv_vm_nested_init(struct kvm *kvm)
+{
+	kvm->arch.max_nested_lpid = -1;
+}
+
+/*
+ * Handle the H_SET_PARTITION_TABLE hcall.
+ * r4 = guest real address of partition table + log_2(size) - 12
+ * (formatted as for the PTCR).
+ */
+long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
+	int srcu_idx;
+	long ret = H_SUCCESS;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	/*
+	 * Limit the partition table to 4096 entries (because that's what
+	 * hardware supports), and check the base address.
+	 */
+	if ((ptcr & PRTS_MASK) > 12 - 8 ||
+	    !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
+		ret = H_PARAMETER;
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	if (ret == H_SUCCESS)
+		kvm->arch.l1_ptcr = ptcr;
+	return ret;
+}
+
+/*
+ * Reload the partition table entry for a guest.
+ * Caller must hold gp->tlb_lock.
+ */
+static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
+{
+	int ret;
+	struct patb_entry ptbl_entry;
+	unsigned long ptbl_addr;
+	struct kvm *kvm = gp->l1_host;
+
+	ret = -EFAULT;
+	ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
+	if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8)))
+		ret = kvm_read_guest(kvm, ptbl_addr,
+				     &ptbl_entry, sizeof(ptbl_entry));
+	if (ret) {
+		gp->l1_gr_to_hr = 0;
+		gp->process_table = 0;
+	} else {
+		gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
+		gp->process_table = be64_to_cpu(ptbl_entry.patb1);
+	}
+	kvmhv_set_nested_ptbl(gp);
+}
+
+struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
+{
+	struct kvm_nested_guest *gp;
+	long shadow_lpid;
+
+	gp = kzalloc(sizeof(*gp), GFP_KERNEL);
+	if (!gp)
+		return NULL;
+	gp->l1_host = kvm;
+	gp->l1_lpid = lpid;
+	mutex_init(&gp->tlb_lock);
+	gp->shadow_pgtable = pgd_alloc(kvm->mm);
+	if (!gp->shadow_pgtable)
+		goto out_free;
+	shadow_lpid = kvmppc_alloc_lpid();
+	if (shadow_lpid < 0)
+		goto out_free2;
+	gp->shadow_lpid = shadow_lpid;
+
+	memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
+
+	return gp;
+
+ out_free2:
+	pgd_free(kvm->mm, gp->shadow_pgtable);
+ out_free:
+	kfree(gp);
+	return NULL;
+}
+
+/*
+ * Free up any resources allocated for a nested guest.
+ */
+static void kvmhv_release_nested(struct kvm_nested_guest *gp)
+{
+	struct kvm *kvm = gp->l1_host;
+
+	if (gp->shadow_pgtable) {
+		/*
+		 * No vcpu is using this struct and no call to
+		 * kvmhv_get_nested can find this struct,
+		 * so we don't need to hold kvm->mmu_lock.
+		 */
+		kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
+					  gp->shadow_lpid);
+		pgd_free(kvm->mm, gp->shadow_pgtable);
+	}
+	kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
+	kvmppc_free_lpid(gp->shadow_lpid);
+	kfree(gp);
+}
+
+static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
+{
+	struct kvm *kvm = gp->l1_host;
+	int lpid = gp->l1_lpid;
+	long ref;
+
+	spin_lock(&kvm->mmu_lock);
+	if (gp == kvm->arch.nested_guests[lpid]) {
+		kvm->arch.nested_guests[lpid] = NULL;
+		if (lpid == kvm->arch.max_nested_lpid) {
+			while (--lpid >= 0 && !kvm->arch.nested_guests[lpid])
+				;
+			kvm->arch.max_nested_lpid = lpid;
+		}
+		--gp->refcnt;
+	}
+	ref = gp->refcnt;
+	spin_unlock(&kvm->mmu_lock);
+	if (ref == 0)
+		kvmhv_release_nested(gp);
+}
+
+/*
+ * Free up all nested resources allocated for this guest.
+ * This is called with no vcpus of the guest running, when
+ * switching the guest to HPT mode or when destroying the
+ * guest.
+ */
+void kvmhv_release_all_nested(struct kvm *kvm)
+{
+	int i;
+	struct kvm_nested_guest *gp;
+	struct kvm_nested_guest *freelist = NULL;
+	struct kvm_memory_slot *memslot;
+	int srcu_idx;
+
+	spin_lock(&kvm->mmu_lock);
+	for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
+		gp = kvm->arch.nested_guests[i];
+		if (!gp)
+			continue;
+		kvm->arch.nested_guests[i] = NULL;
+		if (--gp->refcnt == 0) {
+			gp->next = freelist;
+			freelist = gp;
+		}
+	}
+	kvm->arch.max_nested_lpid = -1;
+	spin_unlock(&kvm->mmu_lock);
+	while ((gp = freelist) != NULL) {
+		freelist = gp->next;
+		kvmhv_release_nested(gp);
+	}
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	kvm_for_each_memslot(memslot, kvm_memslots(kvm))
+		kvmhv_free_memslot_nest_rmap(memslot);
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
+/* caller must hold gp->tlb_lock */
+static void kvmhv_flush_nested(struct kvm_nested_guest *gp)
+{
+	struct kvm *kvm = gp->l1_host;
+
+	spin_lock(&kvm->mmu_lock);
+	kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
+	spin_unlock(&kvm->mmu_lock);
+	kvmhv_flush_lpid(gp->shadow_lpid);
+	kvmhv_update_ptbl_cache(gp);
+	if (gp->l1_gr_to_hr == 0)
+		kvmhv_remove_nested(gp);
+}
+
+struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
+					  bool create)
+{
+	struct kvm_nested_guest *gp, *newgp;
+
+	if (l1_lpid >= KVM_MAX_NESTED_GUESTS ||
+	    l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
+		return NULL;
+
+	spin_lock(&kvm->mmu_lock);
+	gp = kvm->arch.nested_guests[l1_lpid];
+	if (gp)
+		++gp->refcnt;
+	spin_unlock(&kvm->mmu_lock);
+
+	if (gp || !create)
+		return gp;
+
+	newgp = kvmhv_alloc_nested(kvm, l1_lpid);
+	if (!newgp)
+		return NULL;
+	spin_lock(&kvm->mmu_lock);
+	if (kvm->arch.nested_guests[l1_lpid]) {
+		/* someone else beat us to it */
+		gp = kvm->arch.nested_guests[l1_lpid];
+	} else {
+		kvm->arch.nested_guests[l1_lpid] = newgp;
+		++newgp->refcnt;
+		gp = newgp;
+		newgp = NULL;
+		if (l1_lpid > kvm->arch.max_nested_lpid)
+			kvm->arch.max_nested_lpid = l1_lpid;
+	}
+	++gp->refcnt;
+	spin_unlock(&kvm->mmu_lock);
+
+	if (newgp)
+		kvmhv_release_nested(newgp);
+
+	return gp;
+}
+
+void kvmhv_put_nested(struct kvm_nested_guest *gp)
+{
+	struct kvm *kvm = gp->l1_host;
+	long ref;
+
+	spin_lock(&kvm->mmu_lock);
+	ref = --gp->refcnt;
+	spin_unlock(&kvm->mmu_lock);
+	if (ref == 0)
+		kvmhv_release_nested(gp);
+}
+
+static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid)
+{
+	if (lpid > kvm->arch.max_nested_lpid)
+		return NULL;
+	return kvm->arch.nested_guests[lpid];
+}
+
+static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
+{
+	return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
+				       RMAP_NESTED_GPA_MASK));
+}
+
+void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
+			    struct rmap_nested **n_rmap)
+{
+	struct llist_node *entry = ((struct llist_head *) rmapp)->first;
+	struct rmap_nested *cursor;
+	u64 rmap, new_rmap = (*n_rmap)->rmap;
+
+	/* Are there any existing entries? */
+	if (!(*rmapp)) {
+		/* No -> use the rmap as a single entry */
+		*rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY;
+		return;
+	}
+
+	/* Do any entries match what we're trying to insert? */
+	for_each_nest_rmap_safe(cursor, entry, &rmap) {
+		if (kvmhv_n_rmap_is_equal(rmap, new_rmap))
+			return;
+	}
+
+	/* Do we need to create a list or just add the new entry? */
+	rmap = *rmapp;
+	if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
+		*rmapp = 0UL;
+	llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp);
+	if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
+		(*n_rmap)->list.next = (struct llist_node *) rmap;
+
+	/* Set NULL so not freed by caller */
+	*n_rmap = NULL;
+}
+
+static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
+				   unsigned long hpa, unsigned long mask)
+{
+	struct kvm_nested_guest *gp;
+	unsigned long gpa;
+	unsigned int shift, lpid;
+	pte_t *ptep;
+
+	gpa = n_rmap & RMAP_NESTED_GPA_MASK;
+	lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
+	gp = kvmhv_find_nested(kvm, lpid);
+	if (!gp)
+		return;
+
+	/* Find and invalidate the pte */
+	ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
+	/* Don't spuriously invalidate ptes if the pfn has changed */
+	if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
+		kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
+}
+
+static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
+					unsigned long hpa, unsigned long mask)
+{
+	struct llist_node *entry = llist_del_all((struct llist_head *) rmapp);
+	struct rmap_nested *cursor;
+	unsigned long rmap;
+
+	for_each_nest_rmap_safe(cursor, entry, &rmap) {
+		kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask);
+		kfree(cursor);
+	}
+}
+
+/* called with kvm->mmu_lock held */
+void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
+				  struct kvm_memory_slot *memslot,
+				  unsigned long gpa, unsigned long hpa,
+				  unsigned long nbytes)
+{
+	unsigned long gfn, end_gfn;
+	unsigned long addr_mask;
+
+	if (!memslot)
+		return;
+	gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn;
+	end_gfn = gfn + (nbytes >> PAGE_SHIFT);
+
+	addr_mask = PTE_RPN_MASK & ~(nbytes - 1);
+	hpa &= addr_mask;
+
+	for (; gfn < end_gfn; gfn++) {
+		unsigned long *rmap = &memslot->arch.rmap[gfn];
+		kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask);
+	}
+}
+
+static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free)
+{
+	unsigned long page;
+
+	for (page = 0; page < free->npages; page++) {
+		unsigned long rmap, *rmapp = &free->arch.rmap[page];
+		struct rmap_nested *cursor;
+		struct llist_node *entry;
+
+		entry = llist_del_all((struct llist_head *) rmapp);
+		for_each_nest_rmap_safe(cursor, entry, &rmap)
+			kfree(cursor);
+	}
+}
+
+static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
+					struct kvm_nested_guest *gp,
+					long gpa, int *shift_ret)
+{
+	struct kvm *kvm = vcpu->kvm;
+	bool ret = false;
+	pte_t *ptep;
+	int shift;
+
+	spin_lock(&kvm->mmu_lock);
+	ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
+	if (!shift)
+		shift = PAGE_SHIFT;
+	if (ptep && pte_present(*ptep)) {
+		kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
+		ret = true;
+	}
+	spin_unlock(&kvm->mmu_lock);
+
+	if (shift_ret)
+		*shift_ret = shift;
+	return ret;
+}
+
+static inline int get_ric(unsigned int instr)
+{
+	return (instr >> 18) & 0x3;
+}
+
+static inline int get_prs(unsigned int instr)
+{
+	return (instr >> 17) & 0x1;
+}
+
+static inline int get_r(unsigned int instr)
+{
+	return (instr >> 16) & 0x1;
+}
+
+static inline int get_lpid(unsigned long r_val)
+{
+	return r_val & 0xffffffff;
+}
+
+static inline int get_is(unsigned long r_val)
+{
+	return (r_val >> 10) & 0x3;
+}
+
+static inline int get_ap(unsigned long r_val)
+{
+	return (r_val >> 5) & 0x7;
+}
+
+static inline long get_epn(unsigned long r_val)
+{
+	return r_val >> 12;
+}
+
+static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
+					int ap, long epn)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+	long npages;
+	int shift, shadow_shift;
+	unsigned long addr;
+
+	shift = ap_to_shift(ap);
+	addr = epn << 12;
+	if (shift < 0)
+		/* Invalid ap encoding */
+		return -EINVAL;
+
+	addr &= ~((1UL << shift) - 1);
+	npages = 1UL << (shift - PAGE_SHIFT);
+
+	gp = kvmhv_get_nested(kvm, lpid, false);
+	if (!gp) /* No such guest -> nothing to do */
+		return 0;
+	mutex_lock(&gp->tlb_lock);
+
+	/* There may be more than one host page backing this single guest pte */
+	do {
+		kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift);
+
+		npages -= 1UL << (shadow_shift - PAGE_SHIFT);
+		addr += 1UL << shadow_shift;
+	} while (npages > 0);
+
+	mutex_unlock(&gp->tlb_lock);
+	kvmhv_put_nested(gp);
+	return 0;
+}
+
+static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
+				     struct kvm_nested_guest *gp, int ric)
+{
+	struct kvm *kvm = vcpu->kvm;
+
+	mutex_lock(&gp->tlb_lock);
+	switch (ric) {
+	case 0:
+		/* Invalidate TLB */
+		spin_lock(&kvm->mmu_lock);
+		kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
+					  gp->shadow_lpid);
+		kvmhv_flush_lpid(gp->shadow_lpid);
+		spin_unlock(&kvm->mmu_lock);
+		break;
+	case 1:
+		/*
+		 * Invalidate PWC
+		 * We don't cache this -> nothing to do
+		 */
+		break;
+	case 2:
+		/* Invalidate TLB, PWC and caching of partition table entries */
+		kvmhv_flush_nested(gp);
+		break;
+	default:
+		break;
+	}
+	mutex_unlock(&gp->tlb_lock);
+}
+
+static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+	int i;
+
+	spin_lock(&kvm->mmu_lock);
+	for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
+		gp = kvm->arch.nested_guests[i];
+		if (gp) {
+			spin_unlock(&kvm->mmu_lock);
+			kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+			spin_lock(&kvm->mmu_lock);
+		}
+	}
+	spin_unlock(&kvm->mmu_lock);
+}
+
+static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr,
+				    unsigned long rsval, unsigned long rbval)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+	int r, ric, prs, is, ap;
+	int lpid;
+	long epn;
+	int ret = 0;
+
+	ric = get_ric(instr);
+	prs = get_prs(instr);
+	r = get_r(instr);
+	lpid = get_lpid(rsval);
+	is = get_is(rbval);
+
+	/*
+	 * These cases are invalid and are not handled:
+	 * r   != 1 -> Only radix supported
+	 * prs == 1 -> Not HV privileged
+	 * ric == 3 -> No cluster bombs for radix
+	 * is  == 1 -> Partition scoped translations not associated with pid
+	 * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA
+	 */
+	if ((!r) || (prs) || (ric == 3) || (is == 1) ||
+	    ((!is) && (ric == 1 || ric == 2)))
+		return -EINVAL;
+
+	switch (is) {
+	case 0:
+		/*
+		 * We know ric == 0
+		 * Invalidate TLB for a given target address
+		 */
+		epn = get_epn(rbval);
+		ap = get_ap(rbval);
+		ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
+		break;
+	case 2:
+		/* Invalidate matching LPID */
+		gp = kvmhv_get_nested(kvm, lpid, false);
+		if (gp) {
+			kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+			kvmhv_put_nested(gp);
+		}
+		break;
+	case 3:
+		/* Invalidate ALL LPIDs */
+		kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * This handles the H_TLB_INVALIDATE hcall.
+ * Parameters are (r4) tlbie instruction code, (r5) rS contents,
+ * (r6) rB contents.
+ */
+long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
+{
+	int ret;
+
+	ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4),
+			kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6));
+	if (ret)
+		return H_PARAMETER;
+	return H_SUCCESS;
+}
+
+/* Used to convert a nested guest real address to a L1 guest real address */
+static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
+				       struct kvm_nested_guest *gp,
+				       unsigned long n_gpa, unsigned long dsisr,
+				       struct kvmppc_pte *gpte_p)
+{
+	u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
+	int ret;
+
+	ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
+					 &fault_addr);
+
+	if (ret) {
+		/* We didn't find a pte */
+		if (ret == -EINVAL) {
+			/* Unsupported mmu config */
+			flags |= DSISR_UNSUPP_MMU;
+		} else if (ret == -ENOENT) {
+			/* No translation found */
+			flags |= DSISR_NOHPTE;
+		} else if (ret == -EFAULT) {
+			/* Couldn't access L1 real address */
+			flags |= DSISR_PRTABLE_FAULT;
+			vcpu->arch.fault_gpa = fault_addr;
+		} else {
+			/* Unknown error */
+			return ret;
+		}
+		goto forward_to_l1;
+	} else {
+		/* We found a pte -> check permissions */
+		if (dsisr & DSISR_ISSTORE) {
+			/* Can we write? */
+			if (!gpte_p->may_write) {
+				flags |= DSISR_PROTFAULT;
+				goto forward_to_l1;
+			}
+		} else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+			/* Can we execute? */
+			if (!gpte_p->may_execute) {
+				flags |= SRR1_ISI_N_OR_G;
+				goto forward_to_l1;
+			}
+		} else {
+			/* Can we read? */
+			if (!gpte_p->may_read && !gpte_p->may_write) {
+				flags |= DSISR_PROTFAULT;
+				goto forward_to_l1;
+			}
+		}
+	}
+
+	return 0;
+
+forward_to_l1:
+	vcpu->arch.fault_dsisr = flags;
+	if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+		vcpu->arch.shregs.msr &= ~0x783f0000ul;
+		vcpu->arch.shregs.msr |= flags;
+	}
+	return RESUME_HOST;
+}
+
+static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
+				       struct kvm_nested_guest *gp,
+				       unsigned long n_gpa,
+				       struct kvmppc_pte gpte,
+				       unsigned long dsisr)
+{
+	struct kvm *kvm = vcpu->kvm;
+	bool writing = !!(dsisr & DSISR_ISSTORE);
+	u64 pgflags;
+	bool ret;
+
+	/* Are the rc bits set in the L1 partition scoped pte? */
+	pgflags = _PAGE_ACCESSED;
+	if (writing)
+		pgflags |= _PAGE_DIRTY;
+	if (pgflags & ~gpte.rc)
+		return RESUME_HOST;
+
+	spin_lock(&kvm->mmu_lock);
+	/* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
+	ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing,
+				     gpte.raddr, kvm->arch.lpid);
+	spin_unlock(&kvm->mmu_lock);
+	if (!ret)
+		return -EINVAL;
+
+	/* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
+	ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa,
+				      gp->shadow_lpid);
+	if (!ret)
+		return -EINVAL;
+	return 0;
+}
+
+static inline int kvmppc_radix_level_to_shift(int level)
+{
+	switch (level) {
+	case 2:
+		return PUD_SHIFT;
+	case 1:
+		return PMD_SHIFT;
+	default:
+		return PAGE_SHIFT;
+	}
+}
+
+static inline int kvmppc_radix_shift_to_level(int shift)
+{
+	if (shift == PUD_SHIFT)
+		return 2;
+	if (shift == PMD_SHIFT)
+		return 1;
+	if (shift == PAGE_SHIFT)
+		return 0;
+	WARN_ON_ONCE(1);
+	return 0;
+}
+
+/* called with gp->tlb_lock held */
+static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
+					  struct kvm_nested_guest *gp)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_memory_slot *memslot;
+	struct rmap_nested *n_rmap;
+	struct kvmppc_pte gpte;
+	pte_t pte, *pte_p;
+	unsigned long mmu_seq;
+	unsigned long dsisr = vcpu->arch.fault_dsisr;
+	unsigned long ea = vcpu->arch.fault_dar;
+	unsigned long *rmapp;
+	unsigned long n_gpa, gpa, gfn, perm = 0UL;
+	unsigned int shift, l1_shift, level;
+	bool writing = !!(dsisr & DSISR_ISSTORE);
+	bool kvm_ro = false;
+	long int ret;
+
+	if (!gp->l1_gr_to_hr) {
+		kvmhv_update_ptbl_cache(gp);
+		if (!gp->l1_gr_to_hr)
+			return RESUME_HOST;
+	}
+
+	/* Convert the nested guest real address into a L1 guest real address */
+
+	n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
+	if (!(dsisr & DSISR_PRTABLE_FAULT))
+		n_gpa |= ea & 0xFFF;
+	ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
+
+	/*
+	 * If the hardware found a translation but we don't now have a usable
+	 * translation in the l1 partition-scoped tree, remove the shadow pte
+	 * and let the guest retry.
+	 */
+	if (ret == RESUME_HOST &&
+	    (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
+		      DSISR_BAD_COPYPASTE)))
+		goto inval;
+	if (ret)
+		return ret;
+
+	/* Failed to set the reference/change bits */
+	if (dsisr & DSISR_SET_RC) {
+		ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
+		if (ret == RESUME_HOST)
+			return ret;
+		if (ret)
+			goto inval;
+		dsisr &= ~DSISR_SET_RC;
+		if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
+			       DSISR_PROTFAULT)))
+			return RESUME_GUEST;
+	}
+
+	/*
+	 * We took an HISI or HDSI while we were running a nested guest which
+	 * means we have no partition scoped translation for that. This means
+	 * we need to insert a pte for the mapping into our shadow_pgtable.
+	 */
+
+	l1_shift = gpte.page_shift;
+	if (l1_shift < PAGE_SHIFT) {
+		/* We don't support l1 using a page size smaller than our own */
+		pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
+			l1_shift, PAGE_SHIFT);
+		return -EINVAL;
+	}
+	gpa = gpte.raddr;
+	gfn = gpa >> PAGE_SHIFT;
+
+	/* 1. Get the corresponding host memslot */
+
+	memslot = gfn_to_memslot(kvm, gfn);
+	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
+		if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
+			/* unusual error -> reflect to the guest as a DSI */
+			kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
+			return RESUME_GUEST;
+		}
+		/* passthrough of emulated MMIO case... */
+		pr_err("emulated MMIO passthrough?\n");
+		return -EINVAL;
+	}
+	if (memslot->flags & KVM_MEM_READONLY) {
+		if (writing) {
+			/* Give the guest a DSI */
+			kvmppc_core_queue_data_storage(vcpu, ea,
+					DSISR_ISSTORE | DSISR_PROTFAULT);
+			return RESUME_GUEST;
+		}
+		kvm_ro = true;
+	}
+
+	/* 2. Find the host pte for this L1 guest real address */
+
+	/* Used to check for invalidations in progress */
+	mmu_seq = kvm->mmu_notifier_seq;
+	smp_rmb();
+
+	/* See if can find translation in our partition scoped tables for L1 */
+	pte = __pte(0);
+	spin_lock(&kvm->mmu_lock);
+	pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+	if (!shift)
+		shift = PAGE_SHIFT;
+	if (pte_p)
+		pte = *pte_p;
+	spin_unlock(&kvm->mmu_lock);
+
+	if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
+		/* No suitable pte found -> try to insert a mapping */
+		ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
+					writing, kvm_ro, &pte, &level);
+		if (ret == -EAGAIN)
+			return RESUME_GUEST;
+		else if (ret)
+			return ret;
+		shift = kvmppc_radix_level_to_shift(level);
+	}
+
+	/* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
+
+	/* The permissions is the combination of the host and l1 guest ptes */
+	perm |= gpte.may_read ? 0UL : _PAGE_READ;
+	perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
+	perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
+	pte = __pte(pte_val(pte) & ~perm);
+
+	/* What size pte can we insert? */
+	if (shift > l1_shift) {
+		u64 mask;
+		unsigned int actual_shift = PAGE_SHIFT;
+		if (PMD_SHIFT < l1_shift)
+			actual_shift = PMD_SHIFT;
+		mask = (1UL << shift) - (1UL << actual_shift);
+		pte = __pte(pte_val(pte) | (gpa & mask));
+		shift = actual_shift;
+	}
+	level = kvmppc_radix_shift_to_level(shift);
+	n_gpa &= ~((1UL << shift) - 1);
+
+	/* 4. Insert the pte into our shadow_pgtable */
+
+	n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
+	if (!n_rmap)
+		return RESUME_GUEST; /* Let the guest try again */
+	n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
+		(((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
+	rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+	ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
+				mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
+	if (n_rmap)
+		kfree(n_rmap);
+	if (ret == -EAGAIN)
+		ret = RESUME_GUEST;	/* Let the guest try again */
+
+	return ret;
+
+ inval:
+	kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
+	return RESUME_GUEST;
+}
+
+long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
+{
+	struct kvm_nested_guest *gp = vcpu->arch.nested;
+	long int ret;
+
+	mutex_lock(&gp->tlb_lock);
+	ret = __kvmhv_nested_page_fault(vcpu, gp);
+	mutex_unlock(&gp->tlb_lock);
+	return ret;
+}
+
+int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid)
+{
+	int ret = -1;
+
+	spin_lock(&kvm->mmu_lock);
+	while (++lpid <= kvm->arch.max_nested_lpid) {
+		if (kvm->arch.nested_guests[lpid]) {
+			ret = lpid;
+			break;
+		}
+	}
+	spin_unlock(&kvm->mmu_lock);
+	return ret;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index b11043b23c18..0787f12c1a1b 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -177,6 +177,7 @@ void kvmppc_subcore_enter_guest(void)
 
 	local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
 }
+EXPORT_SYMBOL_GPL(kvmppc_subcore_enter_guest);
 
 void kvmppc_subcore_exit_guest(void)
 {
@@ -187,6 +188,7 @@ void kvmppc_subcore_exit_guest(void)
 
 	local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
 }
+EXPORT_SYMBOL_GPL(kvmppc_subcore_exit_guest);
 
 static bool kvmppc_tb_resync_required(void)
 {
@@ -331,5 +333,13 @@ long kvmppc_realmode_hmi_handler(void)
 	} else {
 		wait_for_tb_resync();
 	}
+
+	/*
+	 * Reset tb_offset_applied so the guest exit code won't try
+	 * to subtract the previous timebase offset from the timebase.
+	 */
+	if (local_paca->kvm_hstate.kvm_vcore)
+		local_paca->kvm_hstate.kvm_vcore->tb_offset_applied = 0;
+
 	return 0;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 758d1d23215e..b3f5786b20dc 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -136,7 +136,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
 
 	/* Mark the target VCPU as having an interrupt pending */
 	vcpu->stat.queue_intr++;
-	set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
+	set_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
 
 	/* Kick self ? Just set MER and return */
 	if (vcpu == this_vcpu) {
@@ -170,8 +170,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
 static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
 {
 	/* Note: Only called on self ! */
-	clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
-		  &vcpu->arch.pending_exceptions);
+	clear_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
 }
 
@@ -768,6 +767,14 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
 	void __iomem *xics_phys;
 	int64_t rc;
 
+	if (kvmhv_on_pseries()) {
+		unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+		iosync();
+		plpar_hcall_raw(H_EOI, retbuf, hwirq);
+		return;
+	}
+
 	rc = pnv_opal_pci_msi_eoi(c, hwirq);
 
 	if (rc)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 1d14046124a0..9b8d50a7cbaf 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -28,6 +28,7 @@
 #include <asm/exception-64s.h>
 #include <asm/kvm_book3s_asm.h>
 #include <asm/book3s/64/mmu-hash.h>
+#include <asm/export.h>
 #include <asm/tm.h>
 #include <asm/opal.h>
 #include <asm/xive-regs.h>
@@ -46,8 +47,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 #define NAPPING_NOVCPU	2
 
 /* Stack frame offsets for kvmppc_hv_entry */
-#define SFS			160
+#define SFS			208
 #define STACK_SLOT_TRAP		(SFS-4)
+#define STACK_SLOT_SHORT_PATH	(SFS-8)
 #define STACK_SLOT_TID		(SFS-16)
 #define STACK_SLOT_PSSCR	(SFS-24)
 #define STACK_SLOT_PID		(SFS-32)
@@ -56,6 +58,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 #define STACK_SLOT_DAWR		(SFS-56)
 #define STACK_SLOT_DAWRX	(SFS-64)
 #define STACK_SLOT_HFSCR	(SFS-72)
+/* the following is used by the P9 short path */
+#define STACK_SLOT_NVGPRS	(SFS-152)	/* 18 gprs */
 
 /*
  * Call kvmppc_hv_entry in real mode.
@@ -113,45 +117,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	mtspr	SPRN_SPRG_VDSO_WRITE,r3
 
 	/* Reload the host's PMU registers */
-	lbz	r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
-	cmpwi	r4, 0
-	beq	23f			/* skip if not */
-BEGIN_FTR_SECTION
-	ld	r3, HSTATE_MMCR0(r13)
-	andi.	r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
-	cmpwi	r4, MMCR0_PMAO
-	beql	kvmppc_fix_pmao
-END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
-	lwz	r3, HSTATE_PMC1(r13)
-	lwz	r4, HSTATE_PMC2(r13)
-	lwz	r5, HSTATE_PMC3(r13)
-	lwz	r6, HSTATE_PMC4(r13)
-	lwz	r8, HSTATE_PMC5(r13)
-	lwz	r9, HSTATE_PMC6(r13)
-	mtspr	SPRN_PMC1, r3
-	mtspr	SPRN_PMC2, r4
-	mtspr	SPRN_PMC3, r5
-	mtspr	SPRN_PMC4, r6
-	mtspr	SPRN_PMC5, r8
-	mtspr	SPRN_PMC6, r9
-	ld	r3, HSTATE_MMCR0(r13)
-	ld	r4, HSTATE_MMCR1(r13)
-	ld	r5, HSTATE_MMCRA(r13)
-	ld	r6, HSTATE_SIAR(r13)
-	ld	r7, HSTATE_SDAR(r13)
-	mtspr	SPRN_MMCR1, r4
-	mtspr	SPRN_MMCRA, r5
-	mtspr	SPRN_SIAR, r6
-	mtspr	SPRN_SDAR, r7
-BEGIN_FTR_SECTION
-	ld	r8, HSTATE_MMCR2(r13)
-	ld	r9, HSTATE_SIER(r13)
-	mtspr	SPRN_MMCR2, r8
-	mtspr	SPRN_SIER, r9
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	mtspr	SPRN_MMCR0, r3
-	isync
-23:
+	bl	kvmhv_load_host_pmu
 
 	/*
 	 * Reload DEC.  HDEC interrupts were disabled when
@@ -796,66 +762,23 @@ BEGIN_FTR_SECTION
 	b	91f
 END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
-	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
 	 */
 	mr      r3, r4
 	ld      r4, VCPU_MSR(r3)
+	li	r5, 0			/* don't preserve non-vol regs */
 	bl	kvmppc_restore_tm_hv
+	nop
 	ld	r4, HSTATE_KVM_VCPU(r13)
 91:
 #endif
 
-	/* Load guest PMU registers */
-	/* R4 is live here (vcpu pointer) */
-	li	r3, 1
-	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
-	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
-	isync
-BEGIN_FTR_SECTION
-	ld	r3, VCPU_MMCR(r4)
-	andi.	r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
-	cmpwi	r5, MMCR0_PMAO
-	beql	kvmppc_fix_pmao
-END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
-	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */
-	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */
-	lwz	r6, VCPU_PMC + 8(r4)
-	lwz	r7, VCPU_PMC + 12(r4)
-	lwz	r8, VCPU_PMC + 16(r4)
-	lwz	r9, VCPU_PMC + 20(r4)
-	mtspr	SPRN_PMC1, r3
-	mtspr	SPRN_PMC2, r5
-	mtspr	SPRN_PMC3, r6
-	mtspr	SPRN_PMC4, r7
-	mtspr	SPRN_PMC5, r8
-	mtspr	SPRN_PMC6, r9
-	ld	r3, VCPU_MMCR(r4)
-	ld	r5, VCPU_MMCR + 8(r4)
-	ld	r6, VCPU_MMCR + 16(r4)
-	ld	r7, VCPU_SIAR(r4)
-	ld	r8, VCPU_SDAR(r4)
-	mtspr	SPRN_MMCR1, r5
-	mtspr	SPRN_MMCRA, r6
-	mtspr	SPRN_SIAR, r7
-	mtspr	SPRN_SDAR, r8
-BEGIN_FTR_SECTION
-	ld	r5, VCPU_MMCR + 24(r4)
-	ld	r6, VCPU_SIER(r4)
-	mtspr	SPRN_MMCR2, r5
-	mtspr	SPRN_SIER, r6
-BEGIN_FTR_SECTION_NESTED(96)
-	lwz	r7, VCPU_PMC + 24(r4)
-	lwz	r8, VCPU_PMC + 28(r4)
-	ld	r9, VCPU_MMCR + 32(r4)
-	mtspr	SPRN_SPMC1, r7
-	mtspr	SPRN_SPMC2, r8
-	mtspr	SPRN_MMCRS, r9
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	mtspr	SPRN_MMCR0, r3
-	isync
+	/* Load guest PMU registers; r4 = vcpu pointer here */
+	mr	r3, r4
+	bl	kvmhv_load_guest_pmu
 
 	/* Load up FP, VMX and VSX registers */
+	ld	r4, HSTATE_KVM_VCPU(r13)
 	bl	kvmppc_load_fp
 
 	ld	r14, VCPU_GPR(R14)(r4)
@@ -1100,73 +1023,40 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 no_xive:
 #endif /* CONFIG_KVM_XICS */
 
-deliver_guest_interrupt:
-	ld	r6, VCPU_CTR(r4)
-	ld	r7, VCPU_XER(r4)
-
-	mtctr	r6
-	mtxer	r7
+	li	r0, 0
+	stw	r0, STACK_SLOT_SHORT_PATH(r1)
 
-kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
-	ld	r10, VCPU_PC(r4)
-	ld	r11, VCPU_MSR(r4)
+deliver_guest_interrupt:	/* r4 = vcpu, r13 = paca */
+	/* Check if we can deliver an external or decrementer interrupt now */
+	ld	r0, VCPU_PENDING_EXC(r4)
+BEGIN_FTR_SECTION
+	/* On POWER9, also check for emulated doorbell interrupt */
+	lbz	r3, VCPU_DBELL_REQ(r4)
+	or	r0, r0, r3
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+	cmpdi	r0, 0
+	beq	71f
+	mr	r3, r4
+	bl	kvmppc_guest_entry_inject_int
+	ld	r4, HSTATE_KVM_VCPU(r13)
+71:
 	ld	r6, VCPU_SRR0(r4)
 	ld	r7, VCPU_SRR1(r4)
 	mtspr	SPRN_SRR0, r6
 	mtspr	SPRN_SRR1, r7
 
+fast_guest_entry_c:
+	ld	r10, VCPU_PC(r4)
+	ld	r11, VCPU_MSR(r4)
 	/* r11 = vcpu->arch.msr & ~MSR_HV */
 	rldicl	r11, r11, 63 - MSR_HV_LG, 1
 	rotldi	r11, r11, 1 + MSR_HV_LG
 	ori	r11, r11, MSR_ME
 
-	/* Check if we can deliver an external or decrementer interrupt now */
-	ld	r0, VCPU_PENDING_EXC(r4)
-	rldicl	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
-	cmpdi	cr1, r0, 0
-	andi.	r8, r11, MSR_EE
-	mfspr	r8, SPRN_LPCR
-	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
-	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
-	mtspr	SPRN_LPCR, r8
-	isync
-	beq	5f
-	li	r0, BOOK3S_INTERRUPT_EXTERNAL
-	bne	cr1, 12f
-	mfspr	r0, SPRN_DEC
-BEGIN_FTR_SECTION
-	/* On POWER9 check whether the guest has large decrementer enabled */
-	andis.	r8, r8, LPCR_LD@h
-	bne	15f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	extsw	r0, r0
-15:	cmpdi	r0, 0
-	li	r0, BOOK3S_INTERRUPT_DECREMENTER
-	bge	5f
-
-12:	mtspr	SPRN_SRR0, r10
-	mr	r10,r0
-	mtspr	SPRN_SRR1, r11
-	mr	r9, r4
-	bl	kvmppc_msr_interrupt
-5:
-BEGIN_FTR_SECTION
-	b	fast_guest_return
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-	/* On POWER9, check for pending doorbell requests */
-	lbz	r0, VCPU_DBELL_REQ(r4)
-	cmpwi	r0, 0
-	beq	fast_guest_return
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	/* Set DPDES register so the CPU will take a doorbell interrupt */
-	li	r0, 1
-	mtspr	SPRN_DPDES, r0
-	std	r0, VCORE_DPDES(r5)
-	/* Make sure other cpus see vcore->dpdes set before dbell req clear */
-	lwsync
-	/* Clear the pending doorbell request */
-	li	r0, 0
-	stb	r0, VCPU_DBELL_REQ(r4)
+	ld	r6, VCPU_CTR(r4)
+	ld	r7, VCPU_XER(r4)
+	mtctr	r6
+	mtxer	r7
 
 /*
  * Required state:
@@ -1202,7 +1092,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	ld	r5, VCPU_LR(r4)
-	lwz	r6, VCPU_CR(r4)
+	ld	r6, VCPU_CR(r4)
 	mtlr	r5
 	mtcr	r6
 
@@ -1234,6 +1124,83 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	HRFI_TO_GUEST
 	b	.
 
+/*
+ * Enter the guest on a P9 or later system where we have exactly
+ * one vcpu per vcore and we don't need to go to real mode
+ * (which implies that host and guest are both using radix MMU mode).
+ * r3 = vcpu pointer
+ * Most SPRs and all the VSRs have been loaded already.
+ */
+_GLOBAL(__kvmhv_vcpu_entry_p9)
+EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+	stdu	r1, -SFS(r1)
+
+	li	r0, 1
+	stw	r0, STACK_SLOT_SHORT_PATH(r1)
+
+	std	r3, HSTATE_KVM_VCPU(r13)
+	mfcr	r4
+	stw	r4, SFS+8(r1)
+
+	std	r1, HSTATE_HOST_R1(r13)
+
+	reg = 14
+	.rept	18
+	std	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+	reg = reg + 1
+	.endr
+
+	reg = 14
+	.rept	18
+	ld	reg, __VCPU_GPR(reg)(r3)
+	reg = reg + 1
+	.endr
+
+	mfmsr	r10
+	std	r10, HSTATE_HOST_MSR(r13)
+
+	mr	r4, r3
+	b	fast_guest_entry_c
+guest_exit_short_path:
+
+	li	r0, KVM_GUEST_MODE_NONE
+	stb	r0, HSTATE_IN_GUEST(r13)
+
+	reg = 14
+	.rept	18
+	std	reg, __VCPU_GPR(reg)(r9)
+	reg = reg + 1
+	.endr
+
+	reg = 14
+	.rept	18
+	ld	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+	reg = reg + 1
+	.endr
+
+	lwz	r4, SFS+8(r1)
+	mtcr	r4
+
+	mr	r3, r12		/* trap number */
+
+	addi	r1, r1, SFS
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+
+	/* If we are in real mode, do a rfid to get back to the caller */
+	mfmsr	r4
+	andi.	r5, r4, MSR_IR
+	bnelr
+	rldicl	r5, r4, 64 - MSR_TS_S_LG, 62	/* extract TS field */
+	mtspr	SPRN_SRR0, r0
+	ld	r10, HSTATE_HOST_MSR(r13)
+	rldimi	r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+	mtspr	SPRN_SRR1, r10
+	RFI_TO_KERNEL
+	b	.
+
 secondary_too_late:
 	li	r12, 0
 	stw	r12, STACK_SLOT_TRAP(r1)
@@ -1313,7 +1280,7 @@ kvmppc_interrupt_hv:
 	std	r3, VCPU_GPR(R12)(r9)
 	/* CR is in the high half of r12 */
 	srdi	r4, r12, 32
-	stw	r4, VCPU_CR(r9)
+	std	r4, VCPU_CR(r9)
 BEGIN_FTR_SECTION
 	ld	r3, HSTATE_CFAR(r13)
 	std	r3, VCPU_CFAR(r9)
@@ -1387,18 +1354,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	std	r3, VCPU_CTR(r9)
 	std	r4, VCPU_XER(r9)
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	/* For softpatch interrupt, go off and do TM instruction emulation */
-	cmpwi	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
-	beq	kvmppc_tm_emul
-#endif
+	/* Save more register state  */
+	mfdar	r3
+	mfdsisr	r4
+	std	r3, VCPU_DAR(r9)
+	stw	r4, VCPU_DSISR(r9)
 
 	/* If this is a page table miss then see if it's theirs or ours */
 	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
 	beq	kvmppc_hdsi
+	std	r3, VCPU_FAULT_DAR(r9)
+	stw	r4, VCPU_FAULT_DSISR(r9)
 	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
 	beq	kvmppc_hisi
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/* For softpatch interrupt, go off and do TM instruction emulation */
+	cmpwi	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
+	beq	kvmppc_tm_emul
+#endif
+
 	/* See if this is a leftover HDEC interrupt */
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	bne	2f
@@ -1418,10 +1393,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 BEGIN_FTR_SECTION
 	PPC_MSGSYNC
 	lwsync
+	/* always exit if we're running a nested guest */
+	ld	r0, VCPU_NESTED(r9)
+	cmpdi	r0, 0
+	bne	guest_exit_cont
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	lbz	r0, HSTATE_HOST_IPI(r13)
 	cmpwi	r0, 0
-	beq	4f
+	beq	maybe_reenter_guest
 	b	guest_exit_cont
 3:
 	/* If it's a hypervisor facility unavailable interrupt, save HFSCR */
@@ -1433,82 +1412,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 14:
 	/* External interrupt ? */
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	bne+	guest_exit_cont
-
-	/* External interrupt, first check for host_ipi. If this is
-	 * set, we know the host wants us out so let's do it now
-	 */
-	bl	kvmppc_read_intr
-
-	/*
-	 * Restore the active volatile registers after returning from
-	 * a C function.
-	 */
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	li	r12, BOOK3S_INTERRUPT_EXTERNAL
-
-	/*
-	 * kvmppc_read_intr return codes:
-	 *
-	 * Exit to host (r3 > 0)
-	 *   1 An interrupt is pending that needs to be handled by the host
-	 *     Exit guest and return to host by branching to guest_exit_cont
-	 *
-	 *   2 Passthrough that needs completion in the host
-	 *     Exit guest and return to host by branching to guest_exit_cont
-	 *     However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
-	 *     to indicate to the host to complete handling the interrupt
-	 *
-	 * Before returning to guest, we check if any CPU is heading out
-	 * to the host and if so, we head out also. If no CPUs are heading
-	 * check return values <= 0.
-	 *
-	 * Return to guest (r3 <= 0)
-	 *  0 No external interrupt is pending
-	 * -1 A guest wakeup IPI (which has now been cleared)
-	 *    In either case, we return to guest to deliver any pending
-	 *    guest interrupts.
-	 *
-	 * -2 A PCI passthrough external interrupt was handled
-	 *    (interrupt was delivered directly to guest)
-	 *    Return to guest to deliver any pending guest interrupts.
-	 */
-
-	cmpdi	r3, 1
-	ble	1f
-
-	/* Return code = 2 */
-	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
-	stw	r12, VCPU_TRAP(r9)
-	b	guest_exit_cont
-
-1:	/* Return code <= 1 */
-	cmpdi	r3, 0
-	bgt	guest_exit_cont
-
-	/* Return code <= 0 */
-4:	ld	r5, HSTATE_KVM_VCORE(r13)
-	lwz	r0, VCORE_ENTRY_EXIT(r5)
-	cmpwi	r0, 0x100
-	mr	r4, r9
-	blt	deliver_guest_interrupt
-
-guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
-	/* Save more register state  */
-	mfdar	r6
-	mfdsisr	r7
-	std	r6, VCPU_DAR(r9)
-	stw	r7, VCPU_DSISR(r9)
-	/* don't overwrite fault_dar/fault_dsisr if HDSI */
-	cmpwi	r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
-	beq	mc_cont
-	std	r6, VCPU_FAULT_DAR(r9)
-	stw	r7, VCPU_FAULT_DSISR(r9)
-
+	beq	kvmppc_guest_external
 	/* See if it is a machine check */
 	cmpwi	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
 	beq	machine_check_realmode
-mc_cont:
+	/* Or a hypervisor maintenance interrupt */
+	cmpwi	r12, BOOK3S_INTERRUPT_HMI
+	beq	hmi_realmode
+
+guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
+
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
 	addi	r3, r9, VCPU_TB_RMEXIT
 	mr	r4, r9
@@ -1552,6 +1465,11 @@ mc_cont:
 1:
 #endif /* CONFIG_KVM_XICS */
 
+	/* If we came in through the P9 short path, go back out to C now */
+	lwz	r0, STACK_SLOT_SHORT_PATH(r1)
+	cmpwi	r0, 0
+	bne	guest_exit_short_path
+
 	/* For hash guest, read the guest SLB and save it away */
 	ld	r5, VCPU_KVM(r9)
 	lbz	r0, KVM_RADIX(r5)
@@ -1780,11 +1698,13 @@ BEGIN_FTR_SECTION
 	b	91f
 END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
-	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
 	 */
 	mr      r3, r9
 	ld      r4, VCPU_MSR(r3)
+	li	r5, 0			/* don't preserve non-vol regs */
 	bl	kvmppc_save_tm_hv
+	nop
 	ld	r9, HSTATE_KVM_VCPU(r13)
 91:
 #endif
@@ -1802,83 +1722,12 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 25:
 	/* Save PMU registers if requested */
 	/* r8 and cr0.eq are live here */
-BEGIN_FTR_SECTION
-	/*
-	 * POWER8 seems to have a hardware bug where setting
-	 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
-	 * when some counters are already negative doesn't seem
-	 * to cause a performance monitor alert (and hence interrupt).
-	 * The effect of this is that when saving the PMU state,
-	 * if there is no PMU alert pending when we read MMCR0
-	 * before freezing the counters, but one becomes pending
-	 * before we read the counters, we lose it.
-	 * To work around this, we need a way to freeze the counters
-	 * before reading MMCR0.  Normally, freezing the counters
-	 * is done by writing MMCR0 (to set MMCR0[FC]) which
-	 * unavoidably writes MMCR0[PMA0] as well.  On POWER8,
-	 * we can also freeze the counters using MMCR2, by writing
-	 * 1s to all the counter freeze condition bits (there are
-	 * 9 bits each for 6 counters).
-	 */
-	li	r3, -1			/* set all freeze bits */
-	clrrdi	r3, r3, 10
-	mfspr	r10, SPRN_MMCR2
-	mtspr	SPRN_MMCR2, r3
-	isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	li	r3, 1
-	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
-	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
-	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
-	mfspr	r6, SPRN_MMCRA
-	/* Clear MMCRA in order to disable SDAR updates */
-	li	r7, 0
-	mtspr	SPRN_MMCRA, r7
-	isync
+	mr	r3, r9
+	li	r4, 1
 	beq	21f			/* if no VPA, save PMU stuff anyway */
-	lbz	r7, LPPACA_PMCINUSE(r8)
-	cmpwi	r7, 0			/* did they ask for PMU stuff to be saved? */
-	bne	21f
-	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */
-	b	22f
-21:	mfspr	r5, SPRN_MMCR1
-	mfspr	r7, SPRN_SIAR
-	mfspr	r8, SPRN_SDAR
-	std	r4, VCPU_MMCR(r9)
-	std	r5, VCPU_MMCR + 8(r9)
-	std	r6, VCPU_MMCR + 16(r9)
-BEGIN_FTR_SECTION
-	std	r10, VCPU_MMCR + 24(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	std	r7, VCPU_SIAR(r9)
-	std	r8, VCPU_SDAR(r9)
-	mfspr	r3, SPRN_PMC1
-	mfspr	r4, SPRN_PMC2
-	mfspr	r5, SPRN_PMC3
-	mfspr	r6, SPRN_PMC4
-	mfspr	r7, SPRN_PMC5
-	mfspr	r8, SPRN_PMC6
-	stw	r3, VCPU_PMC(r9)
-	stw	r4, VCPU_PMC + 4(r9)
-	stw	r5, VCPU_PMC + 8(r9)
-	stw	r6, VCPU_PMC + 12(r9)
-	stw	r7, VCPU_PMC + 16(r9)
-	stw	r8, VCPU_PMC + 20(r9)
-BEGIN_FTR_SECTION
-	mfspr	r5, SPRN_SIER
-	std	r5, VCPU_SIER(r9)
-BEGIN_FTR_SECTION_NESTED(96)
-	mfspr	r6, SPRN_SPMC1
-	mfspr	r7, SPRN_SPMC2
-	mfspr	r8, SPRN_MMCRS
-	stw	r6, VCPU_PMC + 24(r9)
-	stw	r7, VCPU_PMC + 28(r9)
-	std	r8, VCPU_MMCR + 32(r9)
-	lis	r4, 0x8000
-	mtspr	SPRN_MMCRS, r4
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-22:
+	lbz	r4, LPPACA_PMCINUSE(r8)
+21:	bl	kvmhv_save_guest_pmu
+	ld	r9, HSTATE_KVM_VCPU(r13)
 
 	/* Restore host values of some registers */
 BEGIN_FTR_SECTION
@@ -2010,24 +1859,6 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_DPDES, r8
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
-	/* If HMI, call kvmppc_realmode_hmi_handler() */
-	lwz	r12, STACK_SLOT_TRAP(r1)
-	cmpwi	r12, BOOK3S_INTERRUPT_HMI
-	bne	27f
-	bl	kvmppc_realmode_hmi_handler
-	nop
-	cmpdi	r3, 0
-	/*
-	 * At this point kvmppc_realmode_hmi_handler may have resync-ed
-	 * the TB, and if it has, we must not subtract the guest timebase
-	 * offset from the timebase. So, skip it.
-	 *
-	 * Also, do not call kvmppc_subcore_exit_guest() because it has
-	 * been invoked as part of kvmppc_realmode_hmi_handler().
-	 */
-	beq	30f
-
-27:
 	/* Subtract timebase offset from timebase */
 	ld	r8, VCORE_TB_OFFSET_APPL(r5)
 	cmpdi	r8,0
@@ -2045,7 +1876,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	addis	r8,r8,0x100		/* if so, increment upper 40 bits */
 	mtspr	SPRN_TBU40,r8
 
-17:	bl	kvmppc_subcore_exit_guest
+17:
+	/*
+	 * If this is an HMI, we called kvmppc_realmode_hmi_handler
+	 * above, which may or may not have already called
+	 * kvmppc_subcore_exit_guest.  Fortunately, all that
+	 * kvmppc_subcore_exit_guest does is clear a flag, so calling
+	 * it again here is benign even if kvmppc_realmode_hmi_handler
+	 * has already called it.
+	 */
+	bl	kvmppc_subcore_exit_guest
 	nop
 30:	ld	r5,HSTATE_KVM_VCORE(r13)
 	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
@@ -2099,6 +1939,67 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	mtlr	r0
 	blr
 
+kvmppc_guest_external:
+	/* External interrupt, first check for host_ipi. If this is
+	 * set, we know the host wants us out so let's do it now
+	 */
+	bl	kvmppc_read_intr
+
+	/*
+	 * Restore the active volatile registers after returning from
+	 * a C function.
+	 */
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	li	r12, BOOK3S_INTERRUPT_EXTERNAL
+
+	/*
+	 * kvmppc_read_intr return codes:
+	 *
+	 * Exit to host (r3 > 0)
+	 *   1 An interrupt is pending that needs to be handled by the host
+	 *     Exit guest and return to host by branching to guest_exit_cont
+	 *
+	 *   2 Passthrough that needs completion in the host
+	 *     Exit guest and return to host by branching to guest_exit_cont
+	 *     However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
+	 *     to indicate to the host to complete handling the interrupt
+	 *
+	 * Before returning to guest, we check if any CPU is heading out
+	 * to the host and if so, we head out also. If no CPUs are heading
+	 * check return values <= 0.
+	 *
+	 * Return to guest (r3 <= 0)
+	 *  0 No external interrupt is pending
+	 * -1 A guest wakeup IPI (which has now been cleared)
+	 *    In either case, we return to guest to deliver any pending
+	 *    guest interrupts.
+	 *
+	 * -2 A PCI passthrough external interrupt was handled
+	 *    (interrupt was delivered directly to guest)
+	 *    Return to guest to deliver any pending guest interrupts.
+	 */
+
+	cmpdi	r3, 1
+	ble	1f
+
+	/* Return code = 2 */
+	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
+	stw	r12, VCPU_TRAP(r9)
+	b	guest_exit_cont
+
+1:	/* Return code <= 1 */
+	cmpdi	r3, 0
+	bgt	guest_exit_cont
+
+	/* Return code <= 0 */
+maybe_reenter_guest:
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	lwz	r0, VCORE_ENTRY_EXIT(r5)
+	cmpwi	r0, 0x100
+	mr	r4, r9
+	blt	deliver_guest_interrupt
+	b	guest_exit_cont
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 /*
  * Softpatch interrupt for transactional memory emulation cases
@@ -2302,6 +2203,10 @@ hcall_try_real_mode:
 	andi.	r0,r11,MSR_PR
 	/* sc 1 from userspace - reflect to guest syscall */
 	bne	sc_1_fast_return
+	/* sc 1 from nested guest - give it to L1 to handle */
+	ld	r0, VCPU_NESTED(r9)
+	cmpdi	r0, 0
+	bne	guest_exit_cont
 	clrrdi	r3,r3,2
 	cmpldi	r3,hcall_real_table_end - hcall_real_table
 	bge	guest_exit_cont
@@ -2561,6 +2466,7 @@ hcall_real_table:
 hcall_real_table_end:
 
 _GLOBAL(kvmppc_h_set_xdabr)
+EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
 	andi.	r0, r5, DABRX_USER | DABRX_KERNEL
 	beq	6f
 	li	r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
@@ -2570,6 +2476,7 @@ _GLOBAL(kvmppc_h_set_xdabr)
 	blr
 
 _GLOBAL(kvmppc_h_set_dabr)
+EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
 	li	r5, DABRX_USER | DABRX_KERNEL
 3:
 BEGIN_FTR_SECTION
@@ -2682,11 +2589,13 @@ BEGIN_FTR_SECTION
 	b	91f
 END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
-	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
 	 */
 	ld	r3, HSTATE_KVM_VCPU(r13)
 	ld      r4, VCPU_MSR(r3)
+	li	r5, 0			/* don't preserve non-vol regs */
 	bl	kvmppc_save_tm_hv
+	nop
 91:
 #endif
 
@@ -2802,11 +2711,13 @@ BEGIN_FTR_SECTION
 	b	91f
 END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
-	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
 	 */
 	mr      r3, r4
 	ld      r4, VCPU_MSR(r3)
+	li	r5, 0			/* don't preserve non-vol regs */
 	bl	kvmppc_restore_tm_hv
+	nop
 	ld	r4, HSTATE_KVM_VCPU(r13)
 91:
 #endif
@@ -2874,13 +2785,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	mr	r9, r4
 	cmpdi	r3, 0
 	bgt	guest_exit_cont
-
-	/* see if any other thread is already exiting */
-	lwz	r0,VCORE_ENTRY_EXIT(r5)
-	cmpwi	r0,0x100
-	bge	guest_exit_cont
-
-	b	kvmppc_cede_reentry	/* if not go back to guest */
+	b	maybe_reenter_guest
 
 	/* cede when already previously prodded case */
 kvm_cede_prodded:
@@ -2947,12 +2852,12 @@ machine_check_realmode:
 	 */
 	ld	r11, VCPU_MSR(r9)
 	rldicl.	r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
-	bne	mc_cont			/* if so, exit to host */
+	bne	guest_exit_cont		/* if so, exit to host */
 	/* Check if guest is capable of handling NMI exit */
 	ld	r10, VCPU_KVM(r9)
 	lbz	r10, KVM_FWNMI(r10)
 	cmpdi	r10, 1			/* FWNMI capable? */
-	beq	mc_cont			/* if so, exit with KVM_EXIT_NMI. */
+	beq	guest_exit_cont		/* if so, exit with KVM_EXIT_NMI. */
 
 	/* if not, fall through for backward compatibility. */
 	andi.	r10, r11, MSR_RI	/* check for unrecoverable exception */
@@ -2966,6 +2871,21 @@ machine_check_realmode:
 2:	b	fast_interrupt_c_return
 
 /*
+ * Call C code to handle a HMI in real mode.
+ * Only the primary thread does the call, secondary threads are handled
+ * by calling hmi_exception_realmode() after kvmppc_hv_entry returns.
+ * r9 points to the vcpu on entry
+ */
+hmi_realmode:
+	lbz	r0, HSTATE_PTID(r13)
+	cmpwi	r0, 0
+	bne	guest_exit_cont
+	bl	kvmppc_realmode_hmi_handler
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	li	r12, BOOK3S_INTERRUPT_HMI
+	b	guest_exit_cont
+
+/*
  * Check the reason we woke from nap, and take appropriate action.
  * Returns (in r3):
  *	0 if nothing needs to be done
@@ -3130,10 +3050,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
  * Save transactional state and TM-related registers.
  * Called with r3 pointing to the vcpu struct and r4 containing
  * the guest MSR value.
- * This can modify all checkpointed registers, but
+ * r5 is non-zero iff non-volatile register state needs to be maintained.
+ * If r5 == 0, this can modify all checkpointed registers, but
  * restores r1 and r2 before exit.
  */
-kvmppc_save_tm_hv:
+_GLOBAL_TOC(kvmppc_save_tm_hv)
+EXPORT_SYMBOL_GPL(kvmppc_save_tm_hv)
 	/* See if we need to handle fake suspend mode */
 BEGIN_FTR_SECTION
 	b	__kvmppc_save_tm
@@ -3161,12 +3083,6 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
 	nop
 
-	std	r1, HSTATE_HOST_R1(r13)
-
-	/* Clear the MSR RI since r1, r13 may be foobar. */
-	li	r5, 0
-	mtmsrd	r5, 1
-
 	/* We have to treclaim here because that's the only way to do S->N */
 	li	r3, TM_CAUSE_KVM_RESCHED
 	TRECLAIM(R3)
@@ -3175,22 +3091,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
 	 * We were in fake suspend, so we are not going to save the
 	 * register state as the guest checkpointed state (since
 	 * we already have it), therefore we can now use any volatile GPR.
+	 * In fact treclaim in fake suspend state doesn't modify
+	 * any registers.
 	 */
-	/* Reload PACA pointer, stack pointer and TOC. */
-	GET_PACA(r13)
-	ld	r1, HSTATE_HOST_R1(r13)
-	ld	r2, PACATOC(r13)
 
-	/* Set MSR RI now we have r1 and r13 back. */
-	li	r5, MSR_RI
-	mtmsrd	r5, 1
-
-	HMT_MEDIUM
-	ld	r6, HSTATE_DSCR(r13)
-	mtspr	SPRN_DSCR, r6
-BEGIN_FTR_SECTION_NESTED(96)
+BEGIN_FTR_SECTION
 	bl	pnv_power9_force_smt4_release
-END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
 	nop
 
 4:
@@ -3216,10 +3123,12 @@ END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
  * Restore transactional state and TM-related registers.
  * Called with r3 pointing to the vcpu struct
  * and r4 containing the guest MSR value.
+ * r5 is non-zero iff non-volatile register state needs to be maintained.
  * This potentially modifies all checkpointed registers.
  * It restores r1 and r2 from the PACA.
  */
-kvmppc_restore_tm_hv:
+_GLOBAL_TOC(kvmppc_restore_tm_hv)
+EXPORT_SYMBOL_GPL(kvmppc_restore_tm_hv)
 	/*
 	 * If we are doing TM emulation for the guest on a POWER9 DD2,
 	 * then we don't actually do a trechkpt -- we either set up
@@ -3424,6 +3333,194 @@ kvmppc_msr_interrupt:
 	blr
 
 /*
+ * Load up guest PMU state.  R3 points to the vcpu struct.
+ */
+_GLOBAL(kvmhv_load_guest_pmu)
+EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
+	mr	r4, r3
+	mflr	r0
+	li	r3, 1
+	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
+	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
+	isync
+BEGIN_FTR_SECTION
+	ld	r3, VCPU_MMCR(r4)
+	andi.	r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+	cmpwi	r5, MMCR0_PMAO
+	beql	kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
+	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */
+	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */
+	lwz	r6, VCPU_PMC + 8(r4)
+	lwz	r7, VCPU_PMC + 12(r4)
+	lwz	r8, VCPU_PMC + 16(r4)
+	lwz	r9, VCPU_PMC + 20(r4)
+	mtspr	SPRN_PMC1, r3
+	mtspr	SPRN_PMC2, r5
+	mtspr	SPRN_PMC3, r6
+	mtspr	SPRN_PMC4, r7
+	mtspr	SPRN_PMC5, r8
+	mtspr	SPRN_PMC6, r9
+	ld	r3, VCPU_MMCR(r4)
+	ld	r5, VCPU_MMCR + 8(r4)
+	ld	r6, VCPU_MMCR + 16(r4)
+	ld	r7, VCPU_SIAR(r4)
+	ld	r8, VCPU_SDAR(r4)
+	mtspr	SPRN_MMCR1, r5
+	mtspr	SPRN_MMCRA, r6
+	mtspr	SPRN_SIAR, r7
+	mtspr	SPRN_SDAR, r8
+BEGIN_FTR_SECTION
+	ld	r5, VCPU_MMCR + 24(r4)
+	ld	r6, VCPU_SIER(r4)
+	mtspr	SPRN_MMCR2, r5
+	mtspr	SPRN_SIER, r6
+BEGIN_FTR_SECTION_NESTED(96)
+	lwz	r7, VCPU_PMC + 24(r4)
+	lwz	r8, VCPU_PMC + 28(r4)
+	ld	r9, VCPU_MMCR + 32(r4)
+	mtspr	SPRN_SPMC1, r7
+	mtspr	SPRN_SPMC2, r8
+	mtspr	SPRN_MMCRS, r9
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	mtspr	SPRN_MMCR0, r3
+	isync
+	mtlr	r0
+	blr
+
+/*
+ * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
+ */
+_GLOBAL(kvmhv_load_host_pmu)
+EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
+	mflr	r0
+	lbz	r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
+	cmpwi	r4, 0
+	beq	23f			/* skip if not */
+BEGIN_FTR_SECTION
+	ld	r3, HSTATE_MMCR0(r13)
+	andi.	r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+	cmpwi	r4, MMCR0_PMAO
+	beql	kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
+	lwz	r3, HSTATE_PMC1(r13)
+	lwz	r4, HSTATE_PMC2(r13)
+	lwz	r5, HSTATE_PMC3(r13)
+	lwz	r6, HSTATE_PMC4(r13)
+	lwz	r8, HSTATE_PMC5(r13)
+	lwz	r9, HSTATE_PMC6(r13)
+	mtspr	SPRN_PMC1, r3
+	mtspr	SPRN_PMC2, r4
+	mtspr	SPRN_PMC3, r5
+	mtspr	SPRN_PMC4, r6
+	mtspr	SPRN_PMC5, r8
+	mtspr	SPRN_PMC6, r9
+	ld	r3, HSTATE_MMCR0(r13)
+	ld	r4, HSTATE_MMCR1(r13)
+	ld	r5, HSTATE_MMCRA(r13)
+	ld	r6, HSTATE_SIAR(r13)
+	ld	r7, HSTATE_SDAR(r13)
+	mtspr	SPRN_MMCR1, r4
+	mtspr	SPRN_MMCRA, r5
+	mtspr	SPRN_SIAR, r6
+	mtspr	SPRN_SDAR, r7
+BEGIN_FTR_SECTION
+	ld	r8, HSTATE_MMCR2(r13)
+	ld	r9, HSTATE_SIER(r13)
+	mtspr	SPRN_MMCR2, r8
+	mtspr	SPRN_SIER, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	mtspr	SPRN_MMCR0, r3
+	isync
+	mtlr	r0
+23:	blr
+
+/*
+ * Save guest PMU state into the vcpu struct.
+ * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
+ */
+_GLOBAL(kvmhv_save_guest_pmu)
+EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
+	mr	r9, r3
+	mr	r8, r4
+BEGIN_FTR_SECTION
+	/*
+	 * POWER8 seems to have a hardware bug where setting
+	 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
+	 * when some counters are already negative doesn't seem
+	 * to cause a performance monitor alert (and hence interrupt).
+	 * The effect of this is that when saving the PMU state,
+	 * if there is no PMU alert pending when we read MMCR0
+	 * before freezing the counters, but one becomes pending
+	 * before we read the counters, we lose it.
+	 * To work around this, we need a way to freeze the counters
+	 * before reading MMCR0.  Normally, freezing the counters
+	 * is done by writing MMCR0 (to set MMCR0[FC]) which
+	 * unavoidably writes MMCR0[PMA0] as well.  On POWER8,
+	 * we can also freeze the counters using MMCR2, by writing
+	 * 1s to all the counter freeze condition bits (there are
+	 * 9 bits each for 6 counters).
+	 */
+	li	r3, -1			/* set all freeze bits */
+	clrrdi	r3, r3, 10
+	mfspr	r10, SPRN_MMCR2
+	mtspr	SPRN_MMCR2, r3
+	isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	li	r3, 1
+	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
+	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
+	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
+	mfspr	r6, SPRN_MMCRA
+	/* Clear MMCRA in order to disable SDAR updates */
+	li	r7, 0
+	mtspr	SPRN_MMCRA, r7
+	isync
+	cmpwi	r8, 0			/* did they ask for PMU stuff to be saved? */
+	bne	21f
+	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */
+	b	22f
+21:	mfspr	r5, SPRN_MMCR1
+	mfspr	r7, SPRN_SIAR
+	mfspr	r8, SPRN_SDAR
+	std	r4, VCPU_MMCR(r9)
+	std	r5, VCPU_MMCR + 8(r9)
+	std	r6, VCPU_MMCR + 16(r9)
+BEGIN_FTR_SECTION
+	std	r10, VCPU_MMCR + 24(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	std	r7, VCPU_SIAR(r9)
+	std	r8, VCPU_SDAR(r9)
+	mfspr	r3, SPRN_PMC1
+	mfspr	r4, SPRN_PMC2
+	mfspr	r5, SPRN_PMC3
+	mfspr	r6, SPRN_PMC4
+	mfspr	r7, SPRN_PMC5
+	mfspr	r8, SPRN_PMC6
+	stw	r3, VCPU_PMC(r9)
+	stw	r4, VCPU_PMC + 4(r9)
+	stw	r5, VCPU_PMC + 8(r9)
+	stw	r6, VCPU_PMC + 12(r9)
+	stw	r7, VCPU_PMC + 16(r9)
+	stw	r8, VCPU_PMC + 20(r9)
+BEGIN_FTR_SECTION
+	mfspr	r5, SPRN_SIER
+	std	r5, VCPU_SIER(r9)
+BEGIN_FTR_SECTION_NESTED(96)
+	mfspr	r6, SPRN_SPMC1
+	mfspr	r7, SPRN_SPMC2
+	mfspr	r8, SPRN_MMCRS
+	stw	r6, VCPU_PMC + 24(r9)
+	stw	r7, VCPU_PMC + 28(r9)
+	std	r8, VCPU_MMCR + 32(r9)
+	lis	r4, 0x8000
+	mtspr	SPRN_MMCRS, r4
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+22:	blr
+
+/*
  * This works around a hardware bug on POWER8E processors, where
  * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
  * performance monitor interrupt.  Instead, when we need to have
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index 008285058f9b..888e2609e3f1 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -130,7 +130,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 			return RESUME_GUEST;
 		}
 		/* Set CR0 to indicate previous transactional state */
-		vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
 			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
 		/* L=1 => tresume, L=0 => tsuspend */
 		if (instr & (1 << 21)) {
@@ -174,7 +174,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		copy_from_checkpoint(vcpu);
 
 		/* Set CR0 to indicate previous transactional state */
-		vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
 			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
 		vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
 		return RESUME_GUEST;
@@ -204,7 +204,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		copy_to_checkpoint(vcpu);
 
 		/* Set CR0 to indicate previous transactional state */
-		vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
 			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
 		vcpu->arch.shregs.msr = msr | MSR_TS_S;
 		return RESUME_GUEST;
diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
index b2c7c6fca4f9..3cf5863bc06e 100644
--- a/arch/powerpc/kvm/book3s_hv_tm_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
@@ -89,7 +89,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
 		if (instr & (1 << 21))
 			vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
 		/* Set CR0 to 0b0010 */
-		vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000;
+		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
+			0x20000000;
 		return 1;
 	}
 
@@ -105,5 +106,5 @@ void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
 	vcpu->arch.shregs.msr &= ~MSR_TS_MASK;	/* go to N state */
 	vcpu->arch.regs.nip = vcpu->arch.tfhar;
 	copy_from_checkpoint(vcpu);
-	vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000;
+	vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | 0xa0000000;
 }
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 614ebb4261f7..4efd65d9e828 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -167,7 +167,7 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
 	svcpu->gpr[11] = vcpu->arch.regs.gpr[11];
 	svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
 	svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
-	svcpu->cr  = vcpu->arch.cr;
+	svcpu->cr  = vcpu->arch.regs.ccr;
 	svcpu->xer = vcpu->arch.regs.xer;
 	svcpu->ctr = vcpu->arch.regs.ctr;
 	svcpu->lr  = vcpu->arch.regs.link;
@@ -249,7 +249,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
 	vcpu->arch.regs.gpr[11] = svcpu->gpr[11];
 	vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
 	vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
-	vcpu->arch.cr  = svcpu->cr;
+	vcpu->arch.regs.ccr  = svcpu->cr;
 	vcpu->arch.regs.xer = svcpu->xer;
 	vcpu->arch.regs.ctr = svcpu->ctr;
 	vcpu->arch.regs.link  = svcpu->lr;
@@ -1246,7 +1246,6 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		r = RESUME_GUEST;
 		break;
 	case BOOK3S_INTERRUPT_EXTERNAL:
-	case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
 	case BOOK3S_INTERRUPT_EXTERNAL_HV:
 	case BOOK3S_INTERRUPT_H_VIRT:
 		vcpu->stat.ext_intr_exits++;
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index b8356cdc0c04..b0b2bfc2ff51 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -310,7 +310,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp,
 	 */
 	if (new.out_ee) {
 		kvmppc_book3s_queue_irqprio(icp->vcpu,
-					    BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+					    BOOK3S_INTERRUPT_EXTERNAL);
 		if (!change_self)
 			kvmppc_fast_vcpu_kick(icp->vcpu);
 	}
@@ -593,8 +593,7 @@ static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
 	u32 xirr;
 
 	/* First, remove EE from the processor */
-	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
-				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
 
 	/*
 	 * ICP State: Accept_Interrupt
@@ -754,8 +753,7 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
 	 * We can remove EE from the current processor, the update
 	 * transaction will set it again if needed
 	 */
-	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
-				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
 
 	do {
 		old_state = new_state = READ_ONCE(icp->state);
@@ -1167,8 +1165,7 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
 	 * Deassert the CPU interrupt request.
 	 * icp_try_update will reassert it if necessary.
 	 */
-	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
-				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
 
 	/*
 	 * Note that if we displace an interrupt from old_state.xisr,
@@ -1393,7 +1390,8 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
 	}
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+	if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+	    cpu_has_feature(CPU_FTR_HVMODE)) {
 		/* Enable real mode support */
 		xics->real_mode = ENABLE_REALMODE;
 		xics->real_mode_dbg = DEBUG_REALMODE;
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 30c2eb766954..ad4a370703d3 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -62,6 +62,69 @@
 #define XIVE_Q_GAP	2
 
 /*
+ * Push a vcpu's context to the XIVE on guest entry.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
+{
+	void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+	u64 pq;
+
+	if (!tima)
+		return;
+	eieio();
+	__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
+	__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
+	vcpu->arch.xive_pushed = 1;
+	eieio();
+
+	/*
+	 * We clear the irq_pending flag. There is a small chance of a
+	 * race vs. the escalation interrupt happening on another
+	 * processor setting it again, but the only consequence is to
+	 * cause a spurious wakeup on the next H_CEDE, which is not an
+	 * issue.
+	 */
+	vcpu->arch.irq_pending = 0;
+
+	/*
+	 * In single escalation mode, if the escalation interrupt is
+	 * on, we mask it.
+	 */
+	if (vcpu->arch.xive_esc_on) {
+		pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
+						  XIVE_ESB_SET_PQ_01));
+		mb();
+
+		/*
+		 * We have a possible subtle race here: The escalation
+		 * interrupt might have fired and be on its way to the
+		 * host queue while we mask it, and if we unmask it
+		 * early enough (re-cede right away), there is a
+		 * theorical possibility that it fires again, thus
+		 * landing in the target queue more than once which is
+		 * a big no-no.
+		 *
+		 * Fortunately, solving this is rather easy. If the
+		 * above load setting PQ to 01 returns a previous
+		 * value where P is set, then we know the escalation
+		 * interrupt is somewhere on its way to the host. In
+		 * that case we simply don't clear the xive_esc_on
+		 * flag below. It will be eventually cleared by the
+		 * handler for the escalation interrupt.
+		 *
+		 * Then, when doing a cede, we check that flag again
+		 * before re-enabling the escalation interrupt, and if
+		 * set, we abort the cede.
+		 */
+		if (!(pq & XIVE_ESB_VAL_P))
+			/* Now P is 0, we can clear the flag */
+			vcpu->arch.xive_esc_on = 0;
+	}
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
+
+/*
  * This is a simple trigger for a generic XIVE IRQ. This must
  * only be called for interrupts that support a trigger page
  */
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 4171ede8722b..033363d6e764 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -280,14 +280,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
 	/* First collect pending bits from HW */
 	GLUE(X_PFX,ack_pending)(xc);
 
-	/*
-	 * Cleanup the old-style bits if needed (they may have been
-	 * set by pull or an escalation interrupts).
-	 */
-	if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
-		clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
-			  &vcpu->arch.pending_exceptions);
-
 	pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
 		 xc->pending, xc->hw_cppr, xc->cppr);
 
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 81bd8a07aa51..051af7d97327 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -182,7 +182,7 @@
 	 */
 	PPC_LL	r4, PACACURRENT(r13)
 	PPC_LL	r4, (THREAD + THREAD_KVM_VCPU)(r4)
-	stw	r10, VCPU_CR(r4)
+	PPC_STL	r10, VCPU_CR(r4)
 	PPC_STL r11, VCPU_GPR(R4)(r4)
 	PPC_STL	r5, VCPU_GPR(R5)(r4)
 	PPC_STL	r6, VCPU_GPR(R6)(r4)
@@ -292,7 +292,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
 	PPC_STL	r4, VCPU_GPR(R4)(r11)
 	PPC_LL	r4, THREAD_NORMSAVE(0)(r10)
 	PPC_STL	r5, VCPU_GPR(R5)(r11)
-	stw	r13, VCPU_CR(r11)
+	PPC_STL	r13, VCPU_CR(r11)
 	mfspr	r5, \srr0
 	PPC_STL	r3, VCPU_GPR(R10)(r11)
 	PPC_LL	r3, THREAD_NORMSAVE(2)(r10)
@@ -319,7 +319,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
 	PPC_STL	r4, VCPU_GPR(R4)(r11)
 	PPC_LL	r4, GPR9(r8)
 	PPC_STL	r5, VCPU_GPR(R5)(r11)
-	stw	r9, VCPU_CR(r11)
+	PPC_STL	r9, VCPU_CR(r11)
 	mfspr	r5, \srr0
 	PPC_STL	r3, VCPU_GPR(R8)(r11)
 	PPC_LL	r3, GPR10(r8)
@@ -643,7 +643,7 @@ lightweight_exit:
 	PPC_LL	r3, VCPU_LR(r4)
 	PPC_LL	r5, VCPU_XER(r4)
 	PPC_LL	r6, VCPU_CTR(r4)
-	lwz	r7, VCPU_CR(r4)
+	PPC_LL	r7, VCPU_CR(r4)
 	PPC_LL	r8, VCPU_PC(r4)
 	PPC_LD(r9, VCPU_SHARED_MSR, r11)
 	PPC_LL	r0, VCPU_GPR(R0)(r4)
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 75dce1ef3bc8..f91b1309a0a8 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -117,7 +117,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
 	emulated = EMULATE_FAIL;
 	vcpu->arch.regs.msr = vcpu->arch.shared->msr;
-	vcpu->arch.regs.ccr = vcpu->arch.cr;
 	if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) {
 		int type = op.type & INSTR_TYPE_MASK;
 		int size = GETSIZE(op.type);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index eba5756d5b41..2869a299c4ed 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -594,7 +594,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = !!(hv_enabled && radix_enabled());
 		break;
 	case KVM_CAP_PPC_MMU_HASH_V3:
-		r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300));
+		r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300) &&
+		       cpu_has_feature(CPU_FTR_HVMODE));
+		break;
+	case KVM_CAP_PPC_NESTED_HV:
+		r = !!(hv_enabled && kvmppc_hv_ops->enable_nested &&
+		       !kvmppc_hv_ops->enable_nested(NULL));
 		break;
 #endif
 	case KVM_CAP_SYNC_MMU:
@@ -2114,6 +2119,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 			r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
 		break;
 	}
+
+	case KVM_CAP_PPC_NESTED_HV:
+		r = -EINVAL;
+		if (!is_kvmppc_hv_enabled(kvm) ||
+		    !kvm->arch.kvm_ops->enable_nested)
+			break;
+		r = kvm->arch.kvm_ops->enable_nested(kvm);
+		break;
 #endif
 	default:
 		r = -EINVAL;
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index 90e330f21356..0531a1492fdf 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -28,17 +28,25 @@
  * Save transactional state and TM-related registers.
  * Called with:
  * - r3 pointing to the vcpu struct
- * - r4 points to the MSR with current TS bits:
+ * - r4 containing the MSR with current TS bits:
  * 	(For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR).
- * This can modify all checkpointed registers, but
- * restores r1, r2 before exit.
+ * - r5 containing a flag indicating that non-volatile registers
+ *	must be preserved.
+ * If r5 == 0, this can modify all checkpointed registers, but
+ * restores r1, r2 before exit.  If r5 != 0, this restores the
+ * MSR TM/FP/VEC/VSX bits to their state on entry.
  */
 _GLOBAL(__kvmppc_save_tm)
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
+	stdu    r1, -SWITCH_FRAME_SIZE(r1)
+
+	mr	r9, r3
+	cmpdi	cr7, r5, 0
 
 	/* Turn on TM. */
 	mfmsr	r8
+	mr	r10, r8
 	li	r0, 1
 	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
 	ori     r8, r8, MSR_FP
@@ -51,6 +59,27 @@ _GLOBAL(__kvmppc_save_tm)
 	std	r1, HSTATE_SCRATCH2(r13)
 	std	r3, HSTATE_SCRATCH1(r13)
 
+	/* Save CR on the stack - even if r5 == 0 we need to get cr7 back. */
+	mfcr	r6
+	SAVE_GPR(6, r1)
+
+	/* Save DSCR so we can restore it to avoid running with user value */
+	mfspr	r7, SPRN_DSCR
+	SAVE_GPR(7, r1)
+
+	/*
+	 * We are going to do treclaim., which will modify all checkpointed
+	 * registers.  Save the non-volatile registers on the stack if
+	 * preservation of non-volatile state has been requested.
+	 */
+	beq	cr7, 3f
+	SAVE_NVGPRS(r1)
+
+	/* MSR[TS] will be 0 (non-transactional) once we do treclaim. */
+	li	r0, 0
+	rldimi	r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+	SAVE_GPR(10, r1)	/* final MSR value */
+3:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 BEGIN_FTR_SECTION
 	/* Emulation of the treclaim instruction needs TEXASR before treclaim */
@@ -74,22 +103,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 	std	r9, PACATMSCRATCH(r13)
 	ld	r9, HSTATE_SCRATCH1(r13)
 
-	/* Get a few more GPRs free. */
-	std	r29, VCPU_GPRS_TM(29)(r9)
-	std	r30, VCPU_GPRS_TM(30)(r9)
-	std	r31, VCPU_GPRS_TM(31)(r9)
-
-	/* Save away PPR and DSCR soon so don't run with user values. */
-	mfspr	r31, SPRN_PPR
+	/* Save away PPR soon so we don't run with user value. */
+	std	r0, VCPU_GPRS_TM(0)(r9)
+	mfspr	r0, SPRN_PPR
 	HMT_MEDIUM
-	mfspr	r30, SPRN_DSCR
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	ld	r29, HSTATE_DSCR(r13)
-	mtspr	SPRN_DSCR, r29
-#endif
 
-	/* Save all but r9, r13 & r29-r31 */
-	reg = 0
+	/* Reload stack pointer. */
+	std	r1, VCPU_GPRS_TM(1)(r9)
+	ld	r1, HSTATE_SCRATCH2(r13)
+
+	/* Set MSR RI now we have r1 and r13 back. */
+	std	r2, VCPU_GPRS_TM(2)(r9)
+	li	r2, MSR_RI
+	mtmsrd	r2, 1
+
+	/* Reload TOC pointer. */
+	ld	r2, PACATOC(r13)
+
+	/* Save all but r0-r2, r9 & r13 */
+	reg = 3
 	.rept	29
 	.if (reg != 9) && (reg != 13)
 	std	reg, VCPU_GPRS_TM(reg)(r9)
@@ -103,33 +135,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 	ld	r4, PACATMSCRATCH(r13)
 	std	r4, VCPU_GPRS_TM(9)(r9)
 
-	/* Reload stack pointer and TOC. */
-	ld	r1, HSTATE_SCRATCH2(r13)
-	ld	r2, PACATOC(r13)
-
-	/* Set MSR RI now we have r1 and r13 back. */
-	li	r5, MSR_RI
-	mtmsrd	r5, 1
+	/* Restore host DSCR and CR values, after saving guest values */
+	mfcr	r6
+	mfspr	r7, SPRN_DSCR
+	stw	r6, VCPU_CR_TM(r9)
+	std	r7, VCPU_DSCR_TM(r9)
+	REST_GPR(6, r1)
+	REST_GPR(7, r1)
+	mtcr	r6
+	mtspr	SPRN_DSCR, r7
 
-	/* Save away checkpinted SPRs. */
-	std	r31, VCPU_PPR_TM(r9)
-	std	r30, VCPU_DSCR_TM(r9)
+	/* Save away checkpointed SPRs. */
+	std	r0, VCPU_PPR_TM(r9)
 	mflr	r5
-	mfcr	r6
 	mfctr	r7
 	mfspr	r8, SPRN_AMR
 	mfspr	r10, SPRN_TAR
 	mfxer	r11
 	std	r5, VCPU_LR_TM(r9)
-	stw	r6, VCPU_CR_TM(r9)
 	std	r7, VCPU_CTR_TM(r9)
 	std	r8, VCPU_AMR_TM(r9)
 	std	r10, VCPU_TAR_TM(r9)
 	std	r11, VCPU_XER_TM(r9)
 
-	/* Restore r12 as trap number. */
-	lwz	r12, VCPU_TRAP(r9)
-
 	/* Save FP/VSX. */
 	addi	r3, r9, VCPU_FPRS_TM
 	bl	store_fp_state
@@ -137,6 +165,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 	bl	store_vr_state
 	mfspr	r6, SPRN_VRSAVE
 	stw	r6, VCPU_VRSAVE_TM(r9)
+
+	/* Restore non-volatile registers if requested to */
+	beq	cr7, 1f
+	REST_NVGPRS(r1)
+	REST_GPR(10, r1)
 1:
 	/*
 	 * We need to save these SPRs after the treclaim so that the software
@@ -146,12 +179,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 	 */
 	mfspr	r7, SPRN_TEXASR
 	std	r7, VCPU_TEXASR(r9)
-11:
 	mfspr	r5, SPRN_TFHAR
 	mfspr	r6, SPRN_TFIAR
 	std	r5, VCPU_TFHAR(r9)
 	std	r6, VCPU_TFIAR(r9)
 
+	/* Restore MSR state if requested */
+	beq	cr7, 2f
+	mtmsrd	r10, 0
+2:
+	addi	r1, r1, SWITCH_FRAME_SIZE
 	ld	r0, PPC_LR_STKOFF(r1)
 	mtlr	r0
 	blr
@@ -161,49 +198,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
  * be invoked from C function by PR KVM only.
  */
 _GLOBAL(_kvmppc_save_tm_pr)
-	mflr	r5
-	std	r5, PPC_LR_STKOFF(r1)
-	stdu    r1, -SWITCH_FRAME_SIZE(r1)
-	SAVE_NVGPRS(r1)
-
-	/* save MSR since TM/math bits might be impacted
-	 * by __kvmppc_save_tm().
-	 */
-	mfmsr	r5
-	SAVE_GPR(5, r1)
-
-	/* also save DSCR/CR/TAR so that it can be recovered later */
-	mfspr   r6, SPRN_DSCR
-	SAVE_GPR(6, r1)
-
-	mfcr    r7
-	stw     r7, _CCR(r1)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+	stdu    r1, -PPC_MIN_STKFRM(r1)
 
 	mfspr   r8, SPRN_TAR
-	SAVE_GPR(8, r1)
+	std	r8, PPC_MIN_STKFRM-8(r1)
 
+	li	r5, 1		/* preserve non-volatile registers */
 	bl	__kvmppc_save_tm
 
-	REST_GPR(8, r1)
+	ld	r8, PPC_MIN_STKFRM-8(r1)
 	mtspr   SPRN_TAR, r8
 
-	ld      r7, _CCR(r1)
-	mtcr	r7
-
-	REST_GPR(6, r1)
-	mtspr   SPRN_DSCR, r6
-
-	/* need preserve current MSR's MSR_TS bits */
-	REST_GPR(5, r1)
-	mfmsr   r6
-	rldicl  r6, r6, 64 - MSR_TS_S_LG, 62
-	rldimi  r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG
-	mtmsrd  r5
-
-	REST_NVGPRS(r1)
-	addi    r1, r1, SWITCH_FRAME_SIZE
-	ld	r5, PPC_LR_STKOFF(r1)
-	mtlr	r5
+	addi    r1, r1, PPC_MIN_STKFRM
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
 	blr
 
 EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
@@ -215,15 +225,21 @@ EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
  *  - r4 is the guest MSR with desired TS bits:
  * 	For HV KVM, it is VCPU_MSR
  * 	For PR KVM, it is provided by caller
- * This potentially modifies all checkpointed registers.
- * It restores r1, r2 from the PACA.
+ * - r5 containing a flag indicating that non-volatile registers
+ *	must be preserved.
+ * If r5 == 0, this potentially modifies all checkpointed registers, but
+ * restores r1, r2 from the PACA before exit.
+ * If r5 != 0, this restores the MSR TM/FP/VEC/VSX bits to their state on entry.
  */
 _GLOBAL(__kvmppc_restore_tm)
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
 
+	cmpdi	cr7, r5, 0
+
 	/* Turn on TM/FP/VSX/VMX so we can restore them. */
 	mfmsr	r5
+	mr	r10, r5
 	li	r6, MSR_TM >> 32
 	sldi	r6, r6, 32
 	or	r5, r5, r6
@@ -244,8 +260,7 @@ _GLOBAL(__kvmppc_restore_tm)
 
 	mr	r5, r4
 	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
-	beqlr		/* TM not active in guest */
-	std	r1, HSTATE_SCRATCH2(r13)
+	beq	9f		/* TM not active in guest */
 
 	/* Make sure the failure summary is set, otherwise we'll program check
 	 * when we trechkpt.  It's possible that this might have been not set
@@ -256,6 +271,26 @@ _GLOBAL(__kvmppc_restore_tm)
 	mtspr	SPRN_TEXASR, r7
 
 	/*
+	 * Make a stack frame and save non-volatile registers if requested.
+	 */
+	stdu	r1, -SWITCH_FRAME_SIZE(r1)
+	std	r1, HSTATE_SCRATCH2(r13)
+
+	mfcr	r6
+	mfspr	r7, SPRN_DSCR
+	SAVE_GPR(2, r1)
+	SAVE_GPR(6, r1)
+	SAVE_GPR(7, r1)
+
+	beq	cr7, 4f
+	SAVE_NVGPRS(r1)
+
+	/* MSR[TS] will be 1 (suspended) once we do trechkpt */
+	li	r0, 1
+	rldimi	r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+	SAVE_GPR(10, r1)	/* final MSR value */
+4:
+	/*
 	 * We need to load up the checkpointed state for the guest.
 	 * We need to do this early as it will blow away any GPRs, VSRs and
 	 * some SPRs.
@@ -291,8 +326,6 @@ _GLOBAL(__kvmppc_restore_tm)
 	ld	r29, VCPU_DSCR_TM(r3)
 	ld	r30, VCPU_PPR_TM(r3)
 
-	std	r2, PACATMSCRATCH(r13) /* Save TOC */
-
 	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
 	li	r5, 0
 	mtmsrd	r5, 1
@@ -318,18 +351,31 @@ _GLOBAL(__kvmppc_restore_tm)
 	/* Now let's get back the state we need. */
 	HMT_MEDIUM
 	GET_PACA(r13)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	ld	r29, HSTATE_DSCR(r13)
-	mtspr	SPRN_DSCR, r29
-#endif
 	ld	r1, HSTATE_SCRATCH2(r13)
-	ld	r2, PACATMSCRATCH(r13)
+	REST_GPR(7, r1)
+	mtspr	SPRN_DSCR, r7
 
 	/* Set the MSR RI since we have our registers back. */
 	li	r5, MSR_RI
 	mtmsrd	r5, 1
+
+	/* Restore TOC pointer and CR */
+	REST_GPR(2, r1)
+	REST_GPR(6, r1)
+	mtcr	r6
+
+	/* Restore non-volatile registers if requested to. */
+	beq	cr7, 5f
+	REST_GPR(10, r1)
+	REST_NVGPRS(r1)
+
+5:	addi	r1, r1, SWITCH_FRAME_SIZE
 	ld	r0, PPC_LR_STKOFF(r1)
 	mtlr	r0
+
+9:	/* Restore MSR bits if requested */
+	beqlr	cr7
+	mtmsrd	r10, 0
 	blr
 
 /*
@@ -337,47 +383,23 @@ _GLOBAL(__kvmppc_restore_tm)
  * can be invoked from C function by PR KVM only.
  */
 _GLOBAL(_kvmppc_restore_tm_pr)
-	mflr	r5
-	std	r5, PPC_LR_STKOFF(r1)
-	stdu    r1, -SWITCH_FRAME_SIZE(r1)
-	SAVE_NVGPRS(r1)
-
-	/* save MSR to avoid TM/math bits change */
-	mfmsr	r5
-	SAVE_GPR(5, r1)
-
-	/* also save DSCR/CR/TAR so that it can be recovered later */
-	mfspr   r6, SPRN_DSCR
-	SAVE_GPR(6, r1)
-
-	mfcr    r7
-	stw     r7, _CCR(r1)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+	stdu    r1, -PPC_MIN_STKFRM(r1)
 
+	/* save TAR so that it can be recovered later */
 	mfspr   r8, SPRN_TAR
-	SAVE_GPR(8, r1)
+	std	r8, PPC_MIN_STKFRM-8(r1)
 
+	li	r5, 1
 	bl	__kvmppc_restore_tm
 
-	REST_GPR(8, r1)
+	ld	r8, PPC_MIN_STKFRM-8(r1)
 	mtspr   SPRN_TAR, r8
 
-	ld      r7, _CCR(r1)
-	mtcr	r7
-
-	REST_GPR(6, r1)
-	mtspr   SPRN_DSCR, r6
-
-	/* need preserve current MSR's MSR_TS bits */
-	REST_GPR(5, r1)
-	mfmsr   r6
-	rldicl  r6, r6, 64 - MSR_TS_S_LG, 62
-	rldimi  r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG
-	mtmsrd  r5
-
-	REST_NVGPRS(r1)
-	addi    r1, r1, SWITCH_FRAME_SIZE
-	ld	r5, PPC_LR_STKOFF(r1)
-	mtlr	r5
+	addi    r1, r1, PPC_MIN_STKFRM
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
 	blr
 
 EXPORT_SYMBOL_GPL(_kvmppc_restore_tm_pr);
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
index f3b23759e017..372a82fa2de3 100644
--- a/arch/powerpc/kvm/trace_book3s.h
+++ b/arch/powerpc/kvm/trace_book3s.h
@@ -14,7 +14,6 @@
 	{0x400, "INST_STORAGE"}, \
 	{0x480, "INST_SEGMENT"}, \
 	{0x500, "EXTERNAL"}, \
-	{0x501, "EXTERNAL_LEVEL"}, \
 	{0x502, "EXTERNAL_HV"}, \
 	{0x600, "ALIGNMENT"}, \
 	{0x700, "PROGRAM"}, \
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index fef3e1eb3a19..4c4dfc473800 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -833,6 +833,15 @@ EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
 /*
  * Flush partition scoped translations from LPID (=LPIDR)
  */
+void radix__flush_tlb_lpid(unsigned int lpid)
+{
+	_tlbie_lpid(lpid, RIC_FLUSH_ALL);
+}
+EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid);
+
+/*
+ * Flush partition scoped translations from LPID (=LPIDR)
+ */
 void radix__local_flush_tlb_lpid(unsigned int lpid)
 {
 	_tlbiel_lpid(lpid, RIC_FLUSH_ALL);
diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c
index ad054dd0d666..5df6290d1ccc 100644
--- a/arch/powerpc/oprofile/backtrace.c
+++ b/arch/powerpc/oprofile/backtrace.c
@@ -7,7 +7,7 @@
  * 2 of the License, or (at your option) any later version.
 **/
 
-#include <linux/compat_time.h>
+#include <linux/time.h>
 #include <linux/oprofile.h>
 #include <linux/sched.h>
 #include <asm/processor.h>
diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c
index 5e36508b2a70..1844bf502fcf 100644
--- a/arch/powerpc/platforms/4xx/soc.c
+++ b/arch/powerpc/platforms/4xx/soc.c
@@ -200,7 +200,7 @@ void ppc4xx_reset_system(char *cmd)
 	u32 reset_type = DBCR0_RST_SYSTEM;
 	const u32 *prop;
 
-	np = of_find_node_by_type(NULL, "cpu");
+	np = of_get_cpu_node(0, NULL);
 	if (np) {
 		prop = of_get_property(np, "reset-type", NULL);
 
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 027c42d8966c..f1c805c8adbc 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -66,7 +66,7 @@ static int __init get_freq(char *name, unsigned long *val)
 	int found = 0;
 
 	/* The cpu node should have timebase and clock frequency properties */
-	cpu = of_find_node_by_type(NULL, "cpu");
+	cpu = of_get_cpu_node(0, NULL);
 
 	if (cpu) {
 		fp = of_get_property(cpu, name, NULL);
@@ -147,8 +147,9 @@ void __init mpc8xx_calibrate_decr(void)
 	 * we have to enable the timebase).  The decrementer interrupt
 	 * is wired into the vector table, nothing to do here for that.
 	 */
-	cpu = of_find_node_by_type(NULL, "cpu");
+	cpu = of_get_cpu_node(0, NULL);
 	virq= irq_of_parse_and_map(cpu, 0);
+	of_node_put(cpu);
 	irq = virq_to_hw(virq);
 
 	sys_tmr2 = immr_map(im_sit);
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 4eb8cb38fc69..ed2f54b3f173 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -1049,7 +1049,6 @@ core99_reset_cpu(struct device_node *node, long param, long value)
 	unsigned long flags;
 	struct macio_chip *macio;
 	struct device_node *np;
-	struct device_node *cpus;
 	const int dflt_reset_lines[] = {	KL_GPIO_RESET_CPU0,
 						KL_GPIO_RESET_CPU1,
 						KL_GPIO_RESET_CPU2,
@@ -1059,10 +1058,7 @@ core99_reset_cpu(struct device_node *node, long param, long value)
 	if (macio->type != macio_keylargo)
 		return -ENODEV;
 
-	cpus = of_find_node_by_path("/cpus");
-	if (cpus == NULL)
-		return -ENODEV;
-	for (np = cpus->child; np != NULL; np = np->sibling) {
+	for_each_of_cpu_node(np) {
 		const u32 *num = of_get_property(np, "reg", NULL);
 		const u32 *rst = of_get_property(np, "soft-reset", NULL);
 		if (num == NULL || rst == NULL)
@@ -1072,7 +1068,6 @@ core99_reset_cpu(struct device_node *node, long param, long value)
 			break;
 		}
 	}
-	of_node_put(cpus);
 	if (np == NULL || reset_io == 0)
 		reset_io = dflt_reset_lines[param];
 
@@ -1504,16 +1499,12 @@ static long g5_reset_cpu(struct device_node *node, long param, long value)
 	unsigned long flags;
 	struct macio_chip *macio;
 	struct device_node *np;
-	struct device_node *cpus;
 
 	macio = &macio_chips[0];
 	if (macio->type != macio_keylargo2 && macio->type != macio_shasta)
 		return -ENODEV;
 
-	cpus = of_find_node_by_path("/cpus");
-	if (cpus == NULL)
-		return -ENODEV;
-	for (np = cpus->child; np != NULL; np = np->sibling) {
+	for_each_of_cpu_node(np) {
 		const u32 *num = of_get_property(np, "reg", NULL);
 		const u32 *rst = of_get_property(np, "soft-reset", NULL);
 		if (num == NULL || rst == NULL)
@@ -1523,7 +1514,6 @@ static long g5_reset_cpu(struct device_node *node, long param, long value)
 			break;
 		}
 	}
-	of_node_put(cpus);
 	if (np == NULL || reset_io == 0)
 		return -ENODEV;
 
@@ -2515,31 +2505,26 @@ found:
 	 * supposed to be set when not supported, but I'm not very confident
 	 * that all Apple OF revs did it properly, I do it the paranoid way.
 	 */
-	while (uninorth_base && uninorth_rev > 3) {
-		struct device_node *cpus = of_find_node_by_path("/cpus");
+	if (uninorth_base && uninorth_rev > 3) {
 		struct device_node *np;
 
-		if (!cpus || !cpus->child) {
-			printk(KERN_WARNING "Can't find CPU(s) in device tree !\n");
-			of_node_put(cpus);
-			break;
-		}
-		np = cpus->child;
-		/* Nap mode not supported on SMP */
-		if (np->sibling) {
-			of_node_put(cpus);
-			break;
-		}
-		/* Nap mode not supported if flush-on-lock property is present */
-		if (of_get_property(np, "flush-on-lock", NULL)) {
-			of_node_put(cpus);
-			break;
+		for_each_of_cpu_node(np) {
+			int cpu_count = 1;
+
+			/* Nap mode not supported on SMP */
+			if (of_get_property(np, "flush-on-lock", NULL) ||
+			    (cpu_count > 1)) {
+				powersave_nap = 0;
+				of_node_put(np);
+				break;
+			}
+
+			cpu_count++;
+			powersave_nap = 1;
 		}
-		of_node_put(cpus);
-		powersave_nap = 1;
-		printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n");
-		break;
 	}
+	if (powersave_nap)
+		printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n");
 
 	/* On CPUs that support it (750FX), lowspeed by default during
 	 * NAP mode
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 3a529fcdae97..2f00e3daafb0 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -243,10 +243,9 @@ static void __init l2cr_init(void)
 {
 	/* Checks "l2cr-value" property in the registry */
 	if (cpu_has_feature(CPU_FTR_L2CR)) {
-		struct device_node *np = of_find_node_by_name(NULL, "cpus");
-		if (!np)
-			np = of_find_node_by_type(NULL, "cpu");
-		if (np) {
+		struct device_node *np;
+
+		for_each_of_cpu_node(np) {
 			const unsigned int *l2cr =
 				of_get_property(np, "l2cr-value", NULL);
 			if (l2cr) {
@@ -256,6 +255,7 @@ static void __init l2cr_init(void)
 				_set_L2CR(ppc_override_l2cr_value);
 			}
 			of_node_put(np);
+			break;
 		}
 	}
 
@@ -279,8 +279,8 @@ static void __init pmac_setup_arch(void)
 	/* Set loops_per_jiffy to a half-way reasonable value,
 	   for use until calibrate_delay gets called. */
 	loops_per_jiffy = 50000000 / HZ;
-	cpu = of_find_node_by_type(NULL, "cpu");
-	if (cpu != NULL) {
+
+	for_each_of_cpu_node(cpu) {
 		fp = of_get_property(cpu, "clock-frequency", NULL);
 		if (fp != NULL) {
 			if (pvr >= 0x30 && pvr < 0x80)
@@ -292,8 +292,9 @@ static void __init pmac_setup_arch(void)
 			else
 				/* 601, 603, etc. */
 				loops_per_jiffy = *fp / (2 * HZ);
+			of_node_put(cpu);
+			break;
 		}
-		of_node_put(cpu);
 	}
 
 	/* See if newworld or oldworld */
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a344980287a5..fe451348ae57 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -31,6 +31,7 @@ config RISCV
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_DMA_CONTIGUOUS
+	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_PERF_EVENTS
 	select IRQ_DOMAIN
@@ -108,10 +109,12 @@ config ARCH_RV32I
 	select GENERIC_LIB_ASHRDI3
 	select GENERIC_LIB_LSHRDI3
 	select GENERIC_LIB_UCMPDI2
+	select GENERIC_LIB_UMODDI3
 
 config ARCH_RV64I
 	bool "RV64I"
 	select 64BIT
+	select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FTRACE_MCOUNT_RECORD
@@ -208,14 +211,61 @@ config RISCV_BASE_PMU
 
 endmenu
 
+config FPU
+	bool "FPU support"
+	default y
+	help
+	  Say N here if you want to disable all floating-point related procedure
+	  in the kernel.
+
+	  If you don't know what to do here, say Y.
+
 endmenu
 
-menu "Kernel type"
+menu "Kernel features"
 
 source "kernel/Kconfig.hz"
 
 endmenu
 
+menu "Boot options"
+
+config CMDLINE_BOOL
+	bool "Built-in kernel command line"
+	help
+	  For most platforms, it is firmware or second stage bootloader
+	  that by default specifies the kernel command line options.
+	  However, it might be necessary or advantageous to either override
+	  the default kernel command line or add a few extra options to it.
+	  For such cases, this option allows hardcoding command line options
+	  directly into the kernel.
+
+	  For that, choose 'Y' here and fill in the extra boot parameters
+	  in CONFIG_CMDLINE.
+
+	  The built-in options will be concatenated to the default command
+	  line if CMDLINE_FORCE is set to 'N'. Otherwise, the default
+	  command line will be ignored and replaced by the built-in string.
+
+config CMDLINE
+	string "Built-in kernel command string"
+	depends on CMDLINE_BOOL
+	default ""
+	help
+	  Supply command-line options at build time by entering them here.
+
+config CMDLINE_FORCE
+	bool "Built-in command line overrides bootloader arguments"
+	depends on CMDLINE_BOOL
+	help
+	  Set this option to 'Y' to have the kernel ignore the bootloader
+	  or firmware command line.  Instead, the built-in command line
+	  will be used exclusively.
+
+	  If you don't know what to do here, say N.
+
+endmenu
+
 menu "Bus support"
 
 config PCI
diff --git a/arch/riscv/Kconfig.debug b/arch/riscv/Kconfig.debug
index 3224ff6ecf6e..c5a72f17c469 100644
--- a/arch/riscv/Kconfig.debug
+++ b/arch/riscv/Kconfig.debug
@@ -1,37 +1,2 @@
-
-config CMDLINE_BOOL
-	bool "Built-in kernel command line"
-	help
-	  For most platforms, it is firmware or second stage bootloader
-	  that by default specifies the kernel command line options.
-	  However, it might be necessary or advantageous to either override
-	  the default kernel command line or add a few extra options to it.
-	  For such cases, this option allows hardcoding command line options
-	  directly into the kernel.
-
-	  For that, choose 'Y' here and fill in the extra boot parameters
-	  in CONFIG_CMDLINE.
-
-	  The built-in options will be concatenated to the default command
-	  line if CMDLINE_FORCE is set to 'N'. Otherwise, the default
-	  command line will be ignored and replaced by the built-in string.
-
-config CMDLINE
-	string "Built-in kernel command string"
-	depends on CMDLINE_BOOL
-	default ""
-	help
-	  Supply command-line options at build time by entering them here.
-
-config CMDLINE_FORCE
-	bool "Built-in command line overrides bootloader arguments"
-	depends on CMDLINE_BOOL
-	help
-	  Set this option to 'Y' to have the kernel ignore the bootloader
-	  or firmware command line.  Instead, the built-in command line
-	  will be used exclusively.
-
-	  If you don't know what to do here, say N.
-
 config EARLY_PRINTK
 	def_bool y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 61ec42405ec9..d10146197533 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -25,10 +25,7 @@ ifeq ($(CONFIG_ARCH_RV64I),y)
 
 	KBUILD_CFLAGS += -mabi=lp64
 	KBUILD_AFLAGS += -mabi=lp64
-	
-	KBUILD_CFLAGS	+= $(call cc-ifversion, -ge, 0500, -DCONFIG_ARCH_SUPPORTS_INT128)
 
-	KBUILD_MARCH = rv64im
 	KBUILD_LDFLAGS += -melf64lriscv
 else
 	BITS := 32
@@ -36,22 +33,20 @@ else
 
 	KBUILD_CFLAGS += -mabi=ilp32
 	KBUILD_AFLAGS += -mabi=ilp32
-	KBUILD_MARCH = rv32im
 	KBUILD_LDFLAGS += -melf32lriscv
 endif
 
 KBUILD_CFLAGS += -Wall
 
-ifeq ($(CONFIG_RISCV_ISA_A),y)
-	KBUILD_ARCH_A = a
-endif
-ifeq ($(CONFIG_RISCV_ISA_C),y)
-	KBUILD_ARCH_C = c
-endif
-
-KBUILD_AFLAGS += -march=$(KBUILD_MARCH)$(KBUILD_ARCH_A)fd$(KBUILD_ARCH_C)
+# ISA string setting
+riscv-march-$(CONFIG_ARCH_RV32I)	:= rv32im
+riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64im
+riscv-march-$(CONFIG_RISCV_ISA_A)	:= $(riscv-march-y)a
+riscv-march-$(CONFIG_FPU)		:= $(riscv-march-y)fd
+riscv-march-$(CONFIG_RISCV_ISA_C)	:= $(riscv-march-y)c
+KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y))
+KBUILD_AFLAGS += -march=$(riscv-march-y)
 
-KBUILD_CFLAGS += -march=$(KBUILD_MARCH)$(KBUILD_ARCH_A)$(KBUILD_ARCH_C)
 KBUILD_CFLAGS += -mno-save-restore
 KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET)
 
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index efdbe311e936..6a646d9ea780 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -13,7 +13,6 @@ generic-y += errno.h
 generic-y += exec.h
 generic-y += fb.h
 generic-y += fcntl.h
-generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hash.h
 generic-y += hw_irq.h
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
new file mode 100644
index 000000000000..3b19eba1bc8e
--- /dev/null
+++ b/arch/riscv/include/asm/futex.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2006  Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (c) 2018  Jim Wilson (jimw@sifive.com)
+ */
+
+#ifndef _ASM_FUTEX_H
+#define _ASM_FUTEX_H
+
+#ifndef CONFIG_RISCV_ISA_A
+/*
+ * Use the generic interrupt disabling versions if the A extension
+ * is not supported.
+ */
+#ifdef CONFIG_SMP
+#error "Can't support generic futex calls without A extension on SMP"
+#endif
+#include <asm-generic/futex.h>
+
+#else /* CONFIG_RISCV_ISA_A */
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+#include <asm/asm.h>
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)	\
+{								\
+	uintptr_t tmp;						\
+	__enable_user_access();					\
+	__asm__ __volatile__ (					\
+	"1:	" insn "				\n"	\
+	"2:						\n"	\
+	"	.section .fixup,\"ax\"			\n"	\
+	"	.balign 4				\n"	\
+	"3:	li %[r],%[e]				\n"	\
+	"	jump 2b,%[t]				\n"	\
+	"	.previous				\n"	\
+	"	.section __ex_table,\"a\"		\n"	\
+	"	.balign " RISCV_SZPTR "			\n"	\
+	"	" RISCV_PTR " 1b, 3b			\n"	\
+	"	.previous				\n"	\
+	: [r] "+r" (ret), [ov] "=&r" (oldval),			\
+	  [u] "+m" (*uaddr), [t] "=&r" (tmp)			\
+	: [op] "Jr" (oparg), [e] "i" (-EFAULT)			\
+	: "memory");						\
+	__disable_user_access();				\
+}
+
+static inline int
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
+{
+	int oldval = 0, ret = 0;
+
+	pagefault_disable();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
+				  ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("amoadd.w.aqrl %[ov],%z[op],%[u]",
+				  ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("amoor.w.aqrl %[ov],%z[op],%[u]",
+				  ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("amoand.w.aqrl %[ov],%z[op],%[u]",
+				  ret, oldval, uaddr, ~oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
+				  ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	pagefault_enable();
+
+	if (!ret)
+		*oval = oldval;
+
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	int ret = 0;
+	u32 val;
+	uintptr_t tmp;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	__enable_user_access();
+	__asm__ __volatile__ (
+	"1:	lr.w.aqrl %[v],%[u]			\n"
+	"	bne %[v],%z[ov],3f			\n"
+	"2:	sc.w.aqrl %[t],%z[nv],%[u]		\n"
+	"	bnez %[t],1b				\n"
+	"3:						\n"
+	"	.section .fixup,\"ax\"			\n"
+	"	.balign 4				\n"
+	"4:	li %[r],%[e]				\n"
+	"	jump 3b,%[t]				\n"
+	"	.previous				\n"
+	"	.section __ex_table,\"a\"		\n"
+	"	.balign " RISCV_SZPTR "			\n"
+	"	" RISCV_PTR " 1b, 4b			\n"
+	"	" RISCV_PTR " 2b, 4b			\n"
+	"	.previous				\n"
+	: [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr), [t] "=&r" (tmp)
+	: [ov] "Jr" (oldval), [nv] "Jr" (newval), [e] "i" (-EFAULT)
+	: "memory");
+	__disable_user_access();
+
+	*uval = val;
+	return ret;
+}
+
+#endif /* CONFIG_RISCV_ISA_A */
+#endif /* _ASM_FUTEX_H */
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 3fe4af8147d2..50de774d827a 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -88,7 +88,7 @@ static inline void wait_for_interrupt(void)
 }
 
 struct device_node;
-extern int riscv_of_processor_hart(struct device_node *node);
+int riscv_of_processor_hartid(struct device_node *node);
 
 extern void riscv_fill_hwcap(void);
 
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index 36016845461d..41aa73b476f4 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -14,16 +14,24 @@
 #ifndef _ASM_RISCV_SMP_H
 #define _ASM_RISCV_SMP_H
 
-/* This both needs asm-offsets.h and is used when generating it. */
-#ifndef GENERATING_ASM_OFFSETS
-#include <asm/asm-offsets.h>
-#endif
-
 #include <linux/cpumask.h>
 #include <linux/irqreturn.h>
+#include <linux/thread_info.h>
+
+#define INVALID_HARTID ULONG_MAX
+/*
+ * Mapping between linux logical cpu index and hartid.
+ */
+extern unsigned long __cpuid_to_hartid_map[NR_CPUS];
+#define cpuid_to_hartid_map(cpu)    __cpuid_to_hartid_map[cpu]
+
+struct seq_file;
 
 #ifdef CONFIG_SMP
 
+/* print IPI stats */
+void show_ipi_stats(struct seq_file *p, int prec);
+
 /* SMP initialization hook for setup_arch */
 void __init setup_smp(void);
 
@@ -33,14 +41,31 @@ void arch_send_call_function_ipi_mask(struct cpumask *mask);
 /* Hook for the generic smp_call_function_single() routine. */
 void arch_send_call_function_single_ipi(int cpu);
 
+int riscv_hartid_to_cpuid(int hartid);
+void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
+
 /*
- * This is particularly ugly: it appears we can't actually get the definition
- * of task_struct here, but we need access to the CPU this task is running on.
- * Instead of using C we're using asm-offsets.h to get the current processor
- * ID.
+ * Obtains the hart ID of the currently executing task.  This relies on
+ * THREAD_INFO_IN_TASK, but we define that unconditionally.
  */
-#define raw_smp_processor_id() (*((int*)((char*)get_current() + TASK_TI_CPU)))
+#define raw_smp_processor_id() (current_thread_info()->cpu)
 
-#endif /* CONFIG_SMP */
+#else
+
+static inline void show_ipi_stats(struct seq_file *p, int prec)
+{
+}
 
+static inline int riscv_hartid_to_cpuid(int hartid)
+{
+	return 0;
+}
+
+static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in,
+					      struct cpumask *out)
+{
+	cpumask_set_cpu(cpuid_to_hartid_map(0), out);
+}
+
+#endif /* CONFIG_SMP */
 #endif /* _ASM_RISCV_SMP_H */
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index dd6b05bff75b..733559083f24 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -18,6 +18,7 @@
 #include <asm/ptrace.h>
 #include <asm/csr.h>
 
+#ifdef CONFIG_FPU
 extern void __fstate_save(struct task_struct *save_to);
 extern void __fstate_restore(struct task_struct *restore_from);
 
@@ -55,6 +56,14 @@ static inline void __switch_to_aux(struct task_struct *prev,
 	fstate_restore(next, task_pt_regs(next));
 }
 
+extern bool has_fpu;
+#else
+#define has_fpu false
+#define fstate_save(task, regs) do { } while (0)
+#define fstate_restore(task, regs) do { } while (0)
+#define __switch_to_aux(__prev, __next) do { } while (0)
+#endif
+
 extern struct task_struct *__switch_to(struct task_struct *,
 				       struct task_struct *);
 
@@ -62,7 +71,8 @@ extern struct task_struct *__switch_to(struct task_struct *,
 do {							\
 	struct task_struct *__prev = (prev);		\
 	struct task_struct *__next = (next);		\
-	__switch_to_aux(__prev, __next);		\
+	if (has_fpu)					\
+		__switch_to_aux(__prev, __next);	\
 	((last) = __switch_to(__prev, __next));		\
 } while (0)
 
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 85c2d8bae957..54fee0cadb1e 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -16,6 +16,7 @@
 #define _ASM_RISCV_TLBFLUSH_H
 
 #include <linux/mm_types.h>
+#include <asm/smp.h>
 
 /*
  * Flush entire local TLB.  'sfence.vma' implicitly fences with the instruction
@@ -49,13 +50,22 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 
 #include <asm/sbi.h>
 
+static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start,
+				     unsigned long size)
+{
+	struct cpumask hmask;
+
+	cpumask_clear(&hmask);
+	riscv_cpuid_to_hartid_mask(cmask, &hmask);
+	sbi_remote_sfence_vma(hmask.bits, start, size);
+}
+
 #define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
 #define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
 #define flush_tlb_range(vma, start, end) \
-	sbi_remote_sfence_vma(mm_cpumask((vma)->vm_mm)->bits, \
-			      start, (end) - (start))
+	remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start))
 #define flush_tlb_mm(mm) \
-	sbi_remote_sfence_vma(mm_cpumask(mm)->bits, 0, -1)
+	remote_sfence_vma(mm_cpumask(mm), 0, -1)
 
 #endif /* CONFIG_SMP */
 
diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h
index 0caea01d5cca..eff7aa9aa163 100644
--- a/arch/riscv/include/asm/unistd.h
+++ b/arch/riscv/include/asm/unistd.h
@@ -16,6 +16,7 @@
  * be included multiple times.  See uapi/asm/syscalls.h for more info.
  */
 
+#define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
 #include <uapi/asm/syscalls.h>
diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h
index 1e0dfc36aab9..644a00ce6e2e 100644
--- a/arch/riscv/include/uapi/asm/elf.h
+++ b/arch/riscv/include/uapi/asm/elf.h
@@ -19,7 +19,10 @@ typedef unsigned long elf_greg_t;
 typedef struct user_regs_struct elf_gregset_t;
 #define ELF_NGREG (sizeof(elf_gregset_t) / sizeof(elf_greg_t))
 
+/* We don't support f without d, or q.  */
+typedef __u64 elf_fpreg_t;
 typedef union __riscv_fp_state elf_fpregset_t;
+#define ELF_NFPREG (sizeof(struct __riscv_d_ext_state) / sizeof(elf_fpreg_t))
 
 #if __riscv_xlen == 64
 #define ELF_RISCV_R_SYM(r_info)		ELF64_R_SYM(r_info)
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index e1274fc03af4..f13f7f276639 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -31,6 +31,7 @@ obj-y	+= vdso/
 
 CFLAGS_setup.o := -mcmodel=medany
 
+obj-$(CONFIG_FPU)		+= fpu.o
 obj-$(CONFIG_SMP)		+= smpboot.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_MODULES)		+= module.o
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index 0bc86e5f8f3f..cb35ffd8ec6b 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -22,13 +22,6 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 {
 	this_leaf->level = level;
 	this_leaf->type = type;
-	/* not a sector cache */
-	this_leaf->physical_line_partition = 1;
-	/* TODO: Add to DTS */
-	this_leaf->attributes =
-		CACHE_WRITE_BACK
-		| CACHE_READ_ALLOCATE
-		| CACHE_WRITE_ALLOCATE;
 }
 
 static int __init_cache_level(unsigned int cpu)
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index ca6c81e54e37..3a5a2ee31547 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -14,9 +14,13 @@
 #include <linux/init.h>
 #include <linux/seq_file.h>
 #include <linux/of.h>
+#include <asm/smp.h>
 
-/* Return -1 if not a valid hart */
-int riscv_of_processor_hart(struct device_node *node)
+/*
+ * Returns the hart ID of the given device tree node, or -1 if the device tree
+ * node isn't a RISC-V hart.
+ */
+int riscv_of_processor_hartid(struct device_node *node)
 {
 	const char *isa, *status;
 	u32 hart;
@@ -58,6 +62,64 @@ int riscv_of_processor_hart(struct device_node *node)
 
 #ifdef CONFIG_PROC_FS
 
+static void print_isa(struct seq_file *f, const char *orig_isa)
+{
+	static const char *ext = "mafdc";
+	const char *isa = orig_isa;
+	const char *e;
+
+	/*
+	 * Linux doesn't support rv32e or rv128i, and we only support booting
+	 * kernels on harts with the same ISA that the kernel is compiled for.
+	 */
+#if defined(CONFIG_32BIT)
+	if (strncmp(isa, "rv32i", 5) != 0)
+		return;
+#elif defined(CONFIG_64BIT)
+	if (strncmp(isa, "rv64i", 5) != 0)
+		return;
+#endif
+
+	/* Print the base ISA, as we already know it's legal. */
+	seq_puts(f, "isa\t\t: ");
+	seq_write(f, isa, 5);
+	isa += 5;
+
+	/*
+	 * Check the rest of the ISA string for valid extensions, printing those
+	 * we find.  RISC-V ISA strings define an order, so we only print the
+	 * extension bits when they're in order.
+	 */
+	for (e = ext; *e != '\0'; ++e) {
+		if (isa[0] == e[0]) {
+			seq_write(f, isa, 1);
+			isa++;
+		}
+	}
+	seq_puts(f, "\n");
+
+	/*
+	 * If we were given an unsupported ISA in the device tree then print
+	 * a bit of info describing what went wrong.
+	 */
+	if (isa[0] != '\0')
+		pr_info("unsupported ISA \"%s\" in device tree", orig_isa);
+}
+
+static void print_mmu(struct seq_file *f, const char *mmu_type)
+{
+#if defined(CONFIG_32BIT)
+	if (strcmp(mmu_type, "riscv,sv32") != 0)
+		return;
+#elif defined(CONFIG_64BIT)
+	if (strcmp(mmu_type, "riscv,sv39") != 0 &&
+	    strcmp(mmu_type, "riscv,sv48") != 0)
+		return;
+#endif
+
+	seq_printf(f, "mmu\t\t: %s\n", mmu_type+6);
+}
+
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
 	*pos = cpumask_next(*pos - 1, cpu_online_mask);
@@ -78,21 +140,20 @@ static void c_stop(struct seq_file *m, void *v)
 
 static int c_show(struct seq_file *m, void *v)
 {
-	unsigned long hart_id = (unsigned long)v - 1;
-	struct device_node *node = of_get_cpu_node(hart_id, NULL);
+	unsigned long cpu_id = (unsigned long)v - 1;
+	struct device_node *node = of_get_cpu_node(cpuid_to_hartid_map(cpu_id),
+						   NULL);
 	const char *compat, *isa, *mmu;
 
-	seq_printf(m, "hart\t: %lu\n", hart_id);
-	if (!of_property_read_string(node, "riscv,isa", &isa)
-	    && isa[0] == 'r'
-	    && isa[1] == 'v')
-		seq_printf(m, "isa\t: %s\n", isa);
-	if (!of_property_read_string(node, "mmu-type", &mmu)
-	    && !strncmp(mmu, "riscv,", 6))
-		seq_printf(m, "mmu\t: %s\n", mmu+6);
+	seq_printf(m, "processor\t: %lu\n", cpu_id);
+	seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
+	if (!of_property_read_string(node, "riscv,isa", &isa))
+		print_isa(m, isa);
+	if (!of_property_read_string(node, "mmu-type", &mmu))
+		print_mmu(m, mmu);
 	if (!of_property_read_string(node, "compatible", &compat)
 	    && strcmp(compat, "riscv"))
-		seq_printf(m, "uarch\t: %s\n", compat);
+		seq_printf(m, "uarch\t\t: %s\n", compat);
 	seq_puts(m, "\n");
 
 	return 0;
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 17011a870044..5493f3228704 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -22,6 +22,9 @@
 #include <asm/hwcap.h>
 
 unsigned long elf_hwcap __read_mostly;
+#ifdef CONFIG_FPU
+bool has_fpu __read_mostly;
+#endif
 
 void riscv_fill_hwcap(void)
 {
@@ -57,5 +60,17 @@ void riscv_fill_hwcap(void)
 	for (i = 0; i < strlen(isa); ++i)
 		elf_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
 
+	/* We don't support systems with F but without D, so mask those out
+	 * here. */
+	if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) {
+		pr_info("This kernel does not support systems with F but not D");
+		elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
+	}
+
 	pr_info("elf_hwcap is 0x%lx", elf_hwcap);
+
+#ifdef CONFIG_FPU
+	if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D))
+		has_fpu = true;
+#endif
 }
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index fa2c08e3c05e..13d4826ab2a1 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -168,7 +168,6 @@ ENTRY(handle_exception)
 
 	/* Handle interrupts */
 	move a0, sp /* pt_regs */
-	move a1, s4 /* scause */
 	tail do_IRQ
 1:
 	/* Exceptions run with interrupts enabled */
@@ -357,93 +356,6 @@ ENTRY(__switch_to)
 	ret
 ENDPROC(__switch_to)
 
-ENTRY(__fstate_save)
-	li  a2,  TASK_THREAD_F0
-	add a0, a0, a2
-	li t1, SR_FS
-	csrs sstatus, t1
-	frcsr t0
-	fsd f0,  TASK_THREAD_F0_F0(a0)
-	fsd f1,  TASK_THREAD_F1_F0(a0)
-	fsd f2,  TASK_THREAD_F2_F0(a0)
-	fsd f3,  TASK_THREAD_F3_F0(a0)
-	fsd f4,  TASK_THREAD_F4_F0(a0)
-	fsd f5,  TASK_THREAD_F5_F0(a0)
-	fsd f6,  TASK_THREAD_F6_F0(a0)
-	fsd f7,  TASK_THREAD_F7_F0(a0)
-	fsd f8,  TASK_THREAD_F8_F0(a0)
-	fsd f9,  TASK_THREAD_F9_F0(a0)
-	fsd f10, TASK_THREAD_F10_F0(a0)
-	fsd f11, TASK_THREAD_F11_F0(a0)
-	fsd f12, TASK_THREAD_F12_F0(a0)
-	fsd f13, TASK_THREAD_F13_F0(a0)
-	fsd f14, TASK_THREAD_F14_F0(a0)
-	fsd f15, TASK_THREAD_F15_F0(a0)
-	fsd f16, TASK_THREAD_F16_F0(a0)
-	fsd f17, TASK_THREAD_F17_F0(a0)
-	fsd f18, TASK_THREAD_F18_F0(a0)
-	fsd f19, TASK_THREAD_F19_F0(a0)
-	fsd f20, TASK_THREAD_F20_F0(a0)
-	fsd f21, TASK_THREAD_F21_F0(a0)
-	fsd f22, TASK_THREAD_F22_F0(a0)
-	fsd f23, TASK_THREAD_F23_F0(a0)
-	fsd f24, TASK_THREAD_F24_F0(a0)
-	fsd f25, TASK_THREAD_F25_F0(a0)
-	fsd f26, TASK_THREAD_F26_F0(a0)
-	fsd f27, TASK_THREAD_F27_F0(a0)
-	fsd f28, TASK_THREAD_F28_F0(a0)
-	fsd f29, TASK_THREAD_F29_F0(a0)
-	fsd f30, TASK_THREAD_F30_F0(a0)
-	fsd f31, TASK_THREAD_F31_F0(a0)
-	sw t0, TASK_THREAD_FCSR_F0(a0)
-	csrc sstatus, t1
-	ret
-ENDPROC(__fstate_save)
-
-ENTRY(__fstate_restore)
-	li  a2,  TASK_THREAD_F0
-	add a0, a0, a2
-	li t1, SR_FS
-	lw t0, TASK_THREAD_FCSR_F0(a0)
-	csrs sstatus, t1
-	fld f0,  TASK_THREAD_F0_F0(a0)
-	fld f1,  TASK_THREAD_F1_F0(a0)
-	fld f2,  TASK_THREAD_F2_F0(a0)
-	fld f3,  TASK_THREAD_F3_F0(a0)
-	fld f4,  TASK_THREAD_F4_F0(a0)
-	fld f5,  TASK_THREAD_F5_F0(a0)
-	fld f6,  TASK_THREAD_F6_F0(a0)
-	fld f7,  TASK_THREAD_F7_F0(a0)
-	fld f8,  TASK_THREAD_F8_F0(a0)
-	fld f9,  TASK_THREAD_F9_F0(a0)
-	fld f10, TASK_THREAD_F10_F0(a0)
-	fld f11, TASK_THREAD_F11_F0(a0)
-	fld f12, TASK_THREAD_F12_F0(a0)
-	fld f13, TASK_THREAD_F13_F0(a0)
-	fld f14, TASK_THREAD_F14_F0(a0)
-	fld f15, TASK_THREAD_F15_F0(a0)
-	fld f16, TASK_THREAD_F16_F0(a0)
-	fld f17, TASK_THREAD_F17_F0(a0)
-	fld f18, TASK_THREAD_F18_F0(a0)
-	fld f19, TASK_THREAD_F19_F0(a0)
-	fld f20, TASK_THREAD_F20_F0(a0)
-	fld f21, TASK_THREAD_F21_F0(a0)
-	fld f22, TASK_THREAD_F22_F0(a0)
-	fld f23, TASK_THREAD_F23_F0(a0)
-	fld f24, TASK_THREAD_F24_F0(a0)
-	fld f25, TASK_THREAD_F25_F0(a0)
-	fld f26, TASK_THREAD_F26_F0(a0)
-	fld f27, TASK_THREAD_F27_F0(a0)
-	fld f28, TASK_THREAD_F28_F0(a0)
-	fld f29, TASK_THREAD_F29_F0(a0)
-	fld f30, TASK_THREAD_F30_F0(a0)
-	fld f31, TASK_THREAD_F31_F0(a0)
-	fscsr t0
-	csrc sstatus, t1
-	ret
-ENDPROC(__fstate_restore)
-
-
 	.section ".rodata"
 	/* Exception vector table */
 ENTRY(excp_vect_table)
diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S
new file mode 100644
index 000000000000..1defb0618aff
--- /dev/null
+++ b/arch/riscv/kernel/fpu.S
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+#include <asm/csr.h>
+#include <asm/asm-offsets.h>
+
+ENTRY(__fstate_save)
+	li  a2,  TASK_THREAD_F0
+	add a0, a0, a2
+	li t1, SR_FS
+	csrs sstatus, t1
+	frcsr t0
+	fsd f0,  TASK_THREAD_F0_F0(a0)
+	fsd f1,  TASK_THREAD_F1_F0(a0)
+	fsd f2,  TASK_THREAD_F2_F0(a0)
+	fsd f3,  TASK_THREAD_F3_F0(a0)
+	fsd f4,  TASK_THREAD_F4_F0(a0)
+	fsd f5,  TASK_THREAD_F5_F0(a0)
+	fsd f6,  TASK_THREAD_F6_F0(a0)
+	fsd f7,  TASK_THREAD_F7_F0(a0)
+	fsd f8,  TASK_THREAD_F8_F0(a0)
+	fsd f9,  TASK_THREAD_F9_F0(a0)
+	fsd f10, TASK_THREAD_F10_F0(a0)
+	fsd f11, TASK_THREAD_F11_F0(a0)
+	fsd f12, TASK_THREAD_F12_F0(a0)
+	fsd f13, TASK_THREAD_F13_F0(a0)
+	fsd f14, TASK_THREAD_F14_F0(a0)
+	fsd f15, TASK_THREAD_F15_F0(a0)
+	fsd f16, TASK_THREAD_F16_F0(a0)
+	fsd f17, TASK_THREAD_F17_F0(a0)
+	fsd f18, TASK_THREAD_F18_F0(a0)
+	fsd f19, TASK_THREAD_F19_F0(a0)
+	fsd f20, TASK_THREAD_F20_F0(a0)
+	fsd f21, TASK_THREAD_F21_F0(a0)
+	fsd f22, TASK_THREAD_F22_F0(a0)
+	fsd f23, TASK_THREAD_F23_F0(a0)
+	fsd f24, TASK_THREAD_F24_F0(a0)
+	fsd f25, TASK_THREAD_F25_F0(a0)
+	fsd f26, TASK_THREAD_F26_F0(a0)
+	fsd f27, TASK_THREAD_F27_F0(a0)
+	fsd f28, TASK_THREAD_F28_F0(a0)
+	fsd f29, TASK_THREAD_F29_F0(a0)
+	fsd f30, TASK_THREAD_F30_F0(a0)
+	fsd f31, TASK_THREAD_F31_F0(a0)
+	sw t0, TASK_THREAD_FCSR_F0(a0)
+	csrc sstatus, t1
+	ret
+ENDPROC(__fstate_save)
+
+ENTRY(__fstate_restore)
+	li  a2,  TASK_THREAD_F0
+	add a0, a0, a2
+	li t1, SR_FS
+	lw t0, TASK_THREAD_FCSR_F0(a0)
+	csrs sstatus, t1
+	fld f0,  TASK_THREAD_F0_F0(a0)
+	fld f1,  TASK_THREAD_F1_F0(a0)
+	fld f2,  TASK_THREAD_F2_F0(a0)
+	fld f3,  TASK_THREAD_F3_F0(a0)
+	fld f4,  TASK_THREAD_F4_F0(a0)
+	fld f5,  TASK_THREAD_F5_F0(a0)
+	fld f6,  TASK_THREAD_F6_F0(a0)
+	fld f7,  TASK_THREAD_F7_F0(a0)
+	fld f8,  TASK_THREAD_F8_F0(a0)
+	fld f9,  TASK_THREAD_F9_F0(a0)
+	fld f10, TASK_THREAD_F10_F0(a0)
+	fld f11, TASK_THREAD_F11_F0(a0)
+	fld f12, TASK_THREAD_F12_F0(a0)
+	fld f13, TASK_THREAD_F13_F0(a0)
+	fld f14, TASK_THREAD_F14_F0(a0)
+	fld f15, TASK_THREAD_F15_F0(a0)
+	fld f16, TASK_THREAD_F16_F0(a0)
+	fld f17, TASK_THREAD_F17_F0(a0)
+	fld f18, TASK_THREAD_F18_F0(a0)
+	fld f19, TASK_THREAD_F19_F0(a0)
+	fld f20, TASK_THREAD_F20_F0(a0)
+	fld f21, TASK_THREAD_F21_F0(a0)
+	fld f22, TASK_THREAD_F22_F0(a0)
+	fld f23, TASK_THREAD_F23_F0(a0)
+	fld f24, TASK_THREAD_F24_F0(a0)
+	fld f25, TASK_THREAD_F25_F0(a0)
+	fld f26, TASK_THREAD_F26_F0(a0)
+	fld f27, TASK_THREAD_F27_F0(a0)
+	fld f28, TASK_THREAD_F28_F0(a0)
+	fld f29, TASK_THREAD_F29_F0(a0)
+	fld f30, TASK_THREAD_F30_F0(a0)
+	fld f31, TASK_THREAD_F31_F0(a0)
+	fscsr t0
+	csrc sstatus, t1
+	ret
+ENDPROC(__fstate_restore)
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index c4d2c63f9a29..711190d473d4 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -47,6 +47,8 @@ ENTRY(_start)
 	/* Save hart ID and DTB physical address */
 	mv s0, a0
 	mv s1, a1
+	la a2, boot_cpu_hartid
+	REG_S a0, (a2)
 
 	/* Initialize page tables and relocate to virtual addresses */
 	la sp, init_thread_union + THREAD_SIZE
@@ -55,7 +57,7 @@ ENTRY(_start)
 
 	/* Restore C environment */
 	la tp, init_task
-	sw s0, TASK_TI_CPU(tp)
+	sw zero, TASK_TI_CPU(tp)
 
 	la sp, init_thread_union
 	li a0, ASM_THREAD_SIZE
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 0cfac48a1272..48e6b7db83a1 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -8,6 +8,8 @@
 #include <linux/interrupt.h>
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
+#include <linux/seq_file.h>
+#include <asm/smp.h>
 
 /*
  * Possible interrupt causes:
@@ -24,12 +26,18 @@
  */
 #define INTERRUPT_CAUSE_FLAG	(1UL << (__riscv_xlen - 1))
 
-asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long cause)
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+	show_ipi_stats(p, prec);
+	return 0;
+}
+
+asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	irq_enter();
-	switch (cause & ~INTERRUPT_CAUSE_FLAG) {
+	switch (regs->scause & ~INTERRUPT_CAUSE_FLAG) {
 	case INTERRUPT_CAUSE_TIMER:
 		riscv_timer_interrupt();
 		break;
diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index 5721624886a1..8a5593ff9ff3 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -75,7 +75,6 @@ ENTRY(return_to_handler)
 	RESTORE_RET_ABI_STATE
 	jalr	a1
 ENDPROC(return_to_handler)
-EXPORT_SYMBOL(return_to_handler)
 #endif
 
 #ifndef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index d7c6ca7c95ae..bef19993ea92 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -76,7 +76,9 @@ void show_regs(struct pt_regs *regs)
 void start_thread(struct pt_regs *regs, unsigned long pc,
 	unsigned long sp)
 {
-	regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL;
+	regs->sstatus = SR_SPIE;
+	if (has_fpu)
+		regs->sstatus |= SR_FS_INITIAL;
 	regs->sepc = pc;
 	regs->sp = sp;
 	set_fs(USER_DS);
@@ -84,12 +86,14 @@ void start_thread(struct pt_regs *regs, unsigned long pc,
 
 void flush_thread(void)
 {
+#ifdef CONFIG_FPU
 	/*
 	 * Reset FPU context
 	 *	frm: round to nearest, ties to even (IEEE default)
 	 *	fflags: accrued exceptions cleared
 	 */
 	memset(&current->thread.fstate, 0, sizeof(current->thread.fstate));
+#endif
 }
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 9f82a7e34c64..60f1e02eed36 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -28,6 +28,9 @@
 
 enum riscv_regset {
 	REGSET_X,
+#ifdef CONFIG_FPU
+	REGSET_F,
+#endif
 };
 
 static int riscv_gpr_get(struct task_struct *target,
@@ -54,6 +57,45 @@ static int riscv_gpr_set(struct task_struct *target,
 	return ret;
 }
 
+#ifdef CONFIG_FPU
+static int riscv_fpr_get(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 void *kbuf, void __user *ubuf)
+{
+	int ret;
+	struct __riscv_d_ext_state *fstate = &target->thread.fstate;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, fstate, 0,
+				  offsetof(struct __riscv_d_ext_state, fcsr));
+	if (!ret) {
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, fstate, 0,
+					  offsetof(struct __riscv_d_ext_state, fcsr) +
+					  sizeof(fstate->fcsr));
+	}
+
+	return ret;
+}
+
+static int riscv_fpr_set(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	struct __riscv_d_ext_state *fstate = &target->thread.fstate;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, fstate, 0,
+				 offsetof(struct __riscv_d_ext_state, fcsr));
+	if (!ret) {
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, fstate, 0,
+					 offsetof(struct __riscv_d_ext_state, fcsr) +
+					 sizeof(fstate->fcsr));
+	}
+
+	return ret;
+}
+#endif
 
 static const struct user_regset riscv_user_regset[] = {
 	[REGSET_X] = {
@@ -64,6 +106,16 @@ static const struct user_regset riscv_user_regset[] = {
 		.get = &riscv_gpr_get,
 		.set = &riscv_gpr_set,
 	},
+#ifdef CONFIG_FPU
+	[REGSET_F] = {
+		.core_note_type = NT_PRFPREG,
+		.n = ELF_NFPREG,
+		.size = sizeof(elf_fpreg_t),
+		.align = sizeof(elf_fpreg_t),
+		.get = &riscv_fpr_get,
+		.set = &riscv_fpr_set,
+	},
+#endif
 };
 
 static const struct user_regset_view riscv_user_native_view = {
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index b2d26d9d8489..2c290e6aaa6e 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -81,6 +81,16 @@ EXPORT_SYMBOL(empty_zero_page);
 
 /* The lucky hart to first increment this variable will boot the other cores */
 atomic_t hart_lottery;
+unsigned long boot_cpu_hartid;
+
+unsigned long __cpuid_to_hartid_map[NR_CPUS] = {
+	[0 ... NR_CPUS-1] = INVALID_HARTID
+};
+
+void __init smp_setup_processor_id(void)
+{
+	cpuid_to_hartid_map(0) = boot_cpu_hartid;
+}
 
 #ifdef CONFIG_BLK_DEV_INITRD
 static void __init setup_initrd(void)
@@ -227,7 +237,10 @@ void __init setup_arch(char **cmdline_p)
 	setup_bootmem();
 	paging_init();
 	unflatten_device_tree();
+
+#ifdef CONFIG_SWIOTLB
 	swiotlb_init(1);
+#endif
 
 #ifdef CONFIG_SMP
 	setup_smp();
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 718d0c984ef0..f9b5e7e352ef 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -37,45 +37,69 @@ struct rt_sigframe {
 	struct ucontext uc;
 };
 
-static long restore_d_state(struct pt_regs *regs,
-	struct __riscv_d_ext_state __user *state)
+#ifdef CONFIG_FPU
+static long restore_fp_state(struct pt_regs *regs,
+			     union __riscv_fp_state *sc_fpregs)
 {
 	long err;
+	struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
+	size_t i;
+
 	err = __copy_from_user(&current->thread.fstate, state, sizeof(*state));
-	if (likely(!err))
-		fstate_restore(current, regs);
+	if (unlikely(err))
+		return err;
+
+	fstate_restore(current, regs);
+
+	/* We support no other extension state at this time. */
+	for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) {
+		u32 value;
+
+		err = __get_user(value, &sc_fpregs->q.reserved[i]);
+		if (unlikely(err))
+			break;
+		if (value != 0)
+			return -EINVAL;
+	}
+
 	return err;
 }
 
-static long save_d_state(struct pt_regs *regs,
-	struct __riscv_d_ext_state __user *state)
+static long save_fp_state(struct pt_regs *regs,
+			  union __riscv_fp_state *sc_fpregs)
 {
+	long err;
+	struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
+	size_t i;
+
 	fstate_save(current, regs);
-	return __copy_to_user(state, &current->thread.fstate, sizeof(*state));
+	err = __copy_to_user(state, &current->thread.fstate, sizeof(*state));
+	if (unlikely(err))
+		return err;
+
+	/* We support no other extension state at this time. */
+	for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) {
+		err = __put_user(0, &sc_fpregs->q.reserved[i]);
+		if (unlikely(err))
+			break;
+	}
+
+	return err;
 }
+#else
+#define save_fp_state(task, regs) (0)
+#define restore_fp_state(task, regs) (0)
+#endif
 
 static long restore_sigcontext(struct pt_regs *regs,
 	struct sigcontext __user *sc)
 {
 	long err;
-	size_t i;
 	/* sc_regs is structured the same as the start of pt_regs */
 	err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs));
-	if (unlikely(err))
-		return err;
 	/* Restore the floating-point state. */
-	err = restore_d_state(regs, &sc->sc_fpregs.d);
-	if (unlikely(err))
-		return err;
-	/* We support no other extension state at this time. */
-	for (i = 0; i < ARRAY_SIZE(sc->sc_fpregs.q.reserved); i++) {
-		u32 value;
-		err = __get_user(value, &sc->sc_fpregs.q.reserved[i]);
-		if (unlikely(err))
-			break;
-		if (value != 0)
-			return -EINVAL;
-	}
+	if (has_fpu)
+		err |= restore_fp_state(regs, &sc->sc_fpregs);
 	return err;
 }
 
@@ -124,14 +148,11 @@ static long setup_sigcontext(struct rt_sigframe __user *frame,
 {
 	struct sigcontext __user *sc = &frame->uc.uc_mcontext;
 	long err;
-	size_t i;
 	/* sc_regs is structured the same as the start of pt_regs */
 	err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs));
 	/* Save the floating-point state. */
-	err |= save_d_state(regs, &sc->sc_fpregs.d);
-	/* We support no other extension state at this time. */
-	for (i = 0; i < ARRAY_SIZE(sc->sc_fpregs.q.reserved); i++)
-		err |= __put_user(0, &sc->sc_fpregs.q.reserved[i]);
+	if (has_fpu)
+		err |= save_fp_state(regs, &sc->sc_fpregs);
 	return err;
 }
 
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 906fe21ea21b..57b1383e5ef7 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -22,23 +22,44 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
+#include <linux/seq_file.h>
 
 #include <asm/sbi.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 
-/* A collection of single bit ipi messages.  */
-static struct {
-	unsigned long bits ____cacheline_aligned;
-} ipi_data[NR_CPUS] __cacheline_aligned;
-
 enum ipi_message_type {
 	IPI_RESCHEDULE,
 	IPI_CALL_FUNC,
 	IPI_MAX
 };
 
+/* A collection of single bit ipi messages.  */
+static struct {
+	unsigned long stats[IPI_MAX] ____cacheline_aligned;
+	unsigned long bits ____cacheline_aligned;
+} ipi_data[NR_CPUS] __cacheline_aligned;
+
+int riscv_hartid_to_cpuid(int hartid)
+{
+	int i = -1;
+
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpuid_to_hartid_map(i) == hartid)
+			return i;
 
+	pr_err("Couldn't find cpu id for hartid [%d]\n", hartid);
+	BUG();
+	return i;
+}
+
+void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
+{
+	int cpu;
+
+	for_each_cpu(cpu, in)
+		cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
+}
 /* Unsupported */
 int setup_profiling_timer(unsigned int multiplier)
 {
@@ -48,6 +69,7 @@ int setup_profiling_timer(unsigned int multiplier)
 void riscv_software_interrupt(void)
 {
 	unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
+	unsigned long *stats = ipi_data[smp_processor_id()].stats;
 
 	/* Clear pending IPI */
 	csr_clear(sip, SIE_SSIE);
@@ -62,11 +84,15 @@ void riscv_software_interrupt(void)
 		if (ops == 0)
 			return;
 
-		if (ops & (1 << IPI_RESCHEDULE))
+		if (ops & (1 << IPI_RESCHEDULE)) {
+			stats[IPI_RESCHEDULE]++;
 			scheduler_ipi();
+		}
 
-		if (ops & (1 << IPI_CALL_FUNC))
+		if (ops & (1 << IPI_CALL_FUNC)) {
+			stats[IPI_CALL_FUNC]++;
 			generic_smp_call_function_interrupt();
+		}
 
 		BUG_ON((ops >> IPI_MAX) != 0);
 
@@ -78,14 +104,36 @@ void riscv_software_interrupt(void)
 static void
 send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
 {
-	int i;
+	int cpuid, hartid;
+	struct cpumask hartid_mask;
 
+	cpumask_clear(&hartid_mask);
 	mb();
-	for_each_cpu(i, to_whom)
-		set_bit(operation, &ipi_data[i].bits);
-
+	for_each_cpu(cpuid, to_whom) {
+		set_bit(operation, &ipi_data[cpuid].bits);
+		hartid = cpuid_to_hartid_map(cpuid);
+		cpumask_set_cpu(hartid, &hartid_mask);
+	}
 	mb();
-	sbi_send_ipi(cpumask_bits(to_whom));
+	sbi_send_ipi(cpumask_bits(&hartid_mask));
+}
+
+static const char * const ipi_names[] = {
+	[IPI_RESCHEDULE]	= "Rescheduling interrupts",
+	[IPI_CALL_FUNC]		= "Function call interrupts",
+};
+
+void show_ipi_stats(struct seq_file *p, int prec)
+{
+	unsigned int cpu, i;
+
+	for (i = 0; i < IPI_MAX; i++) {
+		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
+			   prec >= 4 ? " " : "");
+		for_each_online_cpu(cpu)
+			seq_printf(p, "%10lu ", ipi_data[cpu].stats[i]);
+		seq_printf(p, " %s\n", ipi_names[i]);
+	}
 }
 
 void arch_send_call_function_ipi_mask(struct cpumask *mask)
@@ -127,7 +175,7 @@ void smp_send_reschedule(int cpu)
 void flush_icache_mm(struct mm_struct *mm, bool local)
 {
 	unsigned int cpu;
-	cpumask_t others, *mask;
+	cpumask_t others, hmask, *mask;
 
 	preempt_disable();
 
@@ -145,9 +193,11 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
 	 */
 	cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
 	local |= cpumask_empty(&others);
-	if (mm != current->active_mm || !local)
-		sbi_remote_fence_i(others.bits);
-	else {
+	if (mm != current->active_mm || !local) {
+		cpumask_clear(&hmask);
+		riscv_cpuid_to_hartid_mask(&others, &hmask);
+		sbi_remote_fence_i(hmask.bits);
+	} else {
 		/*
 		 * It's assumed that at least one strongly ordered operation is
 		 * performed on this hart between setting a hart's cpumask bit
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 56abab6a9812..18cda0e8cf94 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -30,6 +30,7 @@
 #include <linux/irq.h>
 #include <linux/of.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/mm.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
@@ -50,25 +51,33 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 void __init setup_smp(void)
 {
 	struct device_node *dn = NULL;
-	int hart, im_okay_therefore_i_am = 0;
+	int hart;
+	bool found_boot_cpu = false;
+	int cpuid = 1;
 
 	while ((dn = of_find_node_by_type(dn, "cpu"))) {
-		hart = riscv_of_processor_hart(dn);
-		if (hart >= 0) {
-			set_cpu_possible(hart, true);
-			set_cpu_present(hart, true);
-			if (hart == smp_processor_id()) {
-				BUG_ON(im_okay_therefore_i_am);
-				im_okay_therefore_i_am = 1;
-			}
+		hart = riscv_of_processor_hartid(dn);
+		if (hart < 0)
+			continue;
+
+		if (hart == cpuid_to_hartid_map(0)) {
+			BUG_ON(found_boot_cpu);
+			found_boot_cpu = 1;
+			continue;
 		}
+
+		cpuid_to_hartid_map(cpuid) = hart;
+		set_cpu_possible(cpuid, true);
+		set_cpu_present(cpuid, true);
+		cpuid++;
 	}
 
-	BUG_ON(!im_okay_therefore_i_am);
+	BUG_ON(!found_boot_cpu);
 }
 
 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
+	int hartid = cpuid_to_hartid_map(cpu);
 	tidle->thread_info.cpu = cpu;
 
 	/*
@@ -79,8 +88,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	 * the spinning harts that they can continue the boot process.
 	 */
 	smp_mb();
-	__cpu_up_stack_pointer[cpu] = task_stack_page(tidle) + THREAD_SIZE;
-	__cpu_up_task_pointer[cpu] = tidle;
+	WRITE_ONCE(__cpu_up_stack_pointer[hartid],
+		  task_stack_page(tidle) + THREAD_SIZE);
+	WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
 
 	while (!cpu_online(cpu))
 		cpu_relax();
@@ -100,14 +110,22 @@ asmlinkage void __init smp_callin(void)
 	struct mm_struct *mm = &init_mm;
 
 	/* All kernel threads share the same mm context.  */
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 	current->active_mm = mm;
 
 	trap_init();
 	notify_cpu_starting(smp_processor_id());
 	set_cpu_online(smp_processor_id(), 1);
+	/*
+	 * Remote TLB flushes are ignored while the CPU is offline, so emit
+	 * a local TLB flush right now just in case.
+	 */
 	local_flush_tlb_all();
-	local_irq_enable();
+	/*
+	 * Disable preemption before enabling interrupts, so we don't try to
+	 * schedule a CPU that hasn't actually started yet.
+	 */
 	preempt_disable();
+	local_irq_enable();
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 445ec84f9a47..5739bd05d289 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -2,6 +2,7 @@ lib-y	+= delay.o
 lib-y	+= memcpy.o
 lib-y	+= memset.o
 lib-y	+= uaccess.o
-lib-y	+= tishift.o
+
+lib-(CONFIG_64BIT) += tishift.o
 
 lib-$(CONFIG_32BIT) += udivdi3.o
diff --git a/arch/riscv/mm/ioremap.c b/arch/riscv/mm/ioremap.c
index 70ef2724cdf6..bd2f2db557cc 100644
--- a/arch/riscv/mm/ioremap.c
+++ b/arch/riscv/mm/ioremap.c
@@ -42,7 +42,7 @@ static void __iomem *__ioremap_caller(phys_addr_t addr, size_t size,
 
 	/* Page-align mappings */
 	offset = addr & (~PAGE_MASK);
-	addr &= PAGE_MASK;
+	addr -= offset;
 	size = PAGE_ALIGN(size + offset);
 
 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 039a3417dfc4..8b25e1f45b27 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -783,6 +783,17 @@ config VFIO_CCW
 	  To compile this driver as a module, choose M here: the
 	  module will be called vfio_ccw.
 
+config VFIO_AP
+	def_tristate n
+	prompt "VFIO support for AP devices"
+	depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM
+	help
+		This driver grants access to Adjunct Processor (AP) devices
+		via the VFIO mediated device interface.
+
+		To compile this driver as a module, choose M here: the module
+		will be called vfio_ap.
+
 endmenu
 
 menu "Dump support"
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 941d8cc6c9f5..259d1698ac50 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -668,7 +668,6 @@ CONFIG_CRYPTO_USER=m
 # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
 CONFIG_CRYPTO_PCRYPT=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index eb6f75f24208..37fd60c20e22 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -610,7 +610,6 @@ CONFIG_CRYPTO_USER=m
 # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
 CONFIG_CRYPTO_PCRYPT=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index c54cb26eb7f5..812d9498d97b 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -44,7 +44,7 @@ struct s390_aes_ctx {
 	int key_len;
 	unsigned long fc;
 	union {
-		struct crypto_skcipher *blk;
+		struct crypto_sync_skcipher *blk;
 		struct crypto_cipher *cip;
 	} fallback;
 };
@@ -54,7 +54,7 @@ struct s390_xts_ctx {
 	u8 pcc_key[32];
 	int key_len;
 	unsigned long fc;
-	struct crypto_skcipher *fallback;
+	struct crypto_sync_skcipher *fallback;
 };
 
 struct gcm_sg_walk {
@@ -184,14 +184,15 @@ static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 	unsigned int ret;
 
-	crypto_skcipher_clear_flags(sctx->fallback.blk, CRYPTO_TFM_REQ_MASK);
-	crypto_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
+	crypto_sync_skcipher_clear_flags(sctx->fallback.blk,
+					 CRYPTO_TFM_REQ_MASK);
+	crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
 						      CRYPTO_TFM_REQ_MASK);
 
-	ret = crypto_skcipher_setkey(sctx->fallback.blk, key, len);
+	ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len);
 
 	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->crt_flags |= crypto_skcipher_get_flags(sctx->fallback.blk) &
+	tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) &
 			  CRYPTO_TFM_RES_MASK;
 
 	return ret;
@@ -204,9 +205,9 @@ static int fallback_blk_dec(struct blkcipher_desc *desc,
 	unsigned int ret;
 	struct crypto_blkcipher *tfm = desc->tfm;
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
-	SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
 
-	skcipher_request_set_tfm(req, sctx->fallback.blk);
+	skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
 	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
 	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
 
@@ -223,9 +224,9 @@ static int fallback_blk_enc(struct blkcipher_desc *desc,
 	unsigned int ret;
 	struct crypto_blkcipher *tfm = desc->tfm;
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
-	SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
 
-	skcipher_request_set_tfm(req, sctx->fallback.blk);
+	skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
 	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
 	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
 
@@ -306,8 +307,7 @@ static int fallback_init_blk(struct crypto_tfm *tfm)
 	const char *name = tfm->__crt_alg->cra_name;
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
-	sctx->fallback.blk = crypto_alloc_skcipher(name, 0,
-						   CRYPTO_ALG_ASYNC |
+	sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0,
 						   CRYPTO_ALG_NEED_FALLBACK);
 
 	if (IS_ERR(sctx->fallback.blk)) {
@@ -323,7 +323,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
-	crypto_free_skcipher(sctx->fallback.blk);
+	crypto_free_sync_skcipher(sctx->fallback.blk);
 }
 
 static struct crypto_alg ecb_aes_alg = {
@@ -453,14 +453,15 @@ static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
 	unsigned int ret;
 
-	crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
-	crypto_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
+	crypto_sync_skcipher_clear_flags(xts_ctx->fallback,
+					 CRYPTO_TFM_REQ_MASK);
+	crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
 						     CRYPTO_TFM_REQ_MASK);
 
-	ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len);
+	ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len);
 
 	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->crt_flags |= crypto_skcipher_get_flags(xts_ctx->fallback) &
+	tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) &
 			  CRYPTO_TFM_RES_MASK;
 
 	return ret;
@@ -472,10 +473,10 @@ static int xts_fallback_decrypt(struct blkcipher_desc *desc,
 {
 	struct crypto_blkcipher *tfm = desc->tfm;
 	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
-	SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
 	unsigned int ret;
 
-	skcipher_request_set_tfm(req, xts_ctx->fallback);
+	skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
 	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
 	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
 
@@ -491,10 +492,10 @@ static int xts_fallback_encrypt(struct blkcipher_desc *desc,
 {
 	struct crypto_blkcipher *tfm = desc->tfm;
 	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
-	SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
 	unsigned int ret;
 
-	skcipher_request_set_tfm(req, xts_ctx->fallback);
+	skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
 	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
 	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
 
@@ -611,8 +612,7 @@ static int xts_fallback_init(struct crypto_tfm *tfm)
 	const char *name = tfm->__crt_alg->cra_name;
 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
 
-	xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
-						  CRYPTO_ALG_ASYNC |
+	xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
 						  CRYPTO_ALG_NEED_FALLBACK);
 
 	if (IS_ERR(xts_ctx->fallback)) {
@@ -627,7 +627,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm)
 {
 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
 
-	crypto_free_skcipher(xts_ctx->fallback);
+	crypto_free_sync_skcipher(xts_ctx->fallback);
 }
 
 static struct crypto_alg xts_aes_alg = {
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 20add000dd6d..7cb6a52f727d 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -221,7 +221,6 @@ CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_DEFLATE=m
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 97db2fba546a..63b46e30b2c3 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -9,6 +9,8 @@
 #include <linux/sched/task_stack.h>
 #include <linux/thread_info.h>
 
+#include <asm-generic/compat.h>
+
 #define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \
 				typeof(0?(__force t)0:0ULL), u64))
 
@@ -51,34 +53,18 @@
 #define COMPAT_USER_HZ		100
 #define COMPAT_UTS_MACHINE	"s390\0\0\0\0"
 
-typedef u32		compat_size_t;
-typedef s32		compat_ssize_t;
-typedef s32		compat_clock_t;
-typedef s32		compat_pid_t;
 typedef u16		__compat_uid_t;
 typedef u16		__compat_gid_t;
 typedef u32		__compat_uid32_t;
 typedef u32		__compat_gid32_t;
 typedef u16		compat_mode_t;
-typedef u32		compat_ino_t;
 typedef u16		compat_dev_t;
-typedef s32		compat_off_t;
-typedef s64		compat_loff_t;
 typedef u16		compat_nlink_t;
 typedef u16		compat_ipc_pid_t;
-typedef s32		compat_daddr_t;
 typedef u32		compat_caddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
-typedef s32		compat_key_t;
-typedef s32		compat_timer_t;
-
-typedef s32		compat_int_t;
-typedef s32		compat_long_t;
 typedef s64		compat_s64;
-typedef u32		compat_uint_t;
-typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
-typedef u32		compat_uptr_t;
 
 typedef struct {
 	u32 mask;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 29c940bf8506..d5d24889c3bc 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -44,6 +44,7 @@
 #define KVM_REQ_ICPT_OPEREXC	KVM_ARCH_REQ(2)
 #define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3)
 #define KVM_REQ_STOP_MIGRATION  KVM_ARCH_REQ(4)
+#define KVM_REQ_VSIE_RESTART	KVM_ARCH_REQ(5)
 
 #define SIGP_CTRL_C		0x80
 #define SIGP_CTRL_SCN_MASK	0x3f
@@ -186,6 +187,7 @@ struct kvm_s390_sie_block {
 #define ECA_AIV		0x00200000
 #define ECA_VX		0x00020000
 #define ECA_PROTEXCI	0x00002000
+#define ECA_APIE	0x00000008
 #define ECA_SII		0x00000001
 	__u32	eca;			/* 0x004c */
 #define ICPT_INST	0x04
@@ -237,7 +239,11 @@ struct kvm_s390_sie_block {
 	psw_t	gpsw;			/* 0x0090 */
 	__u64	gg14;			/* 0x00a0 */
 	__u64	gg15;			/* 0x00a8 */
-	__u8	reservedb0[20];		/* 0x00b0 */
+	__u8	reservedb0[8];		/* 0x00b0 */
+#define HPID_KVM	0x4
+#define HPID_VSIE	0x5
+	__u8	hpid;			/* 0x00b8 */
+	__u8	reservedb9[11];		/* 0x00b9 */
 	__u16	extcpuaddr;		/* 0x00c4 */
 	__u16	eic;			/* 0x00c6 */
 	__u32	reservedc8;		/* 0x00c8 */
@@ -255,6 +261,8 @@ struct kvm_s390_sie_block {
 	__u8	reservede4[4];		/* 0x00e4 */
 	__u64	tecmc;			/* 0x00e8 */
 	__u8	reservedf0[12];		/* 0x00f0 */
+#define CRYCB_FORMAT_MASK 0x00000003
+#define CRYCB_FORMAT0 0x00000000
 #define CRYCB_FORMAT1 0x00000001
 #define CRYCB_FORMAT2 0x00000003
 	__u32	crycbd;			/* 0x00fc */
@@ -715,6 +723,7 @@ struct kvm_s390_crypto {
 	__u32 crycbd;
 	__u8 aes_kw;
 	__u8 dea_kw;
+	__u8 apie;
 };
 
 #define APCB0_MASK_SIZE 1
@@ -855,6 +864,10 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 				 struct kvm_async_pf *work);
 
+void kvm_arch_crypto_clear_masks(struct kvm *kvm);
+void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
+			       unsigned long *aqm, unsigned long *adm);
+
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 extern char sie_exit;
 
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index fd79c0d35dc4..a1fbf15d53aa 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -15,6 +15,7 @@
 #define __IGNORE_pkey_alloc
 #define __IGNORE_pkey_free
 
+#define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
@@ -25,7 +26,6 @@
 #define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_FADVISE64
 #define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
 #define __ARCH_WANT_SYS_OLD_MMAP
@@ -34,6 +34,7 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 # ifdef CONFIG_COMPAT
 #   define __ARCH_WANT_COMPAT_SYS_TIME
+#   define __ARCH_WANT_SYS_UTIME32
 # endif
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 9a50f02b9894..16511d97e8dc 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc {
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
 #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW	2
 #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW	3
+#define KVM_S390_VM_CRYPTO_ENABLE_APIE		4
+#define KVM_S390_VM_CRYPTO_DISABLE_APIE		5
 
 /* kvm attributes for migration mode */
 #define KVM_S390_VM_MIGRATION_STOP	0
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ac5da6b0b862..fe24150ff666 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -40,6 +40,7 @@
 #include <asm/sclp.h>
 #include <asm/cpacf.h>
 #include <asm/timex.h>
+#include <asm/ap.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 
@@ -844,20 +845,24 @@ void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 
 	kvm_s390_vcpu_block_all(kvm);
 
-	kvm_for_each_vcpu(i, vcpu, kvm)
+	kvm_for_each_vcpu(i, vcpu, kvm) {
 		kvm_s390_vcpu_crypto_setup(vcpu);
+		/* recreate the shadow crycb by leaving the VSIE handler */
+		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
+	}
 
 	kvm_s390_vcpu_unblock_all(kvm);
 }
 
 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 {
-	if (!test_kvm_facility(kvm, 76))
-		return -EINVAL;
-
 	mutex_lock(&kvm->lock);
 	switch (attr->attr) {
 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
+		if (!test_kvm_facility(kvm, 76)) {
+			mutex_unlock(&kvm->lock);
+			return -EINVAL;
+		}
 		get_random_bytes(
 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
@@ -865,6 +870,10 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 		break;
 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
+		if (!test_kvm_facility(kvm, 76)) {
+			mutex_unlock(&kvm->lock);
+			return -EINVAL;
+		}
 		get_random_bytes(
 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
@@ -872,17 +881,39 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 		break;
 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
+		if (!test_kvm_facility(kvm, 76)) {
+			mutex_unlock(&kvm->lock);
+			return -EINVAL;
+		}
 		kvm->arch.crypto.aes_kw = 0;
 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 		break;
 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
+		if (!test_kvm_facility(kvm, 76)) {
+			mutex_unlock(&kvm->lock);
+			return -EINVAL;
+		}
 		kvm->arch.crypto.dea_kw = 0;
 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 		break;
+	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
+		if (!ap_instructions_available()) {
+			mutex_unlock(&kvm->lock);
+			return -EOPNOTSUPP;
+		}
+		kvm->arch.crypto.apie = 1;
+		break;
+	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
+		if (!ap_instructions_available()) {
+			mutex_unlock(&kvm->lock);
+			return -EOPNOTSUPP;
+		}
+		kvm->arch.crypto.apie = 0;
+		break;
 	default:
 		mutex_unlock(&kvm->lock);
 		return -ENXIO;
@@ -1491,6 +1522,10 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 			ret = 0;
 			break;
+		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
+		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
+			ret = ap_instructions_available() ? 0 : -ENXIO;
+			break;
 		default:
 			ret = -ENXIO;
 			break;
@@ -1992,55 +2027,101 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	return r;
 }
 
-static int kvm_s390_query_ap_config(u8 *config)
-{
-	u32 fcn_code = 0x04000000UL;
-	u32 cc = 0;
-
-	memset(config, 0, 128);
-	asm volatile(
-		"lgr 0,%1\n"
-		"lgr 2,%2\n"
-		".long 0xb2af0000\n"		/* PQAP(QCI) */
-		"0: ipm %0\n"
-		"srl %0,28\n"
-		"1:\n"
-		EX_TABLE(0b, 1b)
-		: "+r" (cc)
-		: "r" (fcn_code), "r" (config)
-		: "cc", "0", "2", "memory"
-	);
-
-	return cc;
-}
-
 static int kvm_s390_apxa_installed(void)
 {
-	u8 config[128];
-	int cc;
+	struct ap_config_info info;
 
-	if (test_facility(12)) {
-		cc = kvm_s390_query_ap_config(config);
-
-		if (cc)
-			pr_err("PQAP(QCI) failed with cc=%d", cc);
-		else
-			return config[0] & 0x40;
+	if (ap_instructions_available()) {
+		if (ap_qci(&info) == 0)
+			return info.apxa;
 	}
 
 	return 0;
 }
 
+/*
+ * The format of the crypto control block (CRYCB) is specified in the 3 low
+ * order bits of the CRYCB designation (CRYCBD) field as follows:
+ * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
+ *	     AP extended addressing (APXA) facility are installed.
+ * Format 1: The APXA facility is not installed but the MSAX3 facility is.
+ * Format 2: Both the APXA and MSAX3 facilities are installed
+ */
 static void kvm_s390_set_crycb_format(struct kvm *kvm)
 {
 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
 
+	/* Clear the CRYCB format bits - i.e., set format 0 by default */
+	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
+
+	/* Check whether MSAX3 is installed */
+	if (!test_kvm_facility(kvm, 76))
+		return;
+
 	if (kvm_s390_apxa_installed())
 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
 	else
 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
 }
 
+void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
+			       unsigned long *aqm, unsigned long *adm)
+{
+	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
+
+	mutex_lock(&kvm->lock);
+	kvm_s390_vcpu_block_all(kvm);
+
+	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
+	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
+		memcpy(crycb->apcb1.apm, apm, 32);
+		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
+			 apm[0], apm[1], apm[2], apm[3]);
+		memcpy(crycb->apcb1.aqm, aqm, 32);
+		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
+			 aqm[0], aqm[1], aqm[2], aqm[3]);
+		memcpy(crycb->apcb1.adm, adm, 32);
+		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
+			 adm[0], adm[1], adm[2], adm[3]);
+		break;
+	case CRYCB_FORMAT1:
+	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
+		memcpy(crycb->apcb0.apm, apm, 8);
+		memcpy(crycb->apcb0.aqm, aqm, 2);
+		memcpy(crycb->apcb0.adm, adm, 2);
+		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
+			 apm[0], *((unsigned short *)aqm),
+			 *((unsigned short *)adm));
+		break;
+	default:	/* Can not happen */
+		break;
+	}
+
+	/* recreate the shadow crycb for each vcpu */
+	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
+	kvm_s390_vcpu_unblock_all(kvm);
+	mutex_unlock(&kvm->lock);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
+
+void kvm_arch_crypto_clear_masks(struct kvm *kvm)
+{
+	mutex_lock(&kvm->lock);
+	kvm_s390_vcpu_block_all(kvm);
+
+	memset(&kvm->arch.crypto.crycb->apcb0, 0,
+	       sizeof(kvm->arch.crypto.crycb->apcb0));
+	memset(&kvm->arch.crypto.crycb->apcb1, 0,
+	       sizeof(kvm->arch.crypto.crycb->apcb1));
+
+	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
+	/* recreate the shadow crycb for each vcpu */
+	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
+	kvm_s390_vcpu_unblock_all(kvm);
+	mutex_unlock(&kvm->lock);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
+
 static u64 kvm_s390_get_initial_cpuid(void)
 {
 	struct cpuid cpuid;
@@ -2052,12 +2133,12 @@ static u64 kvm_s390_get_initial_cpuid(void)
 
 static void kvm_s390_crypto_init(struct kvm *kvm)
 {
-	if (!test_kvm_facility(kvm, 76))
-		return;
-
 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
 	kvm_s390_set_crycb_format(kvm);
 
+	if (!test_kvm_facility(kvm, 76))
+		return;
+
 	/* Enable AES/DEA protected key functions by default */
 	kvm->arch.crypto.aes_kw = 1;
 	kvm->arch.crypto.dea_kw = 1;
@@ -2583,17 +2664,25 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
 {
-	if (!test_kvm_facility(vcpu->kvm, 76))
+	/*
+	 * If the AP instructions are not being interpreted and the MSAX3
+	 * facility is not configured for the guest, there is nothing to set up.
+	 */
+	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
 		return;
 
+	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
+	vcpu->arch.sie_block->eca &= ~ECA_APIE;
+
+	if (vcpu->kvm->arch.crypto.apie)
+		vcpu->arch.sie_block->eca |= ECA_APIE;
 
+	/* Set up protected key support */
 	if (vcpu->kvm->arch.crypto.aes_kw)
 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
 	if (vcpu->kvm->arch.crypto.dea_kw)
 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
-
-	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
 }
 
 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
@@ -2685,6 +2774,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
 
+	vcpu->arch.sie_block->hpid = HPID_KVM;
+
 	kvm_s390_vcpu_crypto_setup(vcpu);
 
 	return rc;
@@ -2768,18 +2859,25 @@ static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
 	exit_sie(vcpu);
 }
 
+bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&vcpu->arch.sie_block->prog20) &
+	       (PROG_BLOCK_SIE | PROG_REQUEST);
+}
+
 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
 {
 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
 }
 
 /*
- * Kick a guest cpu out of SIE and wait until SIE is not running.
+ * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
  * If the CPU is not running (e.g. waiting as idle) the function will
  * return immediately. */
 void exit_sie(struct kvm_vcpu *vcpu)
 {
 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
+	kvm_s390_vsie_kick(vcpu);
 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
 		cpu_relax();
 }
@@ -3196,6 +3294,8 @@ retry:
 
 	/* nothing to do, just clear the request */
 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+	/* we left the vsie handler, nothing to do, just clear the request */
+	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
 
 	return 0;
 }
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 981e3ba97461..1f6e36cdce0d 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -290,6 +290,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu);
 void exit_sie(struct kvm_vcpu *vcpu);
 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a2b28cd1e3fe..a153257bf7d9 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -135,14 +135,148 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	atomic_set(&scb_s->cpuflags, newflags);
 	return 0;
 }
+/* Copy to APCB FORMAT1 from APCB FORMAT0 */
+static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
+			unsigned long apcb_o, struct kvm_s390_apcb1 *apcb_h)
+{
+	struct kvm_s390_apcb0 tmp;
 
-/*
+	if (read_guest_real(vcpu, apcb_o, &tmp, sizeof(struct kvm_s390_apcb0)))
+		return -EFAULT;
+
+	apcb_s->apm[0] = apcb_h->apm[0] & tmp.apm[0];
+	apcb_s->aqm[0] = apcb_h->aqm[0] & tmp.aqm[0] & 0xffff000000000000UL;
+	apcb_s->adm[0] = apcb_h->adm[0] & tmp.adm[0] & 0xffff000000000000UL;
+
+	return 0;
+
+}
+
+/**
+ * setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
+ * @vcpu: pointer to the virtual CPU
+ * @apcb_s: pointer to start of apcb in the shadow crycb
+ * @apcb_o: pointer to start of original apcb in the guest2
+ * @apcb_h: pointer to start of apcb in the guest1
+ *
+ * Returns 0 and -EFAULT on error reading guest apcb
+ */
+static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
+			unsigned long apcb_o, unsigned long *apcb_h)
+{
+	if (read_guest_real(vcpu, apcb_o, apcb_s,
+			    sizeof(struct kvm_s390_apcb0)))
+		return -EFAULT;
+
+	bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb0));
+
+	return 0;
+}
+
+/**
+ * setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
+ * @vcpu: pointer to the virtual CPU
+ * @apcb_s: pointer to start of apcb in the shadow crycb
+ * @apcb_o: pointer to start of original guest apcb
+ * @apcb_h: pointer to start of apcb in the host
+ *
+ * Returns 0 and -EFAULT on error reading guest apcb
+ */
+static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
+			unsigned long apcb_o,
+			unsigned long *apcb_h)
+{
+	if (read_guest_real(vcpu, apcb_o, apcb_s,
+			    sizeof(struct kvm_s390_apcb1)))
+		return -EFAULT;
+
+	bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb1));
+
+	return 0;
+}
+
+/**
+ * setup_apcb - Create a shadow copy of the apcb.
+ * @vcpu: pointer to the virtual CPU
+ * @crycb_s: pointer to shadow crycb
+ * @crycb_o: pointer to original guest crycb
+ * @crycb_h: pointer to the host crycb
+ * @fmt_o: format of the original guest crycb.
+ * @fmt_h: format of the host crycb.
+ *
+ * Checks the compatibility between the guest and host crycb and calls the
+ * appropriate copy function.
+ *
+ * Return 0 or an error number if the guest and host crycb are incompatible.
+ */
+static int setup_apcb(struct kvm_vcpu *vcpu, struct kvm_s390_crypto_cb *crycb_s,
+	       const u32 crycb_o,
+	       struct kvm_s390_crypto_cb *crycb_h,
+	       int fmt_o, int fmt_h)
+{
+	struct kvm_s390_crypto_cb *crycb;
+
+	crycb = (struct kvm_s390_crypto_cb *) (unsigned long)crycb_o;
+
+	switch (fmt_o) {
+	case CRYCB_FORMAT2:
+		if ((crycb_o & PAGE_MASK) != ((crycb_o + 256) & PAGE_MASK))
+			return -EACCES;
+		if (fmt_h != CRYCB_FORMAT2)
+			return -EINVAL;
+		return setup_apcb11(vcpu, (unsigned long *)&crycb_s->apcb1,
+				    (unsigned long) &crycb->apcb1,
+				    (unsigned long *)&crycb_h->apcb1);
+	case CRYCB_FORMAT1:
+		switch (fmt_h) {
+		case CRYCB_FORMAT2:
+			return setup_apcb10(vcpu, &crycb_s->apcb1,
+					    (unsigned long) &crycb->apcb0,
+					    &crycb_h->apcb1);
+		case CRYCB_FORMAT1:
+			return setup_apcb00(vcpu,
+					    (unsigned long *) &crycb_s->apcb0,
+					    (unsigned long) &crycb->apcb0,
+					    (unsigned long *) &crycb_h->apcb0);
+		}
+		break;
+	case CRYCB_FORMAT0:
+		if ((crycb_o & PAGE_MASK) != ((crycb_o + 32) & PAGE_MASK))
+			return -EACCES;
+
+		switch (fmt_h) {
+		case CRYCB_FORMAT2:
+			return setup_apcb10(vcpu, &crycb_s->apcb1,
+					    (unsigned long) &crycb->apcb0,
+					    &crycb_h->apcb1);
+		case CRYCB_FORMAT1:
+		case CRYCB_FORMAT0:
+			return setup_apcb00(vcpu,
+					    (unsigned long *) &crycb_s->apcb0,
+					    (unsigned long) &crycb->apcb0,
+					    (unsigned long *) &crycb_h->apcb0);
+		}
+	}
+	return -EINVAL;
+}
+
+/**
+ * shadow_crycb - Create a shadow copy of the crycb block
+ * @vcpu: a pointer to the virtual CPU
+ * @vsie_page: a pointer to internal date used for the vSIE
+ *
  * Create a shadow copy of the crycb block and setup key wrapping, if
  * requested for guest 3 and enabled for guest 2.
  *
- * We only accept format-1 (no AP in g2), but convert it into format-2
+ * We accept format-1 or format-2, but we convert format-1 into format-2
+ * in the shadow CRYCB.
+ * Using format-2 enables the firmware to choose the right format when
+ * scheduling the SIE.
  * There is nothing to do for format-0.
  *
+ * This function centralize the issuing of set_validity_icpt() for all
+ * the subfunctions working on the crycb.
+ *
  * Returns: - 0 if shadowed or nothing to do
  *          - > 0 if control has to be given to guest 2
  */
@@ -154,23 +288,40 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
 	unsigned long *b1, *b2;
 	u8 ecb3_flags;
+	int apie_h;
+	int key_msk = test_kvm_facility(vcpu->kvm, 76);
+	int fmt_o = crycbd_o & CRYCB_FORMAT_MASK;
+	int fmt_h = vcpu->arch.sie_block->crycbd & CRYCB_FORMAT_MASK;
+	int ret = 0;
 
 	scb_s->crycbd = 0;
-	if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
-		return 0;
-	/* format-1 is supported with message-security-assist extension 3 */
-	if (!test_kvm_facility(vcpu->kvm, 76))
+
+	apie_h = vcpu->arch.sie_block->eca & ECA_APIE;
+	if (!apie_h && !key_msk)
 		return 0;
+
+	if (!crycb_addr)
+		return set_validity_icpt(scb_s, 0x0039U);
+
+	if (fmt_o == CRYCB_FORMAT1)
+		if ((crycb_addr & PAGE_MASK) !=
+		    ((crycb_addr + 128) & PAGE_MASK))
+			return set_validity_icpt(scb_s, 0x003CU);
+
+	if (apie_h && (scb_o->eca & ECA_APIE)) {
+		ret = setup_apcb(vcpu, &vsie_page->crycb, crycb_addr,
+				 vcpu->kvm->arch.crypto.crycb,
+				 fmt_o, fmt_h);
+		if (ret)
+			goto end;
+		scb_s->eca |= scb_o->eca & ECA_APIE;
+	}
+
 	/* we may only allow it if enabled for guest 2 */
 	ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
 		     (ECB3_AES | ECB3_DEA);
 	if (!ecb3_flags)
-		return 0;
-
-	if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK))
-		return set_validity_icpt(scb_s, 0x003CU);
-	else if (!crycb_addr)
-		return set_validity_icpt(scb_s, 0x0039U);
+		goto end;
 
 	/* copy only the wrapping keys */
 	if (read_guest_real(vcpu, crycb_addr + 72,
@@ -178,8 +329,6 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		return set_validity_icpt(scb_s, 0x0035U);
 
 	scb_s->ecb3 |= ecb3_flags;
-	scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 |
-			CRYCB_FORMAT2;
 
 	/* xor both blocks in one run */
 	b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
@@ -187,6 +336,16 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 			    vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
 	/* as 56%8 == 0, bitmap_xor won't overwrite any data */
 	bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
+end:
+	switch (ret) {
+	case -EINVAL:
+		return set_validity_icpt(scb_s, 0x0020U);
+	case -EFAULT:
+		return set_validity_icpt(scb_s, 0x0035U);
+	case -EACCES:
+		return set_validity_icpt(scb_s, 0x003CU);
+	}
+	scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT2;
 	return 0;
 }
 
@@ -383,6 +542,8 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	if (test_kvm_facility(vcpu->kvm, 156))
 		scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
 
+	scb_s->hpid = HPID_VSIE;
+
 	prepare_ibc(vcpu, vsie_page);
 	rc = shadow_crycb(vcpu, vsie_page);
 out:
@@ -830,7 +991,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
 	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
 	int guest_bp_isolation;
-	int rc;
+	int rc = 0;
 
 	handle_last_fault(vcpu, vsie_page);
 
@@ -858,7 +1019,18 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	guest_enter_irqoff();
 	local_irq_enable();
 
-	rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
+	/*
+	 * Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
+	 * and VCPU requests also hinder the vSIE from running and lead
+	 * to an immediate exit. kvm_s390_vsie_kick() has to be used to
+	 * also kick the vSIE.
+	 */
+	vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
+	barrier();
+	if (!kvm_s390_vcpu_sie_inhibited(vcpu))
+		rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
+	barrier();
+	vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
 
 	local_irq_disable();
 	guest_exit_irqoff();
@@ -1005,7 +1177,8 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		if (rc == -EAGAIN)
 			rc = 0;
 		if (rc || scb_s->icptcode || signal_pending(current) ||
-		    kvm_s390_vcpu_has_irq(vcpu, 0))
+		    kvm_s390_vcpu_has_irq(vcpu, 0) ||
+		    kvm_s390_vcpu_sie_inhibited(vcpu))
 			break;
 	}
 
@@ -1122,7 +1295,8 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
 	if (unlikely(scb_addr & 0x1ffUL))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
+	if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
+	    kvm_s390_vcpu_sie_inhibited(vcpu))
 		return 0;
 
 	vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 911c7ded35f1..1e668b95e0c6 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -907,10 +907,16 @@ static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
 	pmd_t *pmdp;
 
 	BUG_ON(gmap_is_shadow(gmap));
-	spin_lock(&gmap->guest_table_lock);
 	pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
+	if (!pmdp)
+		return NULL;
 
-	if (!pmdp || pmd_none(*pmdp)) {
+	/* without huge pages, there is no need to take the table lock */
+	if (!gmap->mm->context.allow_gmap_hpage_1m)
+		return pmd_none(*pmdp) ? NULL : pmdp;
+
+	spin_lock(&gmap->guest_table_lock);
+	if (pmd_none(*pmdp)) {
 		spin_unlock(&gmap->guest_table_lock);
 		return NULL;
 	}
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 0c85aedcf9b3..fd788e0f2e5b 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -106,6 +106,8 @@ static struct facility_def facility_defs[] = {
 
 		.name = "FACILITIES_KVM_CPUMODEL",
 		.bits = (int[]){
+			12, /* AP Query Configuration Information */
+			15, /* AP Facilities Test */
 			156, /* etoken facility */
 			-1  /* END */
 		}
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index 26789ad28193..cde370cad4ae 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -64,7 +64,7 @@ static void sh_of_smp_probe(void)
 
 	init_cpu_possible(cpumask_of(0));
 
-	for_each_node_by_type(np, "cpu") {
+	for_each_of_cpu_node(np) {
 		const __be32 *cell = of_get_property(np, "reg", NULL);
 		u64 id = -1;
 		if (cell) id = of_read_number(cell, of_n_addr_cells(np));
diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h
index b36200af9ce7..a99234b61051 100644
--- a/arch/sh/include/asm/unistd.h
+++ b/arch/sh/include/asm/unistd.h
@@ -5,6 +5,7 @@
 #  include <asm/unistd_64.h>
 # endif
 
+# define __ARCH_WANT_NEW_STAT
 # define __ARCH_WANT_OLD_READDIR
 # define __ARCH_WANT_OLD_STAT
 # define __ARCH_WANT_STAT64
@@ -19,7 +20,6 @@
 # define __ARCH_WANT_SYS_SOCKETCALL
 # define __ARCH_WANT_SYS_FADVISE64
 # define __ARCH_WANT_SYS_GETPGRP
-# define __ARCH_WANT_SYS_LLSEEK
 # define __ARCH_WANT_SYS_NICE
 # define __ARCH_WANT_SYS_OLD_GETRLIMIT
 # define __ARCH_WANT_SYS_OLD_UNAME
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 4eb51d2dae98..30b1763580b1 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -6,38 +6,23 @@
  */
 #include <linux/types.h>
 
+#include <asm-generic/compat.h>
+
 #define COMPAT_USER_HZ		100
 #define COMPAT_UTS_MACHINE	"sparc\0\0"
 
-typedef u32		compat_size_t;
-typedef s32		compat_ssize_t;
-typedef s32		compat_clock_t;
-typedef s32		compat_pid_t;
 typedef u16		__compat_uid_t;
 typedef u16		__compat_gid_t;
 typedef u32		__compat_uid32_t;
 typedef u32		__compat_gid32_t;
 typedef u16		compat_mode_t;
-typedef u32		compat_ino_t;
 typedef u16		compat_dev_t;
-typedef s32		compat_off_t;
-typedef s64		compat_loff_t;
 typedef s16		compat_nlink_t;
 typedef u16		compat_ipc_pid_t;
-typedef s32		compat_daddr_t;
 typedef u32		compat_caddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
-typedef s32		compat_key_t;
-typedef s32		compat_timer_t;
-
-typedef s32		compat_int_t;
-typedef s32		compat_long_t;
 typedef s64		compat_s64;
-typedef u32		compat_uint_t;
-typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
-typedef u32		compat_uptr_t;
-
 struct compat_stat {
 	compat_dev_t	st_dev;
 	compat_ino_t	st_ino;
@@ -47,11 +32,11 @@ struct compat_stat {
 	__compat_gid_t	st_gid;
 	compat_dev_t	st_rdev;
 	compat_off_t	st_size;
-	compat_time_t	st_atime;
+	old_time32_t	st_atime;
 	compat_ulong_t	st_atime_nsec;
-	compat_time_t	st_mtime;
+	old_time32_t	st_mtime;
 	compat_ulong_t	st_mtime_nsec;
-	compat_time_t	st_ctime;
+	old_time32_t	st_ctime;
 	compat_ulong_t	st_ctime_nsec;
 	compat_off_t	st_blksize;
 	compat_off_t	st_blocks;
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index d955c8df62d6..1902db27ff4b 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -24,9 +24,6 @@
 #include <linux/atomic.h>
 #include <linux/irqdomain.h>
 
-#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT	2
-#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT	1
-
 #define of_compat_cmp(s1, s2, l)	strncmp((s1), (s2), (l))
 #define of_prop_cmp(s1, s2)		strcasecmp((s1), (s2))
 #define of_node_cmp(s1, s2)		strcmp((s1), (s2))
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index b2a6a955113e..00f87dbd0b17 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -21,6 +21,7 @@
 #else
 #define __NR_time		231 /* Linux sparc32                               */
 #endif
+#define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
@@ -33,7 +34,6 @@
 #define __ARCH_WANT_SYS_SOCKETCALL
 #define __ARCH_WANT_SYS_FADVISE64
 #define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
@@ -42,6 +42,7 @@
 #define __ARCH_WANT_SYS_IPC
 #else
 #define __ARCH_WANT_COMPAT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME32
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
 
diff --git a/arch/sparc/include/asm/vdso.h b/arch/sparc/include/asm/vdso.h
index 56836eb01787..59e79d35cd73 100644
--- a/arch/sparc/include/asm/vdso.h
+++ b/arch/sparc/include/asm/vdso.h
@@ -9,8 +9,6 @@ struct vdso_image {
 	void *data;
 	unsigned long size;   /* Always a multiple of PAGE_SIZE */
 
-	unsigned long tick_patch, tick_patch_len;
-
 	long sym_vvar_start;  /* Negative offset to the vvar area */
 };
 
diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c
index 7b539ceebe13..55662c3b4513 100644
--- a/arch/sparc/vdso/vclock_gettime.c
+++ b/arch/sparc/vdso/vclock_gettime.c
@@ -90,16 +90,15 @@ notrace static __always_inline u64 vread_tick(void)
 {
 	u64	ret;
 
-	__asm__ __volatile__("1:\n\t"
-			     "rd		%%tick, %0\n\t"
-			     ".pushsection	.tick_patch, \"a\"\n\t"
-			     ".word		1b - ., 1f - .\n\t"
-			     ".popsection\n\t"
-			     ".pushsection	.tick_patch_replacement, \"ax\"\n\t"
-			     "1:\n\t"
-			     "rd		%%asr24, %0\n\t"
-			     ".popsection\n"
-			     : "=r" (ret));
+	__asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
+	return ret;
+}
+
+notrace static __always_inline u64 vread_tick_stick(void)
+{
+	u64	ret;
+
+	__asm__ __volatile__("rd %%asr24, %0" : "=r" (ret));
 	return ret;
 }
 #else
@@ -107,16 +106,18 @@ notrace static __always_inline u64 vread_tick(void)
 {
 	register unsigned long long ret asm("o4");
 
-	__asm__ __volatile__("1:\n\t"
-			     "rd		%%tick, %L0\n\t"
-			     "srlx		%L0, 32, %H0\n\t"
-			     ".pushsection	.tick_patch, \"a\"\n\t"
-			     ".word		1b - ., 1f - .\n\t"
-			     ".popsection\n\t"
-			     ".pushsection	.tick_patch_replacement, \"ax\"\n\t"
-			     "1:\n\t"
-			     "rd		%%asr24, %L0\n\t"
-			     ".popsection\n"
+	__asm__ __volatile__("rd %%tick, %L0\n\t"
+			     "srlx %L0, 32, %H0"
+			     : "=r" (ret));
+	return ret;
+}
+
+notrace static __always_inline u64 vread_tick_stick(void)
+{
+	register unsigned long long ret asm("o4");
+
+	__asm__ __volatile__("rd %%asr24, %L0\n\t"
+			     "srlx %L0, 32, %H0"
 			     : "=r" (ret));
 	return ret;
 }
@@ -132,6 +133,16 @@ notrace static __always_inline u64 vgetsns(struct vvar_data *vvar)
 	return v * vvar->clock.mult;
 }
 
+notrace static __always_inline u64 vgetsns_stick(struct vvar_data *vvar)
+{
+	u64 v;
+	u64 cycles;
+
+	cycles = vread_tick_stick();
+	v = (cycles - vvar->clock.cycle_last) & vvar->clock.mask;
+	return v * vvar->clock.mult;
+}
+
 notrace static __always_inline int do_realtime(struct vvar_data *vvar,
 					       struct timespec *ts)
 {
@@ -152,6 +163,26 @@ notrace static __always_inline int do_realtime(struct vvar_data *vvar,
 	return 0;
 }
 
+notrace static __always_inline int do_realtime_stick(struct vvar_data *vvar,
+						     struct timespec *ts)
+{
+	unsigned long seq;
+	u64 ns;
+
+	do {
+		seq = vvar_read_begin(vvar);
+		ts->tv_sec = vvar->wall_time_sec;
+		ns = vvar->wall_time_snsec;
+		ns += vgetsns_stick(vvar);
+		ns >>= vvar->clock.shift;
+	} while (unlikely(vvar_read_retry(vvar, seq)));
+
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
+	return 0;
+}
+
 notrace static __always_inline int do_monotonic(struct vvar_data *vvar,
 						struct timespec *ts)
 {
@@ -172,6 +203,26 @@ notrace static __always_inline int do_monotonic(struct vvar_data *vvar,
 	return 0;
 }
 
+notrace static __always_inline int do_monotonic_stick(struct vvar_data *vvar,
+						      struct timespec *ts)
+{
+	unsigned long seq;
+	u64 ns;
+
+	do {
+		seq = vvar_read_begin(vvar);
+		ts->tv_sec = vvar->monotonic_time_sec;
+		ns = vvar->monotonic_time_snsec;
+		ns += vgetsns_stick(vvar);
+		ns >>= vvar->clock.shift;
+	} while (unlikely(vvar_read_retry(vvar, seq)));
+
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
+	return 0;
+}
+
 notrace static int do_realtime_coarse(struct vvar_data *vvar,
 				      struct timespec *ts)
 {
@@ -228,6 +279,31 @@ clock_gettime(clockid_t, struct timespec *)
 	__attribute__((weak, alias("__vdso_clock_gettime")));
 
 notrace int
+__vdso_clock_gettime_stick(clockid_t clock, struct timespec *ts)
+{
+	struct vvar_data *vvd = get_vvar_data();
+
+	switch (clock) {
+	case CLOCK_REALTIME:
+		if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
+			break;
+		return do_realtime_stick(vvd, ts);
+	case CLOCK_MONOTONIC:
+		if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
+			break;
+		return do_monotonic_stick(vvd, ts);
+	case CLOCK_REALTIME_COARSE:
+		return do_realtime_coarse(vvd, ts);
+	case CLOCK_MONOTONIC_COARSE:
+		return do_monotonic_coarse(vvd, ts);
+	}
+	/*
+	 * Unknown clock ID ? Fall back to the syscall.
+	 */
+	return vdso_fallback_gettime(clock, ts);
+}
+
+notrace int
 __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
 	struct vvar_data *vvd = get_vvar_data();
@@ -262,3 +338,36 @@ __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 int
 gettimeofday(struct timeval *, struct timezone *)
 	__attribute__((weak, alias("__vdso_gettimeofday")));
+
+notrace int
+__vdso_gettimeofday_stick(struct timeval *tv, struct timezone *tz)
+{
+	struct vvar_data *vvd = get_vvar_data();
+
+	if (likely(vvd->vclock_mode != VCLOCK_NONE)) {
+		if (likely(tv != NULL)) {
+			union tstv_t {
+				struct timespec ts;
+				struct timeval tv;
+			} *tstv = (union tstv_t *) tv;
+			do_realtime_stick(vvd, &tstv->ts);
+			/*
+			 * Assign before dividing to ensure that the division is
+			 * done in the type of tv_usec, not tv_nsec.
+			 *
+			 * There cannot be > 1 billion usec in a second:
+			 * do_realtime() has already distributed such overflow
+			 * into tv_sec.  So we can assign it to an int safely.
+			 */
+			tstv->tv.tv_usec = tstv->ts.tv_nsec;
+			tstv->tv.tv_usec /= 1000;
+		}
+		if (unlikely(tz != NULL)) {
+			/* Avoid memcpy. Some old compilers fail to inline it */
+			tz->tz_minuteswest = vvd->tz_minuteswest;
+			tz->tz_dsttime = vvd->tz_dsttime;
+		}
+		return 0;
+	}
+	return vdso_fallback_gettimeofday(tv, tz);
+}
diff --git a/arch/sparc/vdso/vdso-layout.lds.S b/arch/sparc/vdso/vdso-layout.lds.S
index ed36d49e1617..d31e57e8a3bb 100644
--- a/arch/sparc/vdso/vdso-layout.lds.S
+++ b/arch/sparc/vdso/vdso-layout.lds.S
@@ -73,9 +73,6 @@ SECTIONS
 
 	.text		: { *(.text*) }			:text	=0x90909090,
 
-	.tick_patch 	  : { *(.tick_patch) }		:text
-	.tick_patch_insns : { *(.tick_patch_insns) }	:text
-
 	/DISCARD/ : {
 		*(.discard)
 		*(.discard.*)
diff --git a/arch/sparc/vdso/vdso.lds.S b/arch/sparc/vdso/vdso.lds.S
index f3caa29a331c..629ab6900df7 100644
--- a/arch/sparc/vdso/vdso.lds.S
+++ b/arch/sparc/vdso/vdso.lds.S
@@ -18,8 +18,10 @@ VERSION {
 	global:
 		clock_gettime;
 		__vdso_clock_gettime;
+		__vdso_clock_gettime_stick;
 		gettimeofday;
 		__vdso_gettimeofday;
+		__vdso_gettimeofday_stick;
 	local: *;
 	};
 }
diff --git a/arch/sparc/vdso/vdso2c.h b/arch/sparc/vdso/vdso2c.h
index 4df005cf98c0..60d69acc748f 100644
--- a/arch/sparc/vdso/vdso2c.h
+++ b/arch/sparc/vdso/vdso2c.h
@@ -17,11 +17,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 	unsigned long mapping_size;
 	int i;
 	unsigned long j;
-	ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
-		*patch_sec = NULL;
+	ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr;
 	ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
 	ELF(Dyn) *dyn = 0, *dyn_end = 0;
-	const char *secstrings;
 	INT_BITS syms[NSYMS] = {};
 
 	ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_BE(&hdr->e_phoff));
@@ -64,18 +62,11 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 	}
 
 	/* Walk the section table */
-	secstrings_hdr = raw_addr + GET_BE(&hdr->e_shoff) +
-		GET_BE(&hdr->e_shentsize)*GET_BE(&hdr->e_shstrndx);
-	secstrings = raw_addr + GET_BE(&secstrings_hdr->sh_offset);
 	for (i = 0; i < GET_BE(&hdr->e_shnum); i++) {
 		ELF(Shdr) *sh = raw_addr + GET_BE(&hdr->e_shoff) +
 			GET_BE(&hdr->e_shentsize) * i;
 		if (GET_BE(&sh->sh_type) == SHT_SYMTAB)
 			symtab_hdr = sh;
-
-		if (!strcmp(secstrings + GET_BE(&sh->sh_name),
-			    ".tick_patch"))
-			patch_sec = sh;
 	}
 
 	if (!symtab_hdr)
@@ -142,12 +133,6 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 	fprintf(outfile, "const struct vdso_image %s_builtin = {\n", name);
 	fprintf(outfile, "\t.data = raw_data,\n");
 	fprintf(outfile, "\t.size = %lu,\n", mapping_size);
-	if (patch_sec) {
-		fprintf(outfile, "\t.tick_patch = %lu,\n",
-			(unsigned long)GET_BE(&patch_sec->sh_offset));
-		fprintf(outfile, "\t.tick_patch_len = %lu,\n",
-			(unsigned long)GET_BE(&patch_sec->sh_size));
-	}
 	for (i = 0; i < NSYMS; i++) {
 		if (required_syms[i].export && syms[i])
 			fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
diff --git a/arch/sparc/vdso/vdso32/vdso32.lds.S b/arch/sparc/vdso/vdso32/vdso32.lds.S
index 53575ee154c4..218930fdff03 100644
--- a/arch/sparc/vdso/vdso32/vdso32.lds.S
+++ b/arch/sparc/vdso/vdso32/vdso32.lds.S
@@ -17,8 +17,10 @@ VERSION {
 	global:
 		clock_gettime;
 		__vdso_clock_gettime;
+		__vdso_clock_gettime_stick;
 		gettimeofday;
 		__vdso_gettimeofday;
+		__vdso_gettimeofday_stick;
 	local: *;
 	};
 }
diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c
index 8874a27d8adc..154fe8adc090 100644
--- a/arch/sparc/vdso/vma.c
+++ b/arch/sparc/vdso/vma.c
@@ -42,24 +42,201 @@ static struct vm_special_mapping vdso_mapping32 = {
 
 struct vvar_data *vvar_data;
 
-struct tick_patch_entry {
-	s32 orig, repl;
+struct vdso_elfinfo32 {
+	Elf32_Ehdr	*hdr;
+	Elf32_Sym	*dynsym;
+	unsigned long	dynsymsize;
+	const char	*dynstr;
+	unsigned long	text;
 };
 
-static void stick_patch(const struct vdso_image *image)
+struct vdso_elfinfo64 {
+	Elf64_Ehdr	*hdr;
+	Elf64_Sym	*dynsym;
+	unsigned long	dynsymsize;
+	const char	*dynstr;
+	unsigned long	text;
+};
+
+struct vdso_elfinfo {
+	union {
+		struct vdso_elfinfo32 elf32;
+		struct vdso_elfinfo64 elf64;
+	} u;
+};
+
+static void *one_section64(struct vdso_elfinfo64 *e, const char *name,
+			   unsigned long *size)
+{
+	const char *snames;
+	Elf64_Shdr *shdrs;
+	unsigned int i;
+
+	shdrs = (void *)e->hdr + e->hdr->e_shoff;
+	snames = (void *)e->hdr + shdrs[e->hdr->e_shstrndx].sh_offset;
+	for (i = 1; i < e->hdr->e_shnum; i++) {
+		if (!strcmp(snames+shdrs[i].sh_name, name)) {
+			if (size)
+				*size = shdrs[i].sh_size;
+			return (void *)e->hdr + shdrs[i].sh_offset;
+		}
+	}
+	return NULL;
+}
+
+static int find_sections64(const struct vdso_image *image, struct vdso_elfinfo *_e)
+{
+	struct vdso_elfinfo64 *e = &_e->u.elf64;
+
+	e->hdr = image->data;
+	e->dynsym = one_section64(e, ".dynsym", &e->dynsymsize);
+	e->dynstr = one_section64(e, ".dynstr", NULL);
+
+	if (!e->dynsym || !e->dynstr) {
+		pr_err("VDSO64: Missing symbol sections.\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static Elf64_Sym *find_sym64(const struct vdso_elfinfo64 *e, const char *name)
+{
+	unsigned int i;
+
+	for (i = 0; i < (e->dynsymsize / sizeof(Elf64_Sym)); i++) {
+		Elf64_Sym *s = &e->dynsym[i];
+		if (s->st_name == 0)
+			continue;
+		if (!strcmp(e->dynstr + s->st_name, name))
+			return s;
+	}
+	return NULL;
+}
+
+static int patchsym64(struct vdso_elfinfo *_e, const char *orig,
+		      const char *new)
+{
+	struct vdso_elfinfo64 *e = &_e->u.elf64;
+	Elf64_Sym *osym = find_sym64(e, orig);
+	Elf64_Sym *nsym = find_sym64(e, new);
+
+	if (!nsym || !osym) {
+		pr_err("VDSO64: Missing symbols.\n");
+		return -ENODEV;
+	}
+	osym->st_value = nsym->st_value;
+	osym->st_size = nsym->st_size;
+	osym->st_info = nsym->st_info;
+	osym->st_other = nsym->st_other;
+	osym->st_shndx = nsym->st_shndx;
+
+	return 0;
+}
+
+static void *one_section32(struct vdso_elfinfo32 *e, const char *name,
+			   unsigned long *size)
+{
+	const char *snames;
+	Elf32_Shdr *shdrs;
+	unsigned int i;
+
+	shdrs = (void *)e->hdr + e->hdr->e_shoff;
+	snames = (void *)e->hdr + shdrs[e->hdr->e_shstrndx].sh_offset;
+	for (i = 1; i < e->hdr->e_shnum; i++) {
+		if (!strcmp(snames+shdrs[i].sh_name, name)) {
+			if (size)
+				*size = shdrs[i].sh_size;
+			return (void *)e->hdr + shdrs[i].sh_offset;
+		}
+	}
+	return NULL;
+}
+
+static int find_sections32(const struct vdso_image *image, struct vdso_elfinfo *_e)
+{
+	struct vdso_elfinfo32 *e = &_e->u.elf32;
+
+	e->hdr = image->data;
+	e->dynsym = one_section32(e, ".dynsym", &e->dynsymsize);
+	e->dynstr = one_section32(e, ".dynstr", NULL);
+
+	if (!e->dynsym || !e->dynstr) {
+		pr_err("VDSO32: Missing symbol sections.\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static Elf32_Sym *find_sym32(const struct vdso_elfinfo32 *e, const char *name)
 {
-	struct tick_patch_entry *p, *p_end;
+	unsigned int i;
+
+	for (i = 0; i < (e->dynsymsize / sizeof(Elf32_Sym)); i++) {
+		Elf32_Sym *s = &e->dynsym[i];
+		if (s->st_name == 0)
+			continue;
+		if (!strcmp(e->dynstr + s->st_name, name))
+			return s;
+	}
+	return NULL;
+}
 
-	p = image->data + image->tick_patch;
-	p_end = (void *)p + image->tick_patch_len;
-	while (p < p_end) {
-		u32 *instr = (void *)&p->orig + p->orig;
-		u32 *repl = (void *)&p->repl + p->repl;
+static int patchsym32(struct vdso_elfinfo *_e, const char *orig,
+		      const char *new)
+{
+	struct vdso_elfinfo32 *e = &_e->u.elf32;
+	Elf32_Sym *osym = find_sym32(e, orig);
+	Elf32_Sym *nsym = find_sym32(e, new);
 
-		*instr = *repl;
-		flushi(instr);
-		p++;
+	if (!nsym || !osym) {
+		pr_err("VDSO32: Missing symbols.\n");
+		return -ENODEV;
 	}
+	osym->st_value = nsym->st_value;
+	osym->st_size = nsym->st_size;
+	osym->st_info = nsym->st_info;
+	osym->st_other = nsym->st_other;
+	osym->st_shndx = nsym->st_shndx;
+
+	return 0;
+}
+
+static int find_sections(const struct vdso_image *image, struct vdso_elfinfo *e,
+			 bool elf64)
+{
+	if (elf64)
+		return find_sections64(image, e);
+	else
+		return find_sections32(image, e);
+}
+
+static int patch_one_symbol(struct vdso_elfinfo *e, const char *orig,
+			    const char *new_target, bool elf64)
+{
+	if (elf64)
+		return patchsym64(e, orig, new_target);
+	else
+		return patchsym32(e, orig, new_target);
+}
+
+static int stick_patch(const struct vdso_image *image, struct vdso_elfinfo *e, bool elf64)
+{
+	int err;
+
+	err = find_sections(image, e, elf64);
+	if (err)
+		return err;
+
+	err = patch_one_symbol(e,
+			       "__vdso_gettimeofday",
+			       "__vdso_gettimeofday_stick", elf64);
+	if (err)
+		return err;
+
+	return patch_one_symbol(e,
+				"__vdso_clock_gettime",
+				"__vdso_clock_gettime_stick", elf64);
+	return 0;
 }
 
 /*
@@ -67,13 +244,19 @@ static void stick_patch(const struct vdso_image *image)
  * kernel image.
  */
 int __init init_vdso_image(const struct vdso_image *image,
-		struct vm_special_mapping *vdso_mapping)
+			   struct vm_special_mapping *vdso_mapping, bool elf64)
 {
-	int i;
+	int cnpages = (image->size) / PAGE_SIZE;
 	struct page *dp, **dpp = NULL;
-	int dnpages = 0;
 	struct page *cp, **cpp = NULL;
-	int cnpages = (image->size) / PAGE_SIZE;
+	struct vdso_elfinfo ei;
+	int i, dnpages = 0;
+
+	if (tlb_type != spitfire) {
+		int err = stick_patch(image, &ei, elf64);
+		if (err)
+			return err;
+	}
 
 	/*
 	 * First, the vdso text.  This is initialied data, an integral number of
@@ -88,9 +271,6 @@ int __init init_vdso_image(const struct vdso_image *image,
 	if (!cpp)
 		goto oom;
 
-	if (tlb_type != spitfire)
-		stick_patch(image);
-
 	for (i = 0; i < cnpages; i++) {
 		cp = alloc_page(GFP_KERNEL);
 		if (!cp)
@@ -153,13 +333,13 @@ static int __init init_vdso(void)
 {
 	int err = 0;
 #ifdef CONFIG_SPARC64
-	err = init_vdso_image(&vdso_image_64_builtin, &vdso_mapping64);
+	err = init_vdso_image(&vdso_image_64_builtin, &vdso_mapping64, true);
 	if (err)
 		return err;
 #endif
 
 #ifdef CONFIG_COMPAT
-	err = init_vdso_image(&vdso_image_32_builtin, &vdso_mapping32);
+	err = init_vdso_image(&vdso_image_32_builtin, &vdso_mapping32, false);
 #endif
 	return err;
 
diff --git a/arch/unicore32/include/uapi/asm/unistd.h b/arch/unicore32/include/uapi/asm/unistd.h
index 65856eaab163..1e8fe5941b8a 100644
--- a/arch/unicore32/include/uapi/asm/unistd.h
+++ b/arch/unicore32/include/uapi/asm/unistd.h
@@ -15,4 +15,5 @@
 
 /* Use the standard ABI for syscalls. */
 #include <asm-generic/unistd.h>
+#define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a450ad573dcb..a4b0007a54e1 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -60,9 +60,6 @@ endif
 ifeq ($(avx2_supported),yes)
 	obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
 	obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
-	obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
-	obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
-	obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
 
 	obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
 endif
@@ -106,7 +103,7 @@ ifeq ($(avx2_supported),yes)
 	morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
 endif
 
-aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
+aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
 aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index acbe7e8336d8..661f7daf43da 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -102,9 +102,6 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
 asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
 
-int crypto_fpu_init(void);
-void crypto_fpu_exit(void);
-
 #define AVX_GEN2_OPTSIZE 640
 #define AVX_GEN4_OPTSIZE 4096
 
@@ -817,7 +814,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 	/* Linearize assoc, if not already linear */
 	if (req->src->length >= assoclen && req->src->length &&
 		(!PageHighMem(sg_page(req->src)) ||
-			req->src->offset + req->src->length < PAGE_SIZE)) {
+			req->src->offset + req->src->length <= PAGE_SIZE)) {
 		scatterwalk_start(&assoc_sg_walk, req->src);
 		assoc = scatterwalk_map(&assoc_sg_walk);
 	} else {
@@ -1253,22 +1250,6 @@ static struct skcipher_alg aesni_skciphers[] = {
 static
 struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
 
-static struct {
-	const char *algname;
-	const char *drvname;
-	const char *basename;
-	struct simd_skcipher_alg *simd;
-} aesni_simd_skciphers2[] = {
-#if (defined(MODULE) && IS_ENABLED(CONFIG_CRYPTO_PCBC)) || \
-    IS_BUILTIN(CONFIG_CRYPTO_PCBC)
-	{
-		.algname	= "pcbc(aes)",
-		.drvname	= "pcbc-aes-aesni",
-		.basename	= "fpu(pcbc(__aes-aesni))",
-	},
-#endif
-};
-
 #ifdef CONFIG_X86_64
 static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
 				  unsigned int key_len)
@@ -1422,10 +1403,6 @@ static void aesni_free_simds(void)
 	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
 		    aesni_simd_skciphers[i]; i++)
 		simd_skcipher_free(aesni_simd_skciphers[i]);
-
-	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++)
-		if (aesni_simd_skciphers2[i].simd)
-			simd_skcipher_free(aesni_simd_skciphers2[i].simd);
 }
 
 static int __init aesni_init(void)
@@ -1469,13 +1446,9 @@ static int __init aesni_init(void)
 #endif
 #endif
 
-	err = crypto_fpu_init();
-	if (err)
-		return err;
-
 	err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
 	if (err)
-		goto fpu_exit;
+		return err;
 
 	err = crypto_register_skciphers(aesni_skciphers,
 					ARRAY_SIZE(aesni_skciphers));
@@ -1499,18 +1472,6 @@ static int __init aesni_init(void)
 		aesni_simd_skciphers[i] = simd;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) {
-		algname = aesni_simd_skciphers2[i].algname;
-		drvname = aesni_simd_skciphers2[i].drvname;
-		basename = aesni_simd_skciphers2[i].basename;
-		simd = simd_skcipher_create_compat(algname, drvname, basename);
-		err = PTR_ERR(simd);
-		if (IS_ERR(simd))
-			continue;
-
-		aesni_simd_skciphers2[i].simd = simd;
-	}
-
 	return 0;
 
 unregister_simds:
@@ -1521,8 +1482,6 @@ unregister_skciphers:
 				    ARRAY_SIZE(aesni_skciphers));
 unregister_algs:
 	crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
-fpu_exit:
-	crypto_fpu_exit();
 	return err;
 }
 
@@ -1533,8 +1492,6 @@ static void __exit aesni_exit(void)
 	crypto_unregister_skciphers(aesni_skciphers,
 				    ARRAY_SIZE(aesni_skciphers));
 	crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
-
-	crypto_fpu_exit();
 }
 
 late_initcall(aesni_init);
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
deleted file mode 100644
index 406680476c52..000000000000
--- a/arch/x86/crypto/fpu.c
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * FPU: Wrapper for blkcipher touching fpu
- *
- * Copyright (c) Intel Corp.
- *   Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/internal/skcipher.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <asm/fpu/api.h>
-
-struct crypto_fpu_ctx {
-	struct crypto_skcipher *child;
-};
-
-static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key,
-			     unsigned int keylen)
-{
-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent);
-	struct crypto_skcipher *child = ctx->child;
-	int err;
-
-	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
-					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_skcipher_setkey(child, key, keylen);
-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
-					  CRYPTO_TFM_RES_MASK);
-	return err;
-}
-
-static int crypto_fpu_encrypt(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct crypto_skcipher *child = ctx->child;
-	SKCIPHER_REQUEST_ON_STACK(subreq, child);
-	int err;
-
-	skcipher_request_set_tfm(subreq, child);
-	skcipher_request_set_callback(subreq, 0, NULL, NULL);
-	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
-				   req->iv);
-
-	kernel_fpu_begin();
-	err = crypto_skcipher_encrypt(subreq);
-	kernel_fpu_end();
-
-	skcipher_request_zero(subreq);
-	return err;
-}
-
-static int crypto_fpu_decrypt(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct crypto_skcipher *child = ctx->child;
-	SKCIPHER_REQUEST_ON_STACK(subreq, child);
-	int err;
-
-	skcipher_request_set_tfm(subreq, child);
-	skcipher_request_set_callback(subreq, 0, NULL, NULL);
-	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
-				   req->iv);
-
-	kernel_fpu_begin();
-	err = crypto_skcipher_decrypt(subreq);
-	kernel_fpu_end();
-
-	skcipher_request_zero(subreq);
-	return err;
-}
-
-static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm)
-{
-	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct crypto_skcipher_spawn *spawn;
-	struct crypto_skcipher *cipher;
-
-	spawn = skcipher_instance_ctx(inst);
-	cipher = crypto_spawn_skcipher(spawn);
-	if (IS_ERR(cipher))
-		return PTR_ERR(cipher);
-
-	ctx->child = cipher;
-
-	return 0;
-}
-
-static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm)
-{
-	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
-
-	crypto_free_skcipher(ctx->child);
-}
-
-static void crypto_fpu_free(struct skcipher_instance *inst)
-{
-	crypto_drop_skcipher(skcipher_instance_ctx(inst));
-	kfree(inst);
-}
-
-static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb)
-{
-	struct crypto_skcipher_spawn *spawn;
-	struct skcipher_instance *inst;
-	struct crypto_attr_type *algt;
-	struct skcipher_alg *alg;
-	const char *cipher_name;
-	int err;
-
-	algt = crypto_get_attr_type(tb);
-	if (IS_ERR(algt))
-		return PTR_ERR(algt);
-
-	if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) &
-	    algt->mask)
-		return -EINVAL;
-
-	if (!(algt->mask & CRYPTO_ALG_INTERNAL))
-		return -EINVAL;
-
-	cipher_name = crypto_attr_alg_name(tb[1]);
-	if (IS_ERR(cipher_name))
-		return PTR_ERR(cipher_name);
-
-	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
-	if (!inst)
-		return -ENOMEM;
-
-	spawn = skcipher_instance_ctx(inst);
-
-	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
-	err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL,
-				   CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
-	if (err)
-		goto out_free_inst;
-
-	alg = crypto_skcipher_spawn_alg(spawn);
-
-	err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu",
-				  &alg->base);
-	if (err)
-		goto out_drop_skcipher;
-
-	inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL;
-	inst->alg.base.cra_priority = alg->base.cra_priority;
-	inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
-	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
-
-	inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
-	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
-	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
-
-	inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
-
-	inst->alg.init = crypto_fpu_init_tfm;
-	inst->alg.exit = crypto_fpu_exit_tfm;
-
-	inst->alg.setkey = crypto_fpu_setkey;
-	inst->alg.encrypt = crypto_fpu_encrypt;
-	inst->alg.decrypt = crypto_fpu_decrypt;
-
-	inst->free = crypto_fpu_free;
-
-	err = skcipher_register_instance(tmpl, inst);
-	if (err)
-		goto out_drop_skcipher;
-
-out:
-	return err;
-
-out_drop_skcipher:
-	crypto_drop_skcipher(spawn);
-out_free_inst:
-	kfree(inst);
-	goto out;
-}
-
-static struct crypto_template crypto_fpu_tmpl = {
-	.name = "fpu",
-	.create = crypto_fpu_create,
-	.module = THIS_MODULE,
-};
-
-int __init crypto_fpu_init(void)
-{
-	return crypto_register_template(&crypto_fpu_tmpl);
-}
-
-void crypto_fpu_exit(void)
-{
-	crypto_unregister_template(&crypto_fpu_tmpl);
-}
-
-MODULE_ALIAS_CRYPTO("fpu");
diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile
deleted file mode 100644
index 815ded3ba90e..000000000000
--- a/arch/x86/crypto/sha1-mb/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Arch-specific CryptoAPI modules.
-#
-
-OBJECT_FILES_NON_STANDARD := y
-
-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
-                                $(comma)4)$(comma)%ymm2,yes,no)
-ifeq ($(avx2_supported),yes)
-	obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o
-	sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \
-	     sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o
-endif
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c
deleted file mode 100644
index b93805664c1d..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb.c
+++ /dev/null
@@ -1,1011 +0,0 @@
-/*
- * Multi buffer SHA1 algorithm Glue Code
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *	Tim Chen <tim.c.chen@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/sha.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <asm/byteorder.h>
-#include <linux/hardirq.h>
-#include <asm/fpu/api.h>
-#include "sha1_mb_ctx.h"
-
-#define FLUSH_INTERVAL 1000 /* in usec */
-
-static struct mcryptd_alg_state sha1_mb_alg_state;
-
-struct sha1_mb_ctx {
-	struct mcryptd_ahash *mcryptd_tfm;
-};
-
-static inline struct mcryptd_hash_request_ctx
-		*cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx)
-{
-	struct ahash_request *areq;
-
-	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
-	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
-}
-
-static inline struct ahash_request
-		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
-{
-	return container_of((void *) ctx, struct ahash_request, __ctx);
-}
-
-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
-				struct ahash_request *areq)
-{
-	rctx->flag = HASH_UPDATE;
-}
-
-static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)
-			(struct sha1_mb_mgr *state, struct job_sha1 *job);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
-						(struct sha1_mb_mgr *state);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
-						(struct sha1_mb_mgr *state);
-
-static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
-			 uint64_t total_len)
-{
-	uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
-
-	memset(&padblock[i], 0, SHA1_BLOCK_SIZE);
-	padblock[i] = 0x80;
-
-	i += ((SHA1_BLOCK_SIZE - 1) &
-	      (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1)))
-	     + 1 + SHA1_PADLENGTHFIELD_SIZE;
-
-#if SHA1_PADLENGTHFIELD_SIZE == 16
-	*((uint64_t *) &padblock[i - 16]) = 0;
-#endif
-
-	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
-
-	/* Number of extra blocks to hash */
-	return i >> SHA1_LOG2_BLOCK_SIZE;
-}
-
-static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr,
-						struct sha1_hash_ctx *ctx)
-{
-	while (ctx) {
-		if (ctx->status & HASH_CTX_STS_COMPLETE) {
-			/* Clear PROCESSING bit */
-			ctx->status = HASH_CTX_STS_COMPLETE;
-			return ctx;
-		}
-
-		/*
-		 * If the extra blocks are empty, begin hashing what remains
-		 * in the user's buffer.
-		 */
-		if (ctx->partial_block_buffer_length == 0 &&
-		    ctx->incoming_buffer_length) {
-
-			const void *buffer = ctx->incoming_buffer;
-			uint32_t len = ctx->incoming_buffer_length;
-			uint32_t copy_len;
-
-			/*
-			 * Only entire blocks can be hashed.
-			 * Copy remainder to extra blocks buffer.
-			 */
-			copy_len = len & (SHA1_BLOCK_SIZE-1);
-
-			if (copy_len) {
-				len -= copy_len;
-				memcpy(ctx->partial_block_buffer,
-				       ((const char *) buffer + len),
-				       copy_len);
-				ctx->partial_block_buffer_length = copy_len;
-			}
-
-			ctx->incoming_buffer_length = 0;
-
-			/* len should be a multiple of the block size now */
-			assert((len % SHA1_BLOCK_SIZE) == 0);
-
-			/* Set len to the number of blocks to be hashed */
-			len >>= SHA1_LOG2_BLOCK_SIZE;
-
-			if (len) {
-
-				ctx->job.buffer = (uint8_t *) buffer;
-				ctx->job.len = len;
-				ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr,
-										&ctx->job);
-				continue;
-			}
-		}
-
-		/*
-		 * If the extra blocks are not empty, then we are
-		 * either on the last block(s) or we need more
-		 * user input before continuing.
-		 */
-		if (ctx->status & HASH_CTX_STS_LAST) {
-
-			uint8_t *buf = ctx->partial_block_buffer;
-			uint32_t n_extra_blocks =
-					sha1_pad(buf, ctx->total_length);
-
-			ctx->status = (HASH_CTX_STS_PROCESSING |
-				       HASH_CTX_STS_COMPLETE);
-			ctx->job.buffer = buf;
-			ctx->job.len = (uint32_t) n_extra_blocks;
-			ctx = (struct sha1_hash_ctx *)
-				sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
-			continue;
-		}
-
-		ctx->status = HASH_CTX_STS_IDLE;
-		return ctx;
-	}
-
-	return NULL;
-}
-
-static struct sha1_hash_ctx
-			*sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr)
-{
-	/*
-	 * If get_comp_job returns NULL, there are no jobs complete.
-	 * If get_comp_job returns a job, verify that it is safe to return to
-	 * the user.
-	 * If it is not ready, resubmit the job to finish processing.
-	 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
-	 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
-	 * still need processing.
-	 */
-	struct sha1_hash_ctx *ctx;
-
-	ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr);
-	return sha1_ctx_mgr_resubmit(mgr, ctx);
-}
-
-static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr)
-{
-	sha1_job_mgr_init(&mgr->mgr);
-}
-
-static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
-					  struct sha1_hash_ctx *ctx,
-					  const void *buffer,
-					  uint32_t len,
-					  int flags)
-{
-	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
-		/* User should not pass anything other than UPDATE or LAST */
-		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
-		return ctx;
-	}
-
-	if (ctx->status & HASH_CTX_STS_PROCESSING) {
-		/* Cannot submit to a currently processing job. */
-		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
-		return ctx;
-	}
-
-	if (ctx->status & HASH_CTX_STS_COMPLETE) {
-		/* Cannot update a finished job. */
-		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
-		return ctx;
-	}
-
-	/*
-	 * If we made it here, there were no errors during this call to
-	 * submit
-	 */
-	ctx->error = HASH_CTX_ERROR_NONE;
-
-	/* Store buffer ptr info from user */
-	ctx->incoming_buffer = buffer;
-	ctx->incoming_buffer_length = len;
-
-	/*
-	 * Store the user's request flags and mark this ctx as currently
-	 * being processed.
-	 */
-	ctx->status = (flags & HASH_LAST) ?
-			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
-			HASH_CTX_STS_PROCESSING;
-
-	/* Advance byte counter */
-	ctx->total_length += len;
-
-	/*
-	 * If there is anything currently buffered in the extra blocks,
-	 * append to it until it contains a whole block.
-	 * Or if the user's buffer contains less than a whole block,
-	 * append as much as possible to the extra block.
-	 */
-	if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) {
-		/*
-		 * Compute how many bytes to copy from user buffer into
-		 * extra block
-		 */
-		uint32_t copy_len = SHA1_BLOCK_SIZE -
-					ctx->partial_block_buffer_length;
-		if (len < copy_len)
-			copy_len = len;
-
-		if (copy_len) {
-			/* Copy and update relevant pointers and counters */
-			memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
-				buffer, copy_len);
-
-			ctx->partial_block_buffer_length += copy_len;
-			ctx->incoming_buffer = (const void *)
-					((const char *)buffer + copy_len);
-			ctx->incoming_buffer_length = len - copy_len;
-		}
-
-		/*
-		 * The extra block should never contain more than 1 block
-		 * here
-		 */
-		assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
-
-		/*
-		 * If the extra block buffer contains exactly 1 block, it can
-		 * be hashed.
-		 */
-		if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
-			ctx->partial_block_buffer_length = 0;
-
-			ctx->job.buffer = ctx->partial_block_buffer;
-			ctx->job.len = 1;
-			ctx = (struct sha1_hash_ctx *)
-				sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
-		}
-	}
-
-	return sha1_ctx_mgr_resubmit(mgr, ctx);
-}
-
-static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr)
-{
-	struct sha1_hash_ctx *ctx;
-
-	while (1) {
-		ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr);
-
-		/* If flush returned 0, there are no more jobs in flight. */
-		if (!ctx)
-			return NULL;
-
-		/*
-		 * If flush returned a job, resubmit the job to finish
-		 * processing.
-		 */
-		ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
-
-		/*
-		 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be
-		 * returned. Otherwise, all jobs currently being managed by the
-		 * sha1_ctx_mgr still need processing. Loop.
-		 */
-		if (ctx)
-			return ctx;
-	}
-}
-
-static int sha1_mb_init(struct ahash_request *areq)
-{
-	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
-
-	hash_ctx_init(sctx);
-	sctx->job.result_digest[0] = SHA1_H0;
-	sctx->job.result_digest[1] = SHA1_H1;
-	sctx->job.result_digest[2] = SHA1_H2;
-	sctx->job.result_digest[3] = SHA1_H3;
-	sctx->job.result_digest[4] = SHA1_H4;
-	sctx->total_length = 0;
-	sctx->partial_block_buffer_length = 0;
-	sctx->status = HASH_CTX_STS_IDLE;
-
-	return 0;
-}
-
-static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
-{
-	int	i;
-	struct	sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
-	__be32	*dst = (__be32 *) rctx->out;
-
-	for (i = 0; i < 5; ++i)
-		dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
-
-	return 0;
-}
-
-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
-			struct mcryptd_alg_cstate *cstate, bool flush)
-{
-	int	flag = HASH_UPDATE;
-	int	nbytes, err = 0;
-	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
-	struct sha1_hash_ctx *sha_ctx;
-
-	/* more work ? */
-	while (!(rctx->flag & HASH_DONE)) {
-		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
-		if (nbytes < 0) {
-			err = nbytes;
-			goto out;
-		}
-		/* check if the walk is done */
-		if (crypto_ahash_walk_last(&rctx->walk)) {
-			rctx->flag |= HASH_DONE;
-			if (rctx->flag & HASH_FINAL)
-				flag |= HASH_LAST;
-
-		}
-		sha_ctx = (struct sha1_hash_ctx *)
-						ahash_request_ctx(&rctx->areq);
-		kernel_fpu_begin();
-		sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx,
-						rctx->walk.data, nbytes, flag);
-		if (!sha_ctx) {
-			if (flush)
-				sha_ctx = sha1_ctx_mgr_flush(cstate->mgr);
-		}
-		kernel_fpu_end();
-		if (sha_ctx)
-			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		else {
-			rctx = NULL;
-			goto out;
-		}
-	}
-
-	/* copy the results */
-	if (rctx->flag & HASH_FINAL)
-		sha1_mb_set_results(rctx);
-
-out:
-	*ret_rctx = rctx;
-	return err;
-}
-
-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
-			    struct mcryptd_alg_cstate *cstate,
-			    int err)
-{
-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
-	struct sha1_hash_ctx *sha_ctx;
-	struct mcryptd_hash_request_ctx *req_ctx;
-	int ret;
-
-	/* remove from work list */
-	spin_lock(&cstate->work_lock);
-	list_del(&rctx->waiter);
-	spin_unlock(&cstate->work_lock);
-
-	if (irqs_disabled())
-		rctx->complete(&req->base, err);
-	else {
-		local_bh_disable();
-		rctx->complete(&req->base, err);
-		local_bh_enable();
-	}
-
-	/* check to see if there are other jobs that are done */
-	sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
-	while (sha_ctx) {
-		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		ret = sha_finish_walk(&req_ctx, cstate, false);
-		if (req_ctx) {
-			spin_lock(&cstate->work_lock);
-			list_del(&req_ctx->waiter);
-			spin_unlock(&cstate->work_lock);
-
-			req = cast_mcryptd_ctx_to_req(req_ctx);
-			if (irqs_disabled())
-				req_ctx->complete(&req->base, ret);
-			else {
-				local_bh_disable();
-				req_ctx->complete(&req->base, ret);
-				local_bh_enable();
-			}
-		}
-		sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
-	}
-
-	return 0;
-}
-
-static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
-			     struct mcryptd_alg_cstate *cstate)
-{
-	unsigned long next_flush;
-	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
-
-	/* initialize tag */
-	rctx->tag.arrival = jiffies;    /* tag the arrival time */
-	rctx->tag.seq_num = cstate->next_seq_num++;
-	next_flush = rctx->tag.arrival + delay;
-	rctx->tag.expire = next_flush;
-
-	spin_lock(&cstate->work_lock);
-	list_add_tail(&rctx->waiter, &cstate->work_list);
-	spin_unlock(&cstate->work_lock);
-
-	mcryptd_arm_flusher(cstate, delay);
-}
-
-static int sha1_mb_update(struct ahash_request *areq)
-{
-	struct mcryptd_hash_request_ctx *rctx =
-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
-	struct mcryptd_alg_cstate *cstate =
-				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
-
-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
-	struct sha1_hash_ctx *sha_ctx;
-	int ret = 0, nbytes;
-
-
-	/* sanity check */
-	if (rctx->tag.cpu != smp_processor_id()) {
-		pr_err("mcryptd error: cpu clash\n");
-		goto done;
-	}
-
-	/* need to init context */
-	req_ctx_init(rctx, areq);
-
-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
-	if (nbytes < 0) {
-		ret = nbytes;
-		goto done;
-	}
-
-	if (crypto_ahash_walk_last(&rctx->walk))
-		rctx->flag |= HASH_DONE;
-
-	/* submit */
-	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
-	sha1_mb_add_list(rctx, cstate);
-	kernel_fpu_begin();
-	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
-							nbytes, HASH_UPDATE);
-	kernel_fpu_end();
-
-	/* check if anything is returned */
-	if (!sha_ctx)
-		return -EINPROGRESS;
-
-	if (sha_ctx->error) {
-		ret = sha_ctx->error;
-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		goto done;
-	}
-
-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-	ret = sha_finish_walk(&rctx, cstate, false);
-
-	if (!rctx)
-		return -EINPROGRESS;
-done:
-	sha_complete_job(rctx, cstate, ret);
-	return ret;
-}
-
-static int sha1_mb_finup(struct ahash_request *areq)
-{
-	struct mcryptd_hash_request_ctx *rctx =
-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
-	struct mcryptd_alg_cstate *cstate =
-				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
-
-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
-	struct sha1_hash_ctx *sha_ctx;
-	int ret = 0, flag = HASH_UPDATE, nbytes;
-
-	/* sanity check */
-	if (rctx->tag.cpu != smp_processor_id()) {
-		pr_err("mcryptd error: cpu clash\n");
-		goto done;
-	}
-
-	/* need to init context */
-	req_ctx_init(rctx, areq);
-
-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
-	if (nbytes < 0) {
-		ret = nbytes;
-		goto done;
-	}
-
-	if (crypto_ahash_walk_last(&rctx->walk)) {
-		rctx->flag |= HASH_DONE;
-		flag = HASH_LAST;
-	}
-
-	/* submit */
-	rctx->flag |= HASH_FINAL;
-	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
-	sha1_mb_add_list(rctx, cstate);
-
-	kernel_fpu_begin();
-	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
-								nbytes, flag);
-	kernel_fpu_end();
-
-	/* check if anything is returned */
-	if (!sha_ctx)
-		return -EINPROGRESS;
-
-	if (sha_ctx->error) {
-		ret = sha_ctx->error;
-		goto done;
-	}
-
-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-	ret = sha_finish_walk(&rctx, cstate, false);
-	if (!rctx)
-		return -EINPROGRESS;
-done:
-	sha_complete_job(rctx, cstate, ret);
-	return ret;
-}
-
-static int sha1_mb_final(struct ahash_request *areq)
-{
-	struct mcryptd_hash_request_ctx *rctx =
-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
-	struct mcryptd_alg_cstate *cstate =
-				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
-
-	struct sha1_hash_ctx *sha_ctx;
-	int ret = 0;
-	u8 data;
-
-	/* sanity check */
-	if (rctx->tag.cpu != smp_processor_id()) {
-		pr_err("mcryptd error: cpu clash\n");
-		goto done;
-	}
-
-	/* need to init context */
-	req_ctx_init(rctx, areq);
-
-	rctx->flag |= HASH_DONE | HASH_FINAL;
-
-	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
-	/* flag HASH_FINAL and 0 data size */
-	sha1_mb_add_list(rctx, cstate);
-	kernel_fpu_begin();
-	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
-								HASH_LAST);
-	kernel_fpu_end();
-
-	/* check if anything is returned */
-	if (!sha_ctx)
-		return -EINPROGRESS;
-
-	if (sha_ctx->error) {
-		ret = sha_ctx->error;
-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		goto done;
-	}
-
-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-	ret = sha_finish_walk(&rctx, cstate, false);
-	if (!rctx)
-		return -EINPROGRESS;
-done:
-	sha_complete_job(rctx, cstate, ret);
-	return ret;
-}
-
-static int sha1_mb_export(struct ahash_request *areq, void *out)
-{
-	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
-
-	memcpy(out, sctx, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha1_mb_import(struct ahash_request *areq, const void *in)
-{
-	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
-
-	memcpy(sctx, in, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
-	struct mcryptd_ahash *mcryptd_tfm;
-	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct mcryptd_hash_ctx *mctx;
-
-	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
-						CRYPTO_ALG_INTERNAL,
-						CRYPTO_ALG_INTERNAL);
-	if (IS_ERR(mcryptd_tfm))
-		return PTR_ERR(mcryptd_tfm);
-	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
-	mctx->alg_state = &sha1_mb_alg_state;
-	ctx->mcryptd_tfm = mcryptd_tfm;
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				sizeof(struct ahash_request) +
-				crypto_ahash_reqsize(&mcryptd_tfm->base));
-
-	return 0;
-}
-
-static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm)
-{
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				sizeof(struct ahash_request) +
-				sizeof(struct sha1_hash_ctx));
-
-	return 0;
-}
-
-static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static struct ahash_alg sha1_mb_areq_alg = {
-	.init		=	sha1_mb_init,
-	.update		=	sha1_mb_update,
-	.final		=	sha1_mb_final,
-	.finup		=	sha1_mb_finup,
-	.export		=	sha1_mb_export,
-	.import		=	sha1_mb_import,
-	.halg		=	{
-		.digestsize	=	SHA1_DIGEST_SIZE,
-		.statesize	=	sizeof(struct sha1_hash_ctx),
-		.base		=	{
-			.cra_name	 = "__sha1-mb",
-			.cra_driver_name = "__intel_sha1-mb",
-			.cra_priority	 = 100,
-			/*
-			 * use ASYNC flag as some buffers in multi-buffer
-			 * algo may not have completed before hashing thread
-			 * sleep
-			 */
-			.cra_flags	= CRYPTO_ALG_ASYNC |
-					  CRYPTO_ALG_INTERNAL,
-			.cra_blocksize	= SHA1_BLOCK_SIZE,
-			.cra_module	= THIS_MODULE,
-			.cra_list	= LIST_HEAD_INIT
-					(sha1_mb_areq_alg.halg.base.cra_list),
-			.cra_init	= sha1_mb_areq_init_tfm,
-			.cra_exit	= sha1_mb_areq_exit_tfm,
-			.cra_ctxsize	= sizeof(struct sha1_hash_ctx),
-		}
-	}
-};
-
-static int sha1_mb_async_init(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_init(mcryptd_req);
-}
-
-static int sha1_mb_async_update(struct ahash_request *req)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_update(mcryptd_req);
-}
-
-static int sha1_mb_async_finup(struct ahash_request *req)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_finup(mcryptd_req);
-}
-
-static int sha1_mb_async_final(struct ahash_request *req)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_final(mcryptd_req);
-}
-
-static int sha1_mb_async_digest(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_digest(mcryptd_req);
-}
-
-static int sha1_mb_async_export(struct ahash_request *req, void *out)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_export(mcryptd_req, out);
-}
-
-static int sha1_mb_async_import(struct ahash_request *req, const void *in)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
-	struct mcryptd_hash_request_ctx *rctx;
-	struct ahash_request *areq;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	rctx = ahash_request_ctx(mcryptd_req);
-	areq = &rctx->areq;
-
-	ahash_request_set_tfm(areq, child);
-	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
-					rctx->complete, req);
-
-	return crypto_ahash_import(mcryptd_req, in);
-}
-
-static struct ahash_alg sha1_mb_async_alg = {
-	.init           = sha1_mb_async_init,
-	.update         = sha1_mb_async_update,
-	.final          = sha1_mb_async_final,
-	.finup          = sha1_mb_async_finup,
-	.digest         = sha1_mb_async_digest,
-	.export		= sha1_mb_async_export,
-	.import		= sha1_mb_async_import,
-	.halg = {
-		.digestsize     = SHA1_DIGEST_SIZE,
-		.statesize	= sizeof(struct sha1_hash_ctx),
-		.base = {
-			.cra_name               = "sha1",
-			.cra_driver_name        = "sha1_mb",
-			/*
-			 * Low priority, since with few concurrent hash requests
-			 * this is extremely slow due to the flush delay.  Users
-			 * whose workloads would benefit from this can request
-			 * it explicitly by driver name, or can increase its
-			 * priority at runtime using NETLINK_CRYPTO.
-			 */
-			.cra_priority           = 50,
-			.cra_flags              = CRYPTO_ALG_ASYNC,
-			.cra_blocksize          = SHA1_BLOCK_SIZE,
-			.cra_module             = THIS_MODULE,
-			.cra_list               = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list),
-			.cra_init               = sha1_mb_async_init_tfm,
-			.cra_exit               = sha1_mb_async_exit_tfm,
-			.cra_ctxsize		= sizeof(struct sha1_mb_ctx),
-			.cra_alignmask		= 0,
-		},
-	},
-};
-
-static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
-{
-	struct mcryptd_hash_request_ctx *rctx;
-	unsigned long cur_time;
-	unsigned long next_flush = 0;
-	struct sha1_hash_ctx *sha_ctx;
-
-
-	cur_time = jiffies;
-
-	while (!list_empty(&cstate->work_list)) {
-		rctx = list_entry(cstate->work_list.next,
-				struct mcryptd_hash_request_ctx, waiter);
-		if (time_before(cur_time, rctx->tag.expire))
-			break;
-		kernel_fpu_begin();
-		sha_ctx = (struct sha1_hash_ctx *)
-					sha1_ctx_mgr_flush(cstate->mgr);
-		kernel_fpu_end();
-		if (!sha_ctx) {
-			pr_err("sha1_mb error: nothing got flushed for non-empty list\n");
-			break;
-		}
-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		sha_finish_walk(&rctx, cstate, true);
-		sha_complete_job(rctx, cstate, 0);
-	}
-
-	if (!list_empty(&cstate->work_list)) {
-		rctx = list_entry(cstate->work_list.next,
-				struct mcryptd_hash_request_ctx, waiter);
-		/* get the hash context and then flush time */
-		next_flush = rctx->tag.expire;
-		mcryptd_arm_flusher(cstate, get_delay(next_flush));
-	}
-	return next_flush;
-}
-
-static int __init sha1_mb_mod_init(void)
-{
-
-	int cpu;
-	int err;
-	struct mcryptd_alg_cstate *cpu_state;
-
-	/* check for dependent cpu features */
-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
-	    !boot_cpu_has(X86_FEATURE_BMI2))
-		return -ENODEV;
-
-	/* initialize multibuffer structures */
-	sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate);
-
-	sha1_job_mgr_init = sha1_mb_mgr_init_avx2;
-	sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2;
-	sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2;
-	sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2;
-
-	if (!sha1_mb_alg_state.alg_cstate)
-		return -ENOMEM;
-	for_each_possible_cpu(cpu) {
-		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
-		cpu_state->next_flush = 0;
-		cpu_state->next_seq_num = 0;
-		cpu_state->flusher_engaged = false;
-		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
-		cpu_state->cpu = cpu;
-		cpu_state->alg_state = &sha1_mb_alg_state;
-		cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr),
-					GFP_KERNEL);
-		if (!cpu_state->mgr)
-			goto err2;
-		sha1_ctx_mgr_init(cpu_state->mgr);
-		INIT_LIST_HEAD(&cpu_state->work_list);
-		spin_lock_init(&cpu_state->work_lock);
-	}
-	sha1_mb_alg_state.flusher = &sha1_mb_flusher;
-
-	err = crypto_register_ahash(&sha1_mb_areq_alg);
-	if (err)
-		goto err2;
-	err = crypto_register_ahash(&sha1_mb_async_alg);
-	if (err)
-		goto err1;
-
-
-	return 0;
-err1:
-	crypto_unregister_ahash(&sha1_mb_areq_alg);
-err2:
-	for_each_possible_cpu(cpu) {
-		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
-		kfree(cpu_state->mgr);
-	}
-	free_percpu(sha1_mb_alg_state.alg_cstate);
-	return -ENODEV;
-}
-
-static void __exit sha1_mb_mod_fini(void)
-{
-	int cpu;
-	struct mcryptd_alg_cstate *cpu_state;
-
-	crypto_unregister_ahash(&sha1_mb_async_alg);
-	crypto_unregister_ahash(&sha1_mb_areq_alg);
-	for_each_possible_cpu(cpu) {
-		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
-		kfree(cpu_state->mgr);
-	}
-	free_percpu(sha1_mb_alg_state.alg_cstate);
-}
-
-module_init(sha1_mb_mod_init);
-module_exit(sha1_mb_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated");
-
-MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
deleted file mode 100644
index 9454bd16f9f8..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Header file for multi buffer SHA context
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *	Tim Chen <tim.c.chen@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SHA_MB_CTX_INTERNAL_H
-#define _SHA_MB_CTX_INTERNAL_H
-
-#include "sha1_mb_mgr.h"
-
-#define HASH_UPDATE          0x00
-#define HASH_LAST            0x01
-#define HASH_DONE	     0x02
-#define HASH_FINAL	     0x04
-
-#define HASH_CTX_STS_IDLE       0x00
-#define HASH_CTX_STS_PROCESSING 0x01
-#define HASH_CTX_STS_LAST       0x02
-#define HASH_CTX_STS_COMPLETE   0x04
-
-enum hash_ctx_error {
-	HASH_CTX_ERROR_NONE               =  0,
-	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
-	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
-	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
-
-#ifdef HASH_CTX_DEBUG
-	HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
-#endif
-};
-
-
-#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
-#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
-#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
-#define hash_ctx_status(ctx)     ((ctx)->status)
-#define hash_ctx_error(ctx)      ((ctx)->error)
-#define hash_ctx_init(ctx) \
-	do { \
-		(ctx)->error = HASH_CTX_ERROR_NONE; \
-		(ctx)->status = HASH_CTX_STS_COMPLETE; \
-	} while (0)
-
-
-/* Hash Constants and Typedefs */
-#define SHA1_DIGEST_LENGTH          5
-#define SHA1_LOG2_BLOCK_SIZE        6
-
-#define SHA1_PADLENGTHFIELD_SIZE    8
-
-#ifdef SHA_MB_DEBUG
-#define assert(expr) \
-do { \
-	if (unlikely(!(expr))) { \
-		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
-		#expr, __FILE__, __func__, __LINE__); \
-	} \
-} while (0)
-#else
-#define assert(expr) do {} while (0)
-#endif
-
-struct sha1_ctx_mgr {
-	struct sha1_mb_mgr mgr;
-};
-
-/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */
-
-struct sha1_hash_ctx {
-	/* Must be at struct offset 0 */
-	struct job_sha1       job;
-	/* status flag */
-	int status;
-	/* error flag */
-	int error;
-
-	uint64_t	total_length;
-	const void	*incoming_buffer;
-	uint32_t	incoming_buffer_length;
-	uint8_t		partial_block_buffer[SHA1_BLOCK_SIZE * 2];
-	uint32_t	partial_block_buffer_length;
-	void		*user_data;
-};
-
-#endif
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
deleted file mode 100644
index 08ad1a9acfd7..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Header file for multi buffer SHA1 algorithm manager
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      James Guilford <james.guilford@intel.com>
- *	Tim Chen <tim.c.chen@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef __SHA_MB_MGR_H
-#define __SHA_MB_MGR_H
-
-
-#include <linux/types.h>
-
-#define NUM_SHA1_DIGEST_WORDS 5
-
-enum job_sts {	STS_UNKNOWN = 0,
-		STS_BEING_PROCESSED = 1,
-		STS_COMPLETED = 2,
-		STS_INTERNAL_ERROR = 3,
-		STS_ERROR = 4
-};
-
-struct job_sha1 {
-	u8	*buffer;
-	u32	len;
-	u32	result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32);
-	enum	job_sts status;
-	void	*user_data;
-};
-
-/* SHA1 out-of-order scheduler */
-
-/* typedef uint32_t sha1_digest_array[5][8]; */
-
-struct sha1_args_x8 {
-	uint32_t	digest[5][8];
-	uint8_t		*data_ptr[8];
-};
-
-struct sha1_lane_data {
-	struct job_sha1 *job_in_lane;
-};
-
-struct sha1_mb_mgr {
-	struct sha1_args_x8 args;
-
-	uint32_t lens[8];
-
-	/* each byte is index (0...7) of unused lanes */
-	uint64_t unused_lanes;
-	/* byte 4 is set to FF as a flag */
-	struct sha1_lane_data ldata[8];
-};
-
-
-#define SHA1_MB_MGR_NUM_LANES_AVX2 8
-
-void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state);
-struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state,
-					 struct job_sha1 *job);
-struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state);
-struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state);
-
-#endif
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
deleted file mode 100644
index 86688c6e7a25..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Header file for multi buffer SHA1 algorithm data structure
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      James Guilford <james.guilford@intel.com>
- *	Tim Chen <tim.c.chen@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# Macros for defining data structures
-
-# Usage example
-
-#START_FIELDS	# JOB_AES
-###	name		size	align
-#FIELD	_plaintext,	8,	8	# pointer to plaintext
-#FIELD	_ciphertext,	8,	8	# pointer to ciphertext
-#FIELD	_IV,		16,	8	# IV
-#FIELD	_keys,		8,	8	# pointer to keys
-#FIELD	_len,		4,	4	# length in bytes
-#FIELD	_status,	4,	4	# status enumeration
-#FIELD	_user_data,	8,	8	# pointer to user data
-#UNION  _union,         size1,  align1, \
-#	                size2,  align2, \
-#	                size3,  align3, \
-#	                ...
-#END_FIELDS
-#%assign _JOB_AES_size	_FIELD_OFFSET
-#%assign _JOB_AES_align	_STRUCT_ALIGN
-
-#########################################################################
-
-# Alternate "struc-like" syntax:
-#	STRUCT job_aes2
-#	RES_Q	.plaintext,	1
-#	RES_Q	.ciphertext,	1
-#	RES_DQ	.IV,		1
-#	RES_B	.nested,	_JOB_AES_SIZE, _JOB_AES_ALIGN
-#	RES_U	.union,		size1, align1, \
-#				size2, align2, \
-#				...
-#	ENDSTRUCT
-#	# Following only needed if nesting
-#	%assign job_aes2_size	_FIELD_OFFSET
-#	%assign job_aes2_align	_STRUCT_ALIGN
-#
-# RES_* macros take a name, a count and an optional alignment.
-# The count in in terms of the base size of the macro, and the
-# default alignment is the base size.
-# The macros are:
-# Macro    Base size
-# RES_B	    1
-# RES_W	    2
-# RES_D     4
-# RES_Q     8
-# RES_DQ   16
-# RES_Y    32
-# RES_Z    64
-#
-# RES_U defines a union. It's arguments are a name and two or more
-# pairs of "size, alignment"
-#
-# The two assigns are only needed if this structure is being nested
-# within another. Even if the assigns are not done, one can still use
-# STRUCT_NAME_size as the size of the structure.
-#
-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
-#
-# The differences between this and using "struc" directly are that each
-# type is implicitly aligned to its natural length (although this can be
-# over-ridden with an explicit third parameter), and that the structure
-# is padded at the end to its overall alignment.
-#
-
-#########################################################################
-
-#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_
-#define _SHA1_MB_MGR_DATASTRUCT_ASM_
-
-## START_FIELDS
-.macro START_FIELDS
- _FIELD_OFFSET = 0
- _STRUCT_ALIGN = 0
-.endm
-
-## FIELD name size align
-.macro FIELD name size align
- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
- \name	= _FIELD_OFFSET
- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
-.if (\align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = \align
-.endif
-.endm
-
-## END_FIELDS
-.macro END_FIELDS
- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
-.endm
-
-########################################################################
-
-.macro STRUCT p1
-START_FIELDS
-.struc \p1
-.endm
-
-.macro ENDSTRUCT
- tmp = _FIELD_OFFSET
- END_FIELDS
- tmp = (_FIELD_OFFSET - %%tmp)
-.if (tmp > 0)
-	.lcomm	tmp
-.endif
-.endstruc
-.endm
-
-## RES_int name size align
-.macro RES_int p1 p2 p3
- name = \p1
- size = \p2
- align = .\p3
-
- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
-.align align
-.lcomm name size
- _FIELD_OFFSET = _FIELD_OFFSET + (size)
-.if (align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = align
-.endif
-.endm
-
-
-
-# macro RES_B name, size [, align]
-.macro RES_B _name, _size, _align=1
-RES_int _name _size _align
-.endm
-
-# macro RES_W name, size [, align]
-.macro RES_W _name, _size, _align=2
-RES_int _name 2*(_size) _align
-.endm
-
-# macro RES_D name, size [, align]
-.macro RES_D _name, _size, _align=4
-RES_int _name 4*(_size) _align
-.endm
-
-# macro RES_Q name, size [, align]
-.macro RES_Q _name, _size, _align=8
-RES_int _name 8*(_size) _align
-.endm
-
-# macro RES_DQ name, size [, align]
-.macro RES_DQ _name, _size, _align=16
-RES_int _name 16*(_size) _align
-.endm
-
-# macro RES_Y name, size [, align]
-.macro RES_Y _name, _size, _align=32
-RES_int _name 32*(_size) _align
-.endm
-
-# macro RES_Z name, size [, align]
-.macro RES_Z _name, _size, _align=64
-RES_int _name 64*(_size) _align
-.endm
-
-
-#endif
-
-########################################################################
-#### Define constants
-########################################################################
-
-########################################################################
-#### Define SHA1 Out Of Order Data Structures
-########################################################################
-
-START_FIELDS    # LANE_DATA
-###     name            size    align
-FIELD   _job_in_lane,   8,      8       # pointer to job object
-END_FIELDS
-
-_LANE_DATA_size = _FIELD_OFFSET
-_LANE_DATA_align = _STRUCT_ALIGN
-
-########################################################################
-
-START_FIELDS    # SHA1_ARGS_X8
-###     name            size    align
-FIELD   _digest,        4*5*8,  16      # transposed digest
-FIELD   _data_ptr,      8*8,    8       # array of pointers to data
-END_FIELDS
-
-_SHA1_ARGS_X4_size =     _FIELD_OFFSET
-_SHA1_ARGS_X4_align =    _STRUCT_ALIGN
-_SHA1_ARGS_X8_size =     _FIELD_OFFSET
-_SHA1_ARGS_X8_align =    _STRUCT_ALIGN
-
-########################################################################
-
-START_FIELDS    # MB_MGR
-###     name            size    align
-FIELD   _args,          _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align
-FIELD   _lens,          4*8,    8
-FIELD   _unused_lanes,  8,      8
-FIELD   _ldata,         _LANE_DATA_size*8, _LANE_DATA_align
-END_FIELDS
-
-_MB_MGR_size =   _FIELD_OFFSET
-_MB_MGR_align =  _STRUCT_ALIGN
-
-_args_digest    =     _args + _digest
-_args_data_ptr  =     _args + _data_ptr
-
-
-########################################################################
-#### Define constants
-########################################################################
-
-#define STS_UNKNOWN             0
-#define STS_BEING_PROCESSED     1
-#define STS_COMPLETED           2
-
-########################################################################
-#### Define JOB_SHA1 structure
-########################################################################
-
-START_FIELDS    # JOB_SHA1
-
-###     name                            size    align
-FIELD   _buffer,                        8,      8       # pointer to buffer
-FIELD   _len,                           4,      4       # length in bytes
-FIELD   _result_digest,                 5*4,    32      # Digest (output)
-FIELD   _status,                        4,      4
-FIELD   _user_data,                     8,      8
-END_FIELDS
-
-_JOB_SHA1_size =  _FIELD_OFFSET
-_JOB_SHA1_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
deleted file mode 100644
index 7cfba738f104..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Flush routine for SHA1 multibuffer
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      James Guilford <james.guilford@intel.com>
- *	Tim Chen <tim.c.chen@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha1_mb_mgr_datastruct.S"
-
-
-.extern sha1_x8_avx2
-
-# LINUX register definitions
-#define arg1    %rdi
-#define arg2    %rsi
-
-# Common definitions
-#define state   arg1
-#define job     arg2
-#define len2    arg2
-
-# idx must be a register not clobbered by sha1_x8_avx2
-#define idx		%r8
-#define DWORD_idx	%r8d
-
-#define unused_lanes    %rbx
-#define lane_data       %rbx
-#define tmp2            %rbx
-#define tmp2_w		%ebx
-
-#define job_rax         %rax
-#define tmp1            %rax
-#define size_offset     %rax
-#define tmp             %rax
-#define start_offset    %rax
-
-#define tmp3            %arg1
-
-#define extra_blocks    %arg2
-#define p               %arg2
-
-.macro LABEL prefix n
-\prefix\n\():
-.endm
-
-.macro JNE_SKIP i
-jne     skip_\i
-.endm
-
-.altmacro
-.macro SET_OFFSET _offset
-offset = \_offset
-.endm
-.noaltmacro
-
-# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
-# arg 1 : rcx : state
-ENTRY(sha1_mb_mgr_flush_avx2)
-	FRAME_BEGIN
-	push	%rbx
-
-	# If bit (32+3) is set, then all lanes are empty
-	mov     _unused_lanes(state), unused_lanes
-	bt      $32+3, unused_lanes
-	jc      return_null
-
-	# find a lane with a non-null job
-	xor     idx, idx
-	offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
-	cmpq    $0, offset(state)
-	cmovne  one(%rip), idx
-	offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
-	cmpq    $0, offset(state)
-	cmovne  two(%rip), idx
-	offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
-	cmpq    $0, offset(state)
-	cmovne  three(%rip), idx
-	offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
-	cmpq    $0, offset(state)
-	cmovne  four(%rip), idx
-	offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
-	cmpq    $0, offset(state)
-	cmovne  five(%rip), idx
-	offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
-	cmpq    $0, offset(state)
-	cmovne  six(%rip), idx
-	offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
-	cmpq    $0, offset(state)
-	cmovne  seven(%rip), idx
-
-	# copy idx to empty lanes
-copy_lane_data:
-	offset =  (_args + _data_ptr)
-	mov     offset(state,idx,8), tmp
-
-	I = 0
-.rep 8
-	offset =  (_ldata + I * _LANE_DATA_size + _job_in_lane)
-	cmpq    $0, offset(state)
-.altmacro
-	JNE_SKIP %I
-	offset =  (_args + _data_ptr + 8*I)
-	mov     tmp, offset(state)
-	offset =  (_lens + 4*I)
-	movl    $0xFFFFFFFF, offset(state)
-LABEL skip_ %I
-	I = (I+1)
-.noaltmacro
-.endr
-
-	# Find min length
-	vmovdqu _lens+0*16(state), %xmm0
-	vmovdqu _lens+1*16(state), %xmm1
-
-	vpminud %xmm1, %xmm0, %xmm2     # xmm2 has {D,C,B,A}
-	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
-	vpalignr $4, %xmm2, %xmm3, %xmm3    # xmm3 has {x,x,x,E}
-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
-
-	vmovd   %xmm2, DWORD_idx
-	mov	idx, len2
-	and	$0xF, idx
-	shr	$4, len2
-	jz	len_is_0
-
-	vpand   clear_low_nibble(%rip), %xmm2, %xmm2
-	vpshufd $0, %xmm2, %xmm2
-
-	vpsubd  %xmm2, %xmm0, %xmm0
-	vpsubd  %xmm2, %xmm1, %xmm1
-
-	vmovdqu %xmm0, _lens+0*16(state)
-	vmovdqu %xmm1, _lens+1*16(state)
-
-	# "state" and "args" are the same address, arg1
-	# len is arg2
-	call	sha1_x8_avx2
-	# state and idx are intact
-
-
-len_is_0:
-	# process completed job "idx"
-	imul    $_LANE_DATA_size, idx, lane_data
-	lea     _ldata(state, lane_data), lane_data
-
-	mov     _job_in_lane(lane_data), job_rax
-	movq    $0, _job_in_lane(lane_data)
-	movl    $STS_COMPLETED, _status(job_rax)
-	mov     _unused_lanes(state), unused_lanes
-	shl     $4, unused_lanes
-	or      idx, unused_lanes
-	mov     unused_lanes, _unused_lanes(state)
-
-	movl	$0xFFFFFFFF, _lens(state, idx, 4)
-
-	vmovd    _args_digest(state , idx, 4) , %xmm0
-	vpinsrd  $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
-	vpinsrd  $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
-	vpinsrd  $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
-	movl    _args_digest+4*32(state, idx, 4), tmp2_w
-
-	vmovdqu  %xmm0, _result_digest(job_rax)
-	offset =  (_result_digest + 1*16)
-	mov     tmp2_w, offset(job_rax)
-
-return:
-	pop	%rbx
-	FRAME_END
-	ret
-
-return_null:
-	xor     job_rax, job_rax
-	jmp     return
-ENDPROC(sha1_mb_mgr_flush_avx2)
-
-
-#################################################################
-
-.align 16
-ENTRY(sha1_mb_mgr_get_comp_job_avx2)
-	push    %rbx
-
-	## if bit 32+3 is set, then all lanes are empty
-	mov     _unused_lanes(state), unused_lanes
-	bt      $(32+3), unused_lanes
-	jc      .return_null
-
-	# Find min length
-	vmovdqu _lens(state), %xmm0
-	vmovdqu _lens+1*16(state), %xmm1
-
-	vpminud %xmm1, %xmm0, %xmm2        # xmm2 has {D,C,B,A}
-	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
-	vpalignr $4, %xmm2, %xmm3, %xmm3    # xmm3 has {x,x,x,E}
-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
-
-	vmovd   %xmm2, DWORD_idx
-	test    $~0xF, idx
-	jnz     .return_null
-
-	# process completed job "idx"
-	imul    $_LANE_DATA_size, idx, lane_data
-	lea     _ldata(state, lane_data), lane_data
-
-	mov     _job_in_lane(lane_data), job_rax
-	movq    $0,  _job_in_lane(lane_data)
-	movl    $STS_COMPLETED, _status(job_rax)
-	mov     _unused_lanes(state), unused_lanes
-	shl     $4, unused_lanes
-	or      idx, unused_lanes
-	mov     unused_lanes, _unused_lanes(state)
-
-	movl    $0xFFFFFFFF, _lens(state,  idx, 4)
-
-	vmovd   _args_digest(state, idx, 4), %xmm0
-	vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
-	vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
-	vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
-	movl    _args_digest+4*32(state, idx, 4), tmp2_w
-
-	vmovdqu %xmm0, _result_digest(job_rax)
-	movl    tmp2_w, _result_digest+1*16(job_rax)
-
-	pop     %rbx
-
-	ret
-
-.return_null:
-	xor     job_rax, job_rax
-	pop     %rbx
-	ret
-ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
-
-.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
-.align 16
-clear_low_nibble:
-.octa	0x000000000000000000000000FFFFFFF0
-
-.section	.rodata.cst8, "aM", @progbits, 8
-.align 8
-one:
-.quad  1
-two:
-.quad  2
-three:
-.quad  3
-four:
-.quad  4
-five:
-.quad  5
-six:
-.quad  6
-seven:
-.quad  7
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
deleted file mode 100644
index d2add0d35f43..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Initialization code for multi buffer SHA1 algorithm for AVX2
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *	Tim Chen <tim.c.chen@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sha1_mb_mgr.h"
-
-void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
-{
-	unsigned int j;
-	state->unused_lanes = 0xF76543210ULL;
-	for (j = 0; j < 8; j++) {
-		state->lens[j] = 0xFFFFFFFF;
-		state->ldata[j].job_in_lane = NULL;
-	}
-}
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
deleted file mode 100644
index 7a93b1c0d69a..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Buffer submit code for multi buffer SHA1 algorithm
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      James Guilford <james.guilford@intel.com>
- *	Tim Chen <tim.c.chen@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha1_mb_mgr_datastruct.S"
-
-
-.extern sha1_x8_avx
-
-# LINUX register definitions
-arg1    = %rdi
-arg2    = %rsi
-size_offset	= %rcx
-tmp2		= %rcx
-extra_blocks	= %rdx
-
-# Common definitions
-#define state   arg1
-#define job     %rsi
-#define len2    arg2
-#define p2      arg2
-
-# idx must be a register not clobberred by sha1_x8_avx2
-idx		= %r8
-DWORD_idx	= %r8d
-last_len	= %r8
-
-p               = %r11
-start_offset    = %r11
-
-unused_lanes    = %rbx
-BYTE_unused_lanes = %bl
-
-job_rax         = %rax
-len             = %rax
-DWORD_len	= %eax
-
-lane            = %r12
-tmp3            = %r12
-
-tmp             = %r9
-DWORD_tmp	= %r9d
-
-lane_data       = %r10
-
-# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
-# arg 1 : rcx : state
-# arg 2 : rdx : job
-ENTRY(sha1_mb_mgr_submit_avx2)
-	FRAME_BEGIN
-	push	%rbx
-	push	%r12
-
-	mov     _unused_lanes(state), unused_lanes
-	mov	unused_lanes, lane
-	and	$0xF, lane
-	shr     $4, unused_lanes
-	imul    $_LANE_DATA_size, lane, lane_data
-	movl    $STS_BEING_PROCESSED, _status(job)
-	lea     _ldata(state, lane_data), lane_data
-	mov     unused_lanes, _unused_lanes(state)
-	movl    _len(job),  DWORD_len
-
-	mov	job, _job_in_lane(lane_data)
-	shl	$4, len
-	or	lane, len
-
-	movl    DWORD_len,  _lens(state , lane, 4)
-
-	# Load digest words from result_digest
-	vmovdqu	_result_digest(job), %xmm0
-	mov	_result_digest+1*16(job), DWORD_tmp
-	vmovd    %xmm0, _args_digest(state, lane, 4)
-	vpextrd  $1, %xmm0, _args_digest+1*32(state , lane, 4)
-	vpextrd  $2, %xmm0, _args_digest+2*32(state , lane, 4)
-	vpextrd  $3, %xmm0, _args_digest+3*32(state , lane, 4)
-	movl    DWORD_tmp, _args_digest+4*32(state , lane, 4)
-
-	mov     _buffer(job), p
-	mov     p, _args_data_ptr(state, lane, 8)
-
-	cmp     $0xF, unused_lanes
-	jne     return_null
-
-start_loop:
-	# Find min length
-	vmovdqa _lens(state), %xmm0
-	vmovdqa _lens+1*16(state), %xmm1
-
-	vpminud %xmm1, %xmm0, %xmm2        # xmm2 has {D,C,B,A}
-	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
-	vpalignr $4, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,x,E}
-	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
-
-	vmovd   %xmm2, DWORD_idx
-	mov    idx, len2
-	and    $0xF, idx
-	shr    $4, len2
-	jz     len_is_0
-
-	vpand   clear_low_nibble(%rip), %xmm2, %xmm2
-	vpshufd $0, %xmm2, %xmm2
-
-	vpsubd  %xmm2, %xmm0, %xmm0
-	vpsubd  %xmm2, %xmm1, %xmm1
-
-	vmovdqa %xmm0, _lens + 0*16(state)
-	vmovdqa %xmm1, _lens + 1*16(state)
-
-
-	# "state" and "args" are the same address, arg1
-	# len is arg2
-	call    sha1_x8_avx2
-
-	# state and idx are intact
-
-len_is_0:
-	# process completed job "idx"
-	imul    $_LANE_DATA_size, idx, lane_data
-	lea     _ldata(state, lane_data), lane_data
-
-	mov     _job_in_lane(lane_data), job_rax
-	mov     _unused_lanes(state), unused_lanes
-	movq    $0, _job_in_lane(lane_data)
-	movl    $STS_COMPLETED, _status(job_rax)
-	shl     $4, unused_lanes
-	or      idx, unused_lanes
-	mov     unused_lanes, _unused_lanes(state)
-
-	movl	$0xFFFFFFFF, _lens(state, idx, 4)
-
-	vmovd    _args_digest(state, idx, 4), %xmm0
-	vpinsrd  $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
-	vpinsrd  $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
-	vpinsrd  $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
-	movl     _args_digest+4*32(state, idx, 4), DWORD_tmp
-
-	vmovdqu  %xmm0, _result_digest(job_rax)
-	movl    DWORD_tmp, _result_digest+1*16(job_rax)
-
-return:
-	pop	%r12
-	pop	%rbx
-	FRAME_END
-	ret
-
-return_null:
-	xor     job_rax, job_rax
-	jmp     return
-
-ENDPROC(sha1_mb_mgr_submit_avx2)
-
-.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
-.align 16
-clear_low_nibble:
-	.octa	0x000000000000000000000000FFFFFFF0
diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
deleted file mode 100644
index 20f77aa633de..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
+++ /dev/null
@@ -1,492 +0,0 @@
-/*
- * Multi-buffer SHA1 algorithm hash compute routine
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      James Guilford <james.guilford@intel.com>
- *	Tim Chen <tim.c.chen@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2014 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include "sha1_mb_mgr_datastruct.S"
-
-## code to compute oct SHA1 using SSE-256
-## outer calling routine takes care of save and restore of XMM registers
-
-## Function clobbers: rax, rcx, rdx,   rbx, rsi, rdi, r9-r15# ymm0-15
-##
-## Linux clobbers:    rax rbx rcx rdx rsi            r9 r10 r11 r12 r13 r14 r15
-## Linux preserves:                       rdi rbp r8
-##
-## clobbers ymm0-15
-
-
-# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
-# "transpose" data in {r0...r7} using temps {t0...t1}
-# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
-# r0 = {a7 a6 a5 a4   a3 a2 a1 a0}
-# r1 = {b7 b6 b5 b4   b3 b2 b1 b0}
-# r2 = {c7 c6 c5 c4   c3 c2 c1 c0}
-# r3 = {d7 d6 d5 d4   d3 d2 d1 d0}
-# r4 = {e7 e6 e5 e4   e3 e2 e1 e0}
-# r5 = {f7 f6 f5 f4   f3 f2 f1 f0}
-# r6 = {g7 g6 g5 g4   g3 g2 g1 g0}
-# r7 = {h7 h6 h5 h4   h3 h2 h1 h0}
-#
-# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
-# r0 = {h0 g0 f0 e0   d0 c0 b0 a0}
-# r1 = {h1 g1 f1 e1   d1 c1 b1 a1}
-# r2 = {h2 g2 f2 e2   d2 c2 b2 a2}
-# r3 = {h3 g3 f3 e3   d3 c3 b3 a3}
-# r4 = {h4 g4 f4 e4   d4 c4 b4 a4}
-# r5 = {h5 g5 f5 e5   d5 c5 b5 a5}
-# r6 = {h6 g6 f6 e6   d6 c6 b6 a6}
-# r7 = {h7 g7 f7 e7   d7 c7 b7 a7}
-#
-
-.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
-	# process top half (r0..r3) {a...d}
-	vshufps  $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
-	vshufps  $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
-	vshufps  $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
-	vshufps  $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
-	vshufps  $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5   d1 c1 b1 a1}
-	vshufps  $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6   d2 c2 b2 a2}
-	vshufps  $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7   d3 c3 b3 a3}
-	vshufps  $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4   d0 c0 b0 a0}
-
-	# use r2 in place of t0
-	# process bottom half (r4..r7) {e...h}
-	vshufps  $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4   f1 f0 e1 e0}
-	vshufps  $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6   f3 f2 e3 e2}
-	vshufps  $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4   h1 h0 g1 g0}
-	vshufps  $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6   h3 h2 g3 g2}
-	vshufps  $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5   h1 g1 f1 e1}
-	vshufps  $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6   h2 g2 f2 e2}
-	vshufps  $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7   h3 g3 f3 e3}
-	vshufps  $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4   h0 g0 f0 e0}
-
-	vperm2f128      $0x13, \r1, \r5, \r6  # h6...a6
-	vperm2f128      $0x02, \r1, \r5, \r2  # h2...a2
-	vperm2f128      $0x13, \r3, \r7, \r5  # h5...a5
-	vperm2f128      $0x02, \r3, \r7, \r1  # h1...a1
-	vperm2f128      $0x13, \r0, \r4, \r7  # h7...a7
-	vperm2f128      $0x02, \r0, \r4, \r3  # h3...a3
-	vperm2f128      $0x13, \t0, \t1, \r4  # h4...a4
-	vperm2f128      $0x02, \t0, \t1, \r0  # h0...a0
-
-.endm
-##
-## Magic functions defined in FIPS 180-1
-##
-# macro MAGIC_F0 F,B,C,D,T   ## F = (D ^ (B & (C ^ D)))
-.macro MAGIC_F0 regF regB regC regD regT
-    vpxor \regD, \regC, \regF
-    vpand \regB, \regF, \regF
-    vpxor \regD, \regF, \regF
-.endm
-
-# macro MAGIC_F1 F,B,C,D,T   ## F = (B ^ C ^ D)
-.macro MAGIC_F1 regF regB regC regD regT
-    vpxor  \regC, \regD, \regF
-    vpxor  \regB, \regF, \regF
-.endm
-
-# macro MAGIC_F2 F,B,C,D,T   ## F = ((B & C) | (B & D) | (C & D))
-.macro MAGIC_F2 regF regB regC regD regT
-    vpor  \regC, \regB, \regF
-    vpand \regC, \regB, \regT
-    vpand \regD, \regF, \regF
-    vpor  \regT, \regF, \regF
-.endm
-
-# macro MAGIC_F3 F,B,C,D,T   ## F = (B ^ C ^ D)
-.macro MAGIC_F3 regF regB regC regD regT
-    MAGIC_F1 \regF,\regB,\regC,\regD,\regT
-.endm
-
-# PROLD reg, imm, tmp
-.macro PROLD reg imm tmp
-	vpsrld  $(32-\imm), \reg, \tmp
-	vpslld  $\imm, \reg, \reg
-	vpor    \tmp, \reg, \reg
-.endm
-
-.macro PROLD_nd reg imm tmp src
-	vpsrld  $(32-\imm), \src, \tmp
-	vpslld  $\imm, \src, \reg
-	vpor	\tmp, \reg, \reg
-.endm
-
-.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC
-	vpaddd	\immCNT, \regE, \regE
-	vpaddd	\memW*32(%rsp), \regE, \regE
-	PROLD_nd \regT, 5, \regF, \regA
-	vpaddd	\regT, \regE, \regE
-	\MAGIC  \regF, \regB, \regC, \regD, \regT
-        PROLD   \regB, 30, \regT
-        vpaddd  \regF, \regE, \regE
-.endm
-
-.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC
-	vpaddd	\immCNT, \regE, \regE
-	offset = ((\memW - 14) & 15) * 32
-	vmovdqu offset(%rsp), W14
-	vpxor	W14, W16, W16
-	offset = ((\memW -  8) & 15) * 32
-	vpxor	offset(%rsp), W16, W16
-	offset = ((\memW -  3) & 15) * 32
-	vpxor	offset(%rsp), W16, W16
-	vpsrld	$(32-1), W16, \regF
-	vpslld	$1, W16, W16
-	vpor	W16, \regF, \regF
-
-	ROTATE_W
-
-	offset = ((\memW - 0) & 15) * 32
-	vmovdqu	\regF, offset(%rsp)
-	vpaddd	\regF, \regE, \regE
-	PROLD_nd \regT, 5, \regF, \regA
-	vpaddd	\regT, \regE, \regE
-	\MAGIC \regF,\regB,\regC,\regD,\regT      ## FUN  = MAGIC_Fi(B,C,D)
-	PROLD   \regB,30, \regT
-	vpaddd  \regF, \regE, \regE
-.endm
-
-########################################################################
-########################################################################
-########################################################################
-
-## FRAMESZ plus pushes must be an odd multiple of 8
-YMM_SAVE = (15-15)*32
-FRAMESZ = 32*16 + YMM_SAVE
-_YMM  =   FRAMESZ - YMM_SAVE
-
-#define VMOVPS   vmovups
-
-IDX  = %rax
-inp0 = %r9
-inp1 = %r10
-inp2 = %r11
-inp3 = %r12
-inp4 = %r13
-inp5 = %r14
-inp6 = %r15
-inp7 = %rcx
-arg1 = %rdi
-arg2 = %rsi
-RSP_SAVE = %rdx
-
-# ymm0 A
-# ymm1 B
-# ymm2 C
-# ymm3 D
-# ymm4 E
-# ymm5         F       AA
-# ymm6         T0      BB
-# ymm7         T1      CC
-# ymm8         T2      DD
-# ymm9         T3      EE
-# ymm10                T4      TMP
-# ymm11                T5      FUN
-# ymm12                T6      K
-# ymm13                T7      W14
-# ymm14                T8      W15
-# ymm15                T9      W16
-
-
-A  =     %ymm0
-B  =     %ymm1
-C  =     %ymm2
-D  =     %ymm3
-E  =     %ymm4
-F  =     %ymm5
-T0 =	 %ymm6
-T1 =     %ymm7
-T2 =     %ymm8
-T3 =     %ymm9
-T4 =     %ymm10
-T5 =     %ymm11
-T6 =     %ymm12
-T7 =     %ymm13
-T8  =     %ymm14
-T9  =     %ymm15
-
-AA  =     %ymm5
-BB  =     %ymm6
-CC  =     %ymm7
-DD  =     %ymm8
-EE  =     %ymm9
-TMP =     %ymm10
-FUN =     %ymm11
-K   =     %ymm12
-W14 =     %ymm13
-W15 =     %ymm14
-W16 =     %ymm15
-
-.macro ROTATE_ARGS
- TMP_ = E
- E = D
- D = C
- C = B
- B = A
- A = TMP_
-.endm
-
-.macro ROTATE_W
-TMP_  = W16
-W16  = W15
-W15  = W14
-W14  = TMP_
-.endm
-
-# 8 streams x 5 32bit words per digest x 4 bytes per word
-#define DIGEST_SIZE (8*5*4)
-
-.align 32
-
-# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size)
-# arg 1 : pointer to array[4] of pointer to input data
-# arg 2 : size (in blocks) ;; assumed to be >= 1
-#
-ENTRY(sha1_x8_avx2)
-
-	# save callee-saved clobbered registers to comply with C function ABI
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	#save rsp
-	mov	%rsp, RSP_SAVE
-	sub     $FRAMESZ, %rsp
-
-	#align rsp to 32 Bytes
-	and	$~0x1F, %rsp
-
-	## Initialize digests
-	vmovdqu  0*32(arg1), A
-	vmovdqu  1*32(arg1), B
-	vmovdqu  2*32(arg1), C
-	vmovdqu  3*32(arg1), D
-	vmovdqu  4*32(arg1), E
-
-	## transpose input onto stack
-	mov     _data_ptr+0*8(arg1),inp0
-	mov     _data_ptr+1*8(arg1),inp1
-	mov     _data_ptr+2*8(arg1),inp2
-	mov     _data_ptr+3*8(arg1),inp3
-	mov     _data_ptr+4*8(arg1),inp4
-	mov     _data_ptr+5*8(arg1),inp5
-	mov     _data_ptr+6*8(arg1),inp6
-	mov     _data_ptr+7*8(arg1),inp7
-
-	xor     IDX, IDX
-lloop:
-	vmovdqu  PSHUFFLE_BYTE_FLIP_MASK(%rip), F
-	I=0
-.rep 2
-	VMOVPS   (inp0, IDX), T0
-	VMOVPS   (inp1, IDX), T1
-	VMOVPS   (inp2, IDX), T2
-	VMOVPS   (inp3, IDX), T3
-	VMOVPS   (inp4, IDX), T4
-	VMOVPS   (inp5, IDX), T5
-	VMOVPS   (inp6, IDX), T6
-	VMOVPS   (inp7, IDX), T7
-
-	TRANSPOSE8       T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
-	vpshufb  F, T0, T0
-	vmovdqu  T0, (I*8)*32(%rsp)
-	vpshufb  F, T1, T1
-	vmovdqu  T1, (I*8+1)*32(%rsp)
-	vpshufb  F, T2, T2
-	vmovdqu  T2, (I*8+2)*32(%rsp)
-	vpshufb  F, T3, T3
-	vmovdqu  T3, (I*8+3)*32(%rsp)
-	vpshufb  F, T4, T4
-	vmovdqu  T4, (I*8+4)*32(%rsp)
-	vpshufb  F, T5, T5
-	vmovdqu  T5, (I*8+5)*32(%rsp)
-	vpshufb  F, T6, T6
-	vmovdqu  T6, (I*8+6)*32(%rsp)
-	vpshufb  F, T7, T7
-	vmovdqu  T7, (I*8+7)*32(%rsp)
-	add     $32, IDX
-	I = (I+1)
-.endr
-	# save old digests
-	vmovdqu  A,AA
-	vmovdqu  B,BB
-	vmovdqu  C,CC
-	vmovdqu  D,DD
-	vmovdqu  E,EE
-
-##
-## perform 0-79 steps
-##
-	vmovdqu  K00_19(%rip), K
-## do rounds 0...15
-	I = 0
-.rep 16
-	SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
-	ROTATE_ARGS
-	I = (I+1)
-.endr
-
-## do rounds 16...19
-	vmovdqu  ((16 - 16) & 15) * 32 (%rsp), W16
-	vmovdqu  ((16 - 15) & 15) * 32 (%rsp), W15
-.rep 4
-	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
-	ROTATE_ARGS
-	I = (I+1)
-.endr
-
-## do rounds 20...39
-	vmovdqu  K20_39(%rip), K
-.rep 20
-	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1
-	ROTATE_ARGS
-	I = (I+1)
-.endr
-
-## do rounds 40...59
-	vmovdqu  K40_59(%rip), K
-.rep 20
-	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2
-	ROTATE_ARGS
-	I = (I+1)
-.endr
-
-## do rounds 60...79
-	vmovdqu  K60_79(%rip), K
-.rep 20
-	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3
-	ROTATE_ARGS
-	I = (I+1)
-.endr
-
-	vpaddd   AA,A,A
-	vpaddd   BB,B,B
-	vpaddd   CC,C,C
-	vpaddd   DD,D,D
-	vpaddd   EE,E,E
-
-	sub     $1, arg2
-	jne     lloop
-
-	# write out digests
-	vmovdqu  A, 0*32(arg1)
-	vmovdqu  B, 1*32(arg1)
-	vmovdqu  C, 2*32(arg1)
-	vmovdqu  D, 3*32(arg1)
-	vmovdqu  E, 4*32(arg1)
-
-	# update input pointers
-	add     IDX, inp0
-	add     IDX, inp1
-	add     IDX, inp2
-	add     IDX, inp3
-	add     IDX, inp4
-	add     IDX, inp5
-	add     IDX, inp6
-	add     IDX, inp7
-	mov     inp0, _data_ptr (arg1)
-	mov     inp1, _data_ptr + 1*8(arg1)
-	mov     inp2, _data_ptr + 2*8(arg1)
-	mov     inp3, _data_ptr + 3*8(arg1)
-	mov     inp4, _data_ptr + 4*8(arg1)
-	mov     inp5, _data_ptr + 5*8(arg1)
-	mov     inp6, _data_ptr + 6*8(arg1)
-	mov     inp7, _data_ptr + 7*8(arg1)
-
-	################
-	## Postamble
-
-	mov     RSP_SAVE, %rsp
-
-	# restore callee-saved clobbered registers
-	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-
-	ret
-ENDPROC(sha1_x8_avx2)
-
-
-.section	.rodata.cst32.K00_19, "aM", @progbits, 32
-.align 32
-K00_19:
-.octa 0x5A8279995A8279995A8279995A827999
-.octa 0x5A8279995A8279995A8279995A827999
-
-.section	.rodata.cst32.K20_39, "aM", @progbits, 32
-.align 32
-K20_39:
-.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
-.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
-
-.section	.rodata.cst32.K40_59, "aM", @progbits, 32
-.align 32
-K40_59:
-.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
-.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
-
-.section	.rodata.cst32.K60_79, "aM", @progbits, 32
-.align 32
-K60_79:
-.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
-.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
-
-.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-PSHUFFLE_BYTE_FLIP_MASK:
-.octa 0x0c0d0e0f08090a0b0405060700010203
-.octa 0x0c0d0e0f08090a0b0405060700010203
diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile
deleted file mode 100644
index 53ad6e7db747..000000000000
--- a/arch/x86/crypto/sha256-mb/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Arch-specific CryptoAPI modules.
-#
-
-OBJECT_FILES_NON_STANDARD := y
-
-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
-                                $(comma)4)$(comma)%ymm2,yes,no)
-ifeq ($(avx2_supported),yes)
-	obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o
-	sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \
-	     sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o
-endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c
deleted file mode 100644
index 97c5fc43e115..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb.c
+++ /dev/null
@@ -1,1013 +0,0 @@
-/*
- * Multi buffer SHA256 algorithm Glue Code
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *	Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/sha.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <asm/byteorder.h>
-#include <linux/hardirq.h>
-#include <asm/fpu/api.h>
-#include "sha256_mb_ctx.h"
-
-#define FLUSH_INTERVAL 1000 /* in usec */
-
-static struct mcryptd_alg_state sha256_mb_alg_state;
-
-struct sha256_mb_ctx {
-	struct mcryptd_ahash *mcryptd_tfm;
-};
-
-static inline struct mcryptd_hash_request_ctx
-		*cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx)
-{
-	struct ahash_request *areq;
-
-	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
-	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
-}
-
-static inline struct ahash_request
-		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
-{
-	return container_of((void *) ctx, struct ahash_request, __ctx);
-}
-
-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
-				struct ahash_request *areq)
-{
-	rctx->flag = HASH_UPDATE;
-}
-
-static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state);
-static asmlinkage struct job_sha256* (*sha256_job_mgr_submit)
-			(struct sha256_mb_mgr *state, struct job_sha256 *job);
-static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
-			(struct sha256_mb_mgr *state);
-static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
-			(struct sha256_mb_mgr *state);
-
-inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
-			 uint64_t total_len)
-{
-	uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1);
-
-	memset(&padblock[i], 0, SHA256_BLOCK_SIZE);
-	padblock[i] = 0x80;
-
-	i += ((SHA256_BLOCK_SIZE - 1) &
-	      (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1)))
-	     + 1 + SHA256_PADLENGTHFIELD_SIZE;
-
-#if SHA256_PADLENGTHFIELD_SIZE == 16
-	*((uint64_t *) &padblock[i - 16]) = 0;
-#endif
-
-	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
-
-	/* Number of extra blocks to hash */
-	return i >> SHA256_LOG2_BLOCK_SIZE;
-}
-
-static struct sha256_hash_ctx
-		*sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr,
-					struct sha256_hash_ctx *ctx)
-{
-	while (ctx) {
-		if (ctx->status & HASH_CTX_STS_COMPLETE) {
-			/* Clear PROCESSING bit */
-			ctx->status = HASH_CTX_STS_COMPLETE;
-			return ctx;
-		}
-
-		/*
-		 * If the extra blocks are empty, begin hashing what remains
-		 * in the user's buffer.
-		 */
-		if (ctx->partial_block_buffer_length == 0 &&
-		    ctx->incoming_buffer_length) {
-
-			const void *buffer = ctx->incoming_buffer;
-			uint32_t len = ctx->incoming_buffer_length;
-			uint32_t copy_len;
-
-			/*
-			 * Only entire blocks can be hashed.
-			 * Copy remainder to extra blocks buffer.
-			 */
-			copy_len = len & (SHA256_BLOCK_SIZE-1);
-
-			if (copy_len) {
-				len -= copy_len;
-				memcpy(ctx->partial_block_buffer,
-				       ((const char *) buffer + len),
-				       copy_len);
-				ctx->partial_block_buffer_length = copy_len;
-			}
-
-			ctx->incoming_buffer_length = 0;
-
-			/* len should be a multiple of the block size now */
-			assert((len % SHA256_BLOCK_SIZE) == 0);
-
-			/* Set len to the number of blocks to be hashed */
-			len >>= SHA256_LOG2_BLOCK_SIZE;
-
-			if (len) {
-
-				ctx->job.buffer = (uint8_t *) buffer;
-				ctx->job.len = len;
-				ctx = (struct sha256_hash_ctx *)
-				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
-				continue;
-			}
-		}
-
-		/*
-		 * If the extra blocks are not empty, then we are
-		 * either on the last block(s) or we need more
-		 * user input before continuing.
-		 */
-		if (ctx->status & HASH_CTX_STS_LAST) {
-
-			uint8_t *buf = ctx->partial_block_buffer;
-			uint32_t n_extra_blocks =
-				sha256_pad(buf, ctx->total_length);
-
-			ctx->status = (HASH_CTX_STS_PROCESSING |
-				       HASH_CTX_STS_COMPLETE);
-			ctx->job.buffer = buf;
-			ctx->job.len = (uint32_t) n_extra_blocks;
-			ctx = (struct sha256_hash_ctx *)
-				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
-			continue;
-		}
-
-		ctx->status = HASH_CTX_STS_IDLE;
-		return ctx;
-	}
-
-	return NULL;
-}
-
-static struct sha256_hash_ctx
-		*sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr)
-{
-	/*
-	 * If get_comp_job returns NULL, there are no jobs complete.
-	 * If get_comp_job returns a job, verify that it is safe to return to
-	 * the user. If it is not ready, resubmit the job to finish processing.
-	 * If sha256_ctx_mgr_resubmit returned a job, it is ready to be
-	 * returned. Otherwise, all jobs currently being managed by the
-	 * hash_ctx_mgr still need processing.
-	 */
-	struct sha256_hash_ctx *ctx;
-
-	ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr);
-	return sha256_ctx_mgr_resubmit(mgr, ctx);
-}
-
-static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr)
-{
-	sha256_job_mgr_init(&mgr->mgr);
-}
-
-static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
-					  struct sha256_hash_ctx *ctx,
-					  const void *buffer,
-					  uint32_t len,
-					  int flags)
-{
-	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
-		/* User should not pass anything other than UPDATE or LAST */
-		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
-		return ctx;
-	}
-
-	if (ctx->status & HASH_CTX_STS_PROCESSING) {
-		/* Cannot submit to a currently processing job. */
-		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
-		return ctx;
-	}
-
-	if (ctx->status & HASH_CTX_STS_COMPLETE) {
-		/* Cannot update a finished job. */
-		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
-		return ctx;
-	}
-
-	/* If we made it here, there was no error during this call to submit */
-	ctx->error = HASH_CTX_ERROR_NONE;
-
-	/* Store buffer ptr info from user */
-	ctx->incoming_buffer = buffer;
-	ctx->incoming_buffer_length = len;
-
-	/*
-	 * Store the user's request flags and mark this ctx as currently
-	 * being processed.
-	 */
-	ctx->status = (flags & HASH_LAST) ?
-			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
-			HASH_CTX_STS_PROCESSING;
-
-	/* Advance byte counter */
-	ctx->total_length += len;
-
-	/*
-	 * If there is anything currently buffered in the extra blocks,
-	 * append to it until it contains a whole block.
-	 * Or if the user's buffer contains less than a whole block,
-	 * append as much as possible to the extra block.
-	 */
-	if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) {
-		/*
-		 * Compute how many bytes to copy from user buffer into
-		 * extra block
-		 */
-		uint32_t copy_len = SHA256_BLOCK_SIZE -
-					ctx->partial_block_buffer_length;
-		if (len < copy_len)
-			copy_len = len;
-
-		if (copy_len) {
-			/* Copy and update relevant pointers and counters */
-			memcpy(
-		&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
-				buffer, copy_len);
-
-			ctx->partial_block_buffer_length += copy_len;
-			ctx->incoming_buffer = (const void *)
-					((const char *)buffer + copy_len);
-			ctx->incoming_buffer_length = len - copy_len;
-		}
-
-		/* The extra block should never contain more than 1 block */
-		assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE);
-
-		/*
-		 * If the extra block buffer contains exactly 1 block,
-		 * it can be hashed.
-		 */
-		if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) {
-			ctx->partial_block_buffer_length = 0;
-
-			ctx->job.buffer = ctx->partial_block_buffer;
-			ctx->job.len = 1;
-			ctx = (struct sha256_hash_ctx *)
-				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
-		}
-	}
-
-	return sha256_ctx_mgr_resubmit(mgr, ctx);
-}
-
-static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr)
-{
-	struct sha256_hash_ctx *ctx;
-
-	while (1) {
-		ctx = (struct sha256_hash_ctx *)
-					sha256_job_mgr_flush(&mgr->mgr);
-
-		/* If flush returned 0, there are no more jobs in flight. */
-		if (!ctx)
-			return NULL;
-
-		/*
-		 * If flush returned a job, resubmit the job to finish
-		 * processing.
-		 */
-		ctx = sha256_ctx_mgr_resubmit(mgr, ctx);
-
-		/*
-		 * If sha256_ctx_mgr_resubmit returned a job, it is ready to
-		 * be returned. Otherwise, all jobs currently being managed by
-		 * the sha256_ctx_mgr still need processing. Loop.
-		 */
-		if (ctx)
-			return ctx;
-	}
-}
-
-static int sha256_mb_init(struct ahash_request *areq)
-{
-	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
-
-	hash_ctx_init(sctx);
-	sctx->job.result_digest[0] = SHA256_H0;
-	sctx->job.result_digest[1] = SHA256_H1;
-	sctx->job.result_digest[2] = SHA256_H2;
-	sctx->job.result_digest[3] = SHA256_H3;
-	sctx->job.result_digest[4] = SHA256_H4;
-	sctx->job.result_digest[5] = SHA256_H5;
-	sctx->job.result_digest[6] = SHA256_H6;
-	sctx->job.result_digest[7] = SHA256_H7;
-	sctx->total_length = 0;
-	sctx->partial_block_buffer_length = 0;
-	sctx->status = HASH_CTX_STS_IDLE;
-
-	return 0;
-}
-
-static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
-{
-	int	i;
-	struct	sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
-	__be32	*dst = (__be32 *) rctx->out;
-
-	for (i = 0; i < 8; ++i)
-		dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
-
-	return 0;
-}
-
-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
-			struct mcryptd_alg_cstate *cstate, bool flush)
-{
-	int	flag = HASH_UPDATE;
-	int	nbytes, err = 0;
-	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
-	struct sha256_hash_ctx *sha_ctx;
-
-	/* more work ? */
-	while (!(rctx->flag & HASH_DONE)) {
-		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
-		if (nbytes < 0) {
-			err = nbytes;
-			goto out;
-		}
-		/* check if the walk is done */
-		if (crypto_ahash_walk_last(&rctx->walk)) {
-			rctx->flag |= HASH_DONE;
-			if (rctx->flag & HASH_FINAL)
-				flag |= HASH_LAST;
-
-		}
-		sha_ctx = (struct sha256_hash_ctx *)
-						ahash_request_ctx(&rctx->areq);
-		kernel_fpu_begin();
-		sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx,
-						rctx->walk.data, nbytes, flag);
-		if (!sha_ctx) {
-			if (flush)
-				sha_ctx = sha256_ctx_mgr_flush(cstate->mgr);
-		}
-		kernel_fpu_end();
-		if (sha_ctx)
-			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		else {
-			rctx = NULL;
-			goto out;
-		}
-	}
-
-	/* copy the results */
-	if (rctx->flag & HASH_FINAL)
-		sha256_mb_set_results(rctx);
-
-out:
-	*ret_rctx = rctx;
-	return err;
-}
-
-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
-			    struct mcryptd_alg_cstate *cstate,
-			    int err)
-{
-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
-	struct sha256_hash_ctx *sha_ctx;
-	struct mcryptd_hash_request_ctx *req_ctx;
-	int ret;
-
-	/* remove from work list */
-	spin_lock(&cstate->work_lock);
-	list_del(&rctx->waiter);
-	spin_unlock(&cstate->work_lock);
-
-	if (irqs_disabled())
-		rctx->complete(&req->base, err);
-	else {
-		local_bh_disable();
-		rctx->complete(&req->base, err);
-		local_bh_enable();
-	}
-
-	/* check to see if there are other jobs that are done */
-	sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
-	while (sha_ctx) {
-		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		ret = sha_finish_walk(&req_ctx, cstate, false);
-		if (req_ctx) {
-			spin_lock(&cstate->work_lock);
-			list_del(&req_ctx->waiter);
-			spin_unlock(&cstate->work_lock);
-
-			req = cast_mcryptd_ctx_to_req(req_ctx);
-			if (irqs_disabled())
-				req_ctx->complete(&req->base, ret);
-			else {
-				local_bh_disable();
-				req_ctx->complete(&req->base, ret);
-				local_bh_enable();
-			}
-		}
-		sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
-	}
-
-	return 0;
-}
-
-static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
-			     struct mcryptd_alg_cstate *cstate)
-{
-	unsigned long next_flush;
-	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
-
-	/* initialize tag */
-	rctx->tag.arrival = jiffies;    /* tag the arrival time */
-	rctx->tag.seq_num = cstate->next_seq_num++;
-	next_flush = rctx->tag.arrival + delay;
-	rctx->tag.expire = next_flush;
-
-	spin_lock(&cstate->work_lock);
-	list_add_tail(&rctx->waiter, &cstate->work_list);
-	spin_unlock(&cstate->work_lock);
-
-	mcryptd_arm_flusher(cstate, delay);
-}
-
-static int sha256_mb_update(struct ahash_request *areq)
-{
-	struct mcryptd_hash_request_ctx *rctx =
-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
-	struct mcryptd_alg_cstate *cstate =
-				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
-
-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
-	struct sha256_hash_ctx *sha_ctx;
-	int ret = 0, nbytes;
-
-	/* sanity check */
-	if (rctx->tag.cpu != smp_processor_id()) {
-		pr_err("mcryptd error: cpu clash\n");
-		goto done;
-	}
-
-	/* need to init context */
-	req_ctx_init(rctx, areq);
-
-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
-	if (nbytes < 0) {
-		ret = nbytes;
-		goto done;
-	}
-
-	if (crypto_ahash_walk_last(&rctx->walk))
-		rctx->flag |= HASH_DONE;
-
-	/* submit */
-	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
-	sha256_mb_add_list(rctx, cstate);
-	kernel_fpu_begin();
-	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
-							nbytes, HASH_UPDATE);
-	kernel_fpu_end();
-
-	/* check if anything is returned */
-	if (!sha_ctx)
-		return -EINPROGRESS;
-
-	if (sha_ctx->error) {
-		ret = sha_ctx->error;
-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		goto done;
-	}
-
-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-	ret = sha_finish_walk(&rctx, cstate, false);
-
-	if (!rctx)
-		return -EINPROGRESS;
-done:
-	sha_complete_job(rctx, cstate, ret);
-	return ret;
-}
-
-static int sha256_mb_finup(struct ahash_request *areq)
-{
-	struct mcryptd_hash_request_ctx *rctx =
-		container_of(areq, struct mcryptd_hash_request_ctx, areq);
-	struct mcryptd_alg_cstate *cstate =
-				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
-
-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
-	struct sha256_hash_ctx *sha_ctx;
-	int ret = 0, flag = HASH_UPDATE, nbytes;
-
-	/* sanity check */
-	if (rctx->tag.cpu != smp_processor_id()) {
-		pr_err("mcryptd error: cpu clash\n");
-		goto done;
-	}
-
-	/* need to init context */
-	req_ctx_init(rctx, areq);
-
-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
-	if (nbytes < 0) {
-		ret = nbytes;
-		goto done;
-	}
-
-	if (crypto_ahash_walk_last(&rctx->walk)) {
-		rctx->flag |= HASH_DONE;
-		flag = HASH_LAST;
-	}
-
-	/* submit */
-	rctx->flag |= HASH_FINAL;
-	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
-	sha256_mb_add_list(rctx, cstate);
-
-	kernel_fpu_begin();
-	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
-								nbytes, flag);
-	kernel_fpu_end();
-
-	/* check if anything is returned */
-	if (!sha_ctx)
-		return -EINPROGRESS;
-
-	if (sha_ctx->error) {
-		ret = sha_ctx->error;
-		goto done;
-	}
-
-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-	ret = sha_finish_walk(&rctx, cstate, false);
-	if (!rctx)
-		return -EINPROGRESS;
-done:
-	sha_complete_job(rctx, cstate, ret);
-	return ret;
-}
-
-static int sha256_mb_final(struct ahash_request *areq)
-{
-	struct mcryptd_hash_request_ctx *rctx =
-			container_of(areq, struct mcryptd_hash_request_ctx,
-			areq);
-	struct mcryptd_alg_cstate *cstate =
-				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
-
-	struct sha256_hash_ctx *sha_ctx;
-	int ret = 0;
-	u8 data;
-
-	/* sanity check */
-	if (rctx->tag.cpu != smp_processor_id()) {
-		pr_err("mcryptd error: cpu clash\n");
-		goto done;
-	}
-
-	/* need to init context */
-	req_ctx_init(rctx, areq);
-
-	rctx->flag |= HASH_DONE | HASH_FINAL;
-
-	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
-	/* flag HASH_FINAL and 0 data size */
-	sha256_mb_add_list(rctx, cstate);
-	kernel_fpu_begin();
-	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
-								HASH_LAST);
-	kernel_fpu_end();
-
-	/* check if anything is returned */
-	if (!sha_ctx)
-		return -EINPROGRESS;
-
-	if (sha_ctx->error) {
-		ret = sha_ctx->error;
-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		goto done;
-	}
-
-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-	ret = sha_finish_walk(&rctx, cstate, false);
-	if (!rctx)
-		return -EINPROGRESS;
-done:
-	sha_complete_job(rctx, cstate, ret);
-	return ret;
-}
-
-static int sha256_mb_export(struct ahash_request *areq, void *out)
-{
-	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
-
-	memcpy(out, sctx, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha256_mb_import(struct ahash_request *areq, const void *in)
-{
-	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
-
-	memcpy(sctx, in, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
-	struct mcryptd_ahash *mcryptd_tfm;
-	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct mcryptd_hash_ctx *mctx;
-
-	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb",
-						CRYPTO_ALG_INTERNAL,
-						CRYPTO_ALG_INTERNAL);
-	if (IS_ERR(mcryptd_tfm))
-		return PTR_ERR(mcryptd_tfm);
-	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
-	mctx->alg_state = &sha256_mb_alg_state;
-	ctx->mcryptd_tfm = mcryptd_tfm;
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				sizeof(struct ahash_request) +
-				crypto_ahash_reqsize(&mcryptd_tfm->base));
-
-	return 0;
-}
-
-static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm)
-{
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				sizeof(struct ahash_request) +
-				sizeof(struct sha256_hash_ctx));
-
-	return 0;
-}
-
-static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static struct ahash_alg sha256_mb_areq_alg = {
-	.init		=	sha256_mb_init,
-	.update		=	sha256_mb_update,
-	.final		=	sha256_mb_final,
-	.finup		=	sha256_mb_finup,
-	.export		=	sha256_mb_export,
-	.import		=	sha256_mb_import,
-	.halg		=	{
-	.digestsize	=	SHA256_DIGEST_SIZE,
-	.statesize	=	sizeof(struct sha256_hash_ctx),
-		.base		=	{
-			.cra_name	 = "__sha256-mb",
-			.cra_driver_name = "__intel_sha256-mb",
-			.cra_priority	 = 100,
-			/*
-			 * use ASYNC flag as some buffers in multi-buffer
-			 * algo may not have completed before hashing thread
-			 * sleep
-			 */
-			.cra_flags	= CRYPTO_ALG_ASYNC |
-					  CRYPTO_ALG_INTERNAL,
-			.cra_blocksize	= SHA256_BLOCK_SIZE,
-			.cra_module	= THIS_MODULE,
-			.cra_list	= LIST_HEAD_INIT
-					(sha256_mb_areq_alg.halg.base.cra_list),
-			.cra_init	= sha256_mb_areq_init_tfm,
-			.cra_exit	= sha256_mb_areq_exit_tfm,
-			.cra_ctxsize	= sizeof(struct sha256_hash_ctx),
-		}
-	}
-};
-
-static int sha256_mb_async_init(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_init(mcryptd_req);
-}
-
-static int sha256_mb_async_update(struct ahash_request *req)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_update(mcryptd_req);
-}
-
-static int sha256_mb_async_finup(struct ahash_request *req)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_finup(mcryptd_req);
-}
-
-static int sha256_mb_async_final(struct ahash_request *req)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_final(mcryptd_req);
-}
-
-static int sha256_mb_async_digest(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_digest(mcryptd_req);
-}
-
-static int sha256_mb_async_export(struct ahash_request *req, void *out)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_export(mcryptd_req, out);
-}
-
-static int sha256_mb_async_import(struct ahash_request *req, const void *in)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
-	struct mcryptd_hash_request_ctx *rctx;
-	struct ahash_request *areq;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	rctx = ahash_request_ctx(mcryptd_req);
-	areq = &rctx->areq;
-
-	ahash_request_set_tfm(areq, child);
-	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
-					rctx->complete, req);
-
-	return crypto_ahash_import(mcryptd_req, in);
-}
-
-static struct ahash_alg sha256_mb_async_alg = {
-	.init           = sha256_mb_async_init,
-	.update         = sha256_mb_async_update,
-	.final          = sha256_mb_async_final,
-	.finup          = sha256_mb_async_finup,
-	.export         = sha256_mb_async_export,
-	.import         = sha256_mb_async_import,
-	.digest         = sha256_mb_async_digest,
-	.halg = {
-		.digestsize     = SHA256_DIGEST_SIZE,
-		.statesize      = sizeof(struct sha256_hash_ctx),
-		.base = {
-			.cra_name               = "sha256",
-			.cra_driver_name        = "sha256_mb",
-			/*
-			 * Low priority, since with few concurrent hash requests
-			 * this is extremely slow due to the flush delay.  Users
-			 * whose workloads would benefit from this can request
-			 * it explicitly by driver name, or can increase its
-			 * priority at runtime using NETLINK_CRYPTO.
-			 */
-			.cra_priority           = 50,
-			.cra_flags              = CRYPTO_ALG_ASYNC,
-			.cra_blocksize          = SHA256_BLOCK_SIZE,
-			.cra_module             = THIS_MODULE,
-			.cra_list               = LIST_HEAD_INIT
-				(sha256_mb_async_alg.halg.base.cra_list),
-			.cra_init               = sha256_mb_async_init_tfm,
-			.cra_exit               = sha256_mb_async_exit_tfm,
-			.cra_ctxsize		= sizeof(struct sha256_mb_ctx),
-			.cra_alignmask		= 0,
-		},
-	},
-};
-
-static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate)
-{
-	struct mcryptd_hash_request_ctx *rctx;
-	unsigned long cur_time;
-	unsigned long next_flush = 0;
-	struct sha256_hash_ctx *sha_ctx;
-
-
-	cur_time = jiffies;
-
-	while (!list_empty(&cstate->work_list)) {
-		rctx = list_entry(cstate->work_list.next,
-				struct mcryptd_hash_request_ctx, waiter);
-		if (time_before(cur_time, rctx->tag.expire))
-			break;
-		kernel_fpu_begin();
-		sha_ctx = (struct sha256_hash_ctx *)
-					sha256_ctx_mgr_flush(cstate->mgr);
-		kernel_fpu_end();
-		if (!sha_ctx) {
-			pr_err("sha256_mb error: nothing got"
-					" flushed for non-empty list\n");
-			break;
-		}
-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		sha_finish_walk(&rctx, cstate, true);
-		sha_complete_job(rctx, cstate, 0);
-	}
-
-	if (!list_empty(&cstate->work_list)) {
-		rctx = list_entry(cstate->work_list.next,
-				struct mcryptd_hash_request_ctx, waiter);
-		/* get the hash context and then flush time */
-		next_flush = rctx->tag.expire;
-		mcryptd_arm_flusher(cstate, get_delay(next_flush));
-	}
-	return next_flush;
-}
-
-static int __init sha256_mb_mod_init(void)
-{
-
-	int cpu;
-	int err;
-	struct mcryptd_alg_cstate *cpu_state;
-
-	/* check for dependent cpu features */
-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
-	    !boot_cpu_has(X86_FEATURE_BMI2))
-		return -ENODEV;
-
-	/* initialize multibuffer structures */
-	sha256_mb_alg_state.alg_cstate = alloc_percpu
-						(struct mcryptd_alg_cstate);
-
-	sha256_job_mgr_init = sha256_mb_mgr_init_avx2;
-	sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2;
-	sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2;
-	sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2;
-
-	if (!sha256_mb_alg_state.alg_cstate)
-		return -ENOMEM;
-	for_each_possible_cpu(cpu) {
-		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
-		cpu_state->next_flush = 0;
-		cpu_state->next_seq_num = 0;
-		cpu_state->flusher_engaged = false;
-		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
-		cpu_state->cpu = cpu;
-		cpu_state->alg_state = &sha256_mb_alg_state;
-		cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr),
-					GFP_KERNEL);
-		if (!cpu_state->mgr)
-			goto err2;
-		sha256_ctx_mgr_init(cpu_state->mgr);
-		INIT_LIST_HEAD(&cpu_state->work_list);
-		spin_lock_init(&cpu_state->work_lock);
-	}
-	sha256_mb_alg_state.flusher = &sha256_mb_flusher;
-
-	err = crypto_register_ahash(&sha256_mb_areq_alg);
-	if (err)
-		goto err2;
-	err = crypto_register_ahash(&sha256_mb_async_alg);
-	if (err)
-		goto err1;
-
-
-	return 0;
-err1:
-	crypto_unregister_ahash(&sha256_mb_areq_alg);
-err2:
-	for_each_possible_cpu(cpu) {
-		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
-		kfree(cpu_state->mgr);
-	}
-	free_percpu(sha256_mb_alg_state.alg_cstate);
-	return -ENODEV;
-}
-
-static void __exit sha256_mb_mod_fini(void)
-{
-	int cpu;
-	struct mcryptd_alg_cstate *cpu_state;
-
-	crypto_unregister_ahash(&sha256_mb_async_alg);
-	crypto_unregister_ahash(&sha256_mb_areq_alg);
-	for_each_possible_cpu(cpu) {
-		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
-		kfree(cpu_state->mgr);
-	}
-	free_percpu(sha256_mb_alg_state.alg_cstate);
-}
-
-module_init(sha256_mb_mod_init);
-module_exit(sha256_mb_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated");
-
-MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
deleted file mode 100644
index 7c432543dc7f..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Header file for multi buffer SHA256 context
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *	Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SHA_MB_CTX_INTERNAL_H
-#define _SHA_MB_CTX_INTERNAL_H
-
-#include "sha256_mb_mgr.h"
-
-#define HASH_UPDATE          0x00
-#define HASH_LAST            0x01
-#define HASH_DONE	     0x02
-#define HASH_FINAL	     0x04
-
-#define HASH_CTX_STS_IDLE       0x00
-#define HASH_CTX_STS_PROCESSING 0x01
-#define HASH_CTX_STS_LAST       0x02
-#define HASH_CTX_STS_COMPLETE   0x04
-
-enum hash_ctx_error {
-	HASH_CTX_ERROR_NONE               =  0,
-	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
-	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
-	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
-
-#ifdef HASH_CTX_DEBUG
-	HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
-#endif
-};
-
-
-#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
-#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
-#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
-#define hash_ctx_status(ctx)     ((ctx)->status)
-#define hash_ctx_error(ctx)      ((ctx)->error)
-#define hash_ctx_init(ctx) \
-	do { \
-		(ctx)->error = HASH_CTX_ERROR_NONE; \
-		(ctx)->status = HASH_CTX_STS_COMPLETE; \
-	} while (0)
-
-
-/* Hash Constants and Typedefs */
-#define SHA256_DIGEST_LENGTH        8
-#define SHA256_LOG2_BLOCK_SIZE        6
-
-#define SHA256_PADLENGTHFIELD_SIZE    8
-
-#ifdef SHA_MB_DEBUG
-#define assert(expr) \
-do { \
-	if (unlikely(!(expr))) { \
-		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
-		#expr, __FILE__, __func__, __LINE__); \
-	} \
-} while (0)
-#else
-#define assert(expr) do {} while (0)
-#endif
-
-struct sha256_ctx_mgr {
-	struct sha256_mb_mgr mgr;
-};
-
-/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */
-
-struct sha256_hash_ctx {
-	/* Must be at struct offset 0 */
-	struct job_sha256       job;
-	/* status flag */
-	int status;
-	/* error flag */
-	int error;
-
-	uint64_t	total_length;
-	const void	*incoming_buffer;
-	uint32_t	incoming_buffer_length;
-	uint8_t		partial_block_buffer[SHA256_BLOCK_SIZE * 2];
-	uint32_t	partial_block_buffer_length;
-	void		*user_data;
-};
-
-#endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
deleted file mode 100644
index b01ae408c56d..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Header file for multi buffer SHA256 algorithm manager
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *	Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef __SHA_MB_MGR_H
-#define __SHA_MB_MGR_H
-
-#include <linux/types.h>
-
-#define NUM_SHA256_DIGEST_WORDS 8
-
-enum job_sts {	STS_UNKNOWN = 0,
-		STS_BEING_PROCESSED = 1,
-		STS_COMPLETED = 2,
-		STS_INTERNAL_ERROR = 3,
-		STS_ERROR = 4
-};
-
-struct job_sha256 {
-	u8	*buffer;
-	u32	len;
-	u32	result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32);
-	enum	job_sts status;
-	void	*user_data;
-};
-
-/* SHA256 out-of-order scheduler */
-
-/* typedef uint32_t sha8_digest_array[8][8]; */
-
-struct sha256_args_x8 {
-	uint32_t	digest[8][8];
-	uint8_t		*data_ptr[8];
-};
-
-struct sha256_lane_data {
-	struct job_sha256 *job_in_lane;
-};
-
-struct sha256_mb_mgr {
-	struct sha256_args_x8 args;
-
-	uint32_t lens[8];
-
-	/* each byte is index (0...7) of unused lanes */
-	uint64_t unused_lanes;
-	/* byte 4 is set to FF as a flag */
-	struct sha256_lane_data ldata[8];
-};
-
-
-#define SHA256_MB_MGR_NUM_LANES_AVX2 8
-
-void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state);
-struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state,
-					 struct job_sha256 *job);
-struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state);
-struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state);
-
-#endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
deleted file mode 100644
index 5c377bac21d0..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Header file for multi buffer SHA256 algorithm data structure
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- *     Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# Macros for defining data structures
-
-# Usage example
-
-#START_FIELDS	# JOB_AES
-###	name		size	align
-#FIELD	_plaintext,	8,	8	# pointer to plaintext
-#FIELD	_ciphertext,	8,	8	# pointer to ciphertext
-#FIELD	_IV,		16,	8	# IV
-#FIELD	_keys,		8,	8	# pointer to keys
-#FIELD	_len,		4,	4	# length in bytes
-#FIELD	_status,	4,	4	# status enumeration
-#FIELD	_user_data,	8,	8	# pointer to user data
-#UNION  _union,         size1,  align1, \
-#	                size2,  align2, \
-#	                size3,  align3, \
-#	                ...
-#END_FIELDS
-#%assign _JOB_AES_size	_FIELD_OFFSET
-#%assign _JOB_AES_align	_STRUCT_ALIGN
-
-#########################################################################
-
-# Alternate "struc-like" syntax:
-#	STRUCT job_aes2
-#	RES_Q	.plaintext,	1
-#	RES_Q	.ciphertext, 	1
-#	RES_DQ	.IV,		1
-#	RES_B	.nested,	_JOB_AES_SIZE, _JOB_AES_ALIGN
-#	RES_U	.union,		size1, align1, \
-#				size2, align2, \
-#				...
-#	ENDSTRUCT
-#	# Following only needed if nesting
-#	%assign job_aes2_size	_FIELD_OFFSET
-#	%assign job_aes2_align	_STRUCT_ALIGN
-#
-# RES_* macros take a name, a count and an optional alignment.
-# The count in in terms of the base size of the macro, and the
-# default alignment is the base size.
-# The macros are:
-# Macro    Base size
-# RES_B	    1
-# RES_W	    2
-# RES_D     4
-# RES_Q     8
-# RES_DQ   16
-# RES_Y    32
-# RES_Z    64
-#
-# RES_U defines a union. It's arguments are a name and two or more
-# pairs of "size, alignment"
-#
-# The two assigns are only needed if this structure is being nested
-# within another. Even if the assigns are not done, one can still use
-# STRUCT_NAME_size as the size of the structure.
-#
-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
-#
-# The differences between this and using "struc" directly are that each
-# type is implicitly aligned to its natural length (although this can be
-# over-ridden with an explicit third parameter), and that the structure
-# is padded at the end to its overall alignment.
-#
-
-#########################################################################
-
-#ifndef _DATASTRUCT_ASM_
-#define _DATASTRUCT_ASM_
-
-#define SZ8			8*SHA256_DIGEST_WORD_SIZE
-#define ROUNDS			64*SZ8
-#define PTR_SZ                  8
-#define SHA256_DIGEST_WORD_SIZE 4
-#define MAX_SHA256_LANES        8
-#define SHA256_DIGEST_WORDS 8
-#define SHA256_DIGEST_ROW_SIZE  (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE)
-#define SHA256_DIGEST_SIZE      (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS)
-#define SHA256_BLK_SZ           64
-
-# START_FIELDS
-.macro START_FIELDS
- _FIELD_OFFSET = 0
- _STRUCT_ALIGN = 0
-.endm
-
-# FIELD name size align
-.macro FIELD name size align
- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
- \name	= _FIELD_OFFSET
- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
-.if (\align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = \align
-.endif
-.endm
-
-# END_FIELDS
-.macro END_FIELDS
- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
-.endm
-
-########################################################################
-
-.macro STRUCT p1
-START_FIELDS
-.struc \p1
-.endm
-
-.macro ENDSTRUCT
- tmp = _FIELD_OFFSET
- END_FIELDS
- tmp = (_FIELD_OFFSET - %%tmp)
-.if (tmp > 0)
-	.lcomm	tmp
-.endif
-.endstruc
-.endm
-
-## RES_int name size align
-.macro RES_int p1 p2 p3
- name = \p1
- size = \p2
- align = .\p3
-
- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
-.align align
-.lcomm name size
- _FIELD_OFFSET = _FIELD_OFFSET + (size)
-.if (align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = align
-.endif
-.endm
-
-# macro RES_B name, size [, align]
-.macro RES_B _name, _size, _align=1
-RES_int _name _size _align
-.endm
-
-# macro RES_W name, size [, align]
-.macro RES_W _name, _size, _align=2
-RES_int _name 2*(_size) _align
-.endm
-
-# macro RES_D name, size [, align]
-.macro RES_D _name, _size, _align=4
-RES_int _name 4*(_size) _align
-.endm
-
-# macro RES_Q name, size [, align]
-.macro RES_Q _name, _size, _align=8
-RES_int _name 8*(_size) _align
-.endm
-
-# macro RES_DQ name, size [, align]
-.macro RES_DQ _name, _size, _align=16
-RES_int _name 16*(_size) _align
-.endm
-
-# macro RES_Y name, size [, align]
-.macro RES_Y _name, _size, _align=32
-RES_int _name 32*(_size) _align
-.endm
-
-# macro RES_Z name, size [, align]
-.macro RES_Z _name, _size, _align=64
-RES_int _name 64*(_size) _align
-.endm
-
-#endif
-
-
-########################################################################
-#### Define SHA256 Out Of Order Data Structures
-########################################################################
-
-START_FIELDS    # LANE_DATA
-###     name            size    align
-FIELD   _job_in_lane,   8,      8       # pointer to job object
-END_FIELDS
-
- _LANE_DATA_size = _FIELD_OFFSET
- _LANE_DATA_align = _STRUCT_ALIGN
-
-########################################################################
-
-START_FIELDS    # SHA256_ARGS_X4
-###     name            size    align
-FIELD   _digest,        4*8*8,  4       # transposed digest
-FIELD   _data_ptr,      8*8,    8       # array of pointers to data
-END_FIELDS
-
- _SHA256_ARGS_X4_size  =  _FIELD_OFFSET
- _SHA256_ARGS_X4_align = _STRUCT_ALIGN
- _SHA256_ARGS_X8_size  =	_FIELD_OFFSET
- _SHA256_ARGS_X8_align =	_STRUCT_ALIGN
-
-#######################################################################
-
-START_FIELDS    # MB_MGR
-###     name            size    align
-FIELD   _args,          _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align
-FIELD   _lens,          4*8,    8
-FIELD   _unused_lanes,  8,      8
-FIELD   _ldata,         _LANE_DATA_size*8, _LANE_DATA_align
-END_FIELDS
-
- _MB_MGR_size  =  _FIELD_OFFSET
- _MB_MGR_align =  _STRUCT_ALIGN
-
-_args_digest   =     _args + _digest
-_args_data_ptr =     _args + _data_ptr
-
-#######################################################################
-
-START_FIELDS    #STACK_FRAME
-###     name            size    align
-FIELD   _data,		16*SZ8,   1       # transposed digest
-FIELD   _digest,         8*SZ8,   1       # array of pointers to data
-FIELD   _ytmp,           4*SZ8,   1
-FIELD   _rsp,            8,       1
-END_FIELDS
-
- _STACK_FRAME_size  =  _FIELD_OFFSET
- _STACK_FRAME_align =  _STRUCT_ALIGN
-
-#######################################################################
-
-########################################################################
-#### Define constants
-########################################################################
-
-#define STS_UNKNOWN             0
-#define STS_BEING_PROCESSED     1
-#define STS_COMPLETED           2
-
-########################################################################
-#### Define JOB_SHA256 structure
-########################################################################
-
-START_FIELDS    # JOB_SHA256
-
-###     name                            size    align
-FIELD   _buffer,                        8,      8       # pointer to buffer
-FIELD   _len,                           8,      8       # length in bytes
-FIELD   _result_digest,                 8*4,    32      # Digest (output)
-FIELD   _status,                        4,      4
-FIELD   _user_data,                     8,      8
-END_FIELDS
-
- _JOB_SHA256_size = _FIELD_OFFSET
- _JOB_SHA256_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
deleted file mode 100644
index d2364c55bbde..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Flush routine for SHA256 multibuffer
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha256_mb_mgr_datastruct.S"
-
-.extern sha256_x8_avx2
-
-#LINUX register definitions
-#define arg1	%rdi
-#define arg2	%rsi
-
-# Common register definitions
-#define state	arg1
-#define job	arg2
-#define len2	arg2
-
-# idx must be a register not clobberred by sha1_mult
-#define idx		%r8
-#define DWORD_idx	%r8d
-
-#define unused_lanes	%rbx
-#define lane_data	%rbx
-#define tmp2		%rbx
-#define tmp2_w		%ebx
-
-#define job_rax		%rax
-#define tmp1		%rax
-#define size_offset	%rax
-#define tmp		%rax
-#define start_offset	%rax
-
-#define tmp3		%arg1
-
-#define extra_blocks	%arg2
-#define p		%arg2
-
-.macro LABEL prefix n
-\prefix\n\():
-.endm
-
-.macro JNE_SKIP i
-jne     skip_\i
-.endm
-
-.altmacro
-.macro SET_OFFSET _offset
-offset = \_offset
-.endm
-.noaltmacro
-
-# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state)
-# arg 1 : rcx : state
-ENTRY(sha256_mb_mgr_flush_avx2)
-	FRAME_BEGIN
-        push    %rbx
-
-	# If bit (32+3) is set, then all lanes are empty
-	mov	_unused_lanes(state), unused_lanes
-	bt	$32+3, unused_lanes
-	jc	return_null
-
-	# find a lane with a non-null job
-	xor	idx, idx
-	offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
-	cmpq	$0, offset(state)
-	cmovne	one(%rip), idx
-	offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
-	cmpq	$0, offset(state)
-	cmovne	two(%rip), idx
-	offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
-	cmpq	$0, offset(state)
-	cmovne	three(%rip), idx
-	offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
-	cmpq	$0, offset(state)
-	cmovne	four(%rip), idx
-	offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
-	cmpq	$0, offset(state)
-	cmovne	five(%rip), idx
-	offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
-	cmpq	$0, offset(state)
-	cmovne	six(%rip), idx
-	offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
-	cmpq	$0, offset(state)
-	cmovne	seven(%rip), idx
-
-	# copy idx to empty lanes
-copy_lane_data:
-	offset =  (_args + _data_ptr)
-	mov	offset(state,idx,8), tmp
-
-	I = 0
-.rep 8
-	offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
-	cmpq	$0, offset(state)
-.altmacro
-	JNE_SKIP %I
-	offset =  (_args + _data_ptr + 8*I)
-	mov	tmp, offset(state)
-	offset =  (_lens + 4*I)
-	movl	$0xFFFFFFFF, offset(state)
-LABEL skip_ %I
-	I = (I+1)
-.noaltmacro
-.endr
-
-	# Find min length
-	vmovdqu _lens+0*16(state), %xmm0
-	vmovdqu _lens+1*16(state), %xmm1
-
-	vpminud %xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
-	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
-	vpminud %xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
-	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
-	vpminud %xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
-
-	vmovd	%xmm2, DWORD_idx
-	mov	idx, len2
-	and	$0xF, idx
-	shr	$4, len2
-	jz	len_is_0
-
-	vpand	clear_low_nibble(%rip), %xmm2, %xmm2
-	vpshufd	$0, %xmm2, %xmm2
-
-	vpsubd	%xmm2, %xmm0, %xmm0
-	vpsubd	%xmm2, %xmm1, %xmm1
-
-	vmovdqu	%xmm0, _lens+0*16(state)
-	vmovdqu	%xmm1, _lens+1*16(state)
-
-	# "state" and "args" are the same address, arg1
-	# len is arg2
-	call	sha256_x8_avx2
-	# state and idx are intact
-
-len_is_0:
-	# process completed job "idx"
-	imul	$_LANE_DATA_size, idx, lane_data
-	lea	_ldata(state, lane_data), lane_data
-
-	mov	_job_in_lane(lane_data), job_rax
-	movq	$0, _job_in_lane(lane_data)
-	movl	$STS_COMPLETED, _status(job_rax)
-	mov	_unused_lanes(state), unused_lanes
-	shl	$4, unused_lanes
-	or	idx, unused_lanes
-
-	mov	unused_lanes, _unused_lanes(state)
-	movl	$0xFFFFFFFF, _lens(state,idx,4)
-
-	vmovd	_args_digest(state , idx, 4) , %xmm0
-	vpinsrd	$1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
-	vpinsrd	$2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
-	vpinsrd	$3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
-	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
-	vpinsrd	$1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
-	vpinsrd	$2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
-	vpinsrd	$3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
-
-	vmovdqu	%xmm0, _result_digest(job_rax)
-	offset =  (_result_digest + 1*16)
-	vmovdqu	%xmm1, offset(job_rax)
-
-return:
-	pop     %rbx
-	FRAME_END
-	ret
-
-return_null:
-	xor	job_rax, job_rax
-	jmp	return
-ENDPROC(sha256_mb_mgr_flush_avx2)
-
-##############################################################################
-
-.align 16
-ENTRY(sha256_mb_mgr_get_comp_job_avx2)
-	push	%rbx
-
-	## if bit 32+3 is set, then all lanes are empty
-	mov	_unused_lanes(state), unused_lanes
-	bt	$(32+3), unused_lanes
-	jc	.return_null
-
-	# Find min length
-	vmovdqu	_lens(state), %xmm0
-	vmovdqu	_lens+1*16(state), %xmm1
-
-	vpminud	%xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
-	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
-	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
-	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
-	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
-
-	vmovd	%xmm2, DWORD_idx
-	test	$~0xF, idx
-	jnz	.return_null
-
-	# process completed job "idx"
-	imul	$_LANE_DATA_size, idx, lane_data
-	lea	_ldata(state, lane_data), lane_data
-
-	mov	_job_in_lane(lane_data), job_rax
-	movq	$0,  _job_in_lane(lane_data)
-	movl	$STS_COMPLETED, _status(job_rax)
-	mov	_unused_lanes(state), unused_lanes
-	shl	$4, unused_lanes
-	or	idx, unused_lanes
-	mov	unused_lanes, _unused_lanes(state)
-
-	movl	$0xFFFFFFFF, _lens(state,  idx, 4)
-
-	vmovd	_args_digest(state, idx, 4), %xmm0
-	vpinsrd	$1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
-	vpinsrd	$2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
-	vpinsrd	$3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
-	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
-	vpinsrd	$1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
-	vpinsrd	$2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
-	vpinsrd	$3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
-
-        vmovdqu %xmm0, _result_digest(job_rax)
-        offset =  (_result_digest + 1*16)
-        vmovdqu %xmm1, offset(job_rax)
-
-	pop	%rbx
-
-	ret
-
-.return_null:
-	xor	job_rax, job_rax
-	pop	%rbx
-	ret
-ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
-
-.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
-.align 16
-clear_low_nibble:
-.octa	0x000000000000000000000000FFFFFFF0
-
-.section	.rodata.cst8, "aM", @progbits, 8
-.align 8
-one:
-.quad	1
-two:
-.quad	2
-three:
-.quad	3
-four:
-.quad	4
-five:
-.quad	5
-six:
-.quad	6
-seven:
-.quad  7
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
deleted file mode 100644
index b0c498371e67..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Initialization code for multi buffer SHA256 algorithm for AVX2
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sha256_mb_mgr.h"
-
-void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state)
-{
-	unsigned int j;
-
-	state->unused_lanes = 0xF76543210ULL;
-	for (j = 0; j < 8; j++) {
-		state->lens[j] = 0xFFFFFFFF;
-		state->ldata[j].job_in_lane = NULL;
-	}
-}
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
deleted file mode 100644
index b36ae7454084..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Buffer submit code for multi buffer SHA256 algorithm
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha256_mb_mgr_datastruct.S"
-
-.extern sha256_x8_avx2
-
-# LINUX register definitions
-arg1		= %rdi
-arg2		= %rsi
-size_offset	= %rcx
-tmp2		= %rcx
-extra_blocks	= %rdx
-
-# Common definitions
-#define state	arg1
-#define job	%rsi
-#define len2	arg2
-#define p2	arg2
-
-# idx must be a register not clobberred by sha1_x8_avx2
-idx		= %r8
-DWORD_idx	= %r8d
-last_len	= %r8
-
-p		= %r11
-start_offset	= %r11
-
-unused_lanes	= %rbx
-BYTE_unused_lanes = %bl
-
-job_rax		= %rax
-len		= %rax
-DWORD_len	= %eax
-
-lane		= %r12
-tmp3		= %r12
-
-tmp		= %r9
-DWORD_tmp	= %r9d
-
-lane_data	= %r10
-
-# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job)
-# arg 1 : rcx : state
-# arg 2 : rdx : job
-ENTRY(sha256_mb_mgr_submit_avx2)
-	FRAME_BEGIN
-	push	%rbx
-	push	%r12
-
-	mov	_unused_lanes(state), unused_lanes
-	mov	unused_lanes, lane
-	and	$0xF, lane
-	shr	$4, unused_lanes
-	imul	$_LANE_DATA_size, lane, lane_data
-	movl	$STS_BEING_PROCESSED, _status(job)
-	lea	_ldata(state, lane_data), lane_data
-	mov	unused_lanes, _unused_lanes(state)
-	movl	_len(job),  DWORD_len
-
-	mov	job, _job_in_lane(lane_data)
-	shl	$4, len
-	or	lane, len
-
-	movl	DWORD_len,  _lens(state , lane, 4)
-
-	# Load digest words from result_digest
-	vmovdqu	_result_digest(job), %xmm0
-	vmovdqu	_result_digest+1*16(job), %xmm1
-	vmovd	%xmm0, _args_digest(state, lane, 4)
-	vpextrd	$1, %xmm0, _args_digest+1*32(state , lane, 4)
-	vpextrd	$2, %xmm0, _args_digest+2*32(state , lane, 4)
-	vpextrd	$3, %xmm0, _args_digest+3*32(state , lane, 4)
-	vmovd	%xmm1, _args_digest+4*32(state , lane, 4)
-
-	vpextrd	$1, %xmm1, _args_digest+5*32(state , lane, 4)
-	vpextrd	$2, %xmm1, _args_digest+6*32(state , lane, 4)
-	vpextrd	$3, %xmm1, _args_digest+7*32(state , lane, 4)
-
-	mov	_buffer(job), p
-	mov	p, _args_data_ptr(state, lane, 8)
-
-	cmp	$0xF, unused_lanes
-	jne	return_null
-
-start_loop:
-	# Find min length
-	vmovdqa	_lens(state), %xmm0
-	vmovdqa	_lens+1*16(state), %xmm1
-
-	vpminud	%xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
-	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
-	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
-	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
-	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
-
-	vmovd	%xmm2, DWORD_idx
-	mov	idx, len2
-	and	$0xF, idx
-	shr	$4, len2
-	jz	len_is_0
-
-	vpand	clear_low_nibble(%rip), %xmm2, %xmm2
-	vpshufd	$0, %xmm2, %xmm2
-
-	vpsubd	%xmm2, %xmm0, %xmm0
-	vpsubd	%xmm2, %xmm1, %xmm1
-
-	vmovdqa	%xmm0, _lens + 0*16(state)
-	vmovdqa	%xmm1, _lens + 1*16(state)
-
-	# "state" and "args" are the same address, arg1
-	# len is arg2
-	call	sha256_x8_avx2
-
-	# state and idx are intact
-
-len_is_0:
-	# process completed job "idx"
-	imul	$_LANE_DATA_size, idx, lane_data
-	lea	_ldata(state, lane_data), lane_data
-
-	mov	_job_in_lane(lane_data), job_rax
-	mov	_unused_lanes(state), unused_lanes
-	movq	$0, _job_in_lane(lane_data)
-	movl	$STS_COMPLETED, _status(job_rax)
-	shl	$4, unused_lanes
-	or	idx, unused_lanes
-	mov	unused_lanes, _unused_lanes(state)
-
-	movl	$0xFFFFFFFF, _lens(state,idx,4)
-
-	vmovd	_args_digest(state, idx, 4), %xmm0
-	vpinsrd	$1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
-	vpinsrd	$2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
-	vpinsrd	$3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
-	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
-
-	vpinsrd	$1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1
-	vpinsrd	$2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1
-	vpinsrd	$3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1
-
-	vmovdqu	%xmm0, _result_digest(job_rax)
-	vmovdqu	%xmm1, _result_digest+1*16(job_rax)
-
-return:
-	pop     %r12
-        pop     %rbx
-        FRAME_END
-	ret
-
-return_null:
-	xor	job_rax, job_rax
-	jmp	return
-
-ENDPROC(sha256_mb_mgr_submit_avx2)
-
-.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
-.align 16
-clear_low_nibble:
-	.octa	0x000000000000000000000000FFFFFFF0
diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
deleted file mode 100644
index 1687c80c5995..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
- * Multi-buffer SHA256 algorithm hash compute routine
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *	Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include "sha256_mb_mgr_datastruct.S"
-
-## code to compute oct SHA256 using SSE-256
-## outer calling routine takes care of save and restore of XMM registers
-## Logic designed/laid out by JDG
-
-## Function clobbers: rax, rcx, rdx,   rbx, rsi, rdi, r9-r15; %ymm0-15
-## Linux clobbers:    rax rbx rcx rdx rsi            r9 r10 r11 r12 r13 r14 r15
-## Linux preserves:                       rdi rbp r8
-##
-## clobbers %ymm0-15
-
-arg1 = %rdi
-arg2 = %rsi
-reg3 = %rcx
-reg4 = %rdx
-
-# Common definitions
-STATE = arg1
-INP_SIZE = arg2
-
-IDX = %rax
-ROUND = %rbx
-TBL = reg3
-
-inp0 = %r9
-inp1 = %r10
-inp2 = %r11
-inp3 = %r12
-inp4 = %r13
-inp5 = %r14
-inp6 = %r15
-inp7 = reg4
-
-a = %ymm0
-b = %ymm1
-c = %ymm2
-d = %ymm3
-e = %ymm4
-f = %ymm5
-g = %ymm6
-h = %ymm7
-
-T1 = %ymm8
-
-a0 = %ymm12
-a1 = %ymm13
-a2 = %ymm14
-TMP = %ymm15
-TMP0 = %ymm6
-TMP1 = %ymm7
-
-TT0 = %ymm8
-TT1 = %ymm9
-TT2 = %ymm10
-TT3 = %ymm11
-TT4 = %ymm12
-TT5 = %ymm13
-TT6 = %ymm14
-TT7 = %ymm15
-
-# Define stack usage
-
-# Assume stack aligned to 32 bytes before call
-# Therefore FRAMESZ mod 32 must be 32-8 = 24
-
-#define FRAMESZ	0x388
-
-#define VMOVPS	vmovups
-
-# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
-# "transpose" data in {r0...r7} using temps {t0...t1}
-# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
-# r0 = {a7 a6 a5 a4   a3 a2 a1 a0}
-# r1 = {b7 b6 b5 b4   b3 b2 b1 b0}
-# r2 = {c7 c6 c5 c4   c3 c2 c1 c0}
-# r3 = {d7 d6 d5 d4   d3 d2 d1 d0}
-# r4 = {e7 e6 e5 e4   e3 e2 e1 e0}
-# r5 = {f7 f6 f5 f4   f3 f2 f1 f0}
-# r6 = {g7 g6 g5 g4   g3 g2 g1 g0}
-# r7 = {h7 h6 h5 h4   h3 h2 h1 h0}
-#
-# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
-# r0 = {h0 g0 f0 e0   d0 c0 b0 a0}
-# r1 = {h1 g1 f1 e1   d1 c1 b1 a1}
-# r2 = {h2 g2 f2 e2   d2 c2 b2 a2}
-# r3 = {h3 g3 f3 e3   d3 c3 b3 a3}
-# r4 = {h4 g4 f4 e4   d4 c4 b4 a4}
-# r5 = {h5 g5 f5 e5   d5 c5 b5 a5}
-# r6 = {h6 g6 f6 e6   d6 c6 b6 a6}
-# r7 = {h7 g7 f7 e7   d7 c7 b7 a7}
-#
-
-.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
-	# process top half (r0..r3) {a...d}
-	vshufps	$0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
-	vshufps	$0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
-	vshufps	$0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
-	vshufps	$0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
-	vshufps	$0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5   d1 c1 b1 a1}
-	vshufps	$0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6   d2 c2 b2 a2}
-	vshufps	$0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7   d3 c3 b3 a3}
-	vshufps	$0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4   d0 c0 b0 a0}
-
-	# use r2 in place of t0
-	# process bottom half (r4..r7) {e...h}
-	vshufps	$0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4   f1 f0 e1 e0}
-	vshufps	$0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6   f3 f2 e3 e2}
-	vshufps	$0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4   h1 h0 g1 g0}
-	vshufps	$0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6   h3 h2 g3 g2}
-	vshufps	$0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5   h1 g1 f1 e1}
-	vshufps	$0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6   h2 g2 f2 e2}
-	vshufps	$0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7   h3 g3 f3 e3}
-	vshufps	$0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4   h0 g0 f0 e0}
-
-	vperm2f128	$0x13, \r1, \r5, \r6  # h6...a6
-	vperm2f128	$0x02, \r1, \r5, \r2  # h2...a2
-	vperm2f128	$0x13, \r3, \r7, \r5  # h5...a5
-	vperm2f128	$0x02, \r3, \r7, \r1  # h1...a1
-	vperm2f128	$0x13, \r0, \r4, \r7  # h7...a7
-	vperm2f128	$0x02, \r0, \r4, \r3  # h3...a3
-	vperm2f128	$0x13, \t0, \t1, \r4  # h4...a4
-	vperm2f128	$0x02, \t0, \t1, \r0  # h0...a0
-
-.endm
-
-.macro ROTATE_ARGS
-TMP_ = h
-h = g
-g = f
-f = e
-e = d
-d = c
-c = b
-b = a
-a = TMP_
-.endm
-
-.macro _PRORD reg imm tmp
-	vpslld	$(32-\imm),\reg,\tmp
-	vpsrld	$\imm,\reg, \reg
-	vpor	\tmp,\reg, \reg
-.endm
-
-# PRORD_nd reg, imm, tmp, src
-.macro _PRORD_nd reg imm tmp src
-	vpslld	$(32-\imm), \src, \tmp
-	vpsrld	$\imm, \src, \reg
-	vpor	\tmp, \reg, \reg
-.endm
-
-# PRORD dst/src, amt
-.macro PRORD reg imm
-	_PRORD	\reg,\imm,TMP
-.endm
-
-# PRORD_nd dst, src, amt
-.macro PRORD_nd reg tmp imm
-	_PRORD_nd	\reg, \imm, TMP, \tmp
-.endm
-
-# arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_00_15 _T1 i
-	PRORD_nd	a0,e,5	# sig1: a0 = (e >> 5)
-
-	vpxor	g, f, a2	# ch: a2 = f^g
-	vpand	e,a2, a2	# ch: a2 = (f^g)&e
-	vpxor	g, a2, a2	# a2 = ch
-
-	PRORD_nd	a1,e,25	# sig1: a1 = (e >> 25)
-
-	vmovdqu	\_T1,(SZ8*(\i & 0xf))(%rsp)
-	vpaddd	(TBL,ROUND,1), \_T1, \_T1	# T1 = W + K
-	vpxor	e,a0, a0	# sig1: a0 = e ^ (e >> 5)
-	PRORD	a0, 6		# sig1: a0 = (e >> 6) ^ (e >> 11)
-	vpaddd	a2, h, h	# h = h + ch
-	PRORD_nd	a2,a,11	# sig0: a2 = (a >> 11)
-	vpaddd	\_T1,h, h 	# h = h + ch + W + K
-	vpxor	a1, a0, a0	# a0 = sigma1
-	PRORD_nd	a1,a,22	# sig0: a1 = (a >> 22)
-	vpxor	c, a, \_T1	# maj: T1 = a^c
-	add	$SZ8, ROUND	# ROUND++
-	vpand	b, \_T1, \_T1	# maj: T1 = (a^c)&b
-	vpaddd	a0, h, h
-	vpaddd	h, d, d
-	vpxor	a, a2, a2	# sig0: a2 = a ^ (a >> 11)
-	PRORD	a2,2		# sig0: a2 = (a >> 2) ^ (a >> 13)
-	vpxor	a1, a2, a2	# a2 = sig0
-	vpand	c, a, a1	# maj: a1 = a&c
-	vpor	\_T1, a1, a1 	# a1 = maj
-	vpaddd	a1, h, h	# h = h + ch + W + K + maj
-	vpaddd	a2, h, h	# h = h + ch + W + K + maj + sigma0
-	ROTATE_ARGS
-.endm
-
-# arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_16_XX _T1 i
-	vmovdqu	(SZ8*((\i-15)&0xf))(%rsp), \_T1
-	vmovdqu	(SZ8*((\i-2)&0xf))(%rsp), a1
-	vmovdqu	\_T1, a0
-	PRORD	\_T1,11
-	vmovdqu	a1, a2
-	PRORD	a1,2
-	vpxor	a0, \_T1, \_T1
-	PRORD	\_T1, 7
-	vpxor	a2, a1, a1
-	PRORD	a1, 17
-	vpsrld	$3, a0, a0
-	vpxor	a0, \_T1, \_T1
-	vpsrld	$10, a2, a2
-	vpxor	a2, a1, a1
-	vpaddd	(SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1
-	vpaddd	(SZ8*((\i-7)&0xf))(%rsp), a1, a1
-	vpaddd	a1, \_T1, \_T1
-
-	ROUND_00_15 \_T1,\i
-.endm
-
-# SHA256_ARGS:
-#   UINT128 digest[8];  // transposed digests
-#   UINT8  *data_ptr[4];
-
-# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes);
-# arg 1 : STATE : pointer to array of pointers to input data
-# arg 2 : INP_SIZE  : size of input in blocks
-	# general registers preserved in outer calling routine
-	# outer calling routine saves all the XMM registers
-	# save rsp, allocate 32-byte aligned for local variables
-ENTRY(sha256_x8_avx2)
-
-	# save callee-saved clobbered registers to comply with C function ABI
-	push    %r12
-	push    %r13
-	push    %r14
-	push    %r15
-
-	mov	%rsp, IDX
-	sub	$FRAMESZ, %rsp
-	and	$~0x1F, %rsp
-	mov	IDX, _rsp(%rsp)
-
-	# Load the pre-transposed incoming digest.
-	vmovdqu	0*SHA256_DIGEST_ROW_SIZE(STATE),a
-	vmovdqu	1*SHA256_DIGEST_ROW_SIZE(STATE),b
-	vmovdqu	2*SHA256_DIGEST_ROW_SIZE(STATE),c
-	vmovdqu	3*SHA256_DIGEST_ROW_SIZE(STATE),d
-	vmovdqu	4*SHA256_DIGEST_ROW_SIZE(STATE),e
-	vmovdqu	5*SHA256_DIGEST_ROW_SIZE(STATE),f
-	vmovdqu	6*SHA256_DIGEST_ROW_SIZE(STATE),g
-	vmovdqu	7*SHA256_DIGEST_ROW_SIZE(STATE),h
-
-	lea	K256_8(%rip),TBL
-
-	# load the address of each of the 4 message lanes
-	# getting ready to transpose input onto stack
-	mov	_args_data_ptr+0*PTR_SZ(STATE),inp0
-	mov	_args_data_ptr+1*PTR_SZ(STATE),inp1
-	mov	_args_data_ptr+2*PTR_SZ(STATE),inp2
-	mov	_args_data_ptr+3*PTR_SZ(STATE),inp3
-	mov	_args_data_ptr+4*PTR_SZ(STATE),inp4
-	mov	_args_data_ptr+5*PTR_SZ(STATE),inp5
-	mov	_args_data_ptr+6*PTR_SZ(STATE),inp6
-	mov	_args_data_ptr+7*PTR_SZ(STATE),inp7
-
-	xor	IDX, IDX
-lloop:
-	xor	ROUND, ROUND
-
-	# save old digest
-	vmovdqu	a, _digest(%rsp)
-	vmovdqu	b, _digest+1*SZ8(%rsp)
-	vmovdqu	c, _digest+2*SZ8(%rsp)
-	vmovdqu	d, _digest+3*SZ8(%rsp)
-	vmovdqu	e, _digest+4*SZ8(%rsp)
-	vmovdqu	f, _digest+5*SZ8(%rsp)
-	vmovdqu	g, _digest+6*SZ8(%rsp)
-	vmovdqu	h, _digest+7*SZ8(%rsp)
-	i = 0
-.rep 2
-	VMOVPS	i*32(inp0, IDX), TT0
-	VMOVPS	i*32(inp1, IDX), TT1
-	VMOVPS	i*32(inp2, IDX), TT2
-	VMOVPS	i*32(inp3, IDX), TT3
-	VMOVPS	i*32(inp4, IDX), TT4
-	VMOVPS	i*32(inp5, IDX), TT5
-	VMOVPS	i*32(inp6, IDX), TT6
-	VMOVPS	i*32(inp7, IDX), TT7
-	vmovdqu	g, _ytmp(%rsp)
-	vmovdqu	h, _ytmp+1*SZ8(%rsp)
-	TRANSPOSE8	TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7,   TMP0, TMP1
-	vmovdqu	PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1
-	vmovdqu	_ytmp(%rsp), g
-	vpshufb	TMP1, TT0, TT0
-	vpshufb	TMP1, TT1, TT1
-	vpshufb	TMP1, TT2, TT2
-	vpshufb	TMP1, TT3, TT3
-	vpshufb	TMP1, TT4, TT4
-	vpshufb	TMP1, TT5, TT5
-	vpshufb	TMP1, TT6, TT6
-	vpshufb	TMP1, TT7, TT7
-	vmovdqu	_ytmp+1*SZ8(%rsp), h
-	vmovdqu	TT4, _ytmp(%rsp)
-	vmovdqu	TT5, _ytmp+1*SZ8(%rsp)
-	vmovdqu	TT6, _ytmp+2*SZ8(%rsp)
-	vmovdqu	TT7, _ytmp+3*SZ8(%rsp)
-	ROUND_00_15	TT0,(i*8+0)
-	vmovdqu	_ytmp(%rsp), TT0
-	ROUND_00_15	TT1,(i*8+1)
-	vmovdqu	_ytmp+1*SZ8(%rsp), TT1
-	ROUND_00_15	TT2,(i*8+2)
-	vmovdqu	_ytmp+2*SZ8(%rsp), TT2
-	ROUND_00_15	TT3,(i*8+3)
-	vmovdqu	_ytmp+3*SZ8(%rsp), TT3
-	ROUND_00_15	TT0,(i*8+4)
-	ROUND_00_15	TT1,(i*8+5)
-	ROUND_00_15	TT2,(i*8+6)
-	ROUND_00_15	TT3,(i*8+7)
-	i = (i+1)
-.endr
-	add	$64, IDX
-	i = (i*8)
-
-	jmp	Lrounds_16_xx
-.align 16
-Lrounds_16_xx:
-.rep 16
-	ROUND_16_XX	T1, i
-	i = (i+1)
-.endr
-
-	cmp	$ROUNDS,ROUND
-	jb	Lrounds_16_xx
-
-	# add old digest
-	vpaddd	_digest+0*SZ8(%rsp), a, a
-	vpaddd	_digest+1*SZ8(%rsp), b, b
-	vpaddd	_digest+2*SZ8(%rsp), c, c
-	vpaddd	_digest+3*SZ8(%rsp), d, d
-	vpaddd	_digest+4*SZ8(%rsp), e, e
-	vpaddd	_digest+5*SZ8(%rsp), f, f
-	vpaddd	_digest+6*SZ8(%rsp), g, g
-	vpaddd	_digest+7*SZ8(%rsp), h, h
-
-	sub	$1, INP_SIZE  # unit is blocks
-	jne	lloop
-
-	# write back to memory (state object) the transposed digest
-	vmovdqu	a, 0*SHA256_DIGEST_ROW_SIZE(STATE)
-	vmovdqu	b, 1*SHA256_DIGEST_ROW_SIZE(STATE)
-	vmovdqu	c, 2*SHA256_DIGEST_ROW_SIZE(STATE)
-	vmovdqu	d, 3*SHA256_DIGEST_ROW_SIZE(STATE)
-	vmovdqu	e, 4*SHA256_DIGEST_ROW_SIZE(STATE)
-	vmovdqu	f, 5*SHA256_DIGEST_ROW_SIZE(STATE)
-	vmovdqu	g, 6*SHA256_DIGEST_ROW_SIZE(STATE)
-	vmovdqu	h, 7*SHA256_DIGEST_ROW_SIZE(STATE)
-
-	# update input pointers
-	add	IDX, inp0
-	mov	inp0, _args_data_ptr+0*8(STATE)
-	add	IDX, inp1
-	mov	inp1, _args_data_ptr+1*8(STATE)
-	add	IDX, inp2
-	mov	inp2, _args_data_ptr+2*8(STATE)
-	add	IDX, inp3
-	mov	inp3, _args_data_ptr+3*8(STATE)
-	add	IDX, inp4
-	mov	inp4, _args_data_ptr+4*8(STATE)
-	add	IDX, inp5
-	mov	inp5, _args_data_ptr+5*8(STATE)
-	add	IDX, inp6
-	mov	inp6, _args_data_ptr+6*8(STATE)
-	add	IDX, inp7
-	mov	inp7, _args_data_ptr+7*8(STATE)
-
-	# Postamble
-	mov	_rsp(%rsp), %rsp
-
-	# restore callee-saved clobbered registers
-	pop     %r15
-	pop     %r14
-	pop     %r13
-	pop     %r12
-
-	ret
-ENDPROC(sha256_x8_avx2)
-
-.section	.rodata.K256_8, "a", @progbits
-.align 64
-K256_8:
-	.octa	0x428a2f98428a2f98428a2f98428a2f98
-	.octa	0x428a2f98428a2f98428a2f98428a2f98
-	.octa	0x71374491713744917137449171374491
-	.octa	0x71374491713744917137449171374491
-	.octa	0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
-	.octa	0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
-	.octa	0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
-	.octa	0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
-	.octa	0x3956c25b3956c25b3956c25b3956c25b
-	.octa	0x3956c25b3956c25b3956c25b3956c25b
-	.octa	0x59f111f159f111f159f111f159f111f1
-	.octa	0x59f111f159f111f159f111f159f111f1
-	.octa	0x923f82a4923f82a4923f82a4923f82a4
-	.octa	0x923f82a4923f82a4923f82a4923f82a4
-	.octa	0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
-	.octa	0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
-	.octa	0xd807aa98d807aa98d807aa98d807aa98
-	.octa	0xd807aa98d807aa98d807aa98d807aa98
-	.octa	0x12835b0112835b0112835b0112835b01
-	.octa	0x12835b0112835b0112835b0112835b01
-	.octa	0x243185be243185be243185be243185be
-	.octa	0x243185be243185be243185be243185be
-	.octa	0x550c7dc3550c7dc3550c7dc3550c7dc3
-	.octa	0x550c7dc3550c7dc3550c7dc3550c7dc3
-	.octa	0x72be5d7472be5d7472be5d7472be5d74
-	.octa	0x72be5d7472be5d7472be5d7472be5d74
-	.octa	0x80deb1fe80deb1fe80deb1fe80deb1fe
-	.octa	0x80deb1fe80deb1fe80deb1fe80deb1fe
-	.octa	0x9bdc06a79bdc06a79bdc06a79bdc06a7
-	.octa	0x9bdc06a79bdc06a79bdc06a79bdc06a7
-	.octa	0xc19bf174c19bf174c19bf174c19bf174
-	.octa	0xc19bf174c19bf174c19bf174c19bf174
-	.octa	0xe49b69c1e49b69c1e49b69c1e49b69c1
-	.octa	0xe49b69c1e49b69c1e49b69c1e49b69c1
-	.octa	0xefbe4786efbe4786efbe4786efbe4786
-	.octa	0xefbe4786efbe4786efbe4786efbe4786
-	.octa	0x0fc19dc60fc19dc60fc19dc60fc19dc6
-	.octa	0x0fc19dc60fc19dc60fc19dc60fc19dc6
-	.octa	0x240ca1cc240ca1cc240ca1cc240ca1cc
-	.octa	0x240ca1cc240ca1cc240ca1cc240ca1cc
-	.octa	0x2de92c6f2de92c6f2de92c6f2de92c6f
-	.octa	0x2de92c6f2de92c6f2de92c6f2de92c6f
-	.octa	0x4a7484aa4a7484aa4a7484aa4a7484aa
-	.octa	0x4a7484aa4a7484aa4a7484aa4a7484aa
-	.octa	0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
-	.octa	0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
-	.octa	0x76f988da76f988da76f988da76f988da
-	.octa	0x76f988da76f988da76f988da76f988da
-	.octa	0x983e5152983e5152983e5152983e5152
-	.octa	0x983e5152983e5152983e5152983e5152
-	.octa	0xa831c66da831c66da831c66da831c66d
-	.octa	0xa831c66da831c66da831c66da831c66d
-	.octa	0xb00327c8b00327c8b00327c8b00327c8
-	.octa	0xb00327c8b00327c8b00327c8b00327c8
-	.octa	0xbf597fc7bf597fc7bf597fc7bf597fc7
-	.octa	0xbf597fc7bf597fc7bf597fc7bf597fc7
-	.octa	0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
-	.octa	0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
-	.octa	0xd5a79147d5a79147d5a79147d5a79147
-	.octa	0xd5a79147d5a79147d5a79147d5a79147
-	.octa	0x06ca635106ca635106ca635106ca6351
-	.octa	0x06ca635106ca635106ca635106ca6351
-	.octa	0x14292967142929671429296714292967
-	.octa	0x14292967142929671429296714292967
-	.octa	0x27b70a8527b70a8527b70a8527b70a85
-	.octa	0x27b70a8527b70a8527b70a8527b70a85
-	.octa	0x2e1b21382e1b21382e1b21382e1b2138
-	.octa	0x2e1b21382e1b21382e1b21382e1b2138
-	.octa	0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
-	.octa	0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
-	.octa	0x53380d1353380d1353380d1353380d13
-	.octa	0x53380d1353380d1353380d1353380d13
-	.octa	0x650a7354650a7354650a7354650a7354
-	.octa	0x650a7354650a7354650a7354650a7354
-	.octa	0x766a0abb766a0abb766a0abb766a0abb
-	.octa	0x766a0abb766a0abb766a0abb766a0abb
-	.octa	0x81c2c92e81c2c92e81c2c92e81c2c92e
-	.octa	0x81c2c92e81c2c92e81c2c92e81c2c92e
-	.octa	0x92722c8592722c8592722c8592722c85
-	.octa	0x92722c8592722c8592722c8592722c85
-	.octa	0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
-	.octa	0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
-	.octa	0xa81a664ba81a664ba81a664ba81a664b
-	.octa	0xa81a664ba81a664ba81a664ba81a664b
-	.octa	0xc24b8b70c24b8b70c24b8b70c24b8b70
-	.octa	0xc24b8b70c24b8b70c24b8b70c24b8b70
-	.octa	0xc76c51a3c76c51a3c76c51a3c76c51a3
-	.octa	0xc76c51a3c76c51a3c76c51a3c76c51a3
-	.octa	0xd192e819d192e819d192e819d192e819
-	.octa	0xd192e819d192e819d192e819d192e819
-	.octa	0xd6990624d6990624d6990624d6990624
-	.octa	0xd6990624d6990624d6990624d6990624
-	.octa	0xf40e3585f40e3585f40e3585f40e3585
-	.octa	0xf40e3585f40e3585f40e3585f40e3585
-	.octa	0x106aa070106aa070106aa070106aa070
-	.octa	0x106aa070106aa070106aa070106aa070
-	.octa	0x19a4c11619a4c11619a4c11619a4c116
-	.octa	0x19a4c11619a4c11619a4c11619a4c116
-	.octa	0x1e376c081e376c081e376c081e376c08
-	.octa	0x1e376c081e376c081e376c081e376c08
-	.octa	0x2748774c2748774c2748774c2748774c
-	.octa	0x2748774c2748774c2748774c2748774c
-	.octa	0x34b0bcb534b0bcb534b0bcb534b0bcb5
-	.octa	0x34b0bcb534b0bcb534b0bcb534b0bcb5
-	.octa	0x391c0cb3391c0cb3391c0cb3391c0cb3
-	.octa	0x391c0cb3391c0cb3391c0cb3391c0cb3
-	.octa	0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
-	.octa	0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
-	.octa	0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
-	.octa	0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
-	.octa	0x682e6ff3682e6ff3682e6ff3682e6ff3
-	.octa	0x682e6ff3682e6ff3682e6ff3682e6ff3
-	.octa	0x748f82ee748f82ee748f82ee748f82ee
-	.octa	0x748f82ee748f82ee748f82ee748f82ee
-	.octa	0x78a5636f78a5636f78a5636f78a5636f
-	.octa	0x78a5636f78a5636f78a5636f78a5636f
-	.octa	0x84c8781484c8781484c8781484c87814
-	.octa	0x84c8781484c8781484c8781484c87814
-	.octa	0x8cc702088cc702088cc702088cc70208
-	.octa	0x8cc702088cc702088cc702088cc70208
-	.octa	0x90befffa90befffa90befffa90befffa
-	.octa	0x90befffa90befffa90befffa90befffa
-	.octa	0xa4506ceba4506ceba4506ceba4506ceb
-	.octa	0xa4506ceba4506ceba4506ceba4506ceb
-	.octa	0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
-	.octa	0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
-	.octa	0xc67178f2c67178f2c67178f2c67178f2
-	.octa	0xc67178f2c67178f2c67178f2c67178f2
-
-.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-PSHUFFLE_BYTE_FLIP_MASK:
-.octa 0x0c0d0e0f08090a0b0405060700010203
-.octa 0x0c0d0e0f08090a0b0405060700010203
-
-.section	.rodata.cst256.K256, "aM", @progbits, 256
-.align 64
-.global K256
-K256:
-	.int	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-	.int	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-	.int	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-	.int	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-	.int	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-	.int	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-	.int	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-	.int	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-	.int	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-	.int	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-	.int	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-	.int	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-	.int	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-	.int	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-	.int	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-	.int	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile
deleted file mode 100644
index 90f1ef69152e..000000000000
--- a/arch/x86/crypto/sha512-mb/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Arch-specific CryptoAPI modules.
-#
-
-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
-                                $(comma)4)$(comma)%ymm2,yes,no)
-ifeq ($(avx2_supported),yes)
-	obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
-	sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
-	     sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
-endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
deleted file mode 100644
index 26b85678012d..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ /dev/null
@@ -1,1047 +0,0 @@
-/*
- * Multi buffer SHA512 algorithm Glue Code
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- *	Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/sha.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <asm/byteorder.h>
-#include <linux/hardirq.h>
-#include <asm/fpu/api.h>
-#include "sha512_mb_ctx.h"
-
-#define FLUSH_INTERVAL 1000 /* in usec */
-
-static struct mcryptd_alg_state sha512_mb_alg_state;
-
-struct sha512_mb_ctx {
-	struct mcryptd_ahash *mcryptd_tfm;
-};
-
-static inline struct mcryptd_hash_request_ctx
-		*cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx)
-{
-	struct ahash_request *areq;
-
-	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
-	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
-}
-
-static inline struct ahash_request
-		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
-{
-	return container_of((void *) ctx, struct ahash_request, __ctx);
-}
-
-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
-				struct ahash_request *areq)
-{
-	rctx->flag = HASH_UPDATE;
-}
-
-static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_submit)
-						(struct sha512_mb_mgr *state,
-						struct job_sha512 *job);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
-						(struct sha512_mb_mgr *state);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
-						(struct sha512_mb_mgr *state);
-
-inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
-			 uint64_t total_len)
-{
-	uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
-
-	memset(&padblock[i], 0, SHA512_BLOCK_SIZE);
-	padblock[i] = 0x80;
-
-	i += ((SHA512_BLOCK_SIZE - 1) &
-	      (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1)))
-	     + 1 + SHA512_PADLENGTHFIELD_SIZE;
-
-#if SHA512_PADLENGTHFIELD_SIZE == 16
-	*((uint64_t *) &padblock[i - 16]) = 0;
-#endif
-
-	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
-
-	/* Number of extra blocks to hash */
-	return i >> SHA512_LOG2_BLOCK_SIZE;
-}
-
-static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
-		(struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx)
-{
-	while (ctx) {
-		if (ctx->status & HASH_CTX_STS_COMPLETE) {
-			/* Clear PROCESSING bit */
-			ctx->status = HASH_CTX_STS_COMPLETE;
-			return ctx;
-		}
-
-		/*
-		 * If the extra blocks are empty, begin hashing what remains
-		 * in the user's buffer.
-		 */
-		if (ctx->partial_block_buffer_length == 0 &&
-		    ctx->incoming_buffer_length) {
-
-			const void *buffer = ctx->incoming_buffer;
-			uint32_t len = ctx->incoming_buffer_length;
-			uint32_t copy_len;
-
-			/*
-			 * Only entire blocks can be hashed.
-			 * Copy remainder to extra blocks buffer.
-			 */
-			copy_len = len & (SHA512_BLOCK_SIZE-1);
-
-			if (copy_len) {
-				len -= copy_len;
-				memcpy(ctx->partial_block_buffer,
-				       ((const char *) buffer + len),
-				       copy_len);
-				ctx->partial_block_buffer_length = copy_len;
-			}
-
-			ctx->incoming_buffer_length = 0;
-
-			/* len should be a multiple of the block size now */
-			assert((len % SHA512_BLOCK_SIZE) == 0);
-
-			/* Set len to the number of blocks to be hashed */
-			len >>= SHA512_LOG2_BLOCK_SIZE;
-
-			if (len) {
-
-				ctx->job.buffer = (uint8_t *) buffer;
-				ctx->job.len = len;
-				ctx = (struct sha512_hash_ctx *)
-					sha512_job_mgr_submit(&mgr->mgr,
-					&ctx->job);
-				continue;
-			}
-		}
-
-		/*
-		 * If the extra blocks are not empty, then we are
-		 * either on the last block(s) or we need more
-		 * user input before continuing.
-		 */
-		if (ctx->status & HASH_CTX_STS_LAST) {
-
-			uint8_t *buf = ctx->partial_block_buffer;
-			uint32_t n_extra_blocks =
-					sha512_pad(buf, ctx->total_length);
-
-			ctx->status = (HASH_CTX_STS_PROCESSING |
-				       HASH_CTX_STS_COMPLETE);
-			ctx->job.buffer = buf;
-			ctx->job.len = (uint32_t) n_extra_blocks;
-			ctx = (struct sha512_hash_ctx *)
-				sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
-			continue;
-		}
-
-		if (ctx)
-			ctx->status = HASH_CTX_STS_IDLE;
-		return ctx;
-	}
-
-	return NULL;
-}
-
-static struct sha512_hash_ctx
-		*sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate)
-{
-	/*
-	 * If get_comp_job returns NULL, there are no jobs complete.
-	 * If get_comp_job returns a job, verify that it is safe to return to
-	 * the user.
-	 * If it is not ready, resubmit the job to finish processing.
-	 * If sha512_ctx_mgr_resubmit returned a job, it is ready to be
-	 * returned.
-	 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
-	 * still need processing.
-	 */
-	struct sha512_ctx_mgr *mgr;
-	struct sha512_hash_ctx *ctx;
-	unsigned long flags;
-
-	mgr = cstate->mgr;
-	spin_lock_irqsave(&cstate->work_lock, flags);
-	ctx = (struct sha512_hash_ctx *)
-				sha512_job_mgr_get_comp_job(&mgr->mgr);
-	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
-	spin_unlock_irqrestore(&cstate->work_lock, flags);
-	return ctx;
-}
-
-static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
-{
-	sha512_job_mgr_init(&mgr->mgr);
-}
-
-static struct sha512_hash_ctx
-			*sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate,
-					  struct sha512_hash_ctx *ctx,
-					  const void *buffer,
-					  uint32_t len,
-					  int flags)
-{
-	struct sha512_ctx_mgr *mgr;
-	unsigned long irqflags;
-
-	mgr = cstate->mgr;
-	spin_lock_irqsave(&cstate->work_lock, irqflags);
-	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
-		/* User should not pass anything other than UPDATE or LAST */
-		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
-		goto unlock;
-	}
-
-	if (ctx->status & HASH_CTX_STS_PROCESSING) {
-		/* Cannot submit to a currently processing job. */
-		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
-		goto unlock;
-	}
-
-	if (ctx->status & HASH_CTX_STS_COMPLETE) {
-		/* Cannot update a finished job. */
-		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
-		goto unlock;
-	}
-
-	/*
-	 * If we made it here, there were no errors during this call to
-	 * submit
-	 */
-	ctx->error = HASH_CTX_ERROR_NONE;
-
-	/* Store buffer ptr info from user */
-	ctx->incoming_buffer = buffer;
-	ctx->incoming_buffer_length = len;
-
-	/*
-	 * Store the user's request flags and mark this ctx as currently being
-	 * processed.
-	 */
-	ctx->status = (flags & HASH_LAST) ?
-			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
-			HASH_CTX_STS_PROCESSING;
-
-	/* Advance byte counter */
-	ctx->total_length += len;
-
-	/*
-	 * If there is anything currently buffered in the extra blocks,
-	 * append to it until it contains a whole block.
-	 * Or if the user's buffer contains less than a whole block,
-	 * append as much as possible to the extra block.
-	 */
-	if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) {
-		/* Compute how many bytes to copy from user buffer into extra
-		 * block
-		 */
-		uint32_t copy_len = SHA512_BLOCK_SIZE -
-					ctx->partial_block_buffer_length;
-		if (len < copy_len)
-			copy_len = len;
-
-		if (copy_len) {
-			/* Copy and update relevant pointers and counters */
-			memcpy
-		(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
-				buffer, copy_len);
-
-			ctx->partial_block_buffer_length += copy_len;
-			ctx->incoming_buffer = (const void *)
-					((const char *)buffer + copy_len);
-			ctx->incoming_buffer_length = len - copy_len;
-		}
-
-		/* The extra block should never contain more than 1 block
-		 * here
-		 */
-		assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE);
-
-		/* If the extra block buffer contains exactly 1 block, it can
-		 * be hashed.
-		 */
-		if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) {
-			ctx->partial_block_buffer_length = 0;
-
-			ctx->job.buffer = ctx->partial_block_buffer;
-			ctx->job.len = 1;
-			ctx = (struct sha512_hash_ctx *)
-				sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
-		}
-	}
-
-	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
-unlock:
-	spin_unlock_irqrestore(&cstate->work_lock, irqflags);
-	return ctx;
-}
-
-static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate)
-{
-	struct sha512_ctx_mgr *mgr;
-	struct sha512_hash_ctx *ctx;
-	unsigned long flags;
-
-	mgr = cstate->mgr;
-	spin_lock_irqsave(&cstate->work_lock, flags);
-	while (1) {
-		ctx = (struct sha512_hash_ctx *)
-					sha512_job_mgr_flush(&mgr->mgr);
-
-		/* If flush returned 0, there are no more jobs in flight. */
-		if (!ctx)
-			break;
-
-		/*
-		 * If flush returned a job, resubmit the job to finish
-		 * processing.
-		 */
-		ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
-
-		/*
-		 * If sha512_ctx_mgr_resubmit returned a job, it is ready to
-		 * be returned. Otherwise, all jobs currently being managed by
-		 * the sha512_ctx_mgr still need processing. Loop.
-		 */
-		if (ctx)
-			break;
-	}
-	spin_unlock_irqrestore(&cstate->work_lock, flags);
-	return ctx;
-}
-
-static int sha512_mb_init(struct ahash_request *areq)
-{
-	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
-	hash_ctx_init(sctx);
-	sctx->job.result_digest[0] = SHA512_H0;
-	sctx->job.result_digest[1] = SHA512_H1;
-	sctx->job.result_digest[2] = SHA512_H2;
-	sctx->job.result_digest[3] = SHA512_H3;
-	sctx->job.result_digest[4] = SHA512_H4;
-	sctx->job.result_digest[5] = SHA512_H5;
-	sctx->job.result_digest[6] = SHA512_H6;
-	sctx->job.result_digest[7] = SHA512_H7;
-	sctx->total_length = 0;
-	sctx->partial_block_buffer_length = 0;
-	sctx->status = HASH_CTX_STS_IDLE;
-
-	return 0;
-}
-
-static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
-{
-	int	i;
-	struct	sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
-	__be64	*dst = (__be64 *) rctx->out;
-
-	for (i = 0; i < 8; ++i)
-		dst[i] = cpu_to_be64(sctx->job.result_digest[i]);
-
-	return 0;
-}
-
-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
-			struct mcryptd_alg_cstate *cstate, bool flush)
-{
-	int	flag = HASH_UPDATE;
-	int	nbytes, err = 0;
-	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
-	struct sha512_hash_ctx *sha_ctx;
-
-	/* more work ? */
-	while (!(rctx->flag & HASH_DONE)) {
-		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
-		if (nbytes < 0) {
-			err = nbytes;
-			goto out;
-		}
-		/* check if the walk is done */
-		if (crypto_ahash_walk_last(&rctx->walk)) {
-			rctx->flag |= HASH_DONE;
-			if (rctx->flag & HASH_FINAL)
-				flag |= HASH_LAST;
-
-		}
-		sha_ctx = (struct sha512_hash_ctx *)
-						ahash_request_ctx(&rctx->areq);
-		kernel_fpu_begin();
-		sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx,
-						rctx->walk.data, nbytes, flag);
-		if (!sha_ctx) {
-			if (flush)
-				sha_ctx = sha512_ctx_mgr_flush(cstate);
-		}
-		kernel_fpu_end();
-		if (sha_ctx)
-			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		else {
-			rctx = NULL;
-			goto out;
-		}
-	}
-
-	/* copy the results */
-	if (rctx->flag & HASH_FINAL)
-		sha512_mb_set_results(rctx);
-
-out:
-	*ret_rctx = rctx;
-	return err;
-}
-
-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
-			    struct mcryptd_alg_cstate *cstate,
-			    int err)
-{
-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
-	struct sha512_hash_ctx *sha_ctx;
-	struct mcryptd_hash_request_ctx *req_ctx;
-	int ret;
-	unsigned long flags;
-
-	/* remove from work list */
-	spin_lock_irqsave(&cstate->work_lock, flags);
-	list_del(&rctx->waiter);
-	spin_unlock_irqrestore(&cstate->work_lock, flags);
-
-	if (irqs_disabled())
-		rctx->complete(&req->base, err);
-	else {
-		local_bh_disable();
-		rctx->complete(&req->base, err);
-		local_bh_enable();
-	}
-
-	/* check to see if there are other jobs that are done */
-	sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
-	while (sha_ctx) {
-		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		ret = sha_finish_walk(&req_ctx, cstate, false);
-		if (req_ctx) {
-			spin_lock_irqsave(&cstate->work_lock, flags);
-			list_del(&req_ctx->waiter);
-			spin_unlock_irqrestore(&cstate->work_lock, flags);
-
-			req = cast_mcryptd_ctx_to_req(req_ctx);
-			if (irqs_disabled())
-				req_ctx->complete(&req->base, ret);
-			else {
-				local_bh_disable();
-				req_ctx->complete(&req->base, ret);
-				local_bh_enable();
-			}
-		}
-		sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
-	}
-
-	return 0;
-}
-
-static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
-			     struct mcryptd_alg_cstate *cstate)
-{
-	unsigned long next_flush;
-	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
-	unsigned long flags;
-
-	/* initialize tag */
-	rctx->tag.arrival = jiffies;    /* tag the arrival time */
-	rctx->tag.seq_num = cstate->next_seq_num++;
-	next_flush = rctx->tag.arrival + delay;
-	rctx->tag.expire = next_flush;
-
-	spin_lock_irqsave(&cstate->work_lock, flags);
-	list_add_tail(&rctx->waiter, &cstate->work_list);
-	spin_unlock_irqrestore(&cstate->work_lock, flags);
-
-	mcryptd_arm_flusher(cstate, delay);
-}
-
-static int sha512_mb_update(struct ahash_request *areq)
-{
-	struct mcryptd_hash_request_ctx *rctx =
-			container_of(areq, struct mcryptd_hash_request_ctx,
-									areq);
-	struct mcryptd_alg_cstate *cstate =
-				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
-	struct sha512_hash_ctx *sha_ctx;
-	int ret = 0, nbytes;
-
-
-	/* sanity check */
-	if (rctx->tag.cpu != smp_processor_id()) {
-		pr_err("mcryptd error: cpu clash\n");
-		goto done;
-	}
-
-	/* need to init context */
-	req_ctx_init(rctx, areq);
-
-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
-	if (nbytes < 0) {
-		ret = nbytes;
-		goto done;
-	}
-
-	if (crypto_ahash_walk_last(&rctx->walk))
-		rctx->flag |= HASH_DONE;
-
-	/* submit */
-	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
-	sha512_mb_add_list(rctx, cstate);
-	kernel_fpu_begin();
-	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
-							nbytes, HASH_UPDATE);
-	kernel_fpu_end();
-
-	/* check if anything is returned */
-	if (!sha_ctx)
-		return -EINPROGRESS;
-
-	if (sha_ctx->error) {
-		ret = sha_ctx->error;
-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		goto done;
-	}
-
-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-	ret = sha_finish_walk(&rctx, cstate, false);
-
-	if (!rctx)
-		return -EINPROGRESS;
-done:
-	sha_complete_job(rctx, cstate, ret);
-	return ret;
-}
-
-static int sha512_mb_finup(struct ahash_request *areq)
-{
-	struct mcryptd_hash_request_ctx *rctx =
-			container_of(areq, struct mcryptd_hash_request_ctx,
-									areq);
-	struct mcryptd_alg_cstate *cstate =
-				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
-	struct sha512_hash_ctx *sha_ctx;
-	int ret = 0, flag = HASH_UPDATE, nbytes;
-
-	/* sanity check */
-	if (rctx->tag.cpu != smp_processor_id()) {
-		pr_err("mcryptd error: cpu clash\n");
-		goto done;
-	}
-
-	/* need to init context */
-	req_ctx_init(rctx, areq);
-
-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
-	if (nbytes < 0) {
-		ret = nbytes;
-		goto done;
-	}
-
-	if (crypto_ahash_walk_last(&rctx->walk)) {
-		rctx->flag |= HASH_DONE;
-		flag = HASH_LAST;
-	}
-
-	/* submit */
-	rctx->flag |= HASH_FINAL;
-	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
-	sha512_mb_add_list(rctx, cstate);
-
-	kernel_fpu_begin();
-	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
-								nbytes, flag);
-	kernel_fpu_end();
-
-	/* check if anything is returned */
-	if (!sha_ctx)
-		return -EINPROGRESS;
-
-	if (sha_ctx->error) {
-		ret = sha_ctx->error;
-		goto done;
-	}
-
-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-	ret = sha_finish_walk(&rctx, cstate, false);
-	if (!rctx)
-		return -EINPROGRESS;
-done:
-	sha_complete_job(rctx, cstate, ret);
-	return ret;
-}
-
-static int sha512_mb_final(struct ahash_request *areq)
-{
-	struct mcryptd_hash_request_ctx *rctx =
-			container_of(areq, struct mcryptd_hash_request_ctx,
-									areq);
-	struct mcryptd_alg_cstate *cstate =
-				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
-	struct sha512_hash_ctx *sha_ctx;
-	int ret = 0;
-	u8 data;
-
-	/* sanity check */
-	if (rctx->tag.cpu != smp_processor_id()) {
-		pr_err("mcryptd error: cpu clash\n");
-		goto done;
-	}
-
-	/* need to init context */
-	req_ctx_init(rctx, areq);
-
-	rctx->flag |= HASH_DONE | HASH_FINAL;
-
-	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
-	/* flag HASH_FINAL and 0 data size */
-	sha512_mb_add_list(rctx, cstate);
-	kernel_fpu_begin();
-	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST);
-	kernel_fpu_end();
-
-	/* check if anything is returned */
-	if (!sha_ctx)
-		return -EINPROGRESS;
-
-	if (sha_ctx->error) {
-		ret = sha_ctx->error;
-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		goto done;
-	}
-
-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-	ret = sha_finish_walk(&rctx, cstate, false);
-	if (!rctx)
-		return -EINPROGRESS;
-done:
-	sha_complete_job(rctx, cstate, ret);
-	return ret;
-}
-
-static int sha512_mb_export(struct ahash_request *areq, void *out)
-{
-	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
-	memcpy(out, sctx, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha512_mb_import(struct ahash_request *areq, const void *in)
-{
-	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
-	memcpy(sctx, in, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
-	struct mcryptd_ahash *mcryptd_tfm;
-	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct mcryptd_hash_ctx *mctx;
-
-	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb",
-						CRYPTO_ALG_INTERNAL,
-						CRYPTO_ALG_INTERNAL);
-	if (IS_ERR(mcryptd_tfm))
-		return PTR_ERR(mcryptd_tfm);
-	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
-	mctx->alg_state = &sha512_mb_alg_state;
-	ctx->mcryptd_tfm = mcryptd_tfm;
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				sizeof(struct ahash_request) +
-				crypto_ahash_reqsize(&mcryptd_tfm->base));
-
-	return 0;
-}
-
-static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm)
-{
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				sizeof(struct ahash_request) +
-				sizeof(struct sha512_hash_ctx));
-
-	return 0;
-}
-
-static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static struct ahash_alg sha512_mb_areq_alg = {
-	.init		=	sha512_mb_init,
-	.update		=	sha512_mb_update,
-	.final		=	sha512_mb_final,
-	.finup		=	sha512_mb_finup,
-	.export		=	sha512_mb_export,
-	.import		=	sha512_mb_import,
-	.halg		=	{
-	.digestsize	=	SHA512_DIGEST_SIZE,
-	.statesize	=	sizeof(struct sha512_hash_ctx),
-	.base		=	{
-			.cra_name	 = "__sha512-mb",
-			.cra_driver_name = "__intel_sha512-mb",
-			.cra_priority	 = 100,
-			/*
-			 * use ASYNC flag as some buffers in multi-buffer
-			 * algo may not have completed before hashing thread
-			 * sleep
-			 */
-			.cra_flags	= CRYPTO_ALG_ASYNC |
-					  CRYPTO_ALG_INTERNAL,
-			.cra_blocksize	= SHA512_BLOCK_SIZE,
-			.cra_module	= THIS_MODULE,
-			.cra_list	= LIST_HEAD_INIT
-					(sha512_mb_areq_alg.halg.base.cra_list),
-			.cra_init	= sha512_mb_areq_init_tfm,
-			.cra_exit	= sha512_mb_areq_exit_tfm,
-			.cra_ctxsize	= sizeof(struct sha512_hash_ctx),
-		}
-	}
-};
-
-static int sha512_mb_async_init(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_init(mcryptd_req);
-}
-
-static int sha512_mb_async_update(struct ahash_request *req)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_update(mcryptd_req);
-}
-
-static int sha512_mb_async_finup(struct ahash_request *req)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_finup(mcryptd_req);
-}
-
-static int sha512_mb_async_final(struct ahash_request *req)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_final(mcryptd_req);
-}
-
-static int sha512_mb_async_digest(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_digest(mcryptd_req);
-}
-
-static int sha512_mb_async_export(struct ahash_request *req, void *out)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_export(mcryptd_req, out);
-}
-
-static int sha512_mb_async_import(struct ahash_request *req, const void *in)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
-	struct mcryptd_hash_request_ctx *rctx;
-	struct ahash_request *areq;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	rctx = ahash_request_ctx(mcryptd_req);
-
-	areq = &rctx->areq;
-
-	ahash_request_set_tfm(areq, child);
-	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
-					rctx->complete, req);
-
-	return crypto_ahash_import(mcryptd_req, in);
-}
-
-static struct ahash_alg sha512_mb_async_alg = {
-	.init           = sha512_mb_async_init,
-	.update         = sha512_mb_async_update,
-	.final          = sha512_mb_async_final,
-	.finup          = sha512_mb_async_finup,
-	.digest         = sha512_mb_async_digest,
-	.export		= sha512_mb_async_export,
-	.import		= sha512_mb_async_import,
-	.halg = {
-		.digestsize     = SHA512_DIGEST_SIZE,
-		.statesize      = sizeof(struct sha512_hash_ctx),
-		.base = {
-			.cra_name               = "sha512",
-			.cra_driver_name        = "sha512_mb",
-			/*
-			 * Low priority, since with few concurrent hash requests
-			 * this is extremely slow due to the flush delay.  Users
-			 * whose workloads would benefit from this can request
-			 * it explicitly by driver name, or can increase its
-			 * priority at runtime using NETLINK_CRYPTO.
-			 */
-			.cra_priority           = 50,
-			.cra_flags              = CRYPTO_ALG_ASYNC,
-			.cra_blocksize          = SHA512_BLOCK_SIZE,
-			.cra_module             = THIS_MODULE,
-			.cra_list               = LIST_HEAD_INIT
-				(sha512_mb_async_alg.halg.base.cra_list),
-			.cra_init               = sha512_mb_async_init_tfm,
-			.cra_exit               = sha512_mb_async_exit_tfm,
-			.cra_ctxsize		= sizeof(struct sha512_mb_ctx),
-			.cra_alignmask		= 0,
-		},
-	},
-};
-
-static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
-{
-	struct mcryptd_hash_request_ctx *rctx;
-	unsigned long cur_time;
-	unsigned long next_flush = 0;
-	struct sha512_hash_ctx *sha_ctx;
-
-
-	cur_time = jiffies;
-
-	while (!list_empty(&cstate->work_list)) {
-		rctx = list_entry(cstate->work_list.next,
-				struct mcryptd_hash_request_ctx, waiter);
-		if time_before(cur_time, rctx->tag.expire)
-			break;
-		kernel_fpu_begin();
-		sha_ctx = (struct sha512_hash_ctx *)
-					sha512_ctx_mgr_flush(cstate);
-		kernel_fpu_end();
-		if (!sha_ctx) {
-			pr_err("sha512_mb error: nothing got flushed for"
-							" non-empty list\n");
-			break;
-		}
-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		sha_finish_walk(&rctx, cstate, true);
-		sha_complete_job(rctx, cstate, 0);
-	}
-
-	if (!list_empty(&cstate->work_list)) {
-		rctx = list_entry(cstate->work_list.next,
-				struct mcryptd_hash_request_ctx, waiter);
-		/* get the hash context and then flush time */
-		next_flush = rctx->tag.expire;
-		mcryptd_arm_flusher(cstate, get_delay(next_flush));
-	}
-	return next_flush;
-}
-
-static int __init sha512_mb_mod_init(void)
-{
-
-	int cpu;
-	int err;
-	struct mcryptd_alg_cstate *cpu_state;
-
-	/* check for dependent cpu features */
-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
-	    !boot_cpu_has(X86_FEATURE_BMI2))
-		return -ENODEV;
-
-	/* initialize multibuffer structures */
-	sha512_mb_alg_state.alg_cstate =
-				alloc_percpu(struct mcryptd_alg_cstate);
-
-	sha512_job_mgr_init = sha512_mb_mgr_init_avx2;
-	sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2;
-	sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2;
-	sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2;
-
-	if (!sha512_mb_alg_state.alg_cstate)
-		return -ENOMEM;
-	for_each_possible_cpu(cpu) {
-		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
-		cpu_state->next_flush = 0;
-		cpu_state->next_seq_num = 0;
-		cpu_state->flusher_engaged = false;
-		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
-		cpu_state->cpu = cpu;
-		cpu_state->alg_state = &sha512_mb_alg_state;
-		cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr),
-								GFP_KERNEL);
-		if (!cpu_state->mgr)
-			goto err2;
-		sha512_ctx_mgr_init(cpu_state->mgr);
-		INIT_LIST_HEAD(&cpu_state->work_list);
-		spin_lock_init(&cpu_state->work_lock);
-	}
-	sha512_mb_alg_state.flusher = &sha512_mb_flusher;
-
-	err = crypto_register_ahash(&sha512_mb_areq_alg);
-	if (err)
-		goto err2;
-	err = crypto_register_ahash(&sha512_mb_async_alg);
-	if (err)
-		goto err1;
-
-
-	return 0;
-err1:
-	crypto_unregister_ahash(&sha512_mb_areq_alg);
-err2:
-	for_each_possible_cpu(cpu) {
-		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
-		kfree(cpu_state->mgr);
-	}
-	free_percpu(sha512_mb_alg_state.alg_cstate);
-	return -ENODEV;
-}
-
-static void __exit sha512_mb_mod_fini(void)
-{
-	int cpu;
-	struct mcryptd_alg_cstate *cpu_state;
-
-	crypto_unregister_ahash(&sha512_mb_async_alg);
-	crypto_unregister_ahash(&sha512_mb_areq_alg);
-	for_each_possible_cpu(cpu) {
-		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
-		kfree(cpu_state->mgr);
-	}
-	free_percpu(sha512_mb_alg_state.alg_cstate);
-}
-
-module_init(sha512_mb_mod_init);
-module_exit(sha512_mb_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated");
-
-MODULE_ALIAS("sha512");
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
deleted file mode 100644
index e5c465bd821e..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Header file for multi buffer SHA512 context
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SHA_MB_CTX_INTERNAL_H
-#define _SHA_MB_CTX_INTERNAL_H
-
-#include "sha512_mb_mgr.h"
-
-#define HASH_UPDATE          0x00
-#define HASH_LAST            0x01
-#define HASH_DONE            0x02
-#define HASH_FINAL           0x04
-
-#define HASH_CTX_STS_IDLE       0x00
-#define HASH_CTX_STS_PROCESSING 0x01
-#define HASH_CTX_STS_LAST       0x02
-#define HASH_CTX_STS_COMPLETE   0x04
-
-enum hash_ctx_error {
-	HASH_CTX_ERROR_NONE               =  0,
-	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
-	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
-	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
-};
-
-#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
-#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
-#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
-#define hash_ctx_status(ctx)     ((ctx)->status)
-#define hash_ctx_error(ctx)      ((ctx)->error)
-#define hash_ctx_init(ctx) \
-	do { \
-		(ctx)->error = HASH_CTX_ERROR_NONE; \
-		(ctx)->status = HASH_CTX_STS_COMPLETE; \
-	} while (0)
-
-/* Hash Constants and Typedefs */
-#define SHA512_DIGEST_LENGTH          8
-#define SHA512_LOG2_BLOCK_SIZE        7
-
-#define SHA512_PADLENGTHFIELD_SIZE    16
-
-#ifdef SHA_MB_DEBUG
-#define assert(expr) \
-do { \
-	if (unlikely(!(expr))) { \
-		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
-		#expr, __FILE__, __func__, __LINE__); \
-	} \
-} while (0)
-#else
-#define assert(expr) do {} while (0)
-#endif
-
-struct sha512_ctx_mgr {
-	struct sha512_mb_mgr mgr;
-};
-
-/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
-
-struct sha512_hash_ctx {
-	/* Must be at struct offset 0 */
-	struct job_sha512       job;
-	/* status flag */
-	int status;
-	/* error flag */
-	int error;
-
-	uint64_t        total_length;
-	const void      *incoming_buffer;
-	uint32_t        incoming_buffer_length;
-	uint8_t         partial_block_buffer[SHA512_BLOCK_SIZE * 2];
-	uint32_t        partial_block_buffer_length;
-	void            *user_data;
-};
-
-#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
deleted file mode 100644
index 178f17eef382..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Header file for multi buffer SHA512 algorithm manager
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __SHA_MB_MGR_H
-#define __SHA_MB_MGR_H
-
-#include <linux/types.h>
-
-#define NUM_SHA512_DIGEST_WORDS 8
-
-enum job_sts {STS_UNKNOWN = 0,
-	STS_BEING_PROCESSED = 1,
-	STS_COMPLETED =       2,
-	STS_INTERNAL_ERROR = 3,
-	STS_ERROR = 4
-};
-
-struct job_sha512 {
-	u8  *buffer;
-	u64  len;
-	u64  result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
-	enum job_sts status;
-	void   *user_data;
-};
-
-struct sha512_args_x4 {
-	uint64_t        digest[8][4];
-	uint8_t         *data_ptr[4];
-};
-
-struct sha512_lane_data {
-	struct job_sha512 *job_in_lane;
-};
-
-struct sha512_mb_mgr {
-	struct sha512_args_x4 args;
-
-	uint64_t lens[4];
-
-	/* each byte is index (0...7) of unused lanes */
-	uint64_t unused_lanes;
-	/* byte 4 is set to FF as a flag */
-	struct sha512_lane_data ldata[4];
-};
-
-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
-
-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
-struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
-						struct job_sha512 *job);
-struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
-struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
-
-#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
deleted file mode 100644
index cf2636d4c9ba..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Header file for multi buffer SHA256 algorithm data structure
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# Macros for defining data structures
-
-# Usage example
-
-#START_FIELDS   # JOB_AES
-###     name            size    align
-#FIELD  _plaintext,     8,      8       # pointer to plaintext
-#FIELD  _ciphertext,    8,      8       # pointer to ciphertext
-#FIELD  _IV,            16,     8       # IV
-#FIELD  _keys,          8,      8       # pointer to keys
-#FIELD  _len,           4,      4       # length in bytes
-#FIELD  _status,        4,      4       # status enumeration
-#FIELD  _user_data,     8,      8       # pointer to user data
-#UNION  _union,         size1,  align1, \
-#                       size2,  align2, \
-#                       size3,  align3, \
-#                       ...
-#END_FIELDS
-#%assign _JOB_AES_size  _FIELD_OFFSET
-#%assign _JOB_AES_align _STRUCT_ALIGN
-
-#########################################################################
-
-# Alternate "struc-like" syntax:
-#       STRUCT job_aes2
-#       RES_Q   .plaintext,     1
-#       RES_Q   .ciphertext,    1
-#       RES_DQ  .IV,            1
-#       RES_B   .nested,        _JOB_AES_SIZE, _JOB_AES_ALIGN
-#       RES_U   .union,         size1, align1, \
-#                               size2, align2, \
-#                               ...
-#       ENDSTRUCT
-#       # Following only needed if nesting
-#       %assign job_aes2_size   _FIELD_OFFSET
-#       %assign job_aes2_align  _STRUCT_ALIGN
-#
-# RES_* macros take a name, a count and an optional alignment.
-# The count in in terms of the base size of the macro, and the
-# default alignment is the base size.
-# The macros are:
-# Macro    Base size
-# RES_B     1
-# RES_W     2
-# RES_D     4
-# RES_Q     8
-# RES_DQ   16
-# RES_Y    32
-# RES_Z    64
-#
-# RES_U defines a union. It's arguments are a name and two or more
-# pairs of "size, alignment"
-#
-# The two assigns are only needed if this structure is being nested
-# within another. Even if the assigns are not done, one can still use
-# STRUCT_NAME_size as the size of the structure.
-#
-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
-#
-# The differences between this and using "struc" directly are that each
-# type is implicitly aligned to its natural length (although this can be
-# over-ridden with an explicit third parameter), and that the structure
-# is padded at the end to its overall alignment.
-#
-
-#########################################################################
-
-#ifndef _DATASTRUCT_ASM_
-#define _DATASTRUCT_ASM_
-
-#define PTR_SZ                  8
-#define SHA512_DIGEST_WORD_SIZE 8
-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
-#define NUM_SHA512_DIGEST_WORDS 8
-#define SZ4                     4*SHA512_DIGEST_WORD_SIZE
-#define ROUNDS                  80*SZ4
-#define SHA512_DIGEST_ROW_SIZE  (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
-
-# START_FIELDS
-.macro START_FIELDS
- _FIELD_OFFSET = 0
- _STRUCT_ALIGN = 0
-.endm
-
-# FIELD name size align
-.macro FIELD name size align
- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
- \name  = _FIELD_OFFSET
- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
-.if (\align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = \align
-.endif
-.endm
-
-# END_FIELDS
-.macro END_FIELDS
- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
-.endm
-
-.macro STRUCT p1
-START_FIELDS
-.struc \p1
-.endm
-
-.macro ENDSTRUCT
- tmp = _FIELD_OFFSET
- END_FIELDS
- tmp = (_FIELD_OFFSET - ##tmp)
-.if (tmp > 0)
-        .lcomm  tmp
-.endm
-
-## RES_int name size align
-.macro RES_int p1 p2 p3
- name = \p1
- size = \p2
- align = .\p3
-
- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
-.align align
-.lcomm name size
- _FIELD_OFFSET = _FIELD_OFFSET + (size)
-.if (align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = align
-.endif
-.endm
-
-# macro RES_B name, size [, align]
-.macro RES_B _name, _size, _align=1
-RES_int _name _size _align
-.endm
-
-# macro RES_W name, size [, align]
-.macro RES_W _name, _size, _align=2
-RES_int _name 2*(_size) _align
-.endm
-
-# macro RES_D name, size [, align]
-.macro RES_D _name, _size, _align=4
-RES_int _name 4*(_size) _align
-.endm
-
-# macro RES_Q name, size [, align]
-.macro RES_Q _name, _size, _align=8
-RES_int _name 8*(_size) _align
-.endm
-
-# macro RES_DQ name, size [, align]
-.macro RES_DQ _name, _size, _align=16
-RES_int _name 16*(_size) _align
-.endm
-
-# macro RES_Y name, size [, align]
-.macro RES_Y _name, _size, _align=32
-RES_int _name 32*(_size) _align
-.endm
-
-# macro RES_Z name, size [, align]
-.macro RES_Z _name, _size, _align=64
-RES_int _name 64*(_size) _align
-.endm
-
-#endif
-
-###################################################################
-### Define SHA512 Out Of Order Data Structures
-###################################################################
-
-START_FIELDS    # LANE_DATA
-###     name            size    align
-FIELD   _job_in_lane,   8,      8       # pointer to job object
-END_FIELDS
-
- _LANE_DATA_size = _FIELD_OFFSET
- _LANE_DATA_align = _STRUCT_ALIGN
-
-####################################################################
-
-START_FIELDS    # SHA512_ARGS_X4
-###     name            size    align
-FIELD   _digest,        8*8*4,  4      # transposed digest
-FIELD   _data_ptr,      8*4,    8       # array of pointers to data
-END_FIELDS
-
- _SHA512_ARGS_X4_size  =  _FIELD_OFFSET
- _SHA512_ARGS_X4_align =  _STRUCT_ALIGN
-
-#####################################################################
-
-START_FIELDS    # MB_MGR
-###     name            size    align
-FIELD   _args,          _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
-FIELD   _lens,          8*4,    8
-FIELD   _unused_lanes,  8,      8
-FIELD   _ldata,         _LANE_DATA_size*4, _LANE_DATA_align
-END_FIELDS
-
- _MB_MGR_size  =  _FIELD_OFFSET
- _MB_MGR_align =  _STRUCT_ALIGN
-
-_args_digest = _args + _digest
-_args_data_ptr = _args + _data_ptr
-
-#######################################################################
-
-#######################################################################
-#### Define constants
-#######################################################################
-
-#define STS_UNKNOWN             0
-#define STS_BEING_PROCESSED     1
-#define STS_COMPLETED           2
-
-#######################################################################
-#### Define JOB_SHA512 structure
-#######################################################################
-
-START_FIELDS    # JOB_SHA512
-###     name                            size    align
-FIELD   _buffer,                        8,      8       # pointer to buffer
-FIELD   _len,                           8,      8       # length in bytes
-FIELD   _result_digest,                 8*8,    32      # Digest (output)
-FIELD   _status,                        4,      4
-FIELD   _user_data,                     8,      8
-END_FIELDS
-
- _JOB_SHA512_size = _FIELD_OFFSET
- _JOB_SHA512_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
deleted file mode 100644
index 7c629caebc05..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Flush routine for SHA512 multibuffer
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- *     Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-.extern sha512_x4_avx2
-
-# LINUX register definitions
-#define arg1    %rdi
-#define arg2    %rsi
-
-# idx needs to be other than arg1, arg2, rbx, r12
-#define idx     %rdx
-
-# Common definitions
-#define state   arg1
-#define job     arg2
-#define len2    arg2
-
-#define unused_lanes    %rbx
-#define lane_data       %rbx
-#define tmp2            %rbx
-
-#define job_rax         %rax
-#define tmp1            %rax
-#define size_offset     %rax
-#define tmp             %rax
-#define start_offset    %rax
-
-#define tmp3            arg1
-
-#define extra_blocks    arg2
-#define p               arg2
-
-#define tmp4            %r8
-#define lens0           %r8
-
-#define lens1           %r9
-#define lens2           %r10
-#define lens3           %r11
-
-.macro LABEL prefix n
-\prefix\n\():
-.endm
-
-.macro JNE_SKIP i
-jne     skip_\i
-.endm
-
-.altmacro
-.macro SET_OFFSET _offset
-offset = \_offset
-.endm
-.noaltmacro
-
-# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
-# arg 1 : rcx : state
-ENTRY(sha512_mb_mgr_flush_avx2)
-	FRAME_BEGIN
-	push	%rbx
-
-	# If bit (32+3) is set, then all lanes are empty
-	mov     _unused_lanes(state), unused_lanes
-        bt      $32+7, unused_lanes
-        jc      return_null
-
-        # find a lane with a non-null job
-	xor     idx, idx
-        offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
-        cmpq    $0, offset(state)
-        cmovne  one(%rip), idx
-        offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
-        cmpq    $0, offset(state)
-        cmovne  two(%rip), idx
-        offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
-        cmpq    $0, offset(state)
-        cmovne  three(%rip), idx
-
-        # copy idx to empty lanes
-copy_lane_data:
-	offset =  (_args + _data_ptr)
-        mov     offset(state,idx,8), tmp
-
-        I = 0
-.rep 4
-	offset =  (_ldata + I * _LANE_DATA_size + _job_in_lane)
-        cmpq    $0, offset(state)
-.altmacro
-        JNE_SKIP %I
-        offset =  (_args + _data_ptr + 8*I)
-        mov     tmp, offset(state)
-        offset =  (_lens + 8*I +4)
-        movl    $0xFFFFFFFF, offset(state)
-LABEL skip_ %I
-        I = (I+1)
-.noaltmacro
-.endr
-
-        # Find min length
-        mov     _lens + 0*8(state),lens0
-        mov     lens0,idx
-        mov     _lens + 1*8(state),lens1
-        cmp     idx,lens1
-        cmovb   lens1,idx
-        mov     _lens + 2*8(state),lens2
-        cmp     idx,lens2
-        cmovb   lens2,idx
-        mov     _lens + 3*8(state),lens3
-        cmp     idx,lens3
-        cmovb   lens3,idx
-        mov     idx,len2
-        and     $0xF,idx
-        and     $~0xFF,len2
-	jz      len_is_0
-
-        sub     len2, lens0
-        sub     len2, lens1
-        sub     len2, lens2
-        sub     len2, lens3
-        shr     $32,len2
-        mov     lens0, _lens + 0*8(state)
-        mov     lens1, _lens + 1*8(state)
-        mov     lens2, _lens + 2*8(state)
-        mov     lens3, _lens + 3*8(state)
-
-        # "state" and "args" are the same address, arg1
-        # len is arg2
-        call    sha512_x4_avx2
-        # state and idx are intact
-
-len_is_0:
-        # process completed job "idx"
-	imul    $_LANE_DATA_size, idx, lane_data
-        lea     _ldata(state, lane_data), lane_data
-
-        mov     _job_in_lane(lane_data), job_rax
-        movq    $0,  _job_in_lane(lane_data)
-        movl    $STS_COMPLETED, _status(job_rax)
-        mov     _unused_lanes(state), unused_lanes
-        shl     $8, unused_lanes
-        or      idx, unused_lanes
-        mov     unused_lanes, _unused_lanes(state)
-
-	movl    $0xFFFFFFFF, _lens+4(state,  idx, 8)
-
-	vmovq _args_digest+0*32(state, idx, 8), %xmm0
-        vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
-	vmovq _args_digest+2*32(state, idx, 8), %xmm1
-        vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
-	vmovq _args_digest+4*32(state, idx, 8), %xmm2
-        vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
-	vmovq _args_digest+6*32(state, idx, 8), %xmm3
-	vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
-
-	vmovdqu %xmm0, _result_digest(job_rax)
-	vmovdqu %xmm1, _result_digest+1*16(job_rax)
-	vmovdqu %xmm2, _result_digest+2*16(job_rax)
-	vmovdqu %xmm3, _result_digest+3*16(job_rax)
-
-return:
-	pop	%rbx
-	FRAME_END
-        ret
-
-return_null:
-        xor     job_rax, job_rax
-        jmp     return
-ENDPROC(sha512_mb_mgr_flush_avx2)
-.align 16
-
-ENTRY(sha512_mb_mgr_get_comp_job_avx2)
-        push    %rbx
-
-	mov     _unused_lanes(state), unused_lanes
-        bt      $(32+7), unused_lanes
-        jc      .return_null
-
-        # Find min length
-        mov     _lens(state),lens0
-        mov     lens0,idx
-        mov     _lens+1*8(state),lens1
-        cmp     idx,lens1
-        cmovb   lens1,idx
-        mov     _lens+2*8(state),lens2
-        cmp     idx,lens2
-        cmovb   lens2,idx
-        mov     _lens+3*8(state),lens3
-        cmp     idx,lens3
-        cmovb   lens3,idx
-        test    $~0xF,idx
-        jnz     .return_null
-        and     $0xF,idx
-
-        #process completed job "idx"
-	imul    $_LANE_DATA_size, idx, lane_data
-        lea     _ldata(state, lane_data), lane_data
-
-        mov     _job_in_lane(lane_data), job_rax
-        movq    $0,  _job_in_lane(lane_data)
-        movl    $STS_COMPLETED, _status(job_rax)
-        mov     _unused_lanes(state), unused_lanes
-        shl     $8, unused_lanes
-        or      idx, unused_lanes
-        mov     unused_lanes, _unused_lanes(state)
-
-        movl    $0xFFFFFFFF, _lens+4(state,  idx, 8)
-
-	vmovq   _args_digest(state, idx, 8), %xmm0
-        vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
-	vmovq    _args_digest+2*32(state, idx, 8), %xmm1
-        vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
-	vmovq    _args_digest+4*32(state, idx, 8), %xmm2
-        vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
-        vmovq    _args_digest+6*32(state, idx, 8), %xmm3
-        vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
-
-	vmovdqu %xmm0, _result_digest+0*16(job_rax)
-	vmovdqu %xmm1, _result_digest+1*16(job_rax)
-	vmovdqu %xmm2, _result_digest+2*16(job_rax)
-	vmovdqu %xmm3, _result_digest+3*16(job_rax)
-
-	pop     %rbx
-
-        ret
-
-.return_null:
-        xor     job_rax, job_rax
-	pop     %rbx
-        ret
-ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
-
-.section	.rodata.cst8.one, "aM", @progbits, 8
-.align 8
-one:
-.quad  1
-
-.section	.rodata.cst8.two, "aM", @progbits, 8
-.align 8
-two:
-.quad  2
-
-.section	.rodata.cst8.three, "aM", @progbits, 8
-.align 8
-three:
-.quad  3
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
deleted file mode 100644
index 4ba709ba78e5..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Buffer submit code for multi buffer SHA512 algorithm
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- *     Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-.extern sha512_x4_avx2
-
-#define arg1    %rdi
-#define arg2    %rsi
-
-#define idx             %rdx
-#define last_len        %rdx
-
-#define size_offset     %rcx
-#define tmp2            %rcx
-
-# Common definitions
-#define state   arg1
-#define job     arg2
-#define len2    arg2
-#define p2      arg2
-
-#define p               %r11
-#define start_offset    %r11
-
-#define unused_lanes    %rbx
-
-#define job_rax         %rax
-#define len             %rax
-
-#define lane            %r12
-#define tmp3            %r12
-#define lens3           %r12
-
-#define extra_blocks    %r8
-#define lens0           %r8
-
-#define tmp             %r9
-#define lens1           %r9
-
-#define lane_data       %r10
-#define lens2           %r10
-
-#define DWORD_len %eax
-
-# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
-# arg 1 : rcx : state
-# arg 2 : rdx : job
-ENTRY(sha512_mb_mgr_submit_avx2)
-	FRAME_BEGIN
-	push	%rbx
-	push	%r12
-
-        mov     _unused_lanes(state), unused_lanes
-        movzb     %bl,lane
-        shr     $8, unused_lanes
-        imul    $_LANE_DATA_size, lane,lane_data
-        movl    $STS_BEING_PROCESSED, _status(job)
-	lea     _ldata(state, lane_data), lane_data
-        mov     unused_lanes, _unused_lanes(state)
-        movl    _len(job),  DWORD_len
-
-	mov     job, _job_in_lane(lane_data)
-        movl    DWORD_len,_lens+4(state , lane, 8)
-
-	# Load digest words from result_digest
-	vmovdqu	_result_digest+0*16(job), %xmm0
-	vmovdqu _result_digest+1*16(job), %xmm1
-	vmovdqu	_result_digest+2*16(job), %xmm2
-        vmovdqu	_result_digest+3*16(job), %xmm3
-
-	vmovq    %xmm0, _args_digest(state, lane, 8)
-	vpextrq  $1, %xmm0, _args_digest+1*32(state , lane, 8)
-	vmovq    %xmm1, _args_digest+2*32(state , lane, 8)
-	vpextrq  $1, %xmm1, _args_digest+3*32(state , lane, 8)
-	vmovq    %xmm2, _args_digest+4*32(state , lane, 8)
-	vpextrq  $1, %xmm2, _args_digest+5*32(state , lane, 8)
-	vmovq    %xmm3, _args_digest+6*32(state , lane, 8)
-	vpextrq  $1, %xmm3, _args_digest+7*32(state , lane, 8)
-
-	mov     _buffer(job), p
-	mov     p, _args_data_ptr(state, lane, 8)
-
-	cmp     $0xFF, unused_lanes
-	jne     return_null
-
-start_loop:
-
-	# Find min length
-	mov     _lens+0*8(state),lens0
-	mov     lens0,idx
-	mov     _lens+1*8(state),lens1
-	cmp     idx,lens1
-	cmovb   lens1, idx
-	mov     _lens+2*8(state),lens2
-	cmp     idx,lens2
-	cmovb   lens2,idx
-	mov     _lens+3*8(state),lens3
-	cmp     idx,lens3
-	cmovb   lens3,idx
-	mov     idx,len2
-	and     $0xF,idx
-	and     $~0xFF,len2
-	jz      len_is_0
-
-	sub     len2,lens0
-	sub     len2,lens1
-	sub     len2,lens2
-	sub     len2,lens3
-	shr     $32,len2
-	mov     lens0, _lens + 0*8(state)
-	mov     lens1, _lens + 1*8(state)
-	mov     lens2, _lens + 2*8(state)
-	mov     lens3, _lens + 3*8(state)
-
-	# "state" and "args" are the same address, arg1
-	# len is arg2
-	call    sha512_x4_avx2
-	# state and idx are intact
-
-len_is_0:
-
-	# process completed job "idx"
-	imul    $_LANE_DATA_size, idx, lane_data
-	lea     _ldata(state, lane_data), lane_data
-
-	mov     _job_in_lane(lane_data), job_rax
-	mov     _unused_lanes(state), unused_lanes
-	movq    $0, _job_in_lane(lane_data)
-	movl    $STS_COMPLETED, _status(job_rax)
-	shl     $8, unused_lanes
-	or      idx, unused_lanes
-	mov     unused_lanes, _unused_lanes(state)
-
-	movl	$0xFFFFFFFF,_lens+4(state,idx,8)
-	vmovq    _args_digest+0*32(state , idx, 8), %xmm0
-	vpinsrq  $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
-	vmovq    _args_digest+2*32(state , idx, 8), %xmm1
-	vpinsrq  $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
-	vmovq    _args_digest+4*32(state , idx, 8), %xmm2
-	vpinsrq  $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
-	vmovq    _args_digest+6*32(state , idx, 8), %xmm3
-	vpinsrq  $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
-
-	vmovdqu  %xmm0, _result_digest + 0*16(job_rax)
-	vmovdqu  %xmm1, _result_digest + 1*16(job_rax)
-	vmovdqu  %xmm2, _result_digest + 2*16(job_rax)
-	vmovdqu  %xmm3, _result_digest + 3*16(job_rax)
-
-return:
-	pop	%r12
-	pop	%rbx
-	FRAME_END
-	ret
-
-return_null:
-	xor     job_rax, job_rax
-	jmp     return
-ENDPROC(sha512_mb_mgr_submit_avx2)
-
-/* UNUSED?
-.section	.rodata.cst16, "aM", @progbits, 16
-.align 16
-H0:     .int  0x6a09e667
-H1:     .int  0xbb67ae85
-H2:     .int  0x3c6ef372
-H3:     .int  0xa54ff53a
-H4:     .int  0x510e527f
-H5:     .int  0x9b05688c
-H6:     .int  0x1f83d9ab
-H7:     .int  0x5be0cd19
-*/
diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
deleted file mode 100644
index e22e907643a6..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
+++ /dev/null
@@ -1,531 +0,0 @@
-/*
- * Multi-buffer SHA512 algorithm hash compute routine
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- *     Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# code to compute quad SHA512 using AVX2
-# use YMMs to tackle the larger digest size
-# outer calling routine takes care of save and restore of XMM registers
-# Logic designed/laid out by JDG
-
-# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
-# Stack must be aligned to 32 bytes before call
-# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
-# Linux preserves: rcx rdx rdi rbp r13 r14 r15
-# clobbers ymm0-15
-
-#include <linux/linkage.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-arg1 = %rdi
-arg2 = %rsi
-
-# Common definitions
-STATE = arg1
-INP_SIZE = arg2
-
-IDX = %rax
-ROUND = %rbx
-TBL = %r8
-
-inp0 = %r9
-inp1 = %r10
-inp2 = %r11
-inp3 = %r12
-
-a = %ymm0
-b = %ymm1
-c = %ymm2
-d = %ymm3
-e = %ymm4
-f = %ymm5
-g = %ymm6
-h = %ymm7
-
-a0 = %ymm8
-a1 = %ymm9
-a2 = %ymm10
-
-TT0 = %ymm14
-TT1 = %ymm13
-TT2 = %ymm12
-TT3 = %ymm11
-TT4 = %ymm10
-TT5 = %ymm9
-
-T1 = %ymm14
-TMP = %ymm15
-
-# Define stack usage
-STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
-
-#define VMOVPD	vmovupd
-_digest = SZ4*16
-
-# transpose r0, r1, r2, r3, t0, t1
-# "transpose" data in {r0..r3} using temps {t0..t3}
-# Input looks like: {r0 r1 r2 r3}
-# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
-# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
-# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
-# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
-#
-# output looks like: {t0 r1 r0 r3}
-# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
-# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
-# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
-# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
-
-.macro TRANSPOSE r0 r1 r2 r3 t0 t1
-	vshufps  $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
-        vshufps  $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
-        vshufps  $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
-        vshufps  $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
-
-	vperm2f128      $0x20, \r2, \r0, \r1  # h6...a6
-        vperm2f128      $0x31, \r2, \r0, \r3  # h2...a2
-        vperm2f128      $0x31, \t1, \t0, \r0  # h5...a5
-        vperm2f128      $0x20, \t1, \t0, \t0  # h1...a1
-.endm
-
-.macro ROTATE_ARGS
-TMP_ = h
-h = g
-g = f
-f = e
-e = d
-d = c
-c = b
-b = a
-a = TMP_
-.endm
-
-# PRORQ reg, imm, tmp
-# packed-rotate-right-double
-# does a rotate by doing two shifts and an or
-.macro _PRORQ reg imm tmp
-	vpsllq	$(64-\imm),\reg,\tmp
-	vpsrlq	$\imm,\reg, \reg
-	vpor	\tmp,\reg, \reg
-.endm
-
-# non-destructive
-# PRORQ_nd reg, imm, tmp, src
-.macro _PRORQ_nd reg imm tmp src
-	vpsllq	$(64-\imm), \src, \tmp
-	vpsrlq	$\imm, \src, \reg
-	vpor	\tmp, \reg, \reg
-.endm
-
-# PRORQ dst/src, amt
-.macro PRORQ reg imm
-	_PRORQ	\reg, \imm, TMP
-.endm
-
-# PRORQ_nd dst, src, amt
-.macro PRORQ_nd reg tmp imm
-	_PRORQ_nd	\reg, \imm, TMP, \tmp
-.endm
-
-#; arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_00_15 _T1 i
-	PRORQ_nd a0, e, (18-14)	# sig1: a0 = (e >> 4)
-
-	vpxor   g, f, a2        # ch: a2 = f^g
-        vpand   e,a2, a2                # ch: a2 = (f^g)&e
-        vpxor   g, a2, a2               # a2 = ch
-
-        PRORQ_nd        a1,e,41         # sig1: a1 = (e >> 25)
-
-        offset = SZ4*(\i & 0xf)
-        vmovdqu \_T1,offset(%rsp)
-        vpaddq  (TBL,ROUND,1), \_T1, \_T1       # T1 = W + K
-        vpxor   e,a0, a0        # sig1: a0 = e ^ (e >> 5)
-        PRORQ   a0, 14           # sig1: a0 = (e >> 6) ^ (e >> 11)
-        vpaddq  a2, h, h        # h = h + ch
-        PRORQ_nd        a2,a,6  # sig0: a2 = (a >> 11)
-        vpaddq  \_T1,h, h       # h = h + ch + W + K
-        vpxor   a1, a0, a0      # a0 = sigma1
-	vmovdqu a,\_T1
-        PRORQ_nd        a1,a,39 # sig0: a1 = (a >> 22)
-        vpxor   c, \_T1, \_T1      # maj: T1 = a^c
-        add     $SZ4, ROUND     # ROUND++
-        vpand   b, \_T1, \_T1   # maj: T1 = (a^c)&b
-        vpaddq  a0, h, h
-        vpaddq  h, d, d
-        vpxor   a, a2, a2       # sig0: a2 = a ^ (a >> 11)
-        PRORQ   a2,28            # sig0: a2 = (a >> 2) ^ (a >> 13)
-        vpxor   a1, a2, a2      # a2 = sig0
-        vpand   c, a, a1        # maj: a1 = a&c
-        vpor    \_T1, a1, a1    # a1 = maj
-        vpaddq  a1, h, h        # h = h + ch + W + K + maj
-        vpaddq  a2, h, h        # h = h + ch + W + K + maj + sigma0
-        ROTATE_ARGS
-.endm
-
-
-#; arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_16_XX _T1 i
-	vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
-        vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
-        vmovdqu \_T1, a0
-        PRORQ   \_T1,7
-        vmovdqu a1, a2
-        PRORQ   a1,42
-        vpxor   a0, \_T1, \_T1
-        PRORQ   \_T1, 1
-        vpxor   a2, a1, a1
-        PRORQ   a1, 19
-        vpsrlq  $7, a0, a0
-        vpxor   a0, \_T1, \_T1
-        vpsrlq  $6, a2, a2
-        vpxor   a2, a1, a1
-        vpaddq  SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
-        vpaddq  SZ4*((\i-7)&0xf)(%rsp), a1, a1
-        vpaddq  a1, \_T1, \_T1
-
-        ROUND_00_15 \_T1,\i
-.endm
-
-
-# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
-# arg 1 : STATE    : pointer to input data
-# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
-ENTRY(sha512_x4_avx2)
-	# general registers preserved in outer calling routine
-	# outer calling routine saves all the XMM registers
-	# save callee-saved clobbered registers to comply with C function ABI
-	push    %r12
-	push    %r13
-	push    %r14
-	push    %r15
-
-	sub     $STACK_SPACE1, %rsp
-
-        # Load the pre-transposed incoming digest.
-        vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
-        vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
-        vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
-        vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
-        vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
-        vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
-        vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
-        vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
-
-        lea     K512_4(%rip),TBL
-
-        # load the address of each of the 4 message lanes
-        # getting ready to transpose input onto stack
-        mov     _data_ptr+0*PTR_SZ(STATE),inp0
-        mov     _data_ptr+1*PTR_SZ(STATE),inp1
-        mov     _data_ptr+2*PTR_SZ(STATE),inp2
-        mov     _data_ptr+3*PTR_SZ(STATE),inp3
-
-        xor     IDX, IDX
-lloop:
-        xor     ROUND, ROUND
-
-	# save old digest
-        vmovdqu a, _digest(%rsp)
-        vmovdqu b, _digest+1*SZ4(%rsp)
-        vmovdqu c, _digest+2*SZ4(%rsp)
-        vmovdqu d, _digest+3*SZ4(%rsp)
-        vmovdqu e, _digest+4*SZ4(%rsp)
-        vmovdqu f, _digest+5*SZ4(%rsp)
-        vmovdqu g, _digest+6*SZ4(%rsp)
-        vmovdqu h, _digest+7*SZ4(%rsp)
-        i = 0
-.rep 4
-	vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
-        VMOVPD  i*32(inp0, IDX), TT2
-        VMOVPD  i*32(inp1, IDX), TT1
-        VMOVPD  i*32(inp2, IDX), TT4
-        VMOVPD  i*32(inp3, IDX), TT3
-	TRANSPOSE	TT2, TT1, TT4, TT3, TT0, TT5
-	vpshufb	TMP, TT0, TT0
-	vpshufb	TMP, TT1, TT1
-	vpshufb	TMP, TT2, TT2
-	vpshufb	TMP, TT3, TT3
-	ROUND_00_15	TT0,(i*4+0)
-	ROUND_00_15	TT1,(i*4+1)
-	ROUND_00_15	TT2,(i*4+2)
-	ROUND_00_15	TT3,(i*4+3)
-	i = (i+1)
-.endr
-        add     $128, IDX
-
-        i = (i*4)
-
-        jmp     Lrounds_16_xx
-.align 16
-Lrounds_16_xx:
-.rep 16
-        ROUND_16_XX     T1, i
-        i = (i+1)
-.endr
-        cmp     $0xa00,ROUND
-        jb      Lrounds_16_xx
-
-	# add old digest
-        vpaddq  _digest(%rsp), a, a
-        vpaddq  _digest+1*SZ4(%rsp), b, b
-        vpaddq  _digest+2*SZ4(%rsp), c, c
-        vpaddq  _digest+3*SZ4(%rsp), d, d
-        vpaddq  _digest+4*SZ4(%rsp), e, e
-        vpaddq  _digest+5*SZ4(%rsp), f, f
-        vpaddq  _digest+6*SZ4(%rsp), g, g
-        vpaddq  _digest+7*SZ4(%rsp), h, h
-
-        sub     $1, INP_SIZE  # unit is blocks
-        jne     lloop
-
-        # write back to memory (state object) the transposed digest
-        vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
-
-	# update input data pointers
-	add     IDX, inp0
-        mov     inp0, _data_ptr+0*PTR_SZ(STATE)
-        add     IDX, inp1
-        mov     inp1, _data_ptr+1*PTR_SZ(STATE)
-        add     IDX, inp2
-        mov     inp2, _data_ptr+2*PTR_SZ(STATE)
-        add     IDX, inp3
-        mov     inp3, _data_ptr+3*PTR_SZ(STATE)
-
-	#;;;;;;;;;;;;;;;
-	#; Postamble
-	add $STACK_SPACE1, %rsp
-	# restore callee-saved clobbered registers
-
-	pop     %r15
-	pop     %r14
-	pop     %r13
-	pop     %r12
-
-	# outer calling routine restores XMM and other GP registers
-	ret
-ENDPROC(sha512_x4_avx2)
-
-.section	.rodata.K512_4, "a", @progbits
-.align 64
-K512_4:
-	.octa 0x428a2f98d728ae22428a2f98d728ae22,\
-		0x428a2f98d728ae22428a2f98d728ae22
-	.octa 0x7137449123ef65cd7137449123ef65cd,\
-		0x7137449123ef65cd7137449123ef65cd
-	.octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
-		0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
-	.octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
-		0xe9b5dba58189dbbce9b5dba58189dbbc
-	.octa 0x3956c25bf348b5383956c25bf348b538,\
-		0x3956c25bf348b5383956c25bf348b538
-	.octa 0x59f111f1b605d01959f111f1b605d019,\
-		0x59f111f1b605d01959f111f1b605d019
-	.octa 0x923f82a4af194f9b923f82a4af194f9b,\
-		0x923f82a4af194f9b923f82a4af194f9b
-	.octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
-		0xab1c5ed5da6d8118ab1c5ed5da6d8118
-	.octa 0xd807aa98a3030242d807aa98a3030242,\
-		0xd807aa98a3030242d807aa98a3030242
-	.octa 0x12835b0145706fbe12835b0145706fbe,\
-		0x12835b0145706fbe12835b0145706fbe
-	.octa 0x243185be4ee4b28c243185be4ee4b28c,\
-		0x243185be4ee4b28c243185be4ee4b28c
-	.octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
-		0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
-	.octa 0x72be5d74f27b896f72be5d74f27b896f,\
-		0x72be5d74f27b896f72be5d74f27b896f
-	.octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
-		0x80deb1fe3b1696b180deb1fe3b1696b1
-	.octa 0x9bdc06a725c712359bdc06a725c71235,\
-		0x9bdc06a725c712359bdc06a725c71235
-	.octa 0xc19bf174cf692694c19bf174cf692694,\
-		0xc19bf174cf692694c19bf174cf692694
-	.octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
-		0xe49b69c19ef14ad2e49b69c19ef14ad2
-	.octa 0xefbe4786384f25e3efbe4786384f25e3,\
-		0xefbe4786384f25e3efbe4786384f25e3
-	.octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
-		0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
-	.octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
-		0x240ca1cc77ac9c65240ca1cc77ac9c65
-	.octa 0x2de92c6f592b02752de92c6f592b0275,\
-		0x2de92c6f592b02752de92c6f592b0275
-	.octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
-		0x4a7484aa6ea6e4834a7484aa6ea6e483
-	.octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
-		0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
-	.octa 0x76f988da831153b576f988da831153b5,\
-		0x76f988da831153b576f988da831153b5
-	.octa 0x983e5152ee66dfab983e5152ee66dfab,\
-		0x983e5152ee66dfab983e5152ee66dfab
-	.octa 0xa831c66d2db43210a831c66d2db43210,\
-		0xa831c66d2db43210a831c66d2db43210
-	.octa 0xb00327c898fb213fb00327c898fb213f,\
-		0xb00327c898fb213fb00327c898fb213f
-	.octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
-		0xbf597fc7beef0ee4bf597fc7beef0ee4
-	.octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
-		0xc6e00bf33da88fc2c6e00bf33da88fc2
-	.octa 0xd5a79147930aa725d5a79147930aa725,\
-		0xd5a79147930aa725d5a79147930aa725
-	.octa 0x06ca6351e003826f06ca6351e003826f,\
-		0x06ca6351e003826f06ca6351e003826f
-	.octa 0x142929670a0e6e70142929670a0e6e70,\
-		0x142929670a0e6e70142929670a0e6e70
-	.octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
-		0x27b70a8546d22ffc27b70a8546d22ffc
-	.octa 0x2e1b21385c26c9262e1b21385c26c926,\
-		0x2e1b21385c26c9262e1b21385c26c926
-	.octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
-		0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
-	.octa 0x53380d139d95b3df53380d139d95b3df,\
-		0x53380d139d95b3df53380d139d95b3df
-	.octa 0x650a73548baf63de650a73548baf63de,\
-		0x650a73548baf63de650a73548baf63de
-	.octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
-		0x766a0abb3c77b2a8766a0abb3c77b2a8
-	.octa 0x81c2c92e47edaee681c2c92e47edaee6,\
-		0x81c2c92e47edaee681c2c92e47edaee6
-	.octa 0x92722c851482353b92722c851482353b,\
-		0x92722c851482353b92722c851482353b
-	.octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
-		0xa2bfe8a14cf10364a2bfe8a14cf10364
-	.octa 0xa81a664bbc423001a81a664bbc423001,\
-		0xa81a664bbc423001a81a664bbc423001
-	.octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
-		0xc24b8b70d0f89791c24b8b70d0f89791
-	.octa 0xc76c51a30654be30c76c51a30654be30,\
-		0xc76c51a30654be30c76c51a30654be30
-	.octa 0xd192e819d6ef5218d192e819d6ef5218,\
-		0xd192e819d6ef5218d192e819d6ef5218
-	.octa 0xd69906245565a910d69906245565a910,\
-		0xd69906245565a910d69906245565a910
-	.octa 0xf40e35855771202af40e35855771202a,\
-		0xf40e35855771202af40e35855771202a
-	.octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
-		0x106aa07032bbd1b8106aa07032bbd1b8
-	.octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
-		0x19a4c116b8d2d0c819a4c116b8d2d0c8
-	.octa 0x1e376c085141ab531e376c085141ab53,\
-		0x1e376c085141ab531e376c085141ab53
-	.octa 0x2748774cdf8eeb992748774cdf8eeb99,\
-		0x2748774cdf8eeb992748774cdf8eeb99
-	.octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
-		0x34b0bcb5e19b48a834b0bcb5e19b48a8
-	.octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
-		0x391c0cb3c5c95a63391c0cb3c5c95a63
-	.octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
-		0x4ed8aa4ae3418acb4ed8aa4ae3418acb
-	.octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
-		0x5b9cca4f7763e3735b9cca4f7763e373
-	.octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
-		0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
-	.octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
-		0x748f82ee5defb2fc748f82ee5defb2fc
-	.octa 0x78a5636f43172f6078a5636f43172f60,\
-		0x78a5636f43172f6078a5636f43172f60
-	.octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
-		0x84c87814a1f0ab7284c87814a1f0ab72
-	.octa 0x8cc702081a6439ec8cc702081a6439ec,\
-		0x8cc702081a6439ec8cc702081a6439ec
-	.octa 0x90befffa23631e2890befffa23631e28,\
-		0x90befffa23631e2890befffa23631e28
-	.octa 0xa4506cebde82bde9a4506cebde82bde9,\
-		0xa4506cebde82bde9a4506cebde82bde9
-	.octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
-		0xbef9a3f7b2c67915bef9a3f7b2c67915
-	.octa 0xc67178f2e372532bc67178f2e372532b,\
-		0xc67178f2e372532bc67178f2e372532b
-	.octa 0xca273eceea26619cca273eceea26619c,\
-		0xca273eceea26619cca273eceea26619c
-	.octa 0xd186b8c721c0c207d186b8c721c0c207,\
-		0xd186b8c721c0c207d186b8c721c0c207
-	.octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
-		0xeada7dd6cde0eb1eeada7dd6cde0eb1e
-	.octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
-		0xf57d4f7fee6ed178f57d4f7fee6ed178
-	.octa 0x06f067aa72176fba06f067aa72176fba,\
-		0x06f067aa72176fba06f067aa72176fba
-	.octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
-		0x0a637dc5a2c898a60a637dc5a2c898a6
-	.octa 0x113f9804bef90dae113f9804bef90dae,\
-		0x113f9804bef90dae113f9804bef90dae
-	.octa 0x1b710b35131c471b1b710b35131c471b,\
-		0x1b710b35131c471b1b710b35131c471b
-	.octa 0x28db77f523047d8428db77f523047d84,\
-		0x28db77f523047d8428db77f523047d84
-	.octa 0x32caab7b40c7249332caab7b40c72493,\
-		0x32caab7b40c7249332caab7b40c72493
-	.octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
-		0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
-	.octa 0x431d67c49c100d4c431d67c49c100d4c,\
-		0x431d67c49c100d4c431d67c49c100d4c
-	.octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
-		0x4cc5d4becb3e42b64cc5d4becb3e42b6
-	.octa 0x597f299cfc657e2a597f299cfc657e2a,\
-		0x597f299cfc657e2a597f299cfc657e2a
-	.octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
-		0x5fcb6fab3ad6faec5fcb6fab3ad6faec
-	.octa 0x6c44198c4a4758176c44198c4a475817,\
-		0x6c44198c4a4758176c44198c4a475817
-
-.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
-                         .octa 0x18191a1b1c1d1e1f1011121314151617
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index a0f46bdd9f24..fab4df16a3c4 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -12,38 +12,23 @@
 #include <asm/user32.h>
 #include <asm/unistd.h>
 
+#include <asm-generic/compat.h>
+
 #define COMPAT_USER_HZ		100
 #define COMPAT_UTS_MACHINE	"i686\0\0"
 
-typedef u32		compat_size_t;
-typedef s32		compat_ssize_t;
-typedef s32		compat_clock_t;
-typedef s32		compat_pid_t;
 typedef u16		__compat_uid_t;
 typedef u16		__compat_gid_t;
 typedef u32		__compat_uid32_t;
 typedef u32		__compat_gid32_t;
 typedef u16		compat_mode_t;
-typedef u32		compat_ino_t;
 typedef u16		compat_dev_t;
-typedef s32		compat_off_t;
-typedef s64		compat_loff_t;
 typedef u16		compat_nlink_t;
 typedef u16		compat_ipc_pid_t;
-typedef s32		compat_daddr_t;
 typedef u32		compat_caddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
-typedef s32		compat_timer_t;
-typedef s32		compat_key_t;
-
-typedef s32		compat_int_t;
-typedef s32		compat_long_t;
 typedef s64 __attribute__((aligned(4))) compat_s64;
-typedef u32		compat_uint_t;
-typedef u32		compat_ulong_t;
-typedef u32		compat_u32;
 typedef u64 __attribute__((aligned(4))) compat_u64;
-typedef u32		compat_uptr_t;
 
 struct compat_stat {
 	compat_dev_t	st_dev;
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5f26962eff42..67ed72f31cc2 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -45,6 +45,8 @@ struct vcpu_data {
 
 #ifdef CONFIG_IRQ_REMAP
 
+extern raw_spinlock_t irq_2_ir_lock;
+
 extern bool irq_remapping_cap(enum irq_remap_cap cap);
 extern void set_irq_remapping_broken(void);
 extern int irq_remapping_prepare(void);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 09b2e3e2cf1b..55e51ff7e421 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -102,7 +102,15 @@
 #define UNMAPPED_GVA (~(gpa_t)0)
 
 /* KVM Hugepage definitions for x86 */
-#define KVM_NR_PAGE_SIZES	3
+enum {
+	PT_PAGE_TABLE_LEVEL   = 1,
+	PT_DIRECTORY_LEVEL    = 2,
+	PT_PDPE_LEVEL         = 3,
+	/* set max level to the biggest one */
+	PT_MAX_HUGEPAGE_LEVEL = PT_PDPE_LEVEL,
+};
+#define KVM_NR_PAGE_SIZES	(PT_MAX_HUGEPAGE_LEVEL - \
+				 PT_PAGE_TABLE_LEVEL + 1)
 #define KVM_HPAGE_GFN_SHIFT(x)	(((x) - 1) * 9)
 #define KVM_HPAGE_SHIFT(x)	(PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
 #define KVM_HPAGE_SIZE(x)	(1UL << KVM_HPAGE_SHIFT(x))
@@ -177,6 +185,7 @@ enum {
 
 #define DR6_BD		(1 << 13)
 #define DR6_BS		(1 << 14)
+#define DR6_BT		(1 << 15)
 #define DR6_RTM		(1 << 16)
 #define DR6_FIXED_1	0xfffe0ff0
 #define DR6_INIT	0xffff0ff0
@@ -247,7 +256,7 @@ struct kvm_mmu_memory_cache {
  * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
  */
 union kvm_mmu_page_role {
-	unsigned word;
+	u32 word;
 	struct {
 		unsigned level:4;
 		unsigned cr4_pae:1;
@@ -273,6 +282,34 @@ union kvm_mmu_page_role {
 	};
 };
 
+union kvm_mmu_extended_role {
+/*
+ * This structure complements kvm_mmu_page_role caching everything needed for
+ * MMU configuration. If nothing in both these structures changed, MMU
+ * re-configuration can be skipped. @valid bit is set on first usage so we don't
+ * treat all-zero structure as valid data.
+ */
+	u32 word;
+	struct {
+		unsigned int valid:1;
+		unsigned int execonly:1;
+		unsigned int cr0_pg:1;
+		unsigned int cr4_pse:1;
+		unsigned int cr4_pke:1;
+		unsigned int cr4_smap:1;
+		unsigned int cr4_smep:1;
+		unsigned int cr4_la57:1;
+	};
+};
+
+union kvm_mmu_role {
+	u64 as_u64;
+	struct {
+		union kvm_mmu_page_role base;
+		union kvm_mmu_extended_role ext;
+	};
+};
+
 struct kvm_rmap_head {
 	unsigned long val;
 };
@@ -280,18 +317,18 @@ struct kvm_rmap_head {
 struct kvm_mmu_page {
 	struct list_head link;
 	struct hlist_node hash_link;
+	bool unsync;
 
 	/*
 	 * The following two entries are used to key the shadow page in the
 	 * hash table.
 	 */
-	gfn_t gfn;
 	union kvm_mmu_page_role role;
+	gfn_t gfn;
 
 	u64 *spt;
 	/* hold the gfn of each spte inside spt */
 	gfn_t *gfns;
-	bool unsync;
 	int root_count;          /* Currently serving as active root */
 	unsigned int unsync_children;
 	struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
@@ -360,7 +397,7 @@ struct kvm_mmu {
 	void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			   u64 *spte, const void *pte);
 	hpa_t root_hpa;
-	union kvm_mmu_page_role base_role;
+	union kvm_mmu_role mmu_role;
 	u8 root_level;
 	u8 shadow_root_level;
 	u8 ept_ad;
@@ -490,7 +527,7 @@ struct kvm_vcpu_hv {
 	struct kvm_hyperv_exit exit;
 	struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
 	DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
-	cpumask_t tlb_lush;
+	cpumask_t tlb_flush;
 };
 
 struct kvm_vcpu_arch {
@@ -534,7 +571,13 @@ struct kvm_vcpu_arch {
 	 * the paging mode of the l1 guest. This context is always used to
 	 * handle faults.
 	 */
-	struct kvm_mmu mmu;
+	struct kvm_mmu *mmu;
+
+	/* Non-nested MMU for L1 */
+	struct kvm_mmu root_mmu;
+
+	/* L1 MMU when running nested */
+	struct kvm_mmu guest_mmu;
 
 	/*
 	 * Paging state of an L2 guest (used for nested npt)
@@ -585,6 +628,8 @@ struct kvm_vcpu_arch {
 		bool has_error_code;
 		u8 nr;
 		u32 error_code;
+		unsigned long payload;
+		bool has_payload;
 		u8 nested_apf;
 	} exception;
 
@@ -781,6 +826,9 @@ struct kvm_hv {
 	u64 hv_reenlightenment_control;
 	u64 hv_tsc_emulation_control;
 	u64 hv_tsc_emulation_status;
+
+	/* How many vCPUs have VP index != vCPU index */
+	atomic_t num_mismatched_vp_indexes;
 };
 
 enum kvm_irqchip_mode {
@@ -871,6 +919,7 @@ struct kvm_arch {
 	bool x2apic_broadcast_quirk_disabled;
 
 	bool guest_can_read_msr_platform_info;
+	bool exception_payload_enabled;
 };
 
 struct kvm_vm_stat {
@@ -1133,6 +1182,9 @@ struct kvm_x86_ops {
 	int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
 
 	int (*get_msr_feature)(struct kvm_msr_entry *entry);
+
+	int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
+				   uint16_t *vmcs_version);
 };
 
 struct kvm_arch_async_pf {
@@ -1170,7 +1222,6 @@ void kvm_mmu_module_exit(void);
 
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
-void kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_init_vm(struct kvm *kvm);
 void kvm_mmu_uninit_vm(struct kvm *kvm);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
@@ -1324,7 +1375,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free);
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+			ulong roots_to_free);
 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
 			   struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 51c4eee00732..dc4ed8bc2382 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -24,6 +24,7 @@
 #  include <asm/unistd_64.h>
 #  include <asm/unistd_64_x32.h>
 #  define __ARCH_WANT_COMPAT_SYS_TIME
+#  define __ARCH_WANT_SYS_UTIME32
 #  define __ARCH_WANT_COMPAT_SYS_PREADV64
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64
 #  define __ARCH_WANT_COMPAT_SYS_PREADV64V2
@@ -31,13 +32,13 @@
 
 # endif
 
+# define __ARCH_WANT_NEW_STAT
 # define __ARCH_WANT_OLD_READDIR
 # define __ARCH_WANT_OLD_STAT
 # define __ARCH_WANT_SYS_ALARM
 # define __ARCH_WANT_SYS_FADVISE64
 # define __ARCH_WANT_SYS_GETHOSTNAME
 # define __ARCH_WANT_SYS_GETPGRP
-# define __ARCH_WANT_SYS_LLSEEK
 # define __ARCH_WANT_SYS_NICE
 # define __ARCH_WANT_SYS_OLDUMOUNT
 # define __ARCH_WANT_SYS_OLD_GETRLIMIT
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index e05e0d309244..1fc7a0d1e877 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -40,7 +40,7 @@ static inline int cpu_has_vmx(void)
  */
 static inline void cpu_vmxoff(void)
 {
-	asm volatile (ASM_VMX_VMXOFF : : : "cc");
+	asm volatile ("vmxoff");
 	cr4_clear_bits(X86_CR4_VMXE);
 }
 
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 9527ba5d62da..ade0f153947d 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -503,19 +503,6 @@ enum vmcs_field {
 
 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
 
-
-#define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
-#define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
-#define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
-#define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
-#define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
-#define ASM_VMX_VMXOFF            ".byte 0x0f, 0x01, 0xc4"
-#define ASM_VMX_VMXON_RAX         ".byte 0xf3, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_INVEPT		  ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
-#define ASM_VMX_INVVPID		  ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
-
 struct vmx_msr_entry {
 	u32 index;
 	u32 reserved;
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index fd23d5778ea1..dabfcf7c3941 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -288,6 +288,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR	0x00000002
 #define KVM_VCPUEVENT_VALID_SHADOW	0x00000004
 #define KVM_VCPUEVENT_VALID_SMM		0x00000008
+#define KVM_VCPUEVENT_VALID_PAYLOAD	0x00000010
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS	0x01
@@ -299,7 +300,7 @@ struct kvm_vcpu_events {
 		__u8 injected;
 		__u8 nr;
 		__u8 has_error_code;
-		__u8 pad;
+		__u8 pending;
 		__u32 error_code;
 	} exception;
 	struct {
@@ -322,7 +323,9 @@ struct kvm_vcpu_events {
 		__u8 smm_inside_nmi;
 		__u8 latched_init;
 	} smi;
-	__u32 reserved[9];
+	__u8 reserved[27];
+	__u8 exception_has_payload;
+	__u64 exception_payload;
 };
 
 /* for KVM_GET/SET_DEBUGREGS */
@@ -381,6 +384,7 @@ struct kvm_sync_regs {
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
+#define KVM_STATE_NESTED_EVMCS		0x00000004
 
 #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index f39f3a06c26f..7299dcbf8e85 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -140,7 +140,7 @@ static void __init dtb_cpu_setup(void)
 	int ret;
 
 	version = GET_APIC_VERSION(apic_read(APIC_LVR));
-	for_each_node_by_type(dn, "cpu") {
+	for_each_of_cpu_node(dn) {
 		ret = of_property_read_u32(dn, "reg", &apic_id);
 		if (ret < 0) {
 			pr_warn("%pOF: missing local APIC ID\n", dn);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 01d209ab5481..4e80080f277a 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -36,6 +36,8 @@
 
 #include "trace.h"
 
+#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
+
 static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
 {
 	return atomic64_read(&synic->sint[sint]);
@@ -132,8 +134,10 @@ static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
 	struct kvm_vcpu *vcpu = NULL;
 	int i;
 
-	if (vpidx < KVM_MAX_VCPUS)
-		vcpu = kvm_get_vcpu(kvm, vpidx);
+	if (vpidx >= KVM_MAX_VCPUS)
+		return NULL;
+
+	vcpu = kvm_get_vcpu(kvm, vpidx);
 	if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
 		return vcpu;
 	kvm_for_each_vcpu(i, vcpu, kvm)
@@ -689,6 +693,24 @@ void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
 		stimer_cleanup(&hv_vcpu->stimer[i]);
 }
 
+bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
+		return false;
+	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
+}
+EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
+
+bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
+			    struct hv_vp_assist_page *assist_page)
+{
+	if (!kvm_hv_assist_page_enabled(vcpu))
+		return false;
+	return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
+				      assist_page, sizeof(*assist_page));
+}
+EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
+
 static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
 {
 	struct hv_message *msg = &stimer->msg;
@@ -1040,21 +1062,41 @@ static u64 current_task_runtime_100ns(void)
 
 static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 {
-	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
 
 	switch (msr) {
-	case HV_X64_MSR_VP_INDEX:
-		if (!host)
+	case HV_X64_MSR_VP_INDEX: {
+		struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+		int vcpu_idx = kvm_vcpu_get_idx(vcpu);
+		u32 new_vp_index = (u32)data;
+
+		if (!host || new_vp_index >= KVM_MAX_VCPUS)
 			return 1;
-		hv->vp_index = (u32)data;
+
+		if (new_vp_index == hv_vcpu->vp_index)
+			return 0;
+
+		/*
+		 * The VP index is initialized to vcpu_index by
+		 * kvm_hv_vcpu_postcreate so they initially match.  Now the
+		 * VP index is changing, adjust num_mismatched_vp_indexes if
+		 * it now matches or no longer matches vcpu_idx.
+		 */
+		if (hv_vcpu->vp_index == vcpu_idx)
+			atomic_inc(&hv->num_mismatched_vp_indexes);
+		else if (new_vp_index == vcpu_idx)
+			atomic_dec(&hv->num_mismatched_vp_indexes);
+
+		hv_vcpu->vp_index = new_vp_index;
 		break;
+	}
 	case HV_X64_MSR_VP_ASSIST_PAGE: {
 		u64 gfn;
 		unsigned long addr;
 
 		if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
-			hv->hv_vapic = data;
-			if (kvm_lapic_enable_pv_eoi(vcpu, 0))
+			hv_vcpu->hv_vapic = data;
+			if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
 				return 1;
 			break;
 		}
@@ -1062,12 +1104,19 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 		addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
 		if (kvm_is_error_hva(addr))
 			return 1;
-		if (__clear_user((void __user *)addr, PAGE_SIZE))
+
+		/*
+		 * Clear apic_assist portion of f(struct hv_vp_assist_page
+		 * only, there can be valuable data in the rest which needs
+		 * to be preserved e.g. on migration.
+		 */
+		if (__clear_user((void __user *)addr, sizeof(u32)))
 			return 1;
-		hv->hv_vapic = data;
+		hv_vcpu->hv_vapic = data;
 		kvm_vcpu_mark_page_dirty(vcpu, gfn);
 		if (kvm_lapic_enable_pv_eoi(vcpu,
-					    gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
+					    gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
+					    sizeof(struct hv_vp_assist_page)))
 			return 1;
 		break;
 	}
@@ -1080,7 +1129,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 	case HV_X64_MSR_VP_RUNTIME:
 		if (!host)
 			return 1;
-		hv->runtime_offset = data - current_task_runtime_100ns();
+		hv_vcpu->runtime_offset = data - current_task_runtime_100ns();
 		break;
 	case HV_X64_MSR_SCONTROL:
 	case HV_X64_MSR_SVERSION:
@@ -1172,11 +1221,11 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
 			  bool host)
 {
 	u64 data = 0;
-	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
 
 	switch (msr) {
 	case HV_X64_MSR_VP_INDEX:
-		data = hv->vp_index;
+		data = hv_vcpu->vp_index;
 		break;
 	case HV_X64_MSR_EOI:
 		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
@@ -1185,10 +1234,10 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
 	case HV_X64_MSR_TPR:
 		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
 	case HV_X64_MSR_VP_ASSIST_PAGE:
-		data = hv->hv_vapic;
+		data = hv_vcpu->hv_vapic;
 		break;
 	case HV_X64_MSR_VP_RUNTIME:
-		data = current_task_runtime_100ns() + hv->runtime_offset;
+		data = current_task_runtime_100ns() + hv_vcpu->runtime_offset;
 		break;
 	case HV_X64_MSR_SCONTROL:
 	case HV_X64_MSR_SVERSION:
@@ -1255,32 +1304,47 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
 		return kvm_hv_get_msr(vcpu, msr, pdata, host);
 }
 
-static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
+static __always_inline unsigned long *sparse_set_to_vcpu_mask(
+	struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask,
+	u64 *vp_bitmap, unsigned long *vcpu_bitmap)
 {
-	int i = 0, j;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+	struct kvm_vcpu *vcpu;
+	int i, bank, sbank = 0;
 
-	if (!(valid_bank_mask & BIT_ULL(bank_no)))
-		return -1;
+	memset(vp_bitmap, 0,
+	       KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
+	for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
+			 KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
+		vp_bitmap[bank] = sparse_banks[sbank++];
 
-	for (j = 0; j < bank_no; j++)
-		if (valid_bank_mask & BIT_ULL(j))
-			i++;
+	if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) {
+		/* for all vcpus vp_index == vcpu_idx */
+		return (unsigned long *)vp_bitmap;
+	}
 
-	return i;
+	bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index,
+			     (unsigned long *)vp_bitmap))
+			__set_bit(i, vcpu_bitmap);
+	}
+	return vcpu_bitmap;
 }
 
 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
 			    u16 rep_cnt, bool ex)
 {
 	struct kvm *kvm = current_vcpu->kvm;
-	struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
+	struct kvm_vcpu_hv *hv_vcpu = &current_vcpu->arch.hyperv;
 	struct hv_tlb_flush_ex flush_ex;
 	struct hv_tlb_flush flush;
-	struct kvm_vcpu *vcpu;
-	unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
-	unsigned long valid_bank_mask = 0;
+	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
+	DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
+	unsigned long *vcpu_mask;
+	u64 valid_bank_mask;
 	u64 sparse_banks[64];
-	int sparse_banks_len, i;
+	int sparse_banks_len;
 	bool all_cpus;
 
 	if (!ex) {
@@ -1290,6 +1354,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
 		trace_kvm_hv_flush_tlb(flush.processor_mask,
 				       flush.address_space, flush.flags);
 
+		valid_bank_mask = BIT_ULL(0);
 		sparse_banks[0] = flush.processor_mask;
 		all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
 	} else {
@@ -1306,7 +1371,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
 		all_cpus = flush_ex.hv_vp_set.format !=
 			HV_GENERIC_SET_SPARSE_4K;
 
-		sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
+		sparse_banks_len =
+			bitmap_weight((unsigned long *)&valid_bank_mask, 64) *
 			sizeof(sparse_banks[0]);
 
 		if (!sparse_banks_len && !all_cpus)
@@ -1321,48 +1387,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
 	}
 
-	cpumask_clear(&hv_current->tlb_lush);
-
-	kvm_for_each_vcpu(i, vcpu, kvm) {
-		struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
-		int bank = hv->vp_index / 64, sbank = 0;
-
-		if (!all_cpus) {
-			/* Banks >64 can't be represented */
-			if (bank >= 64)
-				continue;
-
-			/* Non-ex hypercalls can only address first 64 vCPUs */
-			if (!ex && bank)
-				continue;
-
-			if (ex) {
-				/*
-				 * Check is the bank of this vCPU is in sparse
-				 * set and get the sparse bank number.
-				 */
-				sbank = get_sparse_bank_no(valid_bank_mask,
-							   bank);
-
-				if (sbank < 0)
-					continue;
-			}
-
-			if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
-				continue;
-		}
+	cpumask_clear(&hv_vcpu->tlb_flush);
 
-		/*
-		 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
-		 * can't analyze it here, flush TLB regardless of the specified
-		 * address space.
-		 */
-		__set_bit(i, vcpu_bitmap);
-	}
+	vcpu_mask = all_cpus ? NULL :
+		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
+					vp_bitmap, vcpu_bitmap);
 
+	/*
+	 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
+	 * analyze it here, flush TLB regardless of the specified address space.
+	 */
 	kvm_make_vcpus_request_mask(kvm,
 				    KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
-				    vcpu_bitmap, &hv_current->tlb_lush);
+				    vcpu_mask, &hv_vcpu->tlb_flush);
 
 ret_success:
 	/* We always do full TLB flush, set rep_done = rep_cnt. */
@@ -1370,6 +1407,99 @@ ret_success:
 		((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
 }
 
+static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
+				 unsigned long *vcpu_bitmap)
+{
+	struct kvm_lapic_irq irq = {
+		.delivery_mode = APIC_DM_FIXED,
+		.vector = vector
+	};
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
+			continue;
+
+		/* We fail only when APIC is disabled */
+		kvm_apic_set_irq(vcpu, &irq, NULL);
+	}
+}
+
+static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
+			   bool ex, bool fast)
+{
+	struct kvm *kvm = current_vcpu->kvm;
+	struct hv_send_ipi_ex send_ipi_ex;
+	struct hv_send_ipi send_ipi;
+	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
+	DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
+	unsigned long *vcpu_mask;
+	unsigned long valid_bank_mask;
+	u64 sparse_banks[64];
+	int sparse_banks_len;
+	u32 vector;
+	bool all_cpus;
+
+	if (!ex) {
+		if (!fast) {
+			if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
+						    sizeof(send_ipi))))
+				return HV_STATUS_INVALID_HYPERCALL_INPUT;
+			sparse_banks[0] = send_ipi.cpu_mask;
+			vector = send_ipi.vector;
+		} else {
+			/* 'reserved' part of hv_send_ipi should be 0 */
+			if (unlikely(ingpa >> 32 != 0))
+				return HV_STATUS_INVALID_HYPERCALL_INPUT;
+			sparse_banks[0] = outgpa;
+			vector = (u32)ingpa;
+		}
+		all_cpus = false;
+		valid_bank_mask = BIT_ULL(0);
+
+		trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
+	} else {
+		if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex,
+					    sizeof(send_ipi_ex))))
+			return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+		trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
+					 send_ipi_ex.vp_set.format,
+					 send_ipi_ex.vp_set.valid_bank_mask);
+
+		vector = send_ipi_ex.vector;
+		valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
+		sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
+			sizeof(sparse_banks[0]);
+
+		all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
+
+		if (!sparse_banks_len)
+			goto ret_success;
+
+		if (!all_cpus &&
+		    kvm_read_guest(kvm,
+				   ingpa + offsetof(struct hv_send_ipi_ex,
+						    vp_set.bank_contents),
+				   sparse_banks,
+				   sparse_banks_len))
+			return HV_STATUS_INVALID_HYPERCALL_INPUT;
+	}
+
+	if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+		return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+	vcpu_mask = all_cpus ? NULL :
+		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
+					vp_bitmap, vcpu_bitmap);
+
+	kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
+
+ret_success:
+	return HV_STATUS_SUCCESS;
+}
+
 bool kvm_hv_hypercall_enabled(struct kvm *kvm)
 {
 	return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
@@ -1539,6 +1669,20 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 		}
 		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
 		break;
+	case HVCALL_SEND_IPI:
+		if (unlikely(rep)) {
+			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+		}
+		ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast);
+		break;
+	case HVCALL_SEND_IPI_EX:
+		if (unlikely(fast || rep)) {
+			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+		}
+		ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false);
+		break;
 	default:
 		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
 		break;
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index d6aa969e20f1..0e66c12ed2c3 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -62,6 +62,10 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
 
+bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
+bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
+			    struct hv_vp_assist_page *assist_page);
+
 static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
 							int timer_index)
 {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index fbb0e6df121b..3cd227ff807f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -70,6 +70,11 @@
 #define APIC_BROADCAST			0xFF
 #define X2APIC_BROADCAST		0xFFFFFFFFul
 
+static bool lapic_timer_advance_adjust_done = false;
+#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
+/* step-by-step approximation to mitigate fluctuation */
+#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
+
 static inline int apic_test_vector(int vec, void *bitmap)
 {
 	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -955,14 +960,14 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 	map = rcu_dereference(kvm->arch.apic_map);
 
 	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
-	if (ret)
+	if (ret) {
+		*r = 0;
 		for_each_set_bit(i, &bitmap, 16) {
 			if (!dst[i])
 				continue;
-			if (*r < 0)
-				*r = 0;
 			*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 		}
+	}
 
 	rcu_read_unlock();
 	return ret;
@@ -1472,7 +1477,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
 void wait_lapic_expire(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	u64 guest_tsc, tsc_deadline;
+	u64 guest_tsc, tsc_deadline, ns;
 
 	if (!lapic_in_kernel(vcpu))
 		return;
@@ -1492,6 +1497,24 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 	if (guest_tsc < tsc_deadline)
 		__delay(min(tsc_deadline - guest_tsc,
 			nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
+
+	if (!lapic_timer_advance_adjust_done) {
+		/* too early */
+		if (guest_tsc < tsc_deadline) {
+			ns = (tsc_deadline - guest_tsc) * 1000000ULL;
+			do_div(ns, vcpu->arch.virtual_tsc_khz);
+			lapic_timer_advance_ns -= min((unsigned int)ns,
+				lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+		} else {
+		/* too late */
+			ns = (guest_tsc - tsc_deadline) * 1000000ULL;
+			do_div(ns, vcpu->arch.virtual_tsc_khz);
+			lapic_timer_advance_ns += min((unsigned int)ns,
+				lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+		}
+		if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
+			lapic_timer_advance_adjust_done = true;
+	}
 }
 
 static void start_sw_tscdeadline(struct kvm_lapic *apic)
@@ -2621,17 +2644,25 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
 	return 0;
 }
 
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
+int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
 {
 	u64 addr = data & ~KVM_MSR_ENABLED;
+	struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
+	unsigned long new_len;
+
 	if (!IS_ALIGNED(addr, 4))
 		return 1;
 
 	vcpu->arch.pv_eoi.msr_val = data;
 	if (!pv_eoi_enabled(vcpu))
 		return 0;
-	return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
-					 addr, sizeof(u8));
+
+	if (addr == ghc->gpa && len <= ghc->len)
+		new_len = ghc->len;
+	else
+		new_len = len;
+
+	return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
 }
 
 void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index ed0ed39abd36..ff6ef9c3d760 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -120,7 +120,7 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
 	return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
 }
 
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
+int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
 void kvm_lapic_init(void);
 void kvm_lapic_exit(void);
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e843ec46609d..cf5f572f2305 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -932,7 +932,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
 	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
 		obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
 		if (!obj)
-			return -ENOMEM;
+			return cache->nobjs >= min ? 0 : -ENOMEM;
 		cache->objects[cache->nobjs++] = obj;
 	}
 	return 0;
@@ -960,7 +960,7 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
 	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
 		page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
 		if (!page)
-			return -ENOMEM;
+			return cache->nobjs >= min ? 0 : -ENOMEM;
 		cache->objects[cache->nobjs++] = page;
 	}
 	return 0;
@@ -1265,24 +1265,24 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
 	mmu_free_pte_list_desc(desc);
 }
 
-static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
+static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
 {
 	struct pte_list_desc *desc;
 	struct pte_list_desc *prev_desc;
 	int i;
 
 	if (!rmap_head->val) {
-		printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte);
+		pr_err("%s: %p 0->BUG\n", __func__, spte);
 		BUG();
 	} else if (!(rmap_head->val & 1)) {
-		rmap_printk("pte_list_remove:  %p 1->0\n", spte);
+		rmap_printk("%s:  %p 1->0\n", __func__, spte);
 		if ((u64 *)rmap_head->val != spte) {
-			printk(KERN_ERR "pte_list_remove:  %p 1->BUG\n", spte);
+			pr_err("%s:  %p 1->BUG\n", __func__, spte);
 			BUG();
 		}
 		rmap_head->val = 0;
 	} else {
-		rmap_printk("pte_list_remove:  %p many->many\n", spte);
+		rmap_printk("%s:  %p many->many\n", __func__, spte);
 		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 		prev_desc = NULL;
 		while (desc) {
@@ -1296,11 +1296,17 @@ static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
 			prev_desc = desc;
 			desc = desc->more;
 		}
-		pr_err("pte_list_remove: %p many->many\n", spte);
+		pr_err("%s: %p many->many\n", __func__, spte);
 		BUG();
 	}
 }
 
+static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep)
+{
+	mmu_spte_clear_track_bits(sptep);
+	__pte_list_remove(sptep, rmap_head);
+}
+
 static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
 					   struct kvm_memory_slot *slot)
 {
@@ -1349,7 +1355,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 	sp = page_header(__pa(spte));
 	gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
 	rmap_head = gfn_to_rmap(kvm, gfn, sp);
-	pte_list_remove(spte, rmap_head);
+	__pte_list_remove(spte, rmap_head);
 }
 
 /*
@@ -1685,7 +1691,7 @@ static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 	while ((sptep = rmap_get_first(rmap_head, &iter))) {
 		rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
 
-		drop_spte(kvm, sptep);
+		pte_list_remove(rmap_head, sptep);
 		flush = true;
 	}
 
@@ -1721,7 +1727,7 @@ restart:
 		need_flush = 1;
 
 		if (pte_write(*ptep)) {
-			drop_spte(kvm, sptep);
+			pte_list_remove(rmap_head, sptep);
 			goto restart;
 		} else {
 			new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
@@ -1988,7 +1994,7 @@ static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
 static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
 				       u64 *parent_pte)
 {
-	pte_list_remove(parent_pte, &sp->parent_ptes);
+	__pte_list_remove(parent_pte, &sp->parent_ptes);
 }
 
 static void drop_parent_pte(struct kvm_mmu_page *sp,
@@ -2181,7 +2187,7 @@ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			    struct list_head *invalid_list)
 {
 	if (sp->role.cr4_pae != !!is_pae(vcpu)
-	    || vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
+	    || vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
 		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
 		return false;
 	}
@@ -2375,14 +2381,14 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	int collisions = 0;
 	LIST_HEAD(invalid_list);
 
-	role = vcpu->arch.mmu.base_role;
+	role = vcpu->arch.mmu->mmu_role.base;
 	role.level = level;
 	role.direct = direct;
 	if (role.direct)
 		role.cr4_pae = 0;
 	role.access = access;
-	if (!vcpu->arch.mmu.direct_map
-	    && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
+	if (!vcpu->arch.mmu->direct_map
+	    && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
 		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
 		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
 		role.quadrant = quadrant;
@@ -2457,11 +2463,11 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
 {
 	iterator->addr = addr;
 	iterator->shadow_addr = root;
-	iterator->level = vcpu->arch.mmu.shadow_root_level;
+	iterator->level = vcpu->arch.mmu->shadow_root_level;
 
 	if (iterator->level == PT64_ROOT_4LEVEL &&
-	    vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL &&
-	    !vcpu->arch.mmu.direct_map)
+	    vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
+	    !vcpu->arch.mmu->direct_map)
 		--iterator->level;
 
 	if (iterator->level == PT32E_ROOT_LEVEL) {
@@ -2469,10 +2475,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
 		 * prev_root is currently only used for 64-bit hosts. So only
 		 * the active root_hpa is valid here.
 		 */
-		BUG_ON(root != vcpu->arch.mmu.root_hpa);
+		BUG_ON(root != vcpu->arch.mmu->root_hpa);
 
 		iterator->shadow_addr
-			= vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
+			= vcpu->arch.mmu->pae_root[(addr >> 30) & 3];
 		iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
 		--iterator->level;
 		if (!iterator->shadow_addr)
@@ -2483,7 +2489,7 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
 static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
 			     struct kvm_vcpu *vcpu, u64 addr)
 {
-	shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu.root_hpa,
+	shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root_hpa,
 				    addr);
 }
 
@@ -3095,7 +3101,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
 	int emulate = 0;
 	gfn_t pseudo_gfn;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		return 0;
 
 	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
@@ -3301,7 +3307,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 	u64 spte = 0ull;
 	uint retry_count = 0;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		return false;
 
 	if (!page_fault_can_be_fast(error_code))
@@ -3471,11 +3477,11 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
 }
 
 /* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free)
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+			ulong roots_to_free)
 {
 	int i;
 	LIST_HEAD(invalid_list);
-	struct kvm_mmu *mmu = &vcpu->arch.mmu;
 	bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
 
 	BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
@@ -3535,20 +3541,20 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 	struct kvm_mmu_page *sp;
 	unsigned i;
 
-	if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) {
+	if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
 		spin_lock(&vcpu->kvm->mmu_lock);
 		if(make_mmu_pages_available(vcpu) < 0) {
 			spin_unlock(&vcpu->kvm->mmu_lock);
 			return -ENOSPC;
 		}
 		sp = kvm_mmu_get_page(vcpu, 0, 0,
-				vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
+				vcpu->arch.mmu->shadow_root_level, 1, ACC_ALL);
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
-		vcpu->arch.mmu.root_hpa = __pa(sp->spt);
-	} else if (vcpu->arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) {
+		vcpu->arch.mmu->root_hpa = __pa(sp->spt);
+	} else if (vcpu->arch.mmu->shadow_root_level == PT32E_ROOT_LEVEL) {
 		for (i = 0; i < 4; ++i) {
-			hpa_t root = vcpu->arch.mmu.pae_root[i];
+			hpa_t root = vcpu->arch.mmu->pae_root[i];
 
 			MMU_WARN_ON(VALID_PAGE(root));
 			spin_lock(&vcpu->kvm->mmu_lock);
@@ -3561,9 +3567,9 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 			root = __pa(sp->spt);
 			++sp->root_count;
 			spin_unlock(&vcpu->kvm->mmu_lock);
-			vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
+			vcpu->arch.mmu->pae_root[i] = root | PT_PRESENT_MASK;
 		}
-		vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
 	} else
 		BUG();
 
@@ -3577,7 +3583,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	gfn_t root_gfn;
 	int i;
 
-	root_gfn = vcpu->arch.mmu.get_cr3(vcpu) >> PAGE_SHIFT;
+	root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT;
 
 	if (mmu_check_root(vcpu, root_gfn))
 		return 1;
@@ -3586,8 +3592,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	 * Do we shadow a long mode page table? If so we need to
 	 * write-protect the guests page table root.
 	 */
-	if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
-		hpa_t root = vcpu->arch.mmu.root_hpa;
+	if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+		hpa_t root = vcpu->arch.mmu->root_hpa;
 
 		MMU_WARN_ON(VALID_PAGE(root));
 
@@ -3597,11 +3603,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 			return -ENOSPC;
 		}
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
-				vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
+				vcpu->arch.mmu->shadow_root_level, 0, ACC_ALL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
-		vcpu->arch.mmu.root_hpa = root;
+		vcpu->arch.mmu->root_hpa = root;
 		return 0;
 	}
 
@@ -3611,17 +3617,17 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	 * the shadow page table may be a PAE or a long mode page table.
 	 */
 	pm_mask = PT_PRESENT_MASK;
-	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL)
+	if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL)
 		pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 
 	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->arch.mmu.pae_root[i];
+		hpa_t root = vcpu->arch.mmu->pae_root[i];
 
 		MMU_WARN_ON(VALID_PAGE(root));
-		if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
-			pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
+		if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) {
+			pdptr = vcpu->arch.mmu->get_pdptr(vcpu, i);
 			if (!(pdptr & PT_PRESENT_MASK)) {
-				vcpu->arch.mmu.pae_root[i] = 0;
+				vcpu->arch.mmu->pae_root[i] = 0;
 				continue;
 			}
 			root_gfn = pdptr >> PAGE_SHIFT;
@@ -3639,16 +3645,16 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 
-		vcpu->arch.mmu.pae_root[i] = root | pm_mask;
+		vcpu->arch.mmu->pae_root[i] = root | pm_mask;
 	}
-	vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+	vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
 
 	/*
 	 * If we shadow a 32 bit page table with a long mode page
 	 * table we enter this path.
 	 */
-	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) {
-		if (vcpu->arch.mmu.lm_root == NULL) {
+	if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
+		if (vcpu->arch.mmu->lm_root == NULL) {
 			/*
 			 * The additional page necessary for this is only
 			 * allocated on demand.
@@ -3660,12 +3666,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 			if (lm_root == NULL)
 				return 1;
 
-			lm_root[0] = __pa(vcpu->arch.mmu.pae_root) | pm_mask;
+			lm_root[0] = __pa(vcpu->arch.mmu->pae_root) | pm_mask;
 
-			vcpu->arch.mmu.lm_root = lm_root;
+			vcpu->arch.mmu->lm_root = lm_root;
 		}
 
-		vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.lm_root);
+		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
 	}
 
 	return 0;
@@ -3673,7 +3679,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
 static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->arch.mmu.direct_map)
+	if (vcpu->arch.mmu->direct_map)
 		return mmu_alloc_direct_roots(vcpu);
 	else
 		return mmu_alloc_shadow_roots(vcpu);
@@ -3684,17 +3690,16 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 	int i;
 	struct kvm_mmu_page *sp;
 
-	if (vcpu->arch.mmu.direct_map)
+	if (vcpu->arch.mmu->direct_map)
 		return;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		return;
 
 	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
 
-	if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
-		hpa_t root = vcpu->arch.mmu.root_hpa;
-
+	if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+		hpa_t root = vcpu->arch.mmu->root_hpa;
 		sp = page_header(root);
 
 		/*
@@ -3725,7 +3730,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
 
 	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->arch.mmu.pae_root[i];
+		hpa_t root = vcpu->arch.mmu->pae_root[i];
 
 		if (root && VALID_PAGE(root)) {
 			root &= PT64_BASE_ADDR_MASK;
@@ -3799,7 +3804,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	int root, leaf;
 	bool reserved = false;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		goto exit;
 
 	walk_shadow_page_lockless_begin(vcpu);
@@ -3816,7 +3821,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 		if (!is_shadow_present_pte(spte))
 			break;
 
-		reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
+		reserved |= is_shadow_zero_bits_set(vcpu->arch.mmu, spte,
 						    iterator.level);
 	}
 
@@ -3895,7 +3900,7 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
 	struct kvm_shadow_walk_iterator iterator;
 	u64 spte;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		return;
 
 	walk_shadow_page_lockless_begin(vcpu);
@@ -3922,7 +3927,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 	if (r)
 		return r;
 
-	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
+	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
 
 
 	return nonpaging_map(vcpu, gva & PAGE_MASK,
@@ -3935,8 +3940,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 
 	arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
 	arch.gfn = gfn;
-	arch.direct_map = vcpu->arch.mmu.direct_map;
-	arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
+	arch.direct_map = vcpu->arch.mmu->direct_map;
+	arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu);
 
 	return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
 }
@@ -4042,7 +4047,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 	int write = error_code & PFERR_WRITE_MASK;
 	bool map_writable;
 
-	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
+	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
 
 	if (page_fault_handle_page_track(vcpu, error_code, gfn))
 		return RET_PF_EMULATE;
@@ -4118,7 +4123,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
 {
 	uint i;
 	struct kvm_mmu_root_info root;
-	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
 
 	root.cr3 = mmu->get_cr3(vcpu);
 	root.hpa = mmu->root_hpa;
@@ -4141,7 +4146,7 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
 			    union kvm_mmu_page_role new_role,
 			    bool skip_tlb_flush)
 {
-	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
 
 	/*
 	 * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
@@ -4192,7 +4197,8 @@ static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3,
 			      bool skip_tlb_flush)
 {
 	if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush))
-		kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_CURRENT);
+		kvm_mmu_free_roots(vcpu, vcpu->arch.mmu,
+				   KVM_MMU_ROOT_CURRENT);
 }
 
 void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush)
@@ -4210,7 +4216,7 @@ static unsigned long get_cr3(struct kvm_vcpu *vcpu)
 static void inject_page_fault(struct kvm_vcpu *vcpu,
 			      struct x86_exception *fault)
 {
-	vcpu->arch.mmu.inject_page_fault(vcpu, fault);
+	vcpu->arch.mmu->inject_page_fault(vcpu, fault);
 }
 
 static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
@@ -4414,7 +4420,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
 void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 {
-	bool uses_nx = context->nx || context->base_role.smep_andnot_wp;
+	bool uses_nx = context->nx ||
+		context->mmu_role.base.smep_andnot_wp;
 	struct rsvd_bits_validate *shadow_zero_check;
 	int i;
 
@@ -4553,7 +4560,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
 			 * SMAP:kernel-mode data accesses from user-mode
 			 * mappings should fault. A fault is considered
 			 * as a SMAP violation if all of the following
-			 * conditions are ture:
+			 * conditions are true:
 			 *   - X86_CR4_SMAP is set in CR4
 			 *   - A user page is accessed
 			 *   - The access is not a fetch
@@ -4714,27 +4721,65 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu,
 	paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
 }
 
-static union kvm_mmu_page_role
-kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu)
+static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
+{
+	union kvm_mmu_extended_role ext = {0};
+
+	ext.cr0_pg = !!is_paging(vcpu);
+	ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+	ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
+	ext.cr4_pse = !!is_pse(vcpu);
+	ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE);
+	ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
+
+	ext.valid = 1;
+
+	return ext;
+}
+
+static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
+						   bool base_only)
+{
+	union kvm_mmu_role role = {0};
+
+	role.base.access = ACC_ALL;
+	role.base.nxe = !!is_nx(vcpu);
+	role.base.cr4_pae = !!is_pae(vcpu);
+	role.base.cr0_wp = is_write_protection(vcpu);
+	role.base.smm = is_smm(vcpu);
+	role.base.guest_mode = is_guest_mode(vcpu);
+
+	if (base_only)
+		return role;
+
+	role.ext = kvm_calc_mmu_role_ext(vcpu);
+
+	return role;
+}
+
+static union kvm_mmu_role
+kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
 {
-	union kvm_mmu_page_role role = {0};
+	union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
 
-	role.guest_mode = is_guest_mode(vcpu);
-	role.smm = is_smm(vcpu);
-	role.ad_disabled = (shadow_accessed_mask == 0);
-	role.level = kvm_x86_ops->get_tdp_level(vcpu);
-	role.direct = true;
-	role.access = ACC_ALL;
+	role.base.ad_disabled = (shadow_accessed_mask == 0);
+	role.base.level = kvm_x86_ops->get_tdp_level(vcpu);
+	role.base.direct = true;
 
 	return role;
 }
 
 static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 {
-	struct kvm_mmu *context = &vcpu->arch.mmu;
+	struct kvm_mmu *context = vcpu->arch.mmu;
+	union kvm_mmu_role new_role =
+		kvm_calc_tdp_mmu_root_page_role(vcpu, false);
 
-	context->base_role.word = mmu_base_role_mask.word &
-				  kvm_calc_tdp_mmu_root_page_role(vcpu).word;
+	new_role.base.word &= mmu_base_role_mask.word;
+	if (new_role.as_u64 == context->mmu_role.as_u64)
+		return;
+
+	context->mmu_role.as_u64 = new_role.as_u64;
 	context->page_fault = tdp_page_fault;
 	context->sync_page = nonpaging_sync_page;
 	context->invlpg = nonpaging_invlpg;
@@ -4774,36 +4819,36 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	reset_tdp_shadow_zero_bits_mask(vcpu, context);
 }
 
-static union kvm_mmu_page_role
-kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu)
-{
-	union kvm_mmu_page_role role = {0};
-	bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
-	bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
-
-	role.nxe = is_nx(vcpu);
-	role.cr4_pae = !!is_pae(vcpu);
-	role.cr0_wp  = is_write_protection(vcpu);
-	role.smep_andnot_wp = smep && !is_write_protection(vcpu);
-	role.smap_andnot_wp = smap && !is_write_protection(vcpu);
-	role.guest_mode = is_guest_mode(vcpu);
-	role.smm = is_smm(vcpu);
-	role.direct = !is_paging(vcpu);
-	role.access = ACC_ALL;
+static union kvm_mmu_role
+kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
+{
+	union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
+
+	role.base.smep_andnot_wp = role.ext.cr4_smep &&
+		!is_write_protection(vcpu);
+	role.base.smap_andnot_wp = role.ext.cr4_smap &&
+		!is_write_protection(vcpu);
+	role.base.direct = !is_paging(vcpu);
 
 	if (!is_long_mode(vcpu))
-		role.level = PT32E_ROOT_LEVEL;
+		role.base.level = PT32E_ROOT_LEVEL;
 	else if (is_la57_mode(vcpu))
-		role.level = PT64_ROOT_5LEVEL;
+		role.base.level = PT64_ROOT_5LEVEL;
 	else
-		role.level = PT64_ROOT_4LEVEL;
+		role.base.level = PT64_ROOT_4LEVEL;
 
 	return role;
 }
 
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 {
-	struct kvm_mmu *context = &vcpu->arch.mmu;
+	struct kvm_mmu *context = vcpu->arch.mmu;
+	union kvm_mmu_role new_role =
+		kvm_calc_shadow_mmu_root_page_role(vcpu, false);
+
+	new_role.base.word &= mmu_base_role_mask.word;
+	if (new_role.as_u64 == context->mmu_role.as_u64)
+		return;
 
 	if (!is_paging(vcpu))
 		nonpaging_init_context(vcpu, context);
@@ -4814,22 +4859,28 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 	else
 		paging32_init_context(vcpu, context);
 
-	context->base_role.word = mmu_base_role_mask.word &
-				  kvm_calc_shadow_mmu_root_page_role(vcpu).word;
+	context->mmu_role.as_u64 = new_role.as_u64;
 	reset_shadow_zero_bits_mask(vcpu, context);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
-static union kvm_mmu_page_role
-kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty)
+static union kvm_mmu_role
+kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
+				   bool execonly)
 {
-	union kvm_mmu_page_role role = vcpu->arch.mmu.base_role;
+	union kvm_mmu_role role;
+
+	/* Base role is inherited from root_mmu */
+	role.base.word = vcpu->arch.root_mmu.mmu_role.base.word;
+	role.ext = kvm_calc_mmu_role_ext(vcpu);
+
+	role.base.level = PT64_ROOT_4LEVEL;
+	role.base.direct = false;
+	role.base.ad_disabled = !accessed_dirty;
+	role.base.guest_mode = true;
+	role.base.access = ACC_ALL;
 
-	role.level = PT64_ROOT_4LEVEL;
-	role.direct = false;
-	role.ad_disabled = !accessed_dirty;
-	role.guest_mode = true;
-	role.access = ACC_ALL;
+	role.ext.execonly = execonly;
 
 	return role;
 }
@@ -4837,11 +4888,17 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty)
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 			     bool accessed_dirty, gpa_t new_eptp)
 {
-	struct kvm_mmu *context = &vcpu->arch.mmu;
-	union kvm_mmu_page_role root_page_role =
-		kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty);
+	struct kvm_mmu *context = vcpu->arch.mmu;
+	union kvm_mmu_role new_role =
+		kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
+						   execonly);
+
+	__kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false);
+
+	new_role.base.word &= mmu_base_role_mask.word;
+	if (new_role.as_u64 == context->mmu_role.as_u64)
+		return;
 
-	__kvm_mmu_new_cr3(vcpu, new_eptp, root_page_role, false);
 	context->shadow_root_level = PT64_ROOT_4LEVEL;
 
 	context->nx = true;
@@ -4853,7 +4910,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	context->update_pte = ept_update_pte;
 	context->root_level = PT64_ROOT_4LEVEL;
 	context->direct_map = false;
-	context->base_role.word = root_page_role.word & mmu_base_role_mask.word;
+	context->mmu_role.as_u64 = new_role.as_u64;
+
 	update_permission_bitmask(vcpu, context, true);
 	update_pkru_bitmask(vcpu, context, true);
 	update_last_nonleaf_level(vcpu, context);
@@ -4864,7 +4922,7 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
 
 static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
 {
-	struct kvm_mmu *context = &vcpu->arch.mmu;
+	struct kvm_mmu *context = vcpu->arch.mmu;
 
 	kvm_init_shadow_mmu(vcpu);
 	context->set_cr3           = kvm_x86_ops->set_cr3;
@@ -4875,14 +4933,20 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
 
 static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 {
+	union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
 	struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
 
+	new_role.base.word &= mmu_base_role_mask.word;
+	if (new_role.as_u64 == g_context->mmu_role.as_u64)
+		return;
+
+	g_context->mmu_role.as_u64 = new_role.as_u64;
 	g_context->get_cr3           = get_cr3;
 	g_context->get_pdptr         = kvm_pdptr_read;
 	g_context->inject_page_fault = kvm_inject_page_fault;
 
 	/*
-	 * Note that arch.mmu.gva_to_gpa translates l2_gpa to l1_gpa using
+	 * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
 	 * L1's nested page tables (e.g. EPT12). The nested translation
 	 * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using
 	 * L2's page tables as the first level of translation and L1's
@@ -4921,10 +4985,10 @@ void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
 	if (reset_roots) {
 		uint i;
 
-		vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+		vcpu->arch.mmu->root_hpa = INVALID_PAGE;
 
 		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-			vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+			vcpu->arch.mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
 	}
 
 	if (mmu_is_nested(vcpu))
@@ -4939,10 +5003,14 @@ EXPORT_SYMBOL_GPL(kvm_init_mmu);
 static union kvm_mmu_page_role
 kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu)
 {
+	union kvm_mmu_role role;
+
 	if (tdp_enabled)
-		return kvm_calc_tdp_mmu_root_page_role(vcpu);
+		role = kvm_calc_tdp_mmu_root_page_role(vcpu, true);
 	else
-		return kvm_calc_shadow_mmu_root_page_role(vcpu);
+		role = kvm_calc_shadow_mmu_root_page_role(vcpu, true);
+
+	return role.base;
 }
 
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
@@ -4972,8 +5040,10 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
 
 void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
-	kvm_mmu_free_roots(vcpu, KVM_MMU_ROOTS_ALL);
-	WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
+	WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root_hpa));
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+	WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root_hpa));
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unload);
 
@@ -4987,7 +5057,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
         }
 
 	++vcpu->kvm->stat.mmu_pte_updated;
-	vcpu->arch.mmu.update_pte(vcpu, sp, spte, new);
+	vcpu->arch.mmu->update_pte(vcpu, sp, spte, new);
 }
 
 static bool need_remote_flush(u64 old, u64 new)
@@ -5164,10 +5234,12 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 		local_flush = true;
 		while (npte--) {
+			u32 base_role = vcpu->arch.mmu->mmu_role.base.word;
+
 			entry = *spte;
 			mmu_page_zap_pte(vcpu->kvm, sp, spte);
 			if (gentry &&
-			      !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
+			      !((sp->role.word ^ base_role)
 			      & mmu_base_role_mask.word) && rmap_can_add(vcpu))
 				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
 			if (need_remote_flush(entry, *spte))
@@ -5185,7 +5257,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 	gpa_t gpa;
 	int r;
 
-	if (vcpu->arch.mmu.direct_map)
+	if (vcpu->arch.mmu->direct_map)
 		return 0;
 
 	gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
@@ -5221,10 +5293,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 {
 	int r, emulation_type = 0;
 	enum emulation_result er;
-	bool direct = vcpu->arch.mmu.direct_map;
+	bool direct = vcpu->arch.mmu->direct_map;
 
 	/* With shadow page tables, fault_address contains a GVA or nGPA.  */
-	if (vcpu->arch.mmu.direct_map) {
+	if (vcpu->arch.mmu->direct_map) {
 		vcpu->arch.gpa_available = true;
 		vcpu->arch.gpa_val = cr2;
 	}
@@ -5237,8 +5309,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 	}
 
 	if (r == RET_PF_INVALID) {
-		r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
-					      false);
+		r = vcpu->arch.mmu->page_fault(vcpu, cr2,
+					       lower_32_bits(error_code),
+					       false);
 		WARN_ON(r == RET_PF_INVALID);
 	}
 
@@ -5254,7 +5327,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 	 * paging in both guests. If true, we simply unprotect the page
 	 * and resume the guest.
 	 */
-	if (vcpu->arch.mmu.direct_map &&
+	if (vcpu->arch.mmu->direct_map &&
 	    (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
 		kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
 		return 1;
@@ -5302,7 +5375,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
-	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
 	int i;
 
 	/* INVLPG on a * non-canonical address is a NOP according to the SDM.  */
@@ -5333,7 +5406,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
 
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
 {
-	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
 	bool tlb_flush = false;
 	uint i;
 
@@ -5377,8 +5450,8 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp);
 
 static void free_mmu_pages(struct kvm_vcpu *vcpu)
 {
-	free_page((unsigned long)vcpu->arch.mmu.pae_root);
-	free_page((unsigned long)vcpu->arch.mmu.lm_root);
+	free_page((unsigned long)vcpu->arch.mmu->pae_root);
+	free_page((unsigned long)vcpu->arch.mmu->lm_root);
 }
 
 static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
@@ -5398,9 +5471,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 	if (!page)
 		return -ENOMEM;
 
-	vcpu->arch.mmu.pae_root = page_address(page);
+	vcpu->arch.mmu->pae_root = page_address(page);
 	for (i = 0; i < 4; ++i)
-		vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
+		vcpu->arch.mmu->pae_root[i] = INVALID_PAGE;
 
 	return 0;
 }
@@ -5409,27 +5482,21 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
 {
 	uint i;
 
-	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
-	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
-	vcpu->arch.mmu.translate_gpa = translate_gpa;
-	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
+	vcpu->arch.mmu = &vcpu->arch.root_mmu;
+	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
 
+	vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
+	vcpu->arch.root_mmu.translate_gpa = translate_gpa;
 	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-		vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
-
-	return alloc_mmu_pages(vcpu);
-}
+		vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
 
-void kvm_mmu_setup(struct kvm_vcpu *vcpu)
-{
-	MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
+	vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
+	vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
 
-	/*
-	 * kvm_mmu_setup() is called only on vCPU initialization.  
-	 * Therefore, no need to reset mmu roots as they are not yet
-	 * initialized.
-	 */
-	kvm_init_mmu(vcpu, false);
+	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
+	return alloc_mmu_pages(vcpu);
 }
 
 static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
@@ -5612,7 +5679,7 @@ restart:
 		if (sp->role.direct &&
 			!kvm_is_reserved_pfn(pfn) &&
 			PageTransCompoundMap(pfn_to_page(pfn))) {
-			drop_spte(kvm, sptep);
+			pte_list_remove(rmap_head, sptep);
 			need_tlb_flush = 1;
 			goto restart;
 		}
@@ -5869,6 +5936,16 @@ int kvm_mmu_module_init(void)
 {
 	int ret = -ENOMEM;
 
+	/*
+	 * MMU roles use union aliasing which is, generally speaking, an
+	 * undefined behavior. However, we supposedly know how compilers behave
+	 * and the current status quo is unlikely to change. Guardians below are
+	 * supposed to let us know if the assumption becomes false.
+	 */
+	BUILD_BUG_ON(sizeof(union kvm_mmu_page_role) != sizeof(u32));
+	BUILD_BUG_ON(sizeof(union kvm_mmu_extended_role) != sizeof(u32));
+	BUILD_BUG_ON(sizeof(union kvm_mmu_role) != sizeof(u64));
+
 	kvm_mmu_reset_all_pte_masks();
 
 	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
@@ -5898,7 +5975,7 @@ out:
 }
 
 /*
- * Caculate mmu pages needed for kvm.
+ * Calculate mmu pages needed for kvm.
  */
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 1fab69c0b2f3..c7b333147c4a 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -43,11 +43,6 @@
 #define PT32_ROOT_LEVEL 2
 #define PT32E_ROOT_LEVEL 3
 
-#define PT_PDPE_LEVEL 3
-#define PT_DIRECTORY_LEVEL 2
-#define PT_PAGE_TABLE_LEVEL 1
-#define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1)
-
 static inline u64 rsvd_bits(int s, int e)
 {
 	if (e < s)
@@ -80,7 +75,7 @@ static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 
 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 {
-	if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
+	if (likely(vcpu->arch.mmu->root_hpa != INVALID_PAGE))
 		return 0;
 
 	return kvm_mmu_load(vcpu);
@@ -102,9 +97,9 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
 
 static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu)
 {
-	if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
-		vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa |
-					     kvm_get_active_pcid(vcpu));
+	if (VALID_PAGE(vcpu->arch.mmu->root_hpa))
+		vcpu->arch.mmu->set_cr3(vcpu, vcpu->arch.mmu->root_hpa |
+					      kvm_get_active_pcid(vcpu));
 }
 
 /*
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 1272861e77b9..abac7e208853 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -59,19 +59,19 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
 	int i;
 	struct kvm_mmu_page *sp;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		return;
 
-	if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
-		hpa_t root = vcpu->arch.mmu.root_hpa;
+	if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+		hpa_t root = vcpu->arch.mmu->root_hpa;
 
 		sp = page_header(root);
-		__mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level);
+		__mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu->root_level);
 		return;
 	}
 
 	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->arch.mmu.pae_root[i];
+		hpa_t root = vcpu->arch.mmu->pae_root[i];
 
 		if (root && VALID_PAGE(root)) {
 			root &= PT64_BASE_ADDR_MASK;
@@ -122,7 +122,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
 	hpa =  pfn << PAGE_SHIFT;
 	if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
 		audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx "
-			     "ent %llxn", vcpu->arch.mmu.root_level, pfn,
+			     "ent %llxn", vcpu->arch.mmu->root_level, pfn,
 			     hpa, *sptep);
 }
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 14ffd973df54..7cf2185b7eb5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -158,14 +158,15 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
 				  struct kvm_mmu_page *sp, u64 *spte,
 				  u64 gpte)
 {
-	if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+	if (is_rsvd_bits_set(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
 		goto no_present;
 
 	if (!FNAME(is_present_gpte)(gpte))
 		goto no_present;
 
 	/* if accessed bit is not supported prefetch non accessed gpte */
-	if (PT_HAVE_ACCESSED_DIRTY(&vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK))
+	if (PT_HAVE_ACCESSED_DIRTY(vcpu->arch.mmu) &&
+	    !(gpte & PT_GUEST_ACCESSED_MASK))
 		goto no_present;
 
 	return false;
@@ -480,7 +481,7 @@ error:
 static int FNAME(walk_addr)(struct guest_walker *walker,
 			    struct kvm_vcpu *vcpu, gva_t addr, u32 access)
 {
-	return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu, addr,
+	return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
 					access);
 }
 
@@ -509,7 +510,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 
 	gfn = gpte_to_gfn(gpte);
 	pte_access = sp->role.access & FNAME(gpte_access)(gpte);
-	FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
+	FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
 	pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
 			no_dirty_log && (pte_access & ACC_WRITE_MASK));
 	if (is_error_pfn(pfn))
@@ -604,7 +605,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
 	direct_access = gw->pte_access;
 
-	top_level = vcpu->arch.mmu.root_level;
+	top_level = vcpu->arch.mmu->root_level;
 	if (top_level == PT32E_ROOT_LEVEL)
 		top_level = PT32_ROOT_LEVEL;
 	/*
@@ -616,7 +617,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	if (FNAME(gpte_changed)(vcpu, gw, top_level))
 		goto out_gpte_changed;
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 		goto out_gpte_changed;
 
 	for (shadow_walk_init(&it, vcpu, addr);
@@ -1004,7 +1005,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		gfn = gpte_to_gfn(gpte);
 		pte_access = sp->role.access;
 		pte_access &= FNAME(gpte_access)(gpte);
-		FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
+		FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
 
 		if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
 		      &nr_present))
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 61ccfb13899e..0e21ccc46792 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -809,6 +809,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
 	    nested_svm_check_exception(svm, nr, has_error_code, error_code))
 		return;
 
+	kvm_deliver_exception_payload(&svm->vcpu);
+
 	if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
 		unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
 
@@ -2922,18 +2924,18 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
 {
 	WARN_ON(mmu_is_nested(vcpu));
 	kvm_init_shadow_mmu(vcpu);
-	vcpu->arch.mmu.set_cr3           = nested_svm_set_tdp_cr3;
-	vcpu->arch.mmu.get_cr3           = nested_svm_get_tdp_cr3;
-	vcpu->arch.mmu.get_pdptr         = nested_svm_get_tdp_pdptr;
-	vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
-	vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
-	reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
+	vcpu->arch.mmu->set_cr3           = nested_svm_set_tdp_cr3;
+	vcpu->arch.mmu->get_cr3           = nested_svm_get_tdp_cr3;
+	vcpu->arch.mmu->get_pdptr         = nested_svm_get_tdp_pdptr;
+	vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
+	vcpu->arch.mmu->shadow_root_level = get_npt_level(vcpu);
+	reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
 }
 
 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
+	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
 }
 
 static int nested_svm_check_permissions(struct vcpu_svm *svm)
@@ -2969,16 +2971,13 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 	svm->vmcb->control.exit_info_1 = error_code;
 
 	/*
-	 * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
-	 * The fix is to add the ancillary datum (CR2 or DR6) to structs
-	 * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
-	 * written only when inject_pending_event runs (DR6 would written here
-	 * too).  This should be conditional on a new capability---if the
-	 * capability is disabled, kvm_multiple_exception would write the
-	 * ancillary information to CR2 or DR6, for backwards ABI-compatibility.
+	 * EXITINFO2 is undefined for all exception intercepts other
+	 * than #PF.
 	 */
 	if (svm->vcpu.arch.exception.nested_apf)
 		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+	else if (svm->vcpu.arch.exception.has_payload)
+		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
 	else
 		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
 
@@ -5642,26 +5641,24 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %%r13, %c[r13](%[svm]) \n\t"
 		"mov %%r14, %c[r14](%[svm]) \n\t"
 		"mov %%r15, %c[r15](%[svm]) \n\t"
-#endif
 		/*
 		* Clear host registers marked as clobbered to prevent
 		* speculative use.
 		*/
-		"xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
-		"xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
-		"xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
-		"xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
-		"xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
-#ifdef CONFIG_X86_64
-		"xor %%r8, %%r8 \n\t"
-		"xor %%r9, %%r9 \n\t"
-		"xor %%r10, %%r10 \n\t"
-		"xor %%r11, %%r11 \n\t"
-		"xor %%r12, %%r12 \n\t"
-		"xor %%r13, %%r13 \n\t"
-		"xor %%r14, %%r14 \n\t"
-		"xor %%r15, %%r15 \n\t"
+		"xor %%r8d, %%r8d \n\t"
+		"xor %%r9d, %%r9d \n\t"
+		"xor %%r10d, %%r10d \n\t"
+		"xor %%r11d, %%r11d \n\t"
+		"xor %%r12d, %%r12d \n\t"
+		"xor %%r13d, %%r13d \n\t"
+		"xor %%r14d, %%r14d \n\t"
+		"xor %%r15d, %%r15d \n\t"
 #endif
+		"xor %%ebx, %%ebx \n\t"
+		"xor %%ecx, %%ecx \n\t"
+		"xor %%edx, %%edx \n\t"
+		"xor %%esi, %%esi \n\t"
+		"xor %%edi, %%edi \n\t"
 		"pop %%" _ASM_BP
 		:
 		: [svm]"a"(svm),
@@ -7040,6 +7037,13 @@ failed:
 	return ret;
 }
 
+static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
+				   uint16_t *vmcs_version)
+{
+	/* Intel-only feature */
+	return -ENODEV;
+}
+
 static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -7169,6 +7173,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.mem_enc_op = svm_mem_enc_op,
 	.mem_enc_reg_region = svm_register_enc_region,
 	.mem_enc_unreg_region = svm_unregister_enc_region,
+
+	.nested_enable_evmcs = nested_enable_evmcs,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0f997683404f..0659465a745c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1418,6 +1418,48 @@ TRACE_EVENT(kvm_hv_flush_tlb_ex,
 		  __entry->valid_bank_mask, __entry->format,
 		  __entry->address_space, __entry->flags)
 );
+
+/*
+ * Tracepoints for kvm_hv_send_ipi.
+ */
+TRACE_EVENT(kvm_hv_send_ipi,
+	TP_PROTO(u32 vector, u64 processor_mask),
+	TP_ARGS(vector, processor_mask),
+
+	TP_STRUCT__entry(
+		__field(u32, vector)
+		__field(u64, processor_mask)
+	),
+
+	TP_fast_assign(
+		__entry->vector = vector;
+		__entry->processor_mask = processor_mask;
+	),
+
+	TP_printk("vector %x processor_mask 0x%llx",
+		  __entry->vector, __entry->processor_mask)
+);
+
+TRACE_EVENT(kvm_hv_send_ipi_ex,
+	TP_PROTO(u32 vector, u64 format, u64 valid_bank_mask),
+	TP_ARGS(vector, format, valid_bank_mask),
+
+	TP_STRUCT__entry(
+		__field(u32, vector)
+		__field(u64, format)
+		__field(u64, valid_bank_mask)
+	),
+
+	TP_fast_assign(
+		__entry->vector = vector;
+		__entry->format = format;
+		__entry->valid_bank_mask = valid_bank_mask;
+	),
+
+	TP_printk("vector %x format %llx valid_bank_mask 0x%llx",
+		  __entry->vector, __entry->format,
+		  __entry->valid_bank_mask)
+);
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e665aa7167cf..4555077d69ce 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -20,6 +20,7 @@
 #include "mmu.h"
 #include "cpuid.h"
 #include "lapic.h"
+#include "hyperv.h"
 
 #include <linux/kvm_host.h>
 #include <linux/module.h>
@@ -61,7 +62,7 @@
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 #define __ex_clear(x, reg) \
-	____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg)
+	____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg)
 
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
@@ -107,9 +108,12 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
  * VMX and be a hypervisor for its own guests. If nested=0, guests may not
  * use VMX instructions.
  */
-static bool __read_mostly nested = 0;
+static bool __read_mostly nested = 1;
 module_param(nested, bool, S_IRUGO);
 
+static bool __read_mostly nested_early_check = 0;
+module_param(nested_early_check, bool, S_IRUGO);
+
 static u64 __read_mostly host_xss;
 
 static bool __read_mostly enable_pml = 1;
@@ -131,7 +135,7 @@ static bool __read_mostly enable_preemption_timer = 1;
 module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
 #endif
 
-#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
+#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
 #define KVM_VM_CR0_ALWAYS_ON				\
 	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | 	\
@@ -187,6 +191,7 @@ static unsigned int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
 module_param(ple_window_max, uint, 0444);
 
 extern const ulong vmx_return;
+extern const ulong vmx_early_consistency_check_return;
 
 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
@@ -827,14 +832,28 @@ struct nested_vmx {
 	 */
 	struct vmcs12 *cached_shadow_vmcs12;
 	/*
-	 * Indicates if the shadow vmcs must be updated with the
-	 * data hold by vmcs12
+	 * Indicates if the shadow vmcs or enlightened vmcs must be updated
+	 * with the data held by struct vmcs12.
 	 */
-	bool sync_shadow_vmcs;
+	bool need_vmcs12_sync;
 	bool dirty_vmcs12;
 
+	/*
+	 * vmcs02 has been initialized, i.e. state that is constant for
+	 * vmcs02 has been written to the backing VMCS.  Initialization
+	 * is delayed until L1 actually attempts to run a nested VM.
+	 */
+	bool vmcs02_initialized;
+
 	bool change_vmcs01_virtual_apic_mode;
 
+	/*
+	 * Enlightened VMCS has been enabled. It does not mean that L1 has to
+	 * use it. However, VMX features available to L1 will be limited based
+	 * on what the enlightened VMCS supports.
+	 */
+	bool enlightened_vmcs_enabled;
+
 	/* L2 must run next, and mustn't decide to exit to L1. */
 	bool nested_run_pending;
 
@@ -870,6 +889,10 @@ struct nested_vmx {
 		/* in guest mode on SMM entry? */
 		bool guest_mode;
 	} smm;
+
+	gpa_t hv_evmcs_vmptr;
+	struct page *hv_evmcs_page;
+	struct hv_enlightened_vmcs *hv_evmcs;
 };
 
 #define POSTED_INTR_ON  0
@@ -1381,6 +1404,49 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs);
 
 #define KVM_EVMCS_VERSION 1
 
+/*
+ * Enlightened VMCSv1 doesn't support these:
+ *
+ *	POSTED_INTR_NV                  = 0x00000002,
+ *	GUEST_INTR_STATUS               = 0x00000810,
+ *	APIC_ACCESS_ADDR		= 0x00002014,
+ *	POSTED_INTR_DESC_ADDR           = 0x00002016,
+ *	EOI_EXIT_BITMAP0                = 0x0000201c,
+ *	EOI_EXIT_BITMAP1                = 0x0000201e,
+ *	EOI_EXIT_BITMAP2                = 0x00002020,
+ *	EOI_EXIT_BITMAP3                = 0x00002022,
+ *	GUEST_PML_INDEX			= 0x00000812,
+ *	PML_ADDRESS			= 0x0000200e,
+ *	VM_FUNCTION_CONTROL             = 0x00002018,
+ *	EPTP_LIST_ADDRESS               = 0x00002024,
+ *	VMREAD_BITMAP                   = 0x00002026,
+ *	VMWRITE_BITMAP                  = 0x00002028,
+ *
+ *	TSC_MULTIPLIER                  = 0x00002032,
+ *	PLE_GAP                         = 0x00004020,
+ *	PLE_WINDOW                      = 0x00004022,
+ *	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
+ *      GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
+ *      HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
+ *
+ * Currently unsupported in KVM:
+ *	GUEST_IA32_RTIT_CTL		= 0x00002814,
+ */
+#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
+				    PIN_BASED_VMX_PREEMPTION_TIMER)
+#define EVMCS1_UNSUPPORTED_2NDEXEC					\
+	(SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |				\
+	 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |			\
+	 SECONDARY_EXEC_APIC_REGISTER_VIRT |				\
+	 SECONDARY_EXEC_ENABLE_PML |					\
+	 SECONDARY_EXEC_ENABLE_VMFUNC |					\
+	 SECONDARY_EXEC_SHADOW_VMCS |					\
+	 SECONDARY_EXEC_TSC_SCALING |					\
+	 SECONDARY_EXEC_PAUSE_LOOP_EXITING)
+#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
+
 #if IS_ENABLED(CONFIG_HYPERV)
 static bool __read_mostly enlightened_vmcs = true;
 module_param(enlightened_vmcs, bool, 0444);
@@ -1473,69 +1539,12 @@ static void evmcs_load(u64 phys_addr)
 
 static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
 {
-	/*
-	 * Enlightened VMCSv1 doesn't support these:
-	 *
-	 *	POSTED_INTR_NV                  = 0x00000002,
-	 *	GUEST_INTR_STATUS               = 0x00000810,
-	 *	APIC_ACCESS_ADDR		= 0x00002014,
-	 *	POSTED_INTR_DESC_ADDR           = 0x00002016,
-	 *	EOI_EXIT_BITMAP0                = 0x0000201c,
-	 *	EOI_EXIT_BITMAP1                = 0x0000201e,
-	 *	EOI_EXIT_BITMAP2                = 0x00002020,
-	 *	EOI_EXIT_BITMAP3                = 0x00002022,
-	 */
-	vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
-	vmcs_conf->cpu_based_2nd_exec_ctrl &=
-		~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
-	vmcs_conf->cpu_based_2nd_exec_ctrl &=
-		~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-	vmcs_conf->cpu_based_2nd_exec_ctrl &=
-		~SECONDARY_EXEC_APIC_REGISTER_VIRT;
-
-	/*
-	 *	GUEST_PML_INDEX			= 0x00000812,
-	 *	PML_ADDRESS			= 0x0000200e,
-	 */
-	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML;
-
-	/*	VM_FUNCTION_CONTROL             = 0x00002018, */
-	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC;
-
-	/*
-	 *	EPTP_LIST_ADDRESS               = 0x00002024,
-	 *	VMREAD_BITMAP                   = 0x00002026,
-	 *	VMWRITE_BITMAP                  = 0x00002028,
-	 */
-	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS;
-
-	/*
-	 *	TSC_MULTIPLIER                  = 0x00002032,
-	 */
-	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING;
+	vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
 
-	/*
-	 *	PLE_GAP                         = 0x00004020,
-	 *	PLE_WINDOW                      = 0x00004022,
-	 */
-	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
-
-	/*
-	 *	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
-	 */
-	vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
-
-	/*
-	 *      GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
-	 *      HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
-	 */
-	vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
-	vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+	vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+	vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
 
-	/*
-	 * Currently unsupported in KVM:
-	 *	GUEST_IA32_RTIT_CTL		= 0x00002814,
-	 */
 }
 
 /* check_ept_pointer() should be under protection of ept_pointer_lock. */
@@ -1560,26 +1569,27 @@ static void check_ept_pointer_match(struct kvm *kvm)
 
 static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
 {
-	int ret;
+	struct kvm_vcpu *vcpu;
+	int ret = -ENOTSUPP, i;
 
 	spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
 
 	if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
 		check_ept_pointer_match(kvm);
 
-	if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
-		ret = -ENOTSUPP;
-		goto out;
-	}
-
 	/*
 	 * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the
 	 * base of EPT PML4 table, strip off EPT configuration information.
 	 */
-	ret = hyperv_flush_guest_mapping(
-			to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK);
+	if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
+		kvm_for_each_vcpu(i, vcpu, kvm)
+			ret |= hyperv_flush_guest_mapping(
+				to_vmx(kvm_get_vcpu(kvm, i))->ept_pointer & PAGE_MASK);
+	} else {
+		ret = hyperv_flush_guest_mapping(
+				to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK);
+	}
 
-out:
 	spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
 	return ret;
 }
@@ -1595,6 +1605,35 @@ static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
 static inline void evmcs_touch_msr_bitmap(void) {}
 #endif /* IS_ENABLED(CONFIG_HYPERV) */
 
+static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
+			       uint16_t *vmcs_version)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	/* We don't support disabling the feature for simplicity. */
+	if (vmx->nested.enlightened_vmcs_enabled)
+		return 0;
+
+	vmx->nested.enlightened_vmcs_enabled = true;
+
+	/*
+	 * vmcs_version represents the range of supported Enlightened VMCS
+	 * versions: lower 8 bits is the minimal version, higher 8 bits is the
+	 * maximum supported version. KVM supports versions from 1 to
+	 * KVM_EVMCS_VERSION.
+	 */
+	if (vmcs_version)
+		*vmcs_version = (KVM_EVMCS_VERSION << 8) | 1;
+
+	vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+	vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
+	vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+	vmx->nested.msrs.secondary_ctls_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
+	vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC;
+
+	return 0;
+}
+
 static inline bool is_exception_n(u32 intr_info, u8 vector)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -1617,11 +1656,6 @@ static inline bool is_page_fault(u32 intr_info)
 	return is_exception_n(intr_info, PF_VECTOR);
 }
 
-static inline bool is_no_device(u32 intr_info)
-{
-	return is_exception_n(intr_info, NM_VECTOR);
-}
-
 static inline bool is_invalid_opcode(u32 intr_info)
 {
 	return is_exception_n(intr_info, UD_VECTOR);
@@ -1632,12 +1666,6 @@ static inline bool is_gp_fault(u32 intr_info)
 	return is_exception_n(intr_info, GP_VECTOR);
 }
 
-static inline bool is_external_interrupt(u32 intr_info)
-{
-	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
-		== (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
-}
-
 static inline bool is_machine_check(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -2063,9 +2091,6 @@ static inline bool is_nmi(u32 intr_info)
 static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 			      u32 exit_intr_info,
 			      unsigned long exit_qualification);
-static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
-			struct vmcs12 *vmcs12,
-			u32 reason, unsigned long qualification);
 
 static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
 {
@@ -2077,7 +2102,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
 	return -1;
 }
 
-static inline void __invvpid(int ext, u16 vpid, gva_t gva)
+static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
 {
     struct {
 	u64 vpid : 16;
@@ -2086,22 +2111,20 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
     } operand = { vpid, 0, gva };
     bool error;
 
-    asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na)
-		  : CC_OUT(na) (error) : "a"(&operand), "c"(ext)
-		  : "memory");
+    asm volatile (__ex("invvpid %2, %1") CC_SET(na)
+		  : CC_OUT(na) (error) : "r"(ext), "m"(operand));
     BUG_ON(error);
 }
 
-static inline void __invept(int ext, u64 eptp, gpa_t gpa)
+static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
 {
 	struct {
 		u64 eptp, gpa;
 	} operand = {eptp, gpa};
 	bool error;
 
-	asm volatile (__ex(ASM_VMX_INVEPT) CC_SET(na)
-		      : CC_OUT(na) (error) : "a" (&operand), "c" (ext)
-		      : "memory");
+	asm volatile (__ex("invept %2, %1") CC_SET(na)
+		      : CC_OUT(na) (error) : "r"(ext), "m"(operand));
 	BUG_ON(error);
 }
 
@@ -2120,9 +2143,8 @@ static void vmcs_clear(struct vmcs *vmcs)
 	u64 phys_addr = __pa(vmcs);
 	bool error;
 
-	asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) CC_SET(na)
-		      : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
-		      : "memory");
+	asm volatile (__ex("vmclear %1") CC_SET(na)
+		      : CC_OUT(na) (error) : "m"(phys_addr));
 	if (unlikely(error))
 		printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
 		       vmcs, phys_addr);
@@ -2145,9 +2167,8 @@ static void vmcs_load(struct vmcs *vmcs)
 	if (static_branch_unlikely(&enable_evmcs))
 		return evmcs_load(phys_addr);
 
-	asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) CC_SET(na)
-		      : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
-		      : "memory");
+	asm volatile (__ex("vmptrld %1") CC_SET(na)
+		      : CC_OUT(na) (error) : "m"(phys_addr));
 	if (unlikely(error))
 		printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
 		       vmcs, phys_addr);
@@ -2323,8 +2344,8 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
 {
 	unsigned long value;
 
-	asm volatile (__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0")
-		      : "=a"(value) : "d"(field) : "cc");
+	asm volatile (__ex_clear("vmread %1, %0", "%k0")
+		      : "=r"(value) : "r"(field));
 	return value;
 }
 
@@ -2375,8 +2396,8 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val
 {
 	bool error;
 
-	asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) CC_SET(na)
-		      : CC_OUT(na) (error) : "a"(value), "d"(field));
+	asm volatile (__ex("vmwrite %2, %1") CC_SET(na)
+		      : CC_OUT(na) (error) : "r"(field), "rm"(value));
 	if (unlikely(error))
 		vmwrite_error(field, value);
 }
@@ -2707,7 +2728,8 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
 		u64 guest_val, u64 host_val)
 {
 	vmcs_write64(guest_val_vmcs, guest_val);
-	vmcs_write64(host_val_vmcs, host_val);
+	if (host_val_vmcs != HOST_IA32_EFER)
+		vmcs_write64(host_val_vmcs, host_val);
 	vm_entry_controls_setbit(vmx, entry);
 	vm_exit_controls_setbit(vmx, exit);
 }
@@ -2805,8 +2827,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 		ignore_bits &= ~(u64)EFER_SCE;
 #endif
 
-	clear_atomic_switch_msr(vmx, MSR_EFER);
-
 	/*
 	 * On EPT, we can't emulate NX, so we must switch EFER atomically.
 	 * On CPUs that support "load IA32_EFER", always switch EFER
@@ -2819,8 +2839,12 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 		if (guest_efer != host_efer)
 			add_atomic_switch_msr(vmx, MSR_EFER,
 					      guest_efer, host_efer, false);
+		else
+			clear_atomic_switch_msr(vmx, MSR_EFER);
 		return false;
 	} else {
+		clear_atomic_switch_msr(vmx, MSR_EFER);
+
 		guest_efer &= ~ignore_bits;
 		guest_efer |= host_efer & ignore_bits;
 
@@ -3272,34 +3296,30 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 	unsigned int nr = vcpu->arch.exception.nr;
+	bool has_payload = vcpu->arch.exception.has_payload;
+	unsigned long payload = vcpu->arch.exception.payload;
 
 	if (nr == PF_VECTOR) {
 		if (vcpu->arch.exception.nested_apf) {
 			*exit_qual = vcpu->arch.apf.nested_apf_token;
 			return 1;
 		}
-		/*
-		 * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
-		 * The fix is to add the ancillary datum (CR2 or DR6) to structs
-		 * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
-		 * can be written only when inject_pending_event runs.  This should be
-		 * conditional on a new capability---if the capability is disabled,
-		 * kvm_multiple_exception would write the ancillary information to
-		 * CR2 or DR6, for backwards ABI-compatibility.
-		 */
 		if (nested_vmx_is_page_fault_vmexit(vmcs12,
 						    vcpu->arch.exception.error_code)) {
-			*exit_qual = vcpu->arch.cr2;
-			return 1;
-		}
-	} else {
-		if (vmcs12->exception_bitmap & (1u << nr)) {
-			if (nr == DB_VECTOR)
-				*exit_qual = vcpu->arch.dr6;
-			else
-				*exit_qual = 0;
+			*exit_qual = has_payload ? payload : vcpu->arch.cr2;
 			return 1;
 		}
+	} else if (vmcs12->exception_bitmap & (1u << nr)) {
+		if (nr == DB_VECTOR) {
+			if (!has_payload) {
+				payload = vcpu->arch.dr6;
+				payload &= ~(DR6_FIXED_1 | DR6_BT);
+				payload ^= DR6_RTM;
+			}
+			*exit_qual = payload;
+		} else
+			*exit_qual = 0;
+		return 1;
 	}
 
 	return 0;
@@ -3326,6 +3346,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 	u32 error_code = vcpu->arch.exception.error_code;
 	u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
+	kvm_deliver_exception_payload(vcpu);
+
 	if (has_error_code) {
 		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
 		intr_info |= INTR_INFO_DELIVER_CODE_MASK;
@@ -4397,9 +4419,7 @@ static void kvm_cpu_vmxon(u64 addr)
 	cr4_set_bits(X86_CR4_VMXE);
 	intel_pt_handle_vmx(1);
 
-	asm volatile (ASM_VMX_VMXON_RAX
-			: : "a"(&addr), "m"(addr)
-			: "memory", "cc");
+	asm volatile ("vmxon %0" : : "m"(addr));
 }
 
 static int hardware_enable(void)
@@ -4468,7 +4488,7 @@ static void vmclear_local_loaded_vmcss(void)
  */
 static void kvm_cpu_vmxoff(void)
 {
-	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+	asm volatile (__ex("vmxoff"));
 
 	intel_pt_handle_vmx(0);
 	cr4_clear_bits(X86_CR4_VMXE);
@@ -5112,9 +5132,10 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
 				bool invalidate_gpa)
 {
 	if (enable_ept && (invalidate_gpa || !enable_vpid)) {
-		if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+		if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
 			return;
-		ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
+		ept_sync_context(construct_eptp(vcpu,
+						vcpu->arch.mmu->root_hpa));
 	} else {
 		vpid_sync_context(vpid);
 	}
@@ -5264,7 +5285,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long hw_cr0;
 
-	hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK);
+	hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
 	if (enable_unrestricted_guest)
 		hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
 	else {
@@ -6339,6 +6360,9 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 		rdmsr(MSR_IA32_CR_PAT, low32, high32);
 		vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
 	}
+
+	if (cpu_has_load_ia32_efer)
+		vmcs_write64(HOST_IA32_EFER, host_efer);
 }
 
 static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
@@ -6666,7 +6690,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
 
 	if (enable_pml) {
-		ASSERT(vmx->pml_pg);
 		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
 		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
 	}
@@ -8067,35 +8090,39 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
 
 /*
  * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
- * set the success or error code of an emulated VMX instruction, as specified
- * by Vol 2B, VMX Instruction Reference, "Conventions".
+ * set the success or error code of an emulated VMX instruction (as specified
+ * by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated
+ * instruction.
  */
-static void nested_vmx_succeed(struct kvm_vcpu *vcpu)
+static int nested_vmx_succeed(struct kvm_vcpu *vcpu)
 {
 	vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
 			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
 			    X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
+	return kvm_skip_emulated_instruction(vcpu);
 }
 
-static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
+static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
 {
 	vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
 			& ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
 			    X86_EFLAGS_SF | X86_EFLAGS_OF))
 			| X86_EFLAGS_CF);
+	return kvm_skip_emulated_instruction(vcpu);
 }
 
-static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
-					u32 vm_instruction_error)
+static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
+				u32 vm_instruction_error)
 {
-	if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
-		/*
-		 * failValid writes the error number to the current VMCS, which
-		 * can't be done there isn't a current VMCS.
-		 */
-		nested_vmx_failInvalid(vcpu);
-		return;
-	}
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	/*
+	 * failValid writes the error number to the current VMCS, which
+	 * can't be done if there isn't a current VMCS.
+	 */
+	if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs)
+		return nested_vmx_failInvalid(vcpu);
+
 	vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
 			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
 			    X86_EFLAGS_SF | X86_EFLAGS_OF))
@@ -8105,6 +8132,7 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
 	 * We don't need to force a shadow sync because
 	 * VM_INSTRUCTION_ERROR is not shadowed
 	 */
+	return kvm_skip_emulated_instruction(vcpu);
 }
 
 static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
@@ -8292,6 +8320,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 
 	vmx->nested.vpid02 = allocate_vpid();
 
+	vmx->nested.vmcs02_initialized = false;
 	vmx->nested.vmxon = true;
 	return 0;
 
@@ -8345,10 +8374,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
-	if (vmx->nested.vmxon) {
-		nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
+	if (vmx->nested.vmxon)
+		return nested_vmx_failValid(vcpu,
+			VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
 
 	if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
 			!= VMXON_NEEDED_FEATURES) {
@@ -8367,21 +8395,17 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 	 * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
 	 * which replaces physical address width with 32
 	 */
-	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
-		nested_vmx_failInvalid(vcpu);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
+	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
+		return nested_vmx_failInvalid(vcpu);
 
 	page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
-	if (is_error_page(page)) {
-		nested_vmx_failInvalid(vcpu);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
+	if (is_error_page(page))
+		return nested_vmx_failInvalid(vcpu);
+
 	if (*(u32 *)kmap(page) != VMCS12_REVISION) {
 		kunmap(page);
 		kvm_release_page_clean(page);
-		nested_vmx_failInvalid(vcpu);
-		return kvm_skip_emulated_instruction(vcpu);
+		return nested_vmx_failInvalid(vcpu);
 	}
 	kunmap(page);
 	kvm_release_page_clean(page);
@@ -8391,8 +8415,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 	if (ret)
 		return ret;
 
-	nested_vmx_succeed(vcpu);
-	return kvm_skip_emulated_instruction(vcpu);
+	return nested_vmx_succeed(vcpu);
 }
 
 /*
@@ -8423,8 +8446,24 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
 	vmcs_write64(VMCS_LINK_POINTER, -1ull);
 }
 
-static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
+static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (!vmx->nested.hv_evmcs)
+		return;
+
+	kunmap(vmx->nested.hv_evmcs_page);
+	kvm_release_page_dirty(vmx->nested.hv_evmcs_page);
+	vmx->nested.hv_evmcs_vmptr = -1ull;
+	vmx->nested.hv_evmcs_page = NULL;
+	vmx->nested.hv_evmcs = NULL;
+}
+
+static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
 	if (vmx->nested.current_vmptr == -1ull)
 		return;
 
@@ -8432,16 +8471,18 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
 		/* copy to memory all shadowed fields in case
 		   they were modified */
 		copy_shadow_to_vmcs12(vmx);
-		vmx->nested.sync_shadow_vmcs = false;
+		vmx->nested.need_vmcs12_sync = false;
 		vmx_disable_shadow_vmcs(vmx);
 	}
 	vmx->nested.posted_intr_nv = -1;
 
 	/* Flush VMCS12 to guest memory */
-	kvm_vcpu_write_guest_page(&vmx->vcpu,
+	kvm_vcpu_write_guest_page(vcpu,
 				  vmx->nested.current_vmptr >> PAGE_SHIFT,
 				  vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
 
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+
 	vmx->nested.current_vmptr = -1ull;
 }
 
@@ -8449,8 +8490,10 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
  * Free whatever needs to be freed from vmx->nested when L1 goes down, or
  * just stops using VMX.
  */
-static void free_nested(struct vcpu_vmx *vmx)
+static void free_nested(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
 	if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
 		return;
 
@@ -8483,6 +8526,10 @@ static void free_nested(struct vcpu_vmx *vmx)
 		vmx->nested.pi_desc = NULL;
 	}
 
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+
+	nested_release_evmcs(vcpu);
+
 	free_loaded_vmcs(&vmx->nested.vmcs02);
 }
 
@@ -8491,9 +8538,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
 {
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
-	free_nested(to_vmx(vcpu));
-	nested_vmx_succeed(vcpu);
-	return kvm_skip_emulated_instruction(vcpu);
+	free_nested(vcpu);
+	return nested_vmx_succeed(vcpu);
 }
 
 /* Emulate the VMCLEAR instruction */
@@ -8509,25 +8555,28 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 	if (nested_vmx_get_vmptr(vcpu, &vmptr))
 		return 1;
 
-	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
-		nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
+	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
+		return nested_vmx_failValid(vcpu,
+			VMXERR_VMCLEAR_INVALID_ADDRESS);
 
-	if (vmptr == vmx->nested.vmxon_ptr) {
-		nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
+	if (vmptr == vmx->nested.vmxon_ptr)
+		return nested_vmx_failValid(vcpu,
+			VMXERR_VMCLEAR_VMXON_POINTER);
 
-	if (vmptr == vmx->nested.current_vmptr)
-		nested_release_vmcs12(vmx);
+	if (vmx->nested.hv_evmcs_page) {
+		if (vmptr == vmx->nested.hv_evmcs_vmptr)
+			nested_release_evmcs(vcpu);
+	} else {
+		if (vmptr == vmx->nested.current_vmptr)
+			nested_release_vmcs12(vcpu);
 
-	kvm_vcpu_write_guest(vcpu,
-			vmptr + offsetof(struct vmcs12, launch_state),
-			&zero, sizeof(zero));
+		kvm_vcpu_write_guest(vcpu,
+				     vmptr + offsetof(struct vmcs12,
+						      launch_state),
+				     &zero, sizeof(zero));
+	}
 
-	nested_vmx_succeed(vcpu);
-	return kvm_skip_emulated_instruction(vcpu);
+	return nested_vmx_succeed(vcpu);
 }
 
 static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
@@ -8610,6 +8659,395 @@ static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
 
 }
 
+static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
+{
+	struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
+	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
+
+	vmcs12->hdr.revision_id = evmcs->revision_id;
+
+	/* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
+	vmcs12->tpr_threshold = evmcs->tpr_threshold;
+	vmcs12->guest_rip = evmcs->guest_rip;
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
+		vmcs12->guest_rsp = evmcs->guest_rsp;
+		vmcs12->guest_rflags = evmcs->guest_rflags;
+		vmcs12->guest_interruptibility_info =
+			evmcs->guest_interruptibility_info;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
+		vmcs12->cpu_based_vm_exec_control =
+			evmcs->cpu_based_vm_exec_control;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
+		vmcs12->exception_bitmap = evmcs->exception_bitmap;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
+		vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
+		vmcs12->vm_entry_intr_info_field =
+			evmcs->vm_entry_intr_info_field;
+		vmcs12->vm_entry_exception_error_code =
+			evmcs->vm_entry_exception_error_code;
+		vmcs12->vm_entry_instruction_len =
+			evmcs->vm_entry_instruction_len;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
+		vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
+		vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
+		vmcs12->host_cr0 = evmcs->host_cr0;
+		vmcs12->host_cr3 = evmcs->host_cr3;
+		vmcs12->host_cr4 = evmcs->host_cr4;
+		vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
+		vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
+		vmcs12->host_rip = evmcs->host_rip;
+		vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
+		vmcs12->host_es_selector = evmcs->host_es_selector;
+		vmcs12->host_cs_selector = evmcs->host_cs_selector;
+		vmcs12->host_ss_selector = evmcs->host_ss_selector;
+		vmcs12->host_ds_selector = evmcs->host_ds_selector;
+		vmcs12->host_fs_selector = evmcs->host_fs_selector;
+		vmcs12->host_gs_selector = evmcs->host_gs_selector;
+		vmcs12->host_tr_selector = evmcs->host_tr_selector;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
+		vmcs12->pin_based_vm_exec_control =
+			evmcs->pin_based_vm_exec_control;
+		vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
+		vmcs12->secondary_vm_exec_control =
+			evmcs->secondary_vm_exec_control;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
+		vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
+		vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
+		vmcs12->msr_bitmap = evmcs->msr_bitmap;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
+		vmcs12->guest_es_base = evmcs->guest_es_base;
+		vmcs12->guest_cs_base = evmcs->guest_cs_base;
+		vmcs12->guest_ss_base = evmcs->guest_ss_base;
+		vmcs12->guest_ds_base = evmcs->guest_ds_base;
+		vmcs12->guest_fs_base = evmcs->guest_fs_base;
+		vmcs12->guest_gs_base = evmcs->guest_gs_base;
+		vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
+		vmcs12->guest_tr_base = evmcs->guest_tr_base;
+		vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
+		vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
+		vmcs12->guest_es_limit = evmcs->guest_es_limit;
+		vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
+		vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
+		vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
+		vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
+		vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
+		vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
+		vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
+		vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
+		vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
+		vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
+		vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
+		vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
+		vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
+		vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
+		vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
+		vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
+		vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
+		vmcs12->guest_es_selector = evmcs->guest_es_selector;
+		vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
+		vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
+		vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
+		vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
+		vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
+		vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
+		vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
+		vmcs12->tsc_offset = evmcs->tsc_offset;
+		vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
+		vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
+		vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
+		vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
+		vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
+		vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
+		vmcs12->guest_cr0 = evmcs->guest_cr0;
+		vmcs12->guest_cr3 = evmcs->guest_cr3;
+		vmcs12->guest_cr4 = evmcs->guest_cr4;
+		vmcs12->guest_dr7 = evmcs->guest_dr7;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
+		vmcs12->host_fs_base = evmcs->host_fs_base;
+		vmcs12->host_gs_base = evmcs->host_gs_base;
+		vmcs12->host_tr_base = evmcs->host_tr_base;
+		vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
+		vmcs12->host_idtr_base = evmcs->host_idtr_base;
+		vmcs12->host_rsp = evmcs->host_rsp;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
+		vmcs12->ept_pointer = evmcs->ept_pointer;
+		vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
+	}
+
+	if (unlikely(!(evmcs->hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
+		vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
+		vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
+		vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
+		vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
+		vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
+		vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
+		vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
+		vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
+		vmcs12->guest_pending_dbg_exceptions =
+			evmcs->guest_pending_dbg_exceptions;
+		vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
+		vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
+		vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
+		vmcs12->guest_activity_state = evmcs->guest_activity_state;
+		vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
+	}
+
+	/*
+	 * Not used?
+	 * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
+	 * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
+	 * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
+	 * vmcs12->cr3_target_value0 = evmcs->cr3_target_value0;
+	 * vmcs12->cr3_target_value1 = evmcs->cr3_target_value1;
+	 * vmcs12->cr3_target_value2 = evmcs->cr3_target_value2;
+	 * vmcs12->cr3_target_value3 = evmcs->cr3_target_value3;
+	 * vmcs12->page_fault_error_code_mask =
+	 *		evmcs->page_fault_error_code_mask;
+	 * vmcs12->page_fault_error_code_match =
+	 *		evmcs->page_fault_error_code_match;
+	 * vmcs12->cr3_target_count = evmcs->cr3_target_count;
+	 * vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count;
+	 * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
+	 * vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count;
+	 */
+
+	/*
+	 * Read only fields:
+	 * vmcs12->guest_physical_address = evmcs->guest_physical_address;
+	 * vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
+	 * vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
+	 * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
+	 * vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code;
+	 * vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field;
+	 * vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code;
+	 * vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len;
+	 * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
+	 * vmcs12->exit_qualification = evmcs->exit_qualification;
+	 * vmcs12->guest_linear_address = evmcs->guest_linear_address;
+	 *
+	 * Not present in struct vmcs12:
+	 * vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx;
+	 * vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi;
+	 * vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi;
+	 * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
+	 */
+
+	return 0;
+}
+
+static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
+{
+	struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
+	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
+
+	/*
+	 * Should not be changed by KVM:
+	 *
+	 * evmcs->host_es_selector = vmcs12->host_es_selector;
+	 * evmcs->host_cs_selector = vmcs12->host_cs_selector;
+	 * evmcs->host_ss_selector = vmcs12->host_ss_selector;
+	 * evmcs->host_ds_selector = vmcs12->host_ds_selector;
+	 * evmcs->host_fs_selector = vmcs12->host_fs_selector;
+	 * evmcs->host_gs_selector = vmcs12->host_gs_selector;
+	 * evmcs->host_tr_selector = vmcs12->host_tr_selector;
+	 * evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
+	 * evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
+	 * evmcs->host_cr0 = vmcs12->host_cr0;
+	 * evmcs->host_cr3 = vmcs12->host_cr3;
+	 * evmcs->host_cr4 = vmcs12->host_cr4;
+	 * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
+	 * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
+	 * evmcs->host_rip = vmcs12->host_rip;
+	 * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
+	 * evmcs->host_fs_base = vmcs12->host_fs_base;
+	 * evmcs->host_gs_base = vmcs12->host_gs_base;
+	 * evmcs->host_tr_base = vmcs12->host_tr_base;
+	 * evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
+	 * evmcs->host_idtr_base = vmcs12->host_idtr_base;
+	 * evmcs->host_rsp = vmcs12->host_rsp;
+	 * sync_vmcs12() doesn't read these:
+	 * evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
+	 * evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
+	 * evmcs->msr_bitmap = vmcs12->msr_bitmap;
+	 * evmcs->ept_pointer = vmcs12->ept_pointer;
+	 * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
+	 * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
+	 * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
+	 * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
+	 * evmcs->cr3_target_value0 = vmcs12->cr3_target_value0;
+	 * evmcs->cr3_target_value1 = vmcs12->cr3_target_value1;
+	 * evmcs->cr3_target_value2 = vmcs12->cr3_target_value2;
+	 * evmcs->cr3_target_value3 = vmcs12->cr3_target_value3;
+	 * evmcs->tpr_threshold = vmcs12->tpr_threshold;
+	 * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
+	 * evmcs->exception_bitmap = vmcs12->exception_bitmap;
+	 * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
+	 * evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control;
+	 * evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
+	 * evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control;
+	 * evmcs->page_fault_error_code_mask =
+	 *		vmcs12->page_fault_error_code_mask;
+	 * evmcs->page_fault_error_code_match =
+	 *		vmcs12->page_fault_error_code_match;
+	 * evmcs->cr3_target_count = vmcs12->cr3_target_count;
+	 * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
+	 * evmcs->tsc_offset = vmcs12->tsc_offset;
+	 * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
+	 * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
+	 * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
+	 * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
+	 * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
+	 * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count;
+	 * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
+	 * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count;
+	 *
+	 * Not present in struct vmcs12:
+	 * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx;
+	 * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi;
+	 * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi;
+	 * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip;
+	 */
+
+	evmcs->guest_es_selector = vmcs12->guest_es_selector;
+	evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
+	evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
+	evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
+	evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
+	evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
+	evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
+	evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
+
+	evmcs->guest_es_limit = vmcs12->guest_es_limit;
+	evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
+	evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
+	evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
+	evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
+	evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
+	evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
+	evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
+	evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
+	evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
+
+	evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
+	evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
+	evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
+	evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
+	evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
+	evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
+	evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
+	evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
+
+	evmcs->guest_es_base = vmcs12->guest_es_base;
+	evmcs->guest_cs_base = vmcs12->guest_cs_base;
+	evmcs->guest_ss_base = vmcs12->guest_ss_base;
+	evmcs->guest_ds_base = vmcs12->guest_ds_base;
+	evmcs->guest_fs_base = vmcs12->guest_fs_base;
+	evmcs->guest_gs_base = vmcs12->guest_gs_base;
+	evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
+	evmcs->guest_tr_base = vmcs12->guest_tr_base;
+	evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
+	evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
+
+	evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
+	evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
+
+	evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
+	evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
+	evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
+	evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
+
+	evmcs->guest_pending_dbg_exceptions =
+		vmcs12->guest_pending_dbg_exceptions;
+	evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
+	evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
+
+	evmcs->guest_activity_state = vmcs12->guest_activity_state;
+	evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
+
+	evmcs->guest_cr0 = vmcs12->guest_cr0;
+	evmcs->guest_cr3 = vmcs12->guest_cr3;
+	evmcs->guest_cr4 = vmcs12->guest_cr4;
+	evmcs->guest_dr7 = vmcs12->guest_dr7;
+
+	evmcs->guest_physical_address = vmcs12->guest_physical_address;
+
+	evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
+	evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
+	evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
+	evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
+	evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
+	evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
+	evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
+	evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
+
+	evmcs->exit_qualification = vmcs12->exit_qualification;
+
+	evmcs->guest_linear_address = vmcs12->guest_linear_address;
+	evmcs->guest_rsp = vmcs12->guest_rsp;
+	evmcs->guest_rflags = vmcs12->guest_rflags;
+
+	evmcs->guest_interruptibility_info =
+		vmcs12->guest_interruptibility_info;
+	evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
+	evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
+	evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
+	evmcs->vm_entry_exception_error_code =
+		vmcs12->vm_entry_exception_error_code;
+	evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
+
+	evmcs->guest_rip = vmcs12->guest_rip;
+
+	evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
+
+	return 0;
+}
+
 /*
  * Copy the writable VMCS shadow fields back to the VMCS12, in case
  * they have been modified by the L1 guest. Note that the "read-only"
@@ -8683,20 +9121,6 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 	vmcs_load(vmx->loaded_vmcs->vmcs);
 }
 
-/*
- * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was
- * used before) all generate the same failure when it is missing.
- */
-static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	if (vmx->nested.current_vmptr == -1ull) {
-		nested_vmx_failInvalid(vcpu);
-		return 0;
-	}
-	return 1;
-}
-
 static int handle_vmread(struct kvm_vcpu *vcpu)
 {
 	unsigned long field;
@@ -8709,8 +9133,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	if (!nested_vmx_check_vmcs12(vcpu))
-		return kvm_skip_emulated_instruction(vcpu);
+	if (to_vmx(vcpu)->nested.current_vmptr == -1ull)
+		return nested_vmx_failInvalid(vcpu);
 
 	if (!is_guest_mode(vcpu))
 		vmcs12 = get_vmcs12(vcpu);
@@ -8719,20 +9143,18 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 		 * When vmcs->vmcs_link_pointer is -1ull, any VMREAD
 		 * to shadowed-field sets the ALU flags for VMfailInvalid.
 		 */
-		if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
-			nested_vmx_failInvalid(vcpu);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
+		if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)
+			return nested_vmx_failInvalid(vcpu);
 		vmcs12 = get_shadow_vmcs12(vcpu);
 	}
 
 	/* Decode instruction info and find the field to read */
 	field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
 	/* Read the field, zero-extended to a u64 field_value */
-	if (vmcs12_read_any(vmcs12, field, &field_value) < 0) {
-		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
+	if (vmcs12_read_any(vmcs12, field, &field_value) < 0)
+		return nested_vmx_failValid(vcpu,
+			VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+
 	/*
 	 * Now copy part of this value to register or memory, as requested.
 	 * Note that the number of bits actually copied is 32 or 64 depending
@@ -8750,8 +9172,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 					    (is_long_mode(vcpu) ? 8 : 4), NULL);
 	}
 
-	nested_vmx_succeed(vcpu);
-	return kvm_skip_emulated_instruction(vcpu);
+	return nested_vmx_succeed(vcpu);
 }
 
 
@@ -8776,8 +9197,8 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	if (!nested_vmx_check_vmcs12(vcpu))
-		return kvm_skip_emulated_instruction(vcpu);
+	if (vmx->nested.current_vmptr == -1ull)
+		return nested_vmx_failInvalid(vcpu);
 
 	if (vmx_instruction_info & (1u << 10))
 		field_value = kvm_register_readl(vcpu,
@@ -8800,11 +9221,9 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 	 * VMCS," then the "read-only" fields are actually read/write.
 	 */
 	if (vmcs_field_readonly(field) &&
-	    !nested_cpu_has_vmwrite_any_field(vcpu)) {
-		nested_vmx_failValid(vcpu,
+	    !nested_cpu_has_vmwrite_any_field(vcpu))
+		return nested_vmx_failValid(vcpu,
 			VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
 
 	if (!is_guest_mode(vcpu))
 		vmcs12 = get_vmcs12(vcpu);
@@ -8813,18 +9232,14 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 		 * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE
 		 * to shadowed-field sets the ALU flags for VMfailInvalid.
 		 */
-		if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
-			nested_vmx_failInvalid(vcpu);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
+		if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)
+			return nested_vmx_failInvalid(vcpu);
 		vmcs12 = get_shadow_vmcs12(vcpu);
-
 	}
 
-	if (vmcs12_write_any(vmcs12, field, field_value) < 0) {
-		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
+	if (vmcs12_write_any(vmcs12, field, field_value) < 0)
+		return nested_vmx_failValid(vcpu,
+			VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 
 	/*
 	 * Do not track vmcs12 dirty-state if in guest-mode
@@ -8846,8 +9261,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	nested_vmx_succeed(vcpu);
-	return kvm_skip_emulated_instruction(vcpu);
+	return nested_vmx_succeed(vcpu);
 }
 
 static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
@@ -8858,7 +9272,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
 			      SECONDARY_EXEC_SHADOW_VMCS);
 		vmcs_write64(VMCS_LINK_POINTER,
 			     __pa(vmx->vmcs01.shadow_vmcs));
-		vmx->nested.sync_shadow_vmcs = true;
+		vmx->nested.need_vmcs12_sync = true;
 	}
 	vmx->nested.dirty_vmcs12 = true;
 }
@@ -8875,36 +9289,37 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 	if (nested_vmx_get_vmptr(vcpu, &vmptr))
 		return 1;
 
-	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
-		nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
+	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
+		return nested_vmx_failValid(vcpu,
+			VMXERR_VMPTRLD_INVALID_ADDRESS);
 
-	if (vmptr == vmx->nested.vmxon_ptr) {
-		nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
+	if (vmptr == vmx->nested.vmxon_ptr)
+		return nested_vmx_failValid(vcpu,
+			VMXERR_VMPTRLD_VMXON_POINTER);
+
+	/* Forbid normal VMPTRLD if Enlightened version was used */
+	if (vmx->nested.hv_evmcs)
+		return 1;
 
 	if (vmx->nested.current_vmptr != vmptr) {
 		struct vmcs12 *new_vmcs12;
 		struct page *page;
 		page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
-		if (is_error_page(page)) {
-			nested_vmx_failInvalid(vcpu);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
+		if (is_error_page(page))
+			return nested_vmx_failInvalid(vcpu);
+
 		new_vmcs12 = kmap(page);
 		if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
 		    (new_vmcs12->hdr.shadow_vmcs &&
 		     !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
 			kunmap(page);
 			kvm_release_page_clean(page);
-			nested_vmx_failValid(vcpu,
+			return nested_vmx_failValid(vcpu,
 				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
-			return kvm_skip_emulated_instruction(vcpu);
 		}
 
-		nested_release_vmcs12(vmx);
+		nested_release_vmcs12(vcpu);
+
 		/*
 		 * Load VMCS12 from guest memory since it is not already
 		 * cached.
@@ -8916,8 +9331,71 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 		set_current_vmptr(vmx, vmptr);
 	}
 
-	nested_vmx_succeed(vcpu);
-	return kvm_skip_emulated_instruction(vcpu);
+	return nested_vmx_succeed(vcpu);
+}
+
+/*
+ * This is an equivalent of the nested hypervisor executing the vmptrld
+ * instruction.
+ */
+static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
+						 bool from_launch)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct hv_vp_assist_page assist_page;
+
+	if (likely(!vmx->nested.enlightened_vmcs_enabled))
+		return 1;
+
+	if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
+		return 1;
+
+	if (unlikely(!assist_page.enlighten_vmentry))
+		return 1;
+
+	if (unlikely(assist_page.current_nested_vmcs !=
+		     vmx->nested.hv_evmcs_vmptr)) {
+
+		if (!vmx->nested.hv_evmcs)
+			vmx->nested.current_vmptr = -1ull;
+
+		nested_release_evmcs(vcpu);
+
+		vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page(
+			vcpu, assist_page.current_nested_vmcs);
+
+		if (unlikely(is_error_page(vmx->nested.hv_evmcs_page)))
+			return 0;
+
+		vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page);
+
+		if (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION) {
+			nested_release_evmcs(vcpu);
+			return 0;
+		}
+
+		vmx->nested.dirty_vmcs12 = true;
+		/*
+		 * As we keep L2 state for one guest only 'hv_clean_fields' mask
+		 * can't be used when we switch between them. Reset it here for
+		 * simplicity.
+		 */
+		vmx->nested.hv_evmcs->hv_clean_fields &=
+			~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+		vmx->nested.hv_evmcs_vmptr = assist_page.current_nested_vmcs;
+
+		/*
+		 * Unlike normal vmcs12, enlightened vmcs12 is not fully
+		 * reloaded from guest's memory (read only fields, fields not
+		 * present in struct hv_enlightened_vmcs, ...). Make sure there
+		 * are no leftovers.
+		 */
+		if (from_launch)
+			memset(vmx->nested.cached_vmcs12, 0,
+			       sizeof(*vmx->nested.cached_vmcs12));
+
+	}
+	return 1;
 }
 
 /* Emulate the VMPTRST instruction */
@@ -8932,6 +9410,9 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
+	if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
+		return 1;
+
 	if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva))
 		return 1;
 	/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
@@ -8940,8 +9421,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 		kvm_inject_page_fault(vcpu, &e);
 		return 1;
 	}
-	nested_vmx_succeed(vcpu);
-	return kvm_skip_emulated_instruction(vcpu);
+	return nested_vmx_succeed(vcpu);
 }
 
 /* Emulate the INVEPT instruction */
@@ -8971,11 +9451,9 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 
 	types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
 
-	if (type >= 32 || !(types & (1 << type))) {
-		nested_vmx_failValid(vcpu,
+	if (type >= 32 || !(types & (1 << type)))
+		return nested_vmx_failValid(vcpu,
 				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
 
 	/* According to the Intel VMX instruction reference, the memory
 	 * operand is read even if it isn't needed (e.g., for type==global)
@@ -8997,14 +9475,20 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 	case VMX_EPT_EXTENT_CONTEXT:
 		kvm_mmu_sync_roots(vcpu);
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-		nested_vmx_succeed(vcpu);
 		break;
 	default:
 		BUG_ON(1);
 		break;
 	}
 
-	return kvm_skip_emulated_instruction(vcpu);
+	return nested_vmx_succeed(vcpu);
+}
+
+static u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
 }
 
 static int handle_invvpid(struct kvm_vcpu *vcpu)
@@ -9018,6 +9502,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 		u64 vpid;
 		u64 gla;
 	} operand;
+	u16 vpid02;
 
 	if (!(vmx->nested.msrs.secondary_ctls_high &
 	      SECONDARY_EXEC_ENABLE_VPID) ||
@@ -9035,11 +9520,9 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 	types = (vmx->nested.msrs.vpid_caps &
 			VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
 
-	if (type >= 32 || !(types & (1 << type))) {
-		nested_vmx_failValid(vcpu,
+	if (type >= 32 || !(types & (1 << type)))
+		return nested_vmx_failValid(vcpu,
 			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
 
 	/* according to the intel vmx instruction reference, the memory
 	 * operand is read even if it isn't needed (e.g., for type==global)
@@ -9051,47 +9534,39 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 		kvm_inject_page_fault(vcpu, &e);
 		return 1;
 	}
-	if (operand.vpid >> 16) {
-		nested_vmx_failValid(vcpu,
+	if (operand.vpid >> 16)
+		return nested_vmx_failValid(vcpu,
 			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-		return kvm_skip_emulated_instruction(vcpu);
-	}
 
+	vpid02 = nested_get_vpid02(vcpu);
 	switch (type) {
 	case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
 		if (!operand.vpid ||
-		    is_noncanonical_address(operand.gla, vcpu)) {
-			nested_vmx_failValid(vcpu,
+		    is_noncanonical_address(operand.gla, vcpu))
+			return nested_vmx_failValid(vcpu,
 				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-		if (cpu_has_vmx_invvpid_individual_addr() &&
-		    vmx->nested.vpid02) {
+		if (cpu_has_vmx_invvpid_individual_addr()) {
 			__invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
-				vmx->nested.vpid02, operand.gla);
+				vpid02, operand.gla);
 		} else
-			__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
+			__vmx_flush_tlb(vcpu, vpid02, false);
 		break;
 	case VMX_VPID_EXTENT_SINGLE_CONTEXT:
 	case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
-		if (!operand.vpid) {
-			nested_vmx_failValid(vcpu,
+		if (!operand.vpid)
+			return nested_vmx_failValid(vcpu,
 				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-		__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
+		__vmx_flush_tlb(vcpu, vpid02, false);
 		break;
 	case VMX_VPID_EXTENT_ALL_CONTEXT:
-		__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
+		__vmx_flush_tlb(vcpu, vpid02, false);
 		break;
 	default:
 		WARN_ON_ONCE(1);
 		return kvm_skip_emulated_instruction(vcpu);
 	}
 
-	nested_vmx_succeed(vcpu);
-
-	return kvm_skip_emulated_instruction(vcpu);
+	return nested_vmx_succeed(vcpu);
 }
 
 static int handle_invpcid(struct kvm_vcpu *vcpu)
@@ -9162,11 +9637,11 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
 		}
 
 		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-			if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3)
+			if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3)
 			    == operand.pcid)
 				roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
 
-		kvm_mmu_free_roots(vcpu, roots_to_free);
+		kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
 		/*
 		 * If neither the current cr3 nor any of the prev_roots use the
 		 * given PCID, then nothing needs to be done here because a
@@ -9293,7 +9768,7 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
 
 		kvm_mmu_unload(vcpu);
 		mmu->ept_ad = accessed_dirty;
-		mmu->base_role.ad_disabled = !accessed_dirty;
+		mmu->mmu_role.base.ad_disabled = !accessed_dirty;
 		vmcs12->ept_pointer = address;
 		/*
 		 * TODO: Check what's the correct approach in case
@@ -9652,9 +10127,6 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 			return false;
 		else if (is_page_fault(intr_info))
 			return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
-		else if (is_no_device(intr_info) &&
-			 !(vmcs12->guest_cr0 & X86_CR0_TS))
-			return false;
 		else if (is_debug(intr_info) &&
 			 vcpu->guest_debug &
 			 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
@@ -10676,9 +11148,25 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		vmcs_write32(PLE_WINDOW, vmx->ple_window);
 	}
 
-	if (vmx->nested.sync_shadow_vmcs) {
-		copy_vmcs12_to_shadow(vmx);
-		vmx->nested.sync_shadow_vmcs = false;
+	if (vmx->nested.need_vmcs12_sync) {
+		/*
+		 * hv_evmcs may end up being not mapped after migration (when
+		 * L2 was running), map it here to make sure vmcs12 changes are
+		 * properly reflected.
+		 */
+		if (vmx->nested.enlightened_vmcs_enabled &&
+		    !vmx->nested.hv_evmcs)
+			nested_vmx_handle_enlightened_vmptrld(vcpu, false);
+
+		if (vmx->nested.hv_evmcs) {
+			copy_vmcs12_to_enlightened(vmx);
+			/* All fields are clean */
+			vmx->nested.hv_evmcs->hv_clean_fields |=
+				HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+		} else {
+			copy_vmcs12_to_shadow(vmx);
+		}
+		vmx->nested.need_vmcs12_sync = false;
 	}
 
 	if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
@@ -10745,7 +11233,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
 		"jmp 1f \n\t"
 		"2: \n\t"
-		__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
+		__ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
 		"1: \n\t"
 		/* Reload cr2 if changed */
 		"mov %c[cr2](%0), %%" _ASM_AX " \n\t"
@@ -10777,9 +11265,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 		/* Enter guest mode */
 		"jne 1f \n\t"
-		__ex(ASM_VMX_VMLAUNCH) "\n\t"
+		__ex("vmlaunch") "\n\t"
 		"jmp 2f \n\t"
-		"1: " __ex(ASM_VMX_VMRESUME) "\n\t"
+		"1: " __ex("vmresume") "\n\t"
 		"2: "
 		/* Save guest registers, load host registers, keep flags */
 		"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
@@ -10801,6 +11289,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %%r13, %c[r13](%0) \n\t"
 		"mov %%r14, %c[r14](%0) \n\t"
 		"mov %%r15, %c[r15](%0) \n\t"
+		/*
+		* Clear host registers marked as clobbered to prevent
+		* speculative use.
+		*/
 		"xor %%r8d,  %%r8d \n\t"
 		"xor %%r9d,  %%r9d \n\t"
 		"xor %%r10d, %%r10d \n\t"
@@ -10958,6 +11450,10 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 	vmx->loaded_vmcs = vmcs;
 	vmx_vcpu_load(vcpu, cpu);
 	put_cpu();
+
+	vm_entry_controls_reset_shadow(vmx);
+	vm_exit_controls_reset_shadow(vmx);
+	vmx_segment_cache_clear(vmx);
 }
 
 /*
@@ -10966,12 +11462,10 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
  */
 static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
 {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
-
-       vcpu_load(vcpu);
-       vmx_switch_vmcs(vcpu, &vmx->vmcs01);
-       free_nested(vmx);
-       vcpu_put(vcpu);
+	vcpu_load(vcpu);
+	vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01);
+	free_nested(vcpu);
+	vcpu_put(vcpu);
 }
 
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
@@ -11334,28 +11828,28 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
 	return get_vmcs12(vcpu)->ept_pointer;
 }
 
-static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 {
 	WARN_ON(mmu_is_nested(vcpu));
-	if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
-		return 1;
 
+	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
 	kvm_init_shadow_ept_mmu(vcpu,
 			to_vmx(vcpu)->nested.msrs.ept_caps &
 			VMX_EPT_EXECUTE_ONLY_BIT,
 			nested_ept_ad_enabled(vcpu),
 			nested_ept_get_cr3(vcpu));
-	vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
-	vcpu->arch.mmu.get_cr3           = nested_ept_get_cr3;
-	vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
+	vcpu->arch.mmu->set_cr3           = vmx_set_cr3;
+	vcpu->arch.mmu->get_cr3           = nested_ept_get_cr3;
+	vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
+	vcpu->arch.mmu->get_pdptr         = kvm_pdptr_read;
 
 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
-	return 0;
 }
 
 static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
+	vcpu->arch.mmu = &vcpu->arch.root_mmu;
+	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
 }
 
 static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
@@ -11716,7 +12210,7 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
 	    !nested_exit_intr_ack_set(vcpu) ||
 	    (vmcs12->posted_intr_nv & 0xff00) ||
 	    (vmcs12->posted_intr_desc_addr & 0x3f) ||
-	    (!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr))))
+	    (vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu))))
 		return -EINVAL;
 
 	/* tpr shadow is needed by all apicv features. */
@@ -11772,15 +12266,12 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
 					 struct vmcs12 *vmcs12)
 {
-	u64 address = vmcs12->pml_address;
-	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	if (!nested_cpu_has_pml(vmcs12))
+		return 0;
 
-	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
-		if (!nested_cpu_has_ept(vmcs12) ||
-		    !IS_ALIGNED(address, 4096)  ||
-		    address >> maxphyaddr)
-			return -EINVAL;
-	}
+	if (!nested_cpu_has_ept(vmcs12) ||
+	    !page_address_valid(vcpu, vmcs12->pml_address))
+		return -EINVAL;
 
 	return 0;
 }
@@ -11960,112 +12451,87 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
 	return 0;
 }
 
-static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+/*
+ * Returns if KVM is able to config CPU to tag TLB entries
+ * populated by L2 differently than TLB entries populated
+ * by L1.
+ *
+ * If L1 uses EPT, then TLB entries are tagged with different EPTP.
+ *
+ * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
+ * with different VPID (L1 entries are tagged with vmx->vpid
+ * while L2 entries are tagged with vmx->nested.vpid02).
+ */
+static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
 {
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
-	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
-	vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
-	vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
-	vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
-	vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
-	vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
-	vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
-	vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
-	vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
-	vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
-	vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
-	vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
-	vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
-	vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
-	vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
-	vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
-	vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
-	vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
-	vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
-	vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
-	vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
-	vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
-	vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
-	vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
-	vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
-	vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
-	vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
-	vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
-	vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
-	vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
-	vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
-
-	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
-	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
-		vmcs12->guest_pending_dbg_exceptions);
-	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
-	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+	return nested_cpu_has_ept(vmcs12) ||
+	       (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
+}
 
-	if (nested_cpu_has_xsaves(vmcs12))
-		vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
-	vmcs_write64(VMCS_LINK_POINTER, -1ull);
+static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+{
+	if (vmx->nested.nested_run_pending &&
+	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
+		return vmcs12->guest_ia32_efer;
+	else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
+		return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
+	else
+		return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
+}
 
-	if (cpu_has_vmx_posted_intr())
-		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
+static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
+{
+	/*
+	 * If vmcs02 hasn't been initialized, set the constant vmcs02 state
+	 * according to L0's settings (vmcs12 is irrelevant here).  Host
+	 * fields that come from L0 and are not constant, e.g. HOST_CR3,
+	 * will be set as needed prior to VMLAUNCH/VMRESUME.
+	 */
+	if (vmx->nested.vmcs02_initialized)
+		return;
+	vmx->nested.vmcs02_initialized = true;
 
 	/*
-	 * Whether page-faults are trapped is determined by a combination of
-	 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
-	 * If enable_ept, L0 doesn't care about page faults and we should
-	 * set all of these to L1's desires. However, if !enable_ept, L0 does
-	 * care about (at least some) page faults, and because it is not easy
-	 * (if at all possible?) to merge L0 and L1's desires, we simply ask
-	 * to exit on each and every L2 page fault. This is done by setting
-	 * MASK=MATCH=0 and (see below) EB.PF=1.
-	 * Note that below we don't need special code to set EB.PF beyond the
-	 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
-	 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
-	 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
+	 * We don't care what the EPTP value is we just need to guarantee
+	 * it's valid so we don't get a false positive when doing early
+	 * consistency checks.
 	 */
-	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
-		enable_ept ? vmcs12->page_fault_error_code_mask : 0);
-	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
-		enable_ept ? vmcs12->page_fault_error_code_match : 0);
+	if (enable_ept && nested_early_check)
+		vmcs_write64(EPT_POINTER, construct_eptp(&vmx->vcpu, 0));
 
 	/* All VMFUNCs are currently emulated through L0 vmexits.  */
 	if (cpu_has_vmx_vmfunc())
 		vmcs_write64(VM_FUNCTION_CONTROL, 0);
 
-	if (cpu_has_vmx_apicv()) {
-		vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
-		vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
-		vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
-		vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
-	}
+	if (cpu_has_vmx_posted_intr())
+		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
 
-	/*
-	 * Set host-state according to L0's settings (vmcs12 is irrelevant here)
-	 * Some constant fields are set here by vmx_set_constant_host_state().
-	 * Other fields are different per CPU, and will be set later when
-	 * vmx_vcpu_load() is called, and when vmx_prepare_switch_to_guest()
-	 * is called.
-	 */
-	vmx_set_constant_host_state(vmx);
+	if (cpu_has_vmx_msr_bitmap())
+		vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
+
+	if (enable_pml)
+		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
 
 	/*
-	 * Set the MSR load/store lists to match L0's settings.
+	 * Set the MSR load/store lists to match L0's settings.  Only the
+	 * addresses are constant (for vmcs02), the counts can change based
+	 * on L2's behavior, e.g. switching to/from long mode.
 	 */
 	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
-	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
 	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
-	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
 	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
 
-	set_cr4_guest_host_mask(vmx);
+	vmx_set_constant_host_state(vmx);
+}
 
-	if (kvm_mpx_supported()) {
-		if (vmx->nested.nested_run_pending &&
-			(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
-			vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
-		else
-			vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
-	}
+static void prepare_vmcs02_early_full(struct vcpu_vmx *vmx,
+				      struct vmcs12 *vmcs12)
+{
+	prepare_vmcs02_constant_state(vmx);
+
+	vmcs_write64(VMCS_LINK_POINTER, -1ull);
 
 	if (enable_vpid) {
 		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
@@ -12073,78 +12539,30 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		else
 			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
 	}
-
-	/*
-	 * L1 may access the L2's PDPTR, so save them to construct vmcs12
-	 */
-	if (enable_ept) {
-		vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
-		vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
-		vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
-		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
-	}
-
-	if (cpu_has_vmx_msr_bitmap())
-		vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
 }
 
-/*
- * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
- * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
- * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
- * guest in a way that will both be appropriate to L1's requests, and our
- * needs. In addition to modifying the active vmcs (which is vmcs02), this
- * function also has additional necessary side-effects, like setting various
- * vcpu->arch fields.
- * Returns 0 on success, 1 on failure. Invalid state exit qualification code
- * is assigned to entry_failure_code on failure.
- */
-static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
-			  u32 *entry_failure_code)
+static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 {
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 exec_control, vmcs12_exec_ctrl;
+	u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
 
-	if (vmx->nested.dirty_vmcs12) {
-		prepare_vmcs02_full(vcpu, vmcs12);
-		vmx->nested.dirty_vmcs12 = false;
-	}
+	if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
+		prepare_vmcs02_early_full(vmx, vmcs12);
 
 	/*
-	 * First, the fields that are shadowed.  This must be kept in sync
-	 * with vmx_shadow_fields.h.
+	 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
+	 * entry, but only if the current (host) sp changed from the value
+	 * we wrote last (vmx->host_rsp).  This cache is no longer relevant
+	 * if we switch vmcs, and rather than hold a separate cache per vmcs,
+	 * here we just force the write to happen on entry.  host_rsp will
+	 * also be written unconditionally by nested_vmx_check_vmentry_hw()
+	 * if we are doing early consistency checks via hardware.
 	 */
+	vmx->host_rsp = 0;
 
-	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
-	vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
-	vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
-	vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
-	vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
-
-	if (vmx->nested.nested_run_pending &&
-	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
-		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
-		vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
-	} else {
-		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
-		vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
-	}
-	if (vmx->nested.nested_run_pending) {
-		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-			     vmcs12->vm_entry_intr_info_field);
-		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
-			     vmcs12->vm_entry_exception_error_code);
-		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
-			     vmcs12->vm_entry_instruction_len);
-		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
-			     vmcs12->guest_interruptibility_info);
-		vmx->loaded_vmcs->nmi_known_unmasked =
-			!(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
-	} else {
-		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
-	}
-	vmx_set_rflags(vcpu, vmcs12->guest_rflags);
-
+	/*
+	 * PIN CONTROLS
+	 */
 	exec_control = vmcs12->pin_based_vm_exec_control;
 
 	/* Preemption timer setting is computed directly in vmx_vcpu_run.  */
@@ -12159,13 +12577,43 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	} else {
 		exec_control &= ~PIN_BASED_POSTED_INTR;
 	}
-
 	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
 
-	vmx->nested.preemption_timer_expired = false;
-	if (nested_cpu_has_preemption_timer(vmcs12))
-		vmx_start_preemption_timer(vcpu);
+	/*
+	 * EXEC CONTROLS
+	 */
+	exec_control = vmx_exec_control(vmx); /* L0's desires */
+	exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
+	exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+	exec_control &= ~CPU_BASED_TPR_SHADOW;
+	exec_control |= vmcs12->cpu_based_vm_exec_control;
 
+	/*
+	 * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if
+	 * nested_get_vmcs12_pages can't fix it up, the illegal value
+	 * will result in a VM entry failure.
+	 */
+	if (exec_control & CPU_BASED_TPR_SHADOW) {
+		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
+		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+	} else {
+#ifdef CONFIG_X86_64
+		exec_control |= CPU_BASED_CR8_LOAD_EXITING |
+				CPU_BASED_CR8_STORE_EXITING;
+#endif
+	}
+
+	/*
+	 * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
+	 * for I/O port accesses.
+	 */
+	exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
+	exec_control |= CPU_BASED_UNCOND_IO_EXITING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
+
+	/*
+	 * SECONDARY EXEC CONTROLS
+	 */
 	if (cpu_has_secondary_exec_ctrls()) {
 		exec_control = vmx->secondary_exec_control;
 
@@ -12206,43 +12654,214 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	}
 
 	/*
-	 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
-	 * entry, but only if the current (host) sp changed from the value
-	 * we wrote last (vmx->host_rsp). This cache is no longer relevant
-	 * if we switch vmcs, and rather than hold a separate cache per vmcs,
-	 * here we just force the write to happen on entry.
+	 * ENTRY CONTROLS
+	 *
+	 * vmcs12's VM_{ENTRY,EXIT}_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
+	 * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate
+	 * on the related bits (if supported by the CPU) in the hope that
+	 * we can avoid VMWrites during vmx_set_efer().
+	 */
+	exec_control = (vmcs12->vm_entry_controls | vmcs_config.vmentry_ctrl) &
+			~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
+	if (cpu_has_load_ia32_efer) {
+		if (guest_efer & EFER_LMA)
+			exec_control |= VM_ENTRY_IA32E_MODE;
+		if (guest_efer != host_efer)
+			exec_control |= VM_ENTRY_LOAD_IA32_EFER;
+	}
+	vm_entry_controls_init(vmx, exec_control);
+
+	/*
+	 * EXIT CONTROLS
+	 *
+	 * L2->L1 exit controls are emulated - the hardware exit is to L0 so
+	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
+	 * bits may be modified by vmx_set_efer() in prepare_vmcs02().
 	 */
-	vmx->host_rsp = 0;
+	exec_control = vmcs_config.vmexit_ctrl;
+	if (cpu_has_load_ia32_efer && guest_efer != host_efer)
+		exec_control |= VM_EXIT_LOAD_IA32_EFER;
+	vm_exit_controls_init(vmx, exec_control);
 
-	exec_control = vmx_exec_control(vmx); /* L0's desires */
-	exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
-	exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
-	exec_control &= ~CPU_BASED_TPR_SHADOW;
-	exec_control |= vmcs12->cpu_based_vm_exec_control;
+	/*
+	 * Conceptually we want to copy the PML address and index from
+	 * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
+	 * since we always flush the log on each vmexit and never change
+	 * the PML address (once set), this happens to be equivalent to
+	 * simply resetting the index in vmcs02.
+	 */
+	if (enable_pml)
+		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
 
 	/*
-	 * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if
-	 * nested_get_vmcs12_pages can't fix it up, the illegal value
-	 * will result in a VM entry failure.
+	 * Interrupt/Exception Fields
 	 */
-	if (exec_control & CPU_BASED_TPR_SHADOW) {
-		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
-		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+	if (vmx->nested.nested_run_pending) {
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+			     vmcs12->vm_entry_intr_info_field);
+		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
+			     vmcs12->vm_entry_exception_error_code);
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+			     vmcs12->vm_entry_instruction_len);
+		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+			     vmcs12->guest_interruptibility_info);
+		vmx->loaded_vmcs->nmi_known_unmasked =
+			!(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
 	} else {
-#ifdef CONFIG_X86_64
-		exec_control |= CPU_BASED_CR8_LOAD_EXITING |
-				CPU_BASED_CR8_STORE_EXITING;
-#endif
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
 	}
+}
+
+static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+{
+	struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
+
+	if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
+			   HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
+		vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
+		vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
+		vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
+		vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
+		vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
+		vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
+		vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
+		vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
+		vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
+		vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
+		vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
+		vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
+		vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
+		vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
+		vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
+		vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
+		vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
+		vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
+		vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
+		vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
+		vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
+		vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
+		vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
+		vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
+		vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
+		vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
+		vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
+		vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
+		vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
+		vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
+		vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
+		vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
+		vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
+		vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
+	}
+
+	if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
+			   HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
+		vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
+		vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+			    vmcs12->guest_pending_dbg_exceptions);
+		vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
+		vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+
+		/*
+		 * L1 may access the L2's PDPTR, so save them to construct
+		 * vmcs12
+		 */
+		if (enable_ept) {
+			vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
+			vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
+			vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
+			vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+		}
+	}
+
+	if (nested_cpu_has_xsaves(vmcs12))
+		vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
 
 	/*
-	 * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
-	 * for I/O port accesses.
+	 * Whether page-faults are trapped is determined by a combination of
+	 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
+	 * If enable_ept, L0 doesn't care about page faults and we should
+	 * set all of these to L1's desires. However, if !enable_ept, L0 does
+	 * care about (at least some) page faults, and because it is not easy
+	 * (if at all possible?) to merge L0 and L1's desires, we simply ask
+	 * to exit on each and every L2 page fault. This is done by setting
+	 * MASK=MATCH=0 and (see below) EB.PF=1.
+	 * Note that below we don't need special code to set EB.PF beyond the
+	 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
+	 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
+	 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
 	 */
-	exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
-	exec_control |= CPU_BASED_UNCOND_IO_EXITING;
+	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
+		enable_ept ? vmcs12->page_fault_error_code_mask : 0);
+	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
+		enable_ept ? vmcs12->page_fault_error_code_match : 0);
 
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
+	if (cpu_has_vmx_apicv()) {
+		vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
+		vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
+		vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
+		vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
+	}
+
+	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
+
+	set_cr4_guest_host_mask(vmx);
+
+	if (kvm_mpx_supported()) {
+		if (vmx->nested.nested_run_pending &&
+			(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+			vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+		else
+			vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
+	}
+}
+
+/*
+ * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
+ * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
+ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
+ * guest in a way that will both be appropriate to L1's requests, and our
+ * needs. In addition to modifying the active vmcs (which is vmcs02), this
+ * function also has additional necessary side-effects, like setting various
+ * vcpu->arch fields.
+ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
+ * is assigned to entry_failure_code on failure.
+ */
+static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+			  u32 *entry_failure_code)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
+
+	if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
+		prepare_vmcs02_full(vmx, vmcs12);
+		vmx->nested.dirty_vmcs12 = false;
+	}
+
+	/*
+	 * First, the fields that are shadowed.  This must be kept in sync
+	 * with vmx_shadow_fields.h.
+	 */
+	if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
+			   HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
+		vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
+		vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
+	}
+
+	if (vmx->nested.nested_run_pending &&
+	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
+		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
+		vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
+	} else {
+		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
+		vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
+	}
+	vmx_set_rflags(vcpu, vmcs12->guest_rflags);
+
+	vmx->nested.preemption_timer_expired = false;
+	if (nested_cpu_has_preemption_timer(vmcs12))
+		vmx_start_preemption_timer(vcpu);
 
 	/* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
 	 * bitwise-or of what L1 wants to trap for L2, and what we want to
@@ -12252,20 +12871,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
 
-	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
-	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
-	 * bits are further modified by vmx_set_efer() below.
-	 */
-	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
-
-	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
-	 * emulated by vmx_set_efer(), below.
-	 */
-	vm_entry_controls_init(vmx, 
-		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
-			~VM_ENTRY_IA32E_MODE) |
-		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
-
 	if (vmx->nested.nested_run_pending &&
 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
 		vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
@@ -12288,37 +12893,29 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 		 * influence global bitmap(for vpid01 and vpid02 allocation)
 		 * even if spawn a lot of nested vCPUs.
 		 */
-		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
+		if (nested_cpu_has_vpid(vmcs12) && nested_has_guest_tlb_tag(vcpu)) {
 			if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
 				vmx->nested.last_vpid = vmcs12->virtual_processor_id;
-				__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
+				__vmx_flush_tlb(vcpu, nested_get_vpid02(vcpu), false);
 			}
 		} else {
-			vmx_flush_tlb(vcpu, true);
+			/*
+			 * If L1 use EPT, then L0 needs to execute INVEPT on
+			 * EPTP02 instead of EPTP01. Therefore, delay TLB
+			 * flush until vmcs02->eptp is fully updated by
+			 * KVM_REQ_LOAD_CR3. Note that this assumes
+			 * KVM_REQ_TLB_FLUSH is evaluated after
+			 * KVM_REQ_LOAD_CR3 in vcpu_enter_guest().
+			 */
+			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 		}
 	}
 
-	if (enable_pml) {
-		/*
-		 * Conceptually we want to copy the PML address and index from
-		 * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
-		 * since we always flush the log on each vmexit, this happens
-		 * to be equivalent to simply resetting the fields in vmcs02.
-		 */
-		ASSERT(vmx->pml_pg);
-		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
-		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
-	}
-
-	if (nested_cpu_has_ept(vmcs12)) {
-		if (nested_ept_init_mmu_context(vcpu)) {
-			*entry_failure_code = ENTRY_FAIL_DEFAULT;
-			return 1;
-		}
-	} else if (nested_cpu_has2(vmcs12,
-				   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+	if (nested_cpu_has_ept(vmcs12))
+		nested_ept_init_mmu_context(vcpu);
+	else if (nested_cpu_has2(vmcs12,
+				 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
 		vmx_flush_tlb(vcpu, true);
-	}
 
 	/*
 	 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
@@ -12334,14 +12931,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	vmx_set_cr4(vcpu, vmcs12->guest_cr4);
 	vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
 
-	if (vmx->nested.nested_run_pending &&
-	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
-		vcpu->arch.efer = vmcs12->guest_ia32_efer;
-	else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
-		vcpu->arch.efer |= (EFER_LMA | EFER_LME);
-	else
-		vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
-	/* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
+	vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
+	/* Note: may modify VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
 	vmx_set_efer(vcpu, vcpu->arch.efer);
 
 	/*
@@ -12383,6 +12974,7 @@ static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
 static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	bool ia32e;
 
 	if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
 	    vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
@@ -12457,6 +13049,21 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
 
 	/*
+	 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
+	 * IA32_EFER MSR must be 0 in the field for that register. In addition,
+	 * the values of the LMA and LME bits in the field must each be that of
+	 * the host address-space size VM-exit control.
+	 */
+	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
+		ia32e = (vmcs12->vm_exit_controls &
+			 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
+		if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
+		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
+		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
+			return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
+	}
+
+	/*
 	 * From the Intel SDM, volume 3:
 	 * Fields relevant to VM-entry event injection must be set properly.
 	 * These fields are the VM-entry interruption-information field, the
@@ -12512,6 +13119,10 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		}
 	}
 
+	if (nested_cpu_has_ept(vmcs12) &&
+	    !valid_ept_address(vcpu, vmcs12->ept_pointer))
+		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
 	return 0;
 }
 
@@ -12577,21 +13188,6 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 			return 1;
 	}
 
-	/*
-	 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
-	 * IA32_EFER MSR must be 0 in the field for that register. In addition,
-	 * the values of the LMA and LME bits in the field must each be that of
-	 * the host address-space size VM-exit control.
-	 */
-	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
-		ia32e = (vmcs12->vm_exit_controls &
-			 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
-		if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
-		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
-		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
-			return 1;
-	}
-
 	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
 		(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
 		(vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
@@ -12600,26 +13196,139 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	return 0;
 }
 
+static int __noclone nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	unsigned long cr3, cr4;
+
+	if (!nested_early_check)
+		return 0;
+
+	if (vmx->msr_autoload.host.nr)
+		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
+	if (vmx->msr_autoload.guest.nr)
+		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
+
+	preempt_disable();
+
+	vmx_prepare_switch_to_guest(vcpu);
+
+	/*
+	 * Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS,
+	 * which is reserved to '1' by hardware.  GUEST_RFLAGS is guaranteed to
+	 * be written (by preparve_vmcs02()) before the "real" VMEnter, i.e.
+	 * there is no need to preserve other bits or save/restore the field.
+	 */
+	vmcs_writel(GUEST_RFLAGS, 0);
+
+	vmcs_writel(HOST_RIP, vmx_early_consistency_check_return);
+
+	cr3 = __get_current_cr3_fast();
+	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+		vmcs_writel(HOST_CR3, cr3);
+		vmx->loaded_vmcs->host_state.cr3 = cr3;
+	}
+
+	cr4 = cr4_read_shadow();
+	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
+		vmcs_writel(HOST_CR4, cr4);
+		vmx->loaded_vmcs->host_state.cr4 = cr4;
+	}
+
+	vmx->__launched = vmx->loaded_vmcs->launched;
+
+	asm(
+		/* Set HOST_RSP */
+		__ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
+		"mov %%" _ASM_SP ", %c[host_rsp](%0)\n\t"
+
+		/* Check if vmlaunch of vmresume is needed */
+		"cmpl $0, %c[launched](%0)\n\t"
+		"je 1f\n\t"
+		__ex("vmresume") "\n\t"
+		"jmp 2f\n\t"
+		"1: " __ex("vmlaunch") "\n\t"
+		"jmp 2f\n\t"
+		"2: "
+
+		/* Set vmx->fail accordingly */
+		"setbe %c[fail](%0)\n\t"
+
+		".pushsection .rodata\n\t"
+		".global vmx_early_consistency_check_return\n\t"
+		"vmx_early_consistency_check_return: " _ASM_PTR " 2b\n\t"
+		".popsection"
+	      :
+	      : "c"(vmx), "d"((unsigned long)HOST_RSP),
+		[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
+		[fail]"i"(offsetof(struct vcpu_vmx, fail)),
+		[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp))
+	      : "rax", "cc", "memory"
+	);
+
+	vmcs_writel(HOST_RIP, vmx_return);
+
+	preempt_enable();
+
+	if (vmx->msr_autoload.host.nr)
+		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+	if (vmx->msr_autoload.guest.nr)
+		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
+
+	if (vmx->fail) {
+		WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
+			     VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+		vmx->fail = 0;
+		return 1;
+	}
+
+	/*
+	 * VMExit clears RFLAGS.IF and DR7, even on a consistency check.
+	 */
+	local_irq_enable();
+	if (hw_breakpoint_active())
+		set_debugreg(__this_cpu_read(cpu_dr7), 7);
+
+	/*
+	 * A non-failing VMEntry means we somehow entered guest mode with
+	 * an illegal RIP, and that's just the tip of the iceberg.  There
+	 * is no telling what memory has been modified or what state has
+	 * been exposed to unknown code.  Hitting this all but guarantees
+	 * a (very critical) hardware issue.
+	 */
+	WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
+		VMX_EXIT_REASONS_FAILED_VMENTRY));
+
+	return 0;
+}
+STACK_FRAME_NON_STANDARD(nested_vmx_check_vmentry_hw);
+
+static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
+				   struct vmcs12 *vmcs12);
+
 /*
- * If exit_qual is NULL, this is being called from state restore (either RSM
+ * If from_vmentry is false, this is being called from state restore (either RSM
  * or KVM_SET_NESTED_STATE).  Otherwise it's called from vmlaunch/vmresume.
++ *
++ * Returns:
++ *   0 - success, i.e. proceed with actual VMEnter
++ *   1 - consistency check VMExit
++ *  -1 - consistency check VMFail
  */
-static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
+static int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+					  bool from_vmentry)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-	bool from_vmentry = !!exit_qual;
-	u32 dummy_exit_qual;
 	bool evaluate_pending_interrupts;
-	int r = 0;
+	u32 exit_reason = EXIT_REASON_INVALID_STATE;
+	u32 exit_qual;
 
 	evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
 		(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
 	if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
 		evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
 
-	enter_guest_mode(vcpu);
-
 	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
 		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
 	if (kvm_mpx_supported() &&
@@ -12627,24 +13336,35 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
 		vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
 
 	vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
-	vmx_segment_cache_clear(vmx);
 
+	prepare_vmcs02_early(vmx, vmcs12);
+
+	if (from_vmentry) {
+		nested_get_vmcs12_pages(vcpu);
+
+		if (nested_vmx_check_vmentry_hw(vcpu)) {
+			vmx_switch_vmcs(vcpu, &vmx->vmcs01);
+			return -1;
+		}
+
+		if (check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
+			goto vmentry_fail_vmexit;
+	}
+
+	enter_guest_mode(vcpu);
 	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
 		vcpu->arch.tsc_offset += vmcs12->tsc_offset;
 
-	r = EXIT_REASON_INVALID_STATE;
-	if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? exit_qual : &dummy_exit_qual))
-		goto fail;
+	if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
+		goto vmentry_fail_vmexit_guest_mode;
 
 	if (from_vmentry) {
-		nested_get_vmcs12_pages(vcpu);
-
-		r = EXIT_REASON_MSR_LOAD_FAIL;
-		*exit_qual = nested_vmx_load_msr(vcpu,
-	     					 vmcs12->vm_entry_msr_load_addr,
-					      	 vmcs12->vm_entry_msr_load_count);
-		if (*exit_qual)
-			goto fail;
+		exit_reason = EXIT_REASON_MSR_LOAD_FAIL;
+		exit_qual = nested_vmx_load_msr(vcpu,
+						vmcs12->vm_entry_msr_load_addr,
+						vmcs12->vm_entry_msr_load_count);
+		if (exit_qual)
+			goto vmentry_fail_vmexit_guest_mode;
 	} else {
 		/*
 		 * The MMU is not initialized to point at the right entities yet and
@@ -12681,12 +13401,28 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
 	 */
 	return 0;
 
-fail:
+	/*
+	 * A failed consistency check that leads to a VMExit during L1's
+	 * VMEnter to L2 is a variation of a normal VMexit, as explained in
+	 * 26.7 "VM-entry failures during or after loading guest state".
+	 */
+vmentry_fail_vmexit_guest_mode:
 	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
 		vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
 	leave_guest_mode(vcpu);
+
+vmentry_fail_vmexit:
 	vmx_switch_vmcs(vcpu, &vmx->vmcs01);
-	return r;
+
+	if (!from_vmentry)
+		return 1;
+
+	load_vmcs12_host_state(vcpu, vmcs12);
+	vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
+	vmcs12->exit_qualification = exit_qual;
+	if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
+		vmx->nested.need_vmcs12_sync = true;
+	return 1;
 }
 
 /*
@@ -12698,14 +13434,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	struct vmcs12 *vmcs12;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
-	u32 exit_qual;
 	int ret;
 
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	if (!nested_vmx_check_vmcs12(vcpu))
-		goto out;
+	if (!nested_vmx_handle_enlightened_vmptrld(vcpu, true))
+		return 1;
+
+	if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull)
+		return nested_vmx_failInvalid(vcpu);
 
 	vmcs12 = get_vmcs12(vcpu);
 
@@ -12715,13 +13453,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	 * rather than RFLAGS.ZF, and no error number is stored to the
 	 * VM-instruction error field.
 	 */
-	if (vmcs12->hdr.shadow_vmcs) {
-		nested_vmx_failInvalid(vcpu);
-		goto out;
-	}
+	if (vmcs12->hdr.shadow_vmcs)
+		return nested_vmx_failInvalid(vcpu);
 
-	if (enable_shadow_vmcs)
+	if (vmx->nested.hv_evmcs) {
+		copy_enlightened_to_vmcs12(vmx);
+		/* Enlightened VMCS doesn't have launch state */
+		vmcs12->launch_state = !launch;
+	} else if (enable_shadow_vmcs) {
 		copy_shadow_to_vmcs12(vmx);
+	}
 
 	/*
 	 * The nested entry process starts with enforcing various prerequisites
@@ -12733,59 +13474,37 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	 * for misconfigurations which will anyway be caught by the processor
 	 * when using the merged vmcs02.
 	 */
-	if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS) {
-		nested_vmx_failValid(vcpu,
-				     VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
-		goto out;
-	}
+	if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS)
+		return nested_vmx_failValid(vcpu,
+			VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
 
-	if (vmcs12->launch_state == launch) {
-		nested_vmx_failValid(vcpu,
+	if (vmcs12->launch_state == launch)
+		return nested_vmx_failValid(vcpu,
 			launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
 			       : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
-		goto out;
-	}
 
 	ret = check_vmentry_prereqs(vcpu, vmcs12);
-	if (ret) {
-		nested_vmx_failValid(vcpu, ret);
-		goto out;
-	}
-
-	/*
-	 * After this point, the trap flag no longer triggers a singlestep trap
-	 * on the vm entry instructions; don't call kvm_skip_emulated_instruction.
-	 * This is not 100% correct; for performance reasons, we delegate most
-	 * of the checks on host state to the processor.  If those fail,
-	 * the singlestep trap is missed.
-	 */
-	skip_emulated_instruction(vcpu);
-
-	ret = check_vmentry_postreqs(vcpu, vmcs12, &exit_qual);
-	if (ret) {
-		nested_vmx_entry_failure(vcpu, vmcs12,
-					 EXIT_REASON_INVALID_STATE, exit_qual);
-		return 1;
-	}
+	if (ret)
+		return nested_vmx_failValid(vcpu, ret);
 
 	/*
 	 * We're finally done with prerequisite checking, and can start with
 	 * the nested entry.
 	 */
-
 	vmx->nested.nested_run_pending = 1;
-	ret = enter_vmx_non_root_mode(vcpu, &exit_qual);
-	if (ret) {
-		nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual);
-		vmx->nested.nested_run_pending = 0;
+	ret = nested_vmx_enter_non_root_mode(vcpu, true);
+	vmx->nested.nested_run_pending = !ret;
+	if (ret > 0)
 		return 1;
-	}
+	else if (ret)
+		return nested_vmx_failValid(vcpu,
+			VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 
 	/* Hide L1D cache contents from the nested guest.  */
 	vmx->vcpu.arch.l1tf_flush_l1d = true;
 
 	/*
-	 * Must happen outside of enter_vmx_non_root_mode() as it will
+	 * Must happen outside of nested_vmx_enter_non_root_mode() as it will
 	 * also be used as part of restoring nVMX state for
 	 * snapshot restore (migration).
 	 *
@@ -12806,9 +13525,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 		return kvm_vcpu_halt(vcpu);
 	}
 	return 1;
-
-out:
-	return kvm_skip_emulated_instruction(vcpu);
 }
 
 /*
@@ -13122,24 +13838,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	kvm_clear_interrupt_queue(vcpu);
 }
 
-static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu,
-			struct vmcs12 *vmcs12)
-{
-	u32 entry_failure_code;
-
-	nested_ept_uninit_mmu_context(vcpu);
-
-	/*
-	 * Only PDPTE load can fail as the value of cr3 was checked on entry and
-	 * couldn't have changed.
-	 */
-	if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
-		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
-
-	if (!enable_ept)
-		vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
-}
-
 /*
  * A part of what we need to when the nested L2 guest exits and we want to
  * run its L1 parent, is to reset L1's guest state to the host state specified
@@ -13153,6 +13851,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 				   struct vmcs12 *vmcs12)
 {
 	struct kvm_segment seg;
+	u32 entry_failure_code;
 
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
 		vcpu->arch.efer = vmcs12->host_ia32_efer;
@@ -13165,6 +13864,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
 	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
 	vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
+	vmx_set_interrupt_shadow(vcpu, 0);
+
 	/*
 	 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
 	 * actually changed, because vmx_set_cr0 refers to efer set above.
@@ -13179,23 +13880,35 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
 	vmx_set_cr4(vcpu, vmcs12->host_cr4);
 
-	load_vmcs12_mmu_host_state(vcpu, vmcs12);
+	nested_ept_uninit_mmu_context(vcpu);
 
 	/*
-	 * If vmcs01 don't use VPID, CPU flushes TLB on every
+	 * Only PDPTE load can fail as the value of cr3 was checked on entry and
+	 * couldn't have changed.
+	 */
+	if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
+		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
+
+	if (!enable_ept)
+		vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+
+	/*
+	 * If vmcs01 doesn't use VPID, CPU flushes TLB on every
 	 * VMEntry/VMExit. Thus, no need to flush TLB.
 	 *
-	 * If vmcs12 uses VPID, TLB entries populated by L2 are
-	 * tagged with vmx->nested.vpid02 while L1 entries are tagged
-	 * with vmx->vpid. Thus, no need to flush TLB.
+	 * If vmcs12 doesn't use VPID, L1 expects TLB to be
+	 * flushed on every VMEntry/VMExit.
+	 *
+	 * Otherwise, we can preserve TLB entries as long as we are
+	 * able to tag L1 TLB entries differently than L2 TLB entries.
 	 *
-	 * Therefore, flush TLB only in case vmcs01 uses VPID and
-	 * vmcs12 don't use VPID as in this case L1 & L2 TLB entries
-	 * are both tagged with vmx->vpid.
+	 * If vmcs12 uses EPT, we need to execute this flush on EPTP01
+	 * and therefore we request the TLB flush to happen only after VMCS EPTP
+	 * has been set by KVM_REQ_LOAD_CR3.
 	 */
 	if (enable_vpid &&
-	    !(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02)) {
-		vmx_flush_tlb(vcpu, true);
+	    (!nested_cpu_has_vpid(vmcs12) || !nested_has_guest_tlb_tag(vcpu))) {
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	}
 
 	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
@@ -13275,6 +13988,140 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
 }
 
+static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
+{
+	struct shared_msr_entry *efer_msr;
+	unsigned int i;
+
+	if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
+		return vmcs_read64(GUEST_IA32_EFER);
+
+	if (cpu_has_load_ia32_efer)
+		return host_efer;
+
+	for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
+		if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
+			return vmx->msr_autoload.guest.val[i].value;
+	}
+
+	efer_msr = find_msr_entry(vmx, MSR_EFER);
+	if (efer_msr)
+		return efer_msr->data;
+
+	return host_efer;
+}
+
+static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
+{
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmx_msr_entry g, h;
+	struct msr_data msr;
+	gpa_t gpa;
+	u32 i, j;
+
+	vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
+
+	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
+		/*
+		 * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set
+		 * as vmcs01.GUEST_DR7 contains a userspace defined value
+		 * and vcpu->arch.dr7 is not squirreled away before the
+		 * nested VMENTER (not worth adding a variable in nested_vmx).
+		 */
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+			kvm_set_dr(vcpu, 7, DR7_FIXED_1);
+		else
+			WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
+	}
+
+	/*
+	 * Note that calling vmx_set_{efer,cr0,cr4} is important as they
+	 * handle a variety of side effects to KVM's software model.
+	 */
+	vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
+
+	vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
+	vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
+
+	vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
+	vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
+
+	nested_ept_uninit_mmu_context(vcpu);
+	vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+
+	/*
+	 * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
+	 * from vmcs01 (if necessary).  The PDPTRs are not loaded on
+	 * VMFail, like everything else we just need to ensure our
+	 * software model is up-to-date.
+	 */
+	ept_save_pdptrs(vcpu);
+
+	kvm_mmu_reset_context(vcpu);
+
+	if (cpu_has_vmx_msr_bitmap())
+		vmx_update_msr_bitmap(vcpu);
+
+	/*
+	 * This nasty bit of open coding is a compromise between blindly
+	 * loading L1's MSRs using the exit load lists (incorrect emulation
+	 * of VMFail), leaving the nested VM's MSRs in the software model
+	 * (incorrect behavior) and snapshotting the modified MSRs (too
+	 * expensive since the lists are unbound by hardware).  For each
+	 * MSR that was (prematurely) loaded from the nested VMEntry load
+	 * list, reload it from the exit load list if it exists and differs
+	 * from the guest value.  The intent is to stuff host state as
+	 * silently as possible, not to fully process the exit load list.
+	 */
+	msr.host_initiated = false;
+	for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
+		gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
+		if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
+			pr_debug_ratelimited(
+				"%s read MSR index failed (%u, 0x%08llx)\n",
+				__func__, i, gpa);
+			goto vmabort;
+		}
+
+		for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
+			gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
+			if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
+				pr_debug_ratelimited(
+					"%s read MSR failed (%u, 0x%08llx)\n",
+					__func__, j, gpa);
+				goto vmabort;
+			}
+			if (h.index != g.index)
+				continue;
+			if (h.value == g.value)
+				break;
+
+			if (nested_vmx_load_msr_check(vcpu, &h)) {
+				pr_debug_ratelimited(
+					"%s check failed (%u, 0x%x, 0x%x)\n",
+					__func__, j, h.index, h.reserved);
+				goto vmabort;
+			}
+
+			msr.index = h.index;
+			msr.data = h.value;
+			if (kvm_set_msr(vcpu, &msr)) {
+				pr_debug_ratelimited(
+					"%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
+					__func__, j, h.index, h.value);
+				goto vmabort;
+			}
+		}
+	}
+
+	return;
+
+vmabort:
+	nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
+}
+
 /*
  * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
  * and modify vmcs12 to make it see what it would expect to see there if
@@ -13290,14 +14137,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	/* trying to cancel vmlaunch/vmresume is a bug */
 	WARN_ON_ONCE(vmx->nested.nested_run_pending);
 
-	/*
-	 * The only expected VM-instruction error is "VM entry with
-	 * invalid control field(s)." Anything else indicates a
-	 * problem with L0.
-	 */
-	WARN_ON_ONCE(vmx->fail && (vmcs_read32(VM_INSTRUCTION_ERROR) !=
-				   VMXERR_ENTRY_INVALID_CONTROL_FIELD));
-
 	leave_guest_mode(vcpu);
 
 	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
@@ -13324,12 +14163,19 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 		if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
 					 vmcs12->vm_exit_msr_store_count))
 			nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
+	} else {
+		/*
+		 * The only expected VM-instruction error is "VM entry with
+		 * invalid control field(s)." Anything else indicates a
+		 * problem with L0.  And we should never get here with a
+		 * VMFail of any type if early consistency checks are enabled.
+		 */
+		WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
+			     VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+		WARN_ON_ONCE(nested_early_check);
 	}
 
 	vmx_switch_vmcs(vcpu, &vmx->vmcs01);
-	vm_entry_controls_reset_shadow(vmx);
-	vm_exit_controls_reset_shadow(vmx);
-	vmx_segment_cache_clear(vmx);
 
 	/* Update any VMCS fields that might have changed while L2 ran */
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
@@ -13373,8 +14219,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	 */
 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
-	if (enable_shadow_vmcs && exit_reason != -1)
-		vmx->nested.sync_shadow_vmcs = true;
+	if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs))
+		vmx->nested.need_vmcs12_sync = true;
 
 	/* in case we halted in L2 */
 	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -13409,24 +14255,24 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 
 		return;
 	}
-	
+
 	/*
 	 * After an early L2 VM-entry failure, we're now back
 	 * in L1 which thinks it just finished a VMLAUNCH or
 	 * VMRESUME instruction, so we need to set the failure
 	 * flag and the VM-instruction error field of the VMCS
-	 * accordingly.
+	 * accordingly, and skip the emulated instruction.
 	 */
-	nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-
-	load_vmcs12_mmu_host_state(vcpu, vmcs12);
+	(void)nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 
 	/*
-	 * The emulated instruction was already skipped in
-	 * nested_vmx_run, but the updated RIP was never
-	 * written back to the vmcs01.
+	 * Restore L1's host state to KVM's software model.  We're here
+	 * because a consistency check was caught by hardware, which
+	 * means some amount of guest state has been propagated to KVM's
+	 * model and needs to be unwound to the host's state.
 	 */
-	skip_emulated_instruction(vcpu);
+	nested_vmx_restore_host_state(vcpu);
+
 	vmx->fail = 0;
 }
 
@@ -13439,26 +14285,7 @@ static void vmx_leave_nested(struct kvm_vcpu *vcpu)
 		to_vmx(vcpu)->nested.nested_run_pending = 0;
 		nested_vmx_vmexit(vcpu, -1, 0, 0);
 	}
-	free_nested(to_vmx(vcpu));
-}
-
-/*
- * L1's failure to enter L2 is a subset of a normal exit, as explained in
- * 23.7 "VM-entry failures during or after loading guest state" (this also
- * lists the acceptable exit-reason and exit-qualification parameters).
- * It should only be called before L2 actually succeeded to run, and when
- * vmcs01 is current (it doesn't leave_guest_mode() or switch vmcss).
- */
-static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
-			struct vmcs12 *vmcs12,
-			u32 reason, unsigned long qualification)
-{
-	load_vmcs12_host_state(vcpu, vmcs12);
-	vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
-	vmcs12->exit_qualification = qualification;
-	nested_vmx_succeed(vcpu);
-	if (enable_shadow_vmcs)
-		to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
+	free_nested(vcpu);
 }
 
 static int vmx_check_intercept(struct kvm_vcpu *vcpu,
@@ -13884,7 +14711,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
 
 	if (vmx->nested.smm.guest_mode) {
 		vcpu->arch.hflags &= ~HF_SMM_MASK;
-		ret = enter_vmx_non_root_mode(vcpu, NULL);
+		ret = nested_vmx_enter_non_root_mode(vcpu, false);
 		vcpu->arch.hflags |= HF_SMM_MASK;
 		if (ret)
 			return ret;
@@ -13899,6 +14726,20 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	/*
+	 * In case we do two consecutive get/set_nested_state()s while L2 was
+	 * running hv_evmcs may end up not being mapped (we map it from
+	 * nested_vmx_run()/vmx_vcpu_run()). Check is_guest_mode() as we always
+	 * have vmcs12 if it is true.
+	 */
+	return is_guest_mode(vcpu) || vmx->nested.current_vmptr != -1ull ||
+		vmx->nested.hv_evmcs;
+}
+
 static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 				struct kvm_nested_state __user *user_kvm_nested_state,
 				u32 user_data_size)
@@ -13918,12 +14759,16 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 
 	vmx = to_vmx(vcpu);
 	vmcs12 = get_vmcs12(vcpu);
+
+	if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled)
+		kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
+
 	if (nested_vmx_allowed(vcpu) &&
 	    (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
 		kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
 		kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr;
 
-		if (vmx->nested.current_vmptr != -1ull) {
+		if (vmx_has_valid_vmcs12(vcpu)) {
 			kvm_state.size += VMCS12_SIZE;
 
 			if (is_guest_mode(vcpu) &&
@@ -13952,20 +14797,24 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 	if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
 		return -EFAULT;
 
-	if (vmx->nested.current_vmptr == -1ull)
+	if (!vmx_has_valid_vmcs12(vcpu))
 		goto out;
 
 	/*
 	 * When running L2, the authoritative vmcs12 state is in the
 	 * vmcs02. When running L1, the authoritative vmcs12 state is
-	 * in the shadow vmcs linked to vmcs01, unless
-	 * sync_shadow_vmcs is set, in which case, the authoritative
+	 * in the shadow or enlightened vmcs linked to vmcs01, unless
+	 * need_vmcs12_sync is set, in which case, the authoritative
 	 * vmcs12 state is in the vmcs12 already.
 	 */
-	if (is_guest_mode(vcpu))
+	if (is_guest_mode(vcpu)) {
 		sync_vmcs12(vcpu, vmcs12);
-	else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs)
-		copy_shadow_to_vmcs12(vmx);
+	} else if (!vmx->nested.need_vmcs12_sync) {
+		if (vmx->nested.hv_evmcs)
+			copy_enlightened_to_vmcs12(vmx);
+		else if (enable_shadow_vmcs)
+			copy_shadow_to_vmcs12(vmx);
+	}
 
 	if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12)))
 		return -EFAULT;
@@ -13993,6 +14842,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 	if (kvm_state->format != 0)
 		return -EINVAL;
 
+	if (kvm_state->flags & KVM_STATE_NESTED_EVMCS)
+		nested_enable_evmcs(vcpu, NULL);
+
 	if (!nested_vmx_allowed(vcpu))
 		return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
 
@@ -14010,13 +14862,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 	if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa))
 		return -EINVAL;
 
-	if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
-		return -EINVAL;
-
-	if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
-	    !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
-		return -EINVAL;
-
 	if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
 	    (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
 		return -EINVAL;
@@ -14046,7 +14891,25 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 	if (ret)
 		return ret;
 
-	set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
+	/* Empty 'VMXON' state is permitted */
+	if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
+		return 0;
+
+	if (kvm_state->vmx.vmcs_pa != -1ull) {
+		if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
+		    !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
+			return -EINVAL;
+
+		set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
+	} else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
+		/*
+		 * Sync eVMCS upon entry as we may not have
+		 * HV_X64_MSR_VP_ASSIST_PAGE set up yet.
+		 */
+		vmx->nested.need_vmcs12_sync = true;
+	} else {
+		return -EINVAL;
+	}
 
 	if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
 		vmx->nested.smm.vmxon = true;
@@ -14090,7 +14953,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	vmx->nested.dirty_vmcs12 = true;
-	ret = enter_vmx_non_root_mode(vcpu, NULL);
+	ret = nested_vmx_enter_non_root_mode(vcpu, false);
 	if (ret)
 		return -EINVAL;
 
@@ -14242,6 +15105,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.pre_enter_smm = vmx_pre_enter_smm,
 	.pre_leave_smm = vmx_pre_leave_smm,
 	.enable_smi_window = enable_smi_window,
+
+	.nested_enable_evmcs = nested_enable_evmcs,
 };
 
 static void vmx_cleanup_l1d_flush(void)
diff --git a/arch/x86/kvm/vmx_shadow_fields.h b/arch/x86/kvm/vmx_shadow_fields.h
index cd0c75f6d037..132432f375c2 100644
--- a/arch/x86/kvm/vmx_shadow_fields.h
+++ b/arch/x86/kvm/vmx_shadow_fields.h
@@ -28,7 +28,6 @@
  */
 
 /* 16-bits */
-SHADOW_FIELD_RW(GUEST_CS_SELECTOR)
 SHADOW_FIELD_RW(GUEST_INTR_STATUS)
 SHADOW_FIELD_RW(GUEST_PML_INDEX)
 SHADOW_FIELD_RW(HOST_FS_SELECTOR)
@@ -47,8 +46,8 @@ SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
 SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
 SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
 SHADOW_FIELD_RW(TPR_THRESHOLD)
-SHADOW_FIELD_RW(GUEST_CS_LIMIT)
 SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
+SHADOW_FIELD_RW(GUEST_SS_AR_BYTES)
 SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
 SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
 
@@ -61,8 +60,6 @@ SHADOW_FIELD_RW(GUEST_CR0)
 SHADOW_FIELD_RW(GUEST_CR3)
 SHADOW_FIELD_RW(GUEST_CR4)
 SHADOW_FIELD_RW(GUEST_RFLAGS)
-SHADOW_FIELD_RW(GUEST_CS_BASE)
-SHADOW_FIELD_RW(GUEST_ES_BASE)
 SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
 SHADOW_FIELD_RW(CR0_READ_SHADOW)
 SHADOW_FIELD_RW(CR4_READ_SHADOW)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ca717737347e..66d66d77caee 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -136,7 +136,7 @@ static u32 __read_mostly tsc_tolerance_ppm = 250;
 module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
 
 /* lapic timer advance (tscdeadline mode only) in nanoseconds */
-unsigned int __read_mostly lapic_timer_advance_ns = 0;
+unsigned int __read_mostly lapic_timer_advance_ns = 1000;
 module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
 EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
 
@@ -400,9 +400,51 @@ static int exception_type(int vector)
 	return EXCPT_FAULT;
 }
 
+void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
+{
+	unsigned nr = vcpu->arch.exception.nr;
+	bool has_payload = vcpu->arch.exception.has_payload;
+	unsigned long payload = vcpu->arch.exception.payload;
+
+	if (!has_payload)
+		return;
+
+	switch (nr) {
+	case DB_VECTOR:
+		/*
+		 * "Certain debug exceptions may clear bit 0-3.  The
+		 * remaining contents of the DR6 register are never
+		 * cleared by the processor".
+		 */
+		vcpu->arch.dr6 &= ~DR_TRAP_BITS;
+		/*
+		 * DR6.RTM is set by all #DB exceptions that don't clear it.
+		 */
+		vcpu->arch.dr6 |= DR6_RTM;
+		vcpu->arch.dr6 |= payload;
+		/*
+		 * Bit 16 should be set in the payload whenever the #DB
+		 * exception should clear DR6.RTM. This makes the payload
+		 * compatible with the pending debug exceptions under VMX.
+		 * Though not currently documented in the SDM, this also
+		 * makes the payload compatible with the exit qualification
+		 * for #DB exceptions under VMX.
+		 */
+		vcpu->arch.dr6 ^= payload & DR6_RTM;
+		break;
+	case PF_VECTOR:
+		vcpu->arch.cr2 = payload;
+		break;
+	}
+
+	vcpu->arch.exception.has_payload = false;
+	vcpu->arch.exception.payload = 0;
+}
+EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
+
 static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 		unsigned nr, bool has_error, u32 error_code,
-		bool reinject)
+	        bool has_payload, unsigned long payload, bool reinject)
 {
 	u32 prev_nr;
 	int class1, class2;
@@ -424,6 +466,14 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 			 */
 			WARN_ON_ONCE(vcpu->arch.exception.pending);
 			vcpu->arch.exception.injected = true;
+			if (WARN_ON_ONCE(has_payload)) {
+				/*
+				 * A reinjected event has already
+				 * delivered its payload.
+				 */
+				has_payload = false;
+				payload = 0;
+			}
 		} else {
 			vcpu->arch.exception.pending = true;
 			vcpu->arch.exception.injected = false;
@@ -431,6 +481,22 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 		vcpu->arch.exception.has_error_code = has_error;
 		vcpu->arch.exception.nr = nr;
 		vcpu->arch.exception.error_code = error_code;
+		vcpu->arch.exception.has_payload = has_payload;
+		vcpu->arch.exception.payload = payload;
+		/*
+		 * In guest mode, payload delivery should be deferred,
+		 * so that the L1 hypervisor can intercept #PF before
+		 * CR2 is modified (or intercept #DB before DR6 is
+		 * modified under nVMX).  However, for ABI
+		 * compatibility with KVM_GET_VCPU_EVENTS and
+		 * KVM_SET_VCPU_EVENTS, we can't delay payload
+		 * delivery unless userspace has enabled this
+		 * functionality via the per-VM capability,
+		 * KVM_CAP_EXCEPTION_PAYLOAD.
+		 */
+		if (!vcpu->kvm->arch.exception_payload_enabled ||
+		    !is_guest_mode(vcpu))
+			kvm_deliver_exception_payload(vcpu);
 		return;
 	}
 
@@ -455,6 +521,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 		vcpu->arch.exception.has_error_code = true;
 		vcpu->arch.exception.nr = DF_VECTOR;
 		vcpu->arch.exception.error_code = 0;
+		vcpu->arch.exception.has_payload = false;
+		vcpu->arch.exception.payload = 0;
 	} else
 		/* replace previous exception with a new one in a hope
 		   that instruction re-execution will regenerate lost
@@ -464,16 +532,29 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 
 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
 {
-	kvm_multiple_exception(vcpu, nr, false, 0, false);
+	kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
 }
 EXPORT_SYMBOL_GPL(kvm_queue_exception);
 
 void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
 {
-	kvm_multiple_exception(vcpu, nr, false, 0, true);
+	kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
 }
 EXPORT_SYMBOL_GPL(kvm_requeue_exception);
 
+static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
+				  unsigned long payload)
+{
+	kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
+}
+
+static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
+				    u32 error_code, unsigned long payload)
+{
+	kvm_multiple_exception(vcpu, nr, true, error_code,
+			       true, payload, false);
+}
+
 int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
 {
 	if (err)
@@ -490,11 +571,13 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 	++vcpu->stat.pf_guest;
 	vcpu->arch.exception.nested_apf =
 		is_guest_mode(vcpu) && fault->async_page_fault;
-	if (vcpu->arch.exception.nested_apf)
+	if (vcpu->arch.exception.nested_apf) {
 		vcpu->arch.apf.nested_apf_token = fault->address;
-	else
-		vcpu->arch.cr2 = fault->address;
-	kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
+		kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
+	} else {
+		kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
+					fault->address);
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
 
@@ -503,7 +586,7 @@ static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fau
 	if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
 		vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
 	else
-		vcpu->arch.mmu.inject_page_fault(vcpu, fault);
+		vcpu->arch.mmu->inject_page_fault(vcpu, fault);
 
 	return fault->nested_page_fault;
 }
@@ -517,13 +600,13 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
 
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
 {
-	kvm_multiple_exception(vcpu, nr, true, error_code, false);
+	kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
 }
 EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
 
 void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
 {
-	kvm_multiple_exception(vcpu, nr, true, error_code, true);
+	kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
 }
 EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
 
@@ -602,7 +685,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
 		if ((pdpte[i] & PT_PRESENT_MASK) &&
 		    (pdpte[i] &
-		     vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
+		     vcpu->arch.mmu->guest_rsvd_check.rsvd_bits_mask[0][2])) {
 			ret = 0;
 			goto out;
 		}
@@ -2477,7 +2560,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 		break;
 	case MSR_KVM_PV_EOI_EN:
-		if (kvm_lapic_enable_pv_eoi(vcpu, data))
+		if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
 			return 1;
 		break;
 
@@ -2912,6 +2995,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV_VP_INDEX:
 	case KVM_CAP_HYPERV_EVENTFD:
 	case KVM_CAP_HYPERV_TLBFLUSH:
+	case KVM_CAP_HYPERV_SEND_IPI:
+	case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -2930,6 +3015,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_IMMEDIATE_EXIT:
 	case KVM_CAP_GET_MSR_FEATURES:
 	case KVM_CAP_MSR_PLATFORM_INFO:
+	case KVM_CAP_EXCEPTION_PAYLOAD:
 		r = 1;
 		break;
 	case KVM_CAP_SYNC_REGS:
@@ -3362,19 +3448,33 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 					       struct kvm_vcpu_events *events)
 {
 	process_nmi(vcpu);
+
 	/*
-	 * FIXME: pass injected and pending separately.  This is only
-	 * needed for nested virtualization, whose state cannot be
-	 * migrated yet.  For now we can combine them.
+	 * The API doesn't provide the instruction length for software
+	 * exceptions, so don't report them. As long as the guest RIP
+	 * isn't advanced, we should expect to encounter the exception
+	 * again.
 	 */
-	events->exception.injected =
-		(vcpu->arch.exception.pending ||
-		 vcpu->arch.exception.injected) &&
-		!kvm_exception_is_soft(vcpu->arch.exception.nr);
+	if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
+		events->exception.injected = 0;
+		events->exception.pending = 0;
+	} else {
+		events->exception.injected = vcpu->arch.exception.injected;
+		events->exception.pending = vcpu->arch.exception.pending;
+		/*
+		 * For ABI compatibility, deliberately conflate
+		 * pending and injected exceptions when
+		 * KVM_CAP_EXCEPTION_PAYLOAD isn't enabled.
+		 */
+		if (!vcpu->kvm->arch.exception_payload_enabled)
+			events->exception.injected |=
+				vcpu->arch.exception.pending;
+	}
 	events->exception.nr = vcpu->arch.exception.nr;
 	events->exception.has_error_code = vcpu->arch.exception.has_error_code;
-	events->exception.pad = 0;
 	events->exception.error_code = vcpu->arch.exception.error_code;
+	events->exception_has_payload = vcpu->arch.exception.has_payload;
+	events->exception_payload = vcpu->arch.exception.payload;
 
 	events->interrupt.injected =
 		vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
@@ -3398,6 +3498,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 	events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
 			 | KVM_VCPUEVENT_VALID_SHADOW
 			 | KVM_VCPUEVENT_VALID_SMM);
+	if (vcpu->kvm->arch.exception_payload_enabled)
+		events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
+
 	memset(&events->reserved, 0, sizeof(events->reserved));
 }
 
@@ -3409,12 +3512,24 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 	if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
 			      | KVM_VCPUEVENT_VALID_SIPI_VECTOR
 			      | KVM_VCPUEVENT_VALID_SHADOW
-			      | KVM_VCPUEVENT_VALID_SMM))
+			      | KVM_VCPUEVENT_VALID_SMM
+			      | KVM_VCPUEVENT_VALID_PAYLOAD))
 		return -EINVAL;
 
-	if (events->exception.injected &&
-	    (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR ||
-	     is_guest_mode(vcpu)))
+	if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
+		if (!vcpu->kvm->arch.exception_payload_enabled)
+			return -EINVAL;
+		if (events->exception.pending)
+			events->exception.injected = 0;
+		else
+			events->exception_has_payload = 0;
+	} else {
+		events->exception.pending = 0;
+		events->exception_has_payload = 0;
+	}
+
+	if ((events->exception.injected || events->exception.pending) &&
+	    (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
 		return -EINVAL;
 
 	/* INITs are latched while in SMM */
@@ -3424,11 +3539,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	process_nmi(vcpu);
-	vcpu->arch.exception.injected = false;
-	vcpu->arch.exception.pending = events->exception.injected;
+	vcpu->arch.exception.injected = events->exception.injected;
+	vcpu->arch.exception.pending = events->exception.pending;
 	vcpu->arch.exception.nr = events->exception.nr;
 	vcpu->arch.exception.has_error_code = events->exception.has_error_code;
 	vcpu->arch.exception.error_code = events->exception.error_code;
+	vcpu->arch.exception.has_payload = events->exception_has_payload;
+	vcpu->arch.exception.payload = events->exception_payload;
 
 	vcpu->arch.interrupt.injected = events->interrupt.injected;
 	vcpu->arch.interrupt.nr = events->interrupt.nr;
@@ -3694,6 +3811,10 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 				     struct kvm_enable_cap *cap)
 {
+	int r;
+	uint16_t vmcs_version;
+	void __user *user_ptr;
+
 	if (cap->flags)
 		return -EINVAL;
 
@@ -3706,6 +3827,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 			return -EINVAL;
 		return kvm_hv_activate_synic(vcpu, cap->cap ==
 					     KVM_CAP_HYPERV_SYNIC2);
+	case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
+		r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
+		if (!r) {
+			user_ptr = (void __user *)(uintptr_t)cap->args[0];
+			if (copy_to_user(user_ptr, &vmcs_version,
+					 sizeof(vmcs_version)))
+				r = -EFAULT;
+		}
+		return r;
+
 	default:
 		return -EINVAL;
 	}
@@ -4047,11 +4178,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			break;
 
 		if (kvm_state.flags &
-		    ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
+		    ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
+		      | KVM_STATE_NESTED_EVMCS))
 			break;
 
 		/* nested_run_pending implies guest_mode.  */
-		if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
+		if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING)
+		    && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
 			break;
 
 		r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
@@ -4363,6 +4496,10 @@ split_irqchip_unlock:
 		kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
 		r = 0;
 		break;
+	case KVM_CAP_EXCEPTION_PAYLOAD:
+		kvm->arch.exception_payload_enabled = cap->args[0];
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -4803,7 +4940,7 @@ gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
 
 	/* NPT walks are always user-walks */
 	access |= PFERR_USER_MASK;
-	t_gpa  = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
+	t_gpa  = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
 
 	return t_gpa;
 }
@@ -5889,7 +6026,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
 	if (WARN_ON_ONCE(is_guest_mode(vcpu)))
 		return false;
 
-	if (!vcpu->arch.mmu.direct_map) {
+	if (!vcpu->arch.mmu->direct_map) {
 		/*
 		 * Write permission should be allowed since only
 		 * write access need to be emulated.
@@ -5922,7 +6059,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
 	kvm_release_pfn_clean(pfn);
 
 	/* The instructions are well-emulated on direct mmu. */
-	if (vcpu->arch.mmu.direct_map) {
+	if (vcpu->arch.mmu->direct_map) {
 		unsigned int indirect_shadow_pages;
 
 		spin_lock(&vcpu->kvm->mmu_lock);
@@ -5989,7 +6126,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 	vcpu->arch.last_retry_eip = ctxt->eip;
 	vcpu->arch.last_retry_addr = cr2;
 
-	if (!vcpu->arch.mmu.direct_map)
+	if (!vcpu->arch.mmu->direct_map)
 		gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
 
 	kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
@@ -6049,14 +6186,7 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
 		*r = EMULATE_USER_EXIT;
 	} else {
-		/*
-		 * "Certain debug exceptions may clear bit 0-3.  The
-		 * remaining contents of the DR6 register are never
-		 * cleared by the processor".
-		 */
-		vcpu->arch.dr6 &= ~15;
-		vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
-		kvm_queue_exception(vcpu, DB_VECTOR);
+		kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
 	}
 }
 
@@ -6995,10 +7125,22 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
 					     X86_EFLAGS_RF);
 
-		if (vcpu->arch.exception.nr == DB_VECTOR &&
-		    (vcpu->arch.dr7 & DR7_GD)) {
-			vcpu->arch.dr7 &= ~DR7_GD;
-			kvm_update_dr7(vcpu);
+		if (vcpu->arch.exception.nr == DB_VECTOR) {
+			/*
+			 * This code assumes that nSVM doesn't use
+			 * check_nested_events(). If it does, the
+			 * DR6/DR7 changes should happen before L1
+			 * gets a #VMEXIT for an intercepted #DB in
+			 * L2.  (Under VMX, on the other hand, the
+			 * DR6/DR7 changes should not happen in the
+			 * event of a VM-exit to L1 for an intercepted
+			 * #DB in L2.)
+			 */
+			kvm_deliver_exception_payload(vcpu);
+			if (vcpu->arch.dr7 & DR7_GD) {
+				vcpu->arch.dr7 &= ~DR7_GD;
+				kvm_update_dr7(vcpu);
+			}
 		}
 
 		kvm_x86_ops->queue_exception(vcpu);
@@ -8478,7 +8620,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	kvm_vcpu_mtrr_init(vcpu);
 	vcpu_load(vcpu);
 	kvm_vcpu_reset(vcpu, false);
-	kvm_mmu_setup(vcpu);
+	kvm_init_mmu(vcpu, false);
 	vcpu_put(vcpu);
 	return 0;
 }
@@ -9327,7 +9469,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 {
 	int r;
 
-	if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
+	if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) ||
 	      work->wakeup_all)
 		return;
 
@@ -9335,11 +9477,11 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	if (unlikely(r))
 		return;
 
-	if (!vcpu->arch.mmu.direct_map &&
-	      work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
+	if (!vcpu->arch.mmu->direct_map &&
+	      work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
 		return;
 
-	vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
+	vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true);
 }
 
 static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
@@ -9463,6 +9605,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 			vcpu->arch.exception.nr = 0;
 			vcpu->arch.exception.has_error_code = false;
 			vcpu->arch.exception.error_code = 0;
+			vcpu->arch.exception.has_payload = false;
+			vcpu->arch.exception.payload = 0;
 		} else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
 			fault.vector = PF_VECTOR;
 			fault.error_code_valid = true;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 67b9568613f3..224cd0a47568 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -266,6 +266,8 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu,
 
 int handle_ud(struct kvm_vcpu *vcpu);
 
+void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu);
+
 void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
 u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index d67e30faff9c..be060dfb1cc3 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -80,28 +80,18 @@ LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 head-y		:= arch/xtensa/kernel/head.o
 core-y		+= arch/xtensa/kernel/ arch/xtensa/mm/
 core-y		+= $(buildvar) $(buildplf)
+core-y 		+= arch/xtensa/boot/dts/
 
 libs-y		+= arch/xtensa/lib/ $(LIBGCC)
 drivers-$(CONFIG_OPROFILE)	+= arch/xtensa/oprofile/
 
-ifneq ($(CONFIG_BUILTIN_DTB),"")
-core-$(CONFIG_OF) += arch/xtensa/boot/dts/
-endif
-
 boot		:= arch/xtensa/boot
 
 all Image zImage uImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $@
 
-%.dtb:
-	$(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-dtbs: scripts
-	$(Q)$(MAKE) $(build)=$(boot)/dts
-
 define archhelp
   @echo '* Image       - Kernel ELF image with reset vector'
   @echo '* zImage      - Compressed kernel image (arch/xtensa/boot/images/zImage.*)'
   @echo '* uImage      - U-Boot wrapped image'
-  @echo '  dtbs        - Build device tree blobs for enabled boards'
 endef
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index ed66db3bc9bb..574e5520968c 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -5,9 +5,9 @@
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
 
+#define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_UTIME
-#define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_GETPGRP
 
 /* 
diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c
index 42285f35d313..820e8738af11 100644
--- a/arch/xtensa/platforms/xtfpga/setup.c
+++ b/arch/xtensa/platforms/xtfpga/setup.c
@@ -94,7 +94,7 @@ static void __init xtfpga_clk_setup(struct device_node *np)
 	u32 freq;
 
 	if (!base) {
-		pr_err("%s: invalid address\n", np->name);
+		pr_err("%pOFn: invalid address\n", np);
 		return;
 	}
 
@@ -103,12 +103,12 @@ static void __init xtfpga_clk_setup(struct device_node *np)
 	clk = clk_register_fixed_rate(NULL, np->name, NULL, 0, freq);
 
 	if (IS_ERR(clk)) {
-		pr_err("%s: clk registration failed\n", np->name);
+		pr_err("%pOFn: clk registration failed\n", np);
 		return;
 	}
 
 	if (of_clk_add_provider(np, of_clk_src_simple_get, clk)) {
-		pr_err("%s: clk provider registration failed\n", np->name);
+		pr_err("%pOFn: clk provider registration failed\n", np);
 		return;
 	}
 }
diff --git a/crypto/Kconfig b/crypto/Kconfig
index f3e40ac56d93..f7a235db56aa 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -213,20 +213,6 @@ config CRYPTO_CRYPTD
 	  converts an arbitrary synchronous software crypto algorithm
 	  into an asynchronous algorithm that executes in a kernel thread.
 
-config CRYPTO_MCRYPTD
-	tristate "Software async multi-buffer crypto daemon"
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_HASH
-	select CRYPTO_MANAGER
-	select CRYPTO_WORKQUEUE
-	help
-	  This is a generic software asynchronous crypto daemon that
-	  provides the kernel thread to assist multi-buffer crypto
-	  algorithms for submitting jobs and flushing jobs in multi-buffer
-	  crypto algorithms.  Multi-buffer crypto algorithms are executed
-	  in the context of this kernel thread and drivers can post
-	  their crypto request asynchronously to be processed by this daemon.
-
 config CRYPTO_AUTHENC
 	tristate "Authenc support"
 	select CRYPTO_AEAD
@@ -470,6 +456,18 @@ config CRYPTO_LRW
 	  The first 128, 192 or 256 bits in the key are used for AES and the
 	  rest is used to tie each cipher block to its logical position.
 
+config CRYPTO_OFB
+	tristate "OFB support"
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_MANAGER
+	help
+	  OFB: the Output Feedback mode makes a block cipher into a synchronous
+	  stream cipher. It generates keystream blocks, which are then XORed
+	  with the plaintext blocks to get the ciphertext. Flipping a bit in the
+	  ciphertext produces a flipped bit in the plaintext at the same
+	  location. This property allows many error correcting codes to function
+	  normally even when applied before encryption.
+
 config CRYPTO_PCBC
 	tristate "PCBC support"
 	select CRYPTO_BLKCIPHER
@@ -848,54 +846,6 @@ config CRYPTO_SHA1_PPC_SPE
 	  SHA-1 secure hash standard (DFIPS 180-4) implemented
 	  using powerpc SPE SIMD instruction set.
 
-config CRYPTO_SHA1_MB
-	tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)"
-	depends on X86 && 64BIT
-	select CRYPTO_SHA1
-	select CRYPTO_HASH
-	select CRYPTO_MCRYPTD
-	help
-	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
-	  using multi-buffer technique.  This algorithm computes on
-	  multiple data lanes concurrently with SIMD instructions for
-	  better throughput.  It should not be enabled by default but
-	  used when there is significant amount of work to keep the keep
-	  the data lanes filled to get performance benefit.  If the data
-	  lanes remain unfilled, a flush operation will be initiated to
-	  process the crypto jobs, adding a slight latency.
-
-config CRYPTO_SHA256_MB
-	tristate "SHA256 digest algorithm (x86_64 Multi-Buffer, Experimental)"
-	depends on X86 && 64BIT
-	select CRYPTO_SHA256
-	select CRYPTO_HASH
-	select CRYPTO_MCRYPTD
-	help
-	  SHA-256 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
-	  using multi-buffer technique.  This algorithm computes on
-	  multiple data lanes concurrently with SIMD instructions for
-	  better throughput.  It should not be enabled by default but
-	  used when there is significant amount of work to keep the keep
-	  the data lanes filled to get performance benefit.  If the data
-	  lanes remain unfilled, a flush operation will be initiated to
-	  process the crypto jobs, adding a slight latency.
-
-config CRYPTO_SHA512_MB
-        tristate "SHA512 digest algorithm (x86_64 Multi-Buffer, Experimental)"
-        depends on X86 && 64BIT
-        select CRYPTO_SHA512
-        select CRYPTO_HASH
-        select CRYPTO_MCRYPTD
-        help
-          SHA-512 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
-          using multi-buffer technique.  This algorithm computes on
-          multiple data lanes concurrently with SIMD instructions for
-          better throughput.  It should not be enabled by default but
-          used when there is significant amount of work to keep the keep
-          the data lanes filled to get performance benefit.  If the data
-          lanes remain unfilled, a flush operation will be initiated to
-          process the crypto jobs, adding a slight latency.
-
 config CRYPTO_SHA256
 	tristate "SHA224 and SHA256 digest algorithm"
 	select CRYPTO_HASH
@@ -1133,7 +1083,7 @@ config CRYPTO_AES_NI_INTEL
 
 	  In addition to AES cipher algorithm support, the acceleration
 	  for some popular block cipher mode is supported too, including
-	  ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional
+	  ECB, CBC, LRW, XTS. The 64 bit version has additional
 	  acceleration for CTR.
 
 config CRYPTO_AES_SPARC64
@@ -1590,20 +1540,6 @@ config CRYPTO_SM4
 
 	  If unsure, say N.
 
-config CRYPTO_SPECK
-	tristate "Speck cipher algorithm"
-	select CRYPTO_ALGAPI
-	help
-	  Speck is a lightweight block cipher that is tuned for optimal
-	  performance in software (rather than hardware).
-
-	  Speck may not be as secure as AES, and should only be used on systems
-	  where AES is not fast enough.
-
-	  See also: <https://eprint.iacr.org/2013/404.pdf>
-
-	  If unsure, say N.
-
 config CRYPTO_TEA
 	tristate "TEA, XTEA and XETA cipher algorithms"
 	select CRYPTO_ALGAPI
@@ -1875,6 +1811,17 @@ config CRYPTO_USER_API_AEAD
 	  This option enables the user-spaces interface for AEAD
 	  cipher algorithms.
 
+config CRYPTO_STATS
+	bool "Crypto usage statistics for User-space"
+	help
+	  This option enables the gathering of crypto stats.
+	  This will collect:
+	  - encrypt/decrypt size and numbers of symmeric operations
+	  - compress/decompress size and numbers of compress operations
+	  - size and numbers of hash operations
+	  - encrypt/decrypt/sign/verify numbers for asymmetric operations
+	  - generate/seed numbers for rng operations
+
 config CRYPTO_HASH_INFO
 	bool
 
diff --git a/crypto/Makefile b/crypto/Makefile
index 6d1d40eeb964..5c207c76abf7 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -54,6 +54,7 @@ cryptomgr-y := algboss.o testmgr.o
 
 obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
 obj-$(CONFIG_CRYPTO_USER) += crypto_user.o
+crypto_user-y := crypto_user_base.o crypto_user_stat.o
 obj-$(CONFIG_CRYPTO_CMAC) += cmac.o
 obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
 obj-$(CONFIG_CRYPTO_VMAC) += vmac.o
@@ -93,7 +94,6 @@ obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o
 obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o
 obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
 obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
-obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o
 obj-$(CONFIG_CRYPTO_DES) += des_generic.o
 obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
 obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o
@@ -115,7 +115,6 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o
 obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
 obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
 obj-$(CONFIG_CRYPTO_SEED) += seed.o
-obj-$(CONFIG_CRYPTO_SPECK) += speck.o
 obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
 obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
 obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
@@ -143,6 +142,7 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
 obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
 obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
 obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
+obj-$(CONFIG_CRYPTO_OFB) += ofb.o
 
 ecdh_generic-y := ecc.o
 ecdh_generic-y += ecdh.o
diff --git a/crypto/aegis.h b/crypto/aegis.h
index f1c6900ddb80..405e025fc906 100644
--- a/crypto/aegis.h
+++ b/crypto/aegis.h
@@ -21,7 +21,7 @@
 
 union aegis_block {
 	__le64 words64[AEGIS_BLOCK_SIZE / sizeof(__le64)];
-	u32 words32[AEGIS_BLOCK_SIZE / sizeof(u32)];
+	__le32 words32[AEGIS_BLOCK_SIZE / sizeof(__le32)];
 	u8 bytes[AEGIS_BLOCK_SIZE];
 };
 
@@ -57,24 +57,22 @@ static void crypto_aegis_aesenc(union aegis_block *dst,
 				const union aegis_block *src,
 				const union aegis_block *key)
 {
-	u32 *d = dst->words32;
 	const u8  *s  = src->bytes;
-	const u32 *k  = key->words32;
 	const u32 *t0 = crypto_ft_tab[0];
 	const u32 *t1 = crypto_ft_tab[1];
 	const u32 *t2 = crypto_ft_tab[2];
 	const u32 *t3 = crypto_ft_tab[3];
 	u32 d0, d1, d2, d3;
 
-	d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]] ^ k[0];
-	d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]] ^ k[1];
-	d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]] ^ k[2];
-	d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]] ^ k[3];
+	d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]];
+	d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]];
+	d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]];
+	d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]];
 
-	d[0] = d0;
-	d[1] = d1;
-	d[2] = d2;
-	d[3] = d3;
+	dst->words32[0] = cpu_to_le32(d0) ^ key->words32[0];
+	dst->words32[1] = cpu_to_le32(d1) ^ key->words32[1];
+	dst->words32[2] = cpu_to_le32(d2) ^ key->words32[2];
+	dst->words32[3] = cpu_to_le32(d3) ^ key->words32[3];
 }
 
 #endif /* _CRYPTO_AEGIS_H */
diff --git a/crypto/ahash.c b/crypto/ahash.c
index a64c143165b1..e21667b4e10a 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -364,24 +364,35 @@ static int crypto_ahash_op(struct ahash_request *req,
 
 int crypto_ahash_final(struct ahash_request *req)
 {
-	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
+	int ret;
+
+	ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
+	crypto_stat_ahash_final(req, ret);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(crypto_ahash_final);
 
 int crypto_ahash_finup(struct ahash_request *req)
 {
-	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
+	int ret;
+
+	ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
+	crypto_stat_ahash_final(req, ret);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(crypto_ahash_finup);
 
 int crypto_ahash_digest(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	int ret;
 
 	if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
-		return -ENOKEY;
-
-	return crypto_ahash_op(req, tfm->digest);
+		ret = -ENOKEY;
+	else
+		ret = crypto_ahash_op(req, tfm->digest);
+	crypto_stat_ahash_final(req, ret);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(crypto_ahash_digest);
 
@@ -550,8 +561,8 @@ static int ahash_prepare_alg(struct ahash_alg *alg)
 {
 	struct crypto_alg *base = &alg->halg.base;
 
-	if (alg->halg.digestsize > PAGE_SIZE / 8 ||
-	    alg->halg.statesize > PAGE_SIZE / 8 ||
+	if (alg->halg.digestsize > HASH_MAX_DIGESTSIZE ||
+	    alg->halg.statesize > HASH_MAX_STATESIZE ||
 	    alg->halg.statesize == 0)
 		return -EINVAL;
 
diff --git a/crypto/algapi.c b/crypto/algapi.c
index c0755cf4f53f..2545c5f89c4c 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -57,9 +57,14 @@ static int crypto_check_alg(struct crypto_alg *alg)
 	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
 		return -EINVAL;
 
-	if (alg->cra_blocksize > PAGE_SIZE / 8)
+	/* General maximums for all algs. */
+	if (alg->cra_alignmask > MAX_ALGAPI_ALIGNMASK)
 		return -EINVAL;
 
+	if (alg->cra_blocksize > MAX_ALGAPI_BLOCKSIZE)
+		return -EINVAL;
+
+	/* Lower maximums for specific alg types. */
 	if (!alg->cra_type && (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
 			       CRYPTO_ALG_TYPE_CIPHER) {
 		if (alg->cra_alignmask > MAX_CIPHER_ALIGNMASK)
@@ -253,6 +258,14 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
 	list_add(&alg->cra_list, &crypto_alg_list);
 	list_add(&larval->alg.cra_list, &crypto_alg_list);
 
+	atomic_set(&alg->encrypt_cnt, 0);
+	atomic_set(&alg->decrypt_cnt, 0);
+	atomic64_set(&alg->encrypt_tlen, 0);
+	atomic64_set(&alg->decrypt_tlen, 0);
+	atomic_set(&alg->verify_cnt, 0);
+	atomic_set(&alg->cipher_err_cnt, 0);
+	atomic_set(&alg->sign_cnt, 0);
+
 out:
 	return larval;
 
@@ -367,6 +380,8 @@ static void crypto_wait_for_test(struct crypto_larval *larval)
 
 	err = wait_for_completion_killable(&larval->completion);
 	WARN_ON(err);
+	if (!err)
+		crypto_probing_notify(CRYPTO_MSG_ALG_LOADED, larval);
 
 out:
 	crypto_larval_kill(&larval->alg);
diff --git a/crypto/algboss.c b/crypto/algboss.c
index 5e6df2a087fa..527b44d0af21 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -274,6 +274,8 @@ static int cryptomgr_notify(struct notifier_block *this, unsigned long msg,
 		return cryptomgr_schedule_probe(data);
 	case CRYPTO_MSG_ALG_REGISTER:
 		return cryptomgr_schedule_test(data);
+	case CRYPTO_MSG_ALG_LOADED:
+		break;
 	}
 
 	return NOTIFY_DONE;
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index c40a8c7ee8ae..eb100a04ce9f 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -42,7 +42,7 @@
 
 struct aead_tfm {
 	struct crypto_aead *aead;
-	struct crypto_skcipher *null_tfm;
+	struct crypto_sync_skcipher *null_tfm;
 };
 
 static inline bool aead_sufficient_data(struct sock *sk)
@@ -75,13 +75,13 @@ static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	return af_alg_sendmsg(sock, msg, size, ivsize);
 }
 
-static int crypto_aead_copy_sgl(struct crypto_skcipher *null_tfm,
+static int crypto_aead_copy_sgl(struct crypto_sync_skcipher *null_tfm,
 				struct scatterlist *src,
 				struct scatterlist *dst, unsigned int len)
 {
-	SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
 
-	skcipher_request_set_tfm(skreq, null_tfm);
+	skcipher_request_set_sync_tfm(skreq, null_tfm);
 	skcipher_request_set_callback(skreq, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				      NULL, NULL);
 	skcipher_request_set_crypt(skreq, src, dst, len, NULL);
@@ -99,7 +99,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct af_alg_ctx *ctx = ask->private;
 	struct aead_tfm *aeadc = pask->private;
 	struct crypto_aead *tfm = aeadc->aead;
-	struct crypto_skcipher *null_tfm = aeadc->null_tfm;
+	struct crypto_sync_skcipher *null_tfm = aeadc->null_tfm;
 	unsigned int i, as = crypto_aead_authsize(tfm);
 	struct af_alg_async_req *areq;
 	struct af_alg_tsgl *tsgl, *tmp;
@@ -478,7 +478,7 @@ static void *aead_bind(const char *name, u32 type, u32 mask)
 {
 	struct aead_tfm *tfm;
 	struct crypto_aead *aead;
-	struct crypto_skcipher *null_tfm;
+	struct crypto_sync_skcipher *null_tfm;
 
 	tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
 	if (!tfm)
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index bfcf595fd8f9..d0cde541beb6 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -239,7 +239,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags,
 	struct alg_sock *ask = alg_sk(sk);
 	struct hash_ctx *ctx = ask->private;
 	struct ahash_request *req = &ctx->req;
-	char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1];
+	char state[HASH_MAX_STATESIZE];
 	struct sock *sk2;
 	struct alg_sock *ask2;
 	struct hash_ctx *ctx2;
diff --git a/crypto/authenc.c b/crypto/authenc.c
index 4fa8d40d947b..37f54d1b2f66 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -33,7 +33,7 @@ struct authenc_instance_ctx {
 struct crypto_authenc_ctx {
 	struct crypto_ahash *auth;
 	struct crypto_skcipher *enc;
-	struct crypto_skcipher *null;
+	struct crypto_sync_skcipher *null;
 };
 
 struct authenc_request_ctx {
@@ -185,9 +185,9 @@ static int crypto_authenc_copy_assoc(struct aead_request *req)
 {
 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
-	SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
 
-	skcipher_request_set_tfm(skreq, ctx->null);
+	skcipher_request_set_sync_tfm(skreq, ctx->null);
 	skcipher_request_set_callback(skreq, aead_request_flags(req),
 				      NULL, NULL);
 	skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen,
@@ -318,7 +318,7 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_ahash *auth;
 	struct crypto_skcipher *enc;
-	struct crypto_skcipher *null;
+	struct crypto_sync_skcipher *null;
 	int err;
 
 	auth = crypto_spawn_ahash(&ictx->auth);
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 50b804747e20..80a25cc04aec 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -36,7 +36,7 @@ struct crypto_authenc_esn_ctx {
 	unsigned int reqoff;
 	struct crypto_ahash *auth;
 	struct crypto_skcipher *enc;
-	struct crypto_skcipher *null;
+	struct crypto_sync_skcipher *null;
 };
 
 struct authenc_esn_request_ctx {
@@ -183,9 +183,9 @@ static int crypto_authenc_esn_copy(struct aead_request *req, unsigned int len)
 {
 	struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req);
 	struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn);
-	SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
 
-	skcipher_request_set_tfm(skreq, ctx->null);
+	skcipher_request_set_sync_tfm(skreq, ctx->null);
 	skcipher_request_set_callback(skreq, aead_request_flags(req),
 				      NULL, NULL);
 	skcipher_request_set_crypt(skreq, req->src, req->dst, len, NULL);
@@ -341,7 +341,7 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
 	struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_ahash *auth;
 	struct crypto_skcipher *enc;
-	struct crypto_skcipher *null;
+	struct crypto_sync_skcipher *null;
 	int err;
 
 	auth = crypto_spawn_ahash(&ictx->auth);
diff --git a/crypto/ccm.c b/crypto/ccm.c
index 0a083342ec8c..b242fd0d3262 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -50,7 +50,10 @@ struct crypto_ccm_req_priv_ctx {
 	u32 flags;
 	struct scatterlist src[3];
 	struct scatterlist dst[3];
-	struct skcipher_request skreq;
+	union {
+		struct ahash_request ahreq;
+		struct skcipher_request skreq;
+	};
 };
 
 struct cbcmac_tfm_ctx {
@@ -181,7 +184,7 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
 	struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
-	AHASH_REQUEST_ON_STACK(ahreq, ctx->mac);
+	struct ahash_request *ahreq = &pctx->ahreq;
 	unsigned int assoclen = req->assoclen;
 	struct scatterlist sg[3];
 	u8 *odata = pctx->odata;
@@ -427,7 +430,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
 	crypto_aead_set_reqsize(
 		tfm,
 		align + sizeof(struct crypto_ccm_req_priv_ctx) +
-		crypto_skcipher_reqsize(ctr));
+		max(crypto_ahash_reqsize(mac), crypto_skcipher_reqsize(ctr)));
 
 	return 0;
 
diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
index e451c3cb6a56..3ae96587caf9 100644
--- a/crypto/chacha20_generic.c
+++ b/crypto/chacha20_generic.c
@@ -18,20 +18,21 @@
 static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
 			     unsigned int bytes)
 {
-	u32 stream[CHACHA20_BLOCK_WORDS];
+	/* aligned to potentially speed up crypto_xor() */
+	u8 stream[CHACHA20_BLOCK_SIZE] __aligned(sizeof(long));
 
 	if (dst != src)
 		memcpy(dst, src, bytes);
 
 	while (bytes >= CHACHA20_BLOCK_SIZE) {
 		chacha20_block(state, stream);
-		crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
+		crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE);
 		bytes -= CHACHA20_BLOCK_SIZE;
 		dst += CHACHA20_BLOCK_SIZE;
 	}
 	if (bytes) {
 		chacha20_block(state, stream);
-		crypto_xor(dst, (const u8 *)stream, bytes);
+		crypto_xor(dst, stream, bytes);
 	}
 }
 
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index addca7bae33f..7118fb5efbaa 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -76,7 +76,7 @@ struct cryptd_blkcipher_request_ctx {
 
 struct cryptd_skcipher_ctx {
 	atomic_t refcnt;
-	struct crypto_skcipher *child;
+	struct crypto_sync_skcipher *child;
 };
 
 struct cryptd_skcipher_request_ctx {
@@ -449,14 +449,16 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
 				  const u8 *key, unsigned int keylen)
 {
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
-	struct crypto_skcipher *child = ctx->child;
+	struct crypto_sync_skcipher *child = ctx->child;
 	int err;
 
-	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+	crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_sync_skcipher_set_flags(child,
+				       crypto_skcipher_get_flags(parent) &
 					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_skcipher_setkey(child, key, keylen);
-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+	err = crypto_sync_skcipher_setkey(child, key, keylen);
+	crypto_skcipher_set_flags(parent,
+				  crypto_sync_skcipher_get_flags(child) &
 					  CRYPTO_TFM_RES_MASK);
 	return err;
 }
@@ -483,13 +485,13 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
 	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct crypto_skcipher *child = ctx->child;
-	SKCIPHER_REQUEST_ON_STACK(subreq, child);
+	struct crypto_sync_skcipher *child = ctx->child;
+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
 
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
 
-	skcipher_request_set_tfm(subreq, child);
+	skcipher_request_set_sync_tfm(subreq, child);
 	skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
 				      NULL, NULL);
 	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
@@ -511,13 +513,13 @@ static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
 	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct crypto_skcipher *child = ctx->child;
-	SKCIPHER_REQUEST_ON_STACK(subreq, child);
+	struct crypto_sync_skcipher *child = ctx->child;
+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
 
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
 
-	skcipher_request_set_tfm(subreq, child);
+	skcipher_request_set_sync_tfm(subreq, child);
 	skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
 				      NULL, NULL);
 	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
@@ -568,7 +570,7 @@ static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm)
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
-	ctx->child = cipher;
+	ctx->child = (struct crypto_sync_skcipher *)cipher;
 	crypto_skcipher_set_reqsize(
 		tfm, sizeof(struct cryptd_skcipher_request_ctx));
 	return 0;
@@ -578,7 +580,7 @@ static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm)
 {
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	crypto_free_skcipher(ctx->child);
+	crypto_free_sync_skcipher(ctx->child);
 }
 
 static void cryptd_skcipher_free(struct skcipher_instance *inst)
@@ -1243,7 +1245,7 @@ struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm)
 {
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
 
-	return ctx->child;
+	return &ctx->child->base;
 }
 EXPORT_SYMBOL_GPL(cryptd_skcipher_child);
 
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index 0959b268966c..0bae59922a80 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -26,7 +26,7 @@
 #include <linux/string.h>
 
 static DEFINE_MUTEX(crypto_default_null_skcipher_lock);
-static struct crypto_skcipher *crypto_default_null_skcipher;
+static struct crypto_sync_skcipher *crypto_default_null_skcipher;
 static int crypto_default_null_skcipher_refcnt;
 
 static int null_compress(struct crypto_tfm *tfm, const u8 *src,
@@ -152,16 +152,15 @@ MODULE_ALIAS_CRYPTO("compress_null");
 MODULE_ALIAS_CRYPTO("digest_null");
 MODULE_ALIAS_CRYPTO("cipher_null");
 
-struct crypto_skcipher *crypto_get_default_null_skcipher(void)
+struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void)
 {
-	struct crypto_skcipher *tfm;
+	struct crypto_sync_skcipher *tfm;
 
 	mutex_lock(&crypto_default_null_skcipher_lock);
 	tfm = crypto_default_null_skcipher;
 
 	if (!tfm) {
-		tfm = crypto_alloc_skcipher("ecb(cipher_null)",
-					    0, CRYPTO_ALG_ASYNC);
+		tfm = crypto_alloc_sync_skcipher("ecb(cipher_null)", 0, 0);
 		if (IS_ERR(tfm))
 			goto unlock;
 
@@ -181,7 +180,7 @@ void crypto_put_default_null_skcipher(void)
 {
 	mutex_lock(&crypto_default_null_skcipher_lock);
 	if (!--crypto_default_null_skcipher_refcnt) {
-		crypto_free_skcipher(crypto_default_null_skcipher);
+		crypto_free_sync_skcipher(crypto_default_null_skcipher);
 		crypto_default_null_skcipher = NULL;
 	}
 	mutex_unlock(&crypto_default_null_skcipher_lock);
diff --git a/crypto/crypto_user.c b/crypto/crypto_user_base.c
index 0e89b5457cab..e41f6cc33fff 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user_base.c
@@ -29,6 +29,7 @@
 #include <crypto/internal/rng.h>
 #include <crypto/akcipher.h>
 #include <crypto/kpp.h>
+#include <crypto/internal/cryptouser.h>
 
 #include "internal.h"
 
@@ -37,7 +38,7 @@
 static DEFINE_MUTEX(crypto_cfg_mutex);
 
 /* The crypto netlink socket */
-static struct sock *crypto_nlsk;
+struct sock *crypto_nlsk;
 
 struct crypto_dump_info {
 	struct sk_buff *in_skb;
@@ -46,7 +47,7 @@ struct crypto_dump_info {
 	u16 nlmsg_flags;
 };
 
-static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
+struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
 {
 	struct crypto_alg *q, *alg = NULL;
 
@@ -461,6 +462,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
 	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = 0,
+	[CRYPTO_MSG_GETSTAT	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 };
 
 static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = {
@@ -481,6 +483,9 @@ static const struct crypto_link {
 						       .dump = crypto_dump_report,
 						       .done = crypto_dump_report_done},
 	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = { .doit = crypto_del_rng },
+	[CRYPTO_MSG_GETSTAT	- CRYPTO_MSG_BASE] = { .doit = crypto_reportstat,
+						       .dump = crypto_dump_reportstat,
+						       .done = crypto_dump_reportstat_done},
 };
 
 static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c
new file mode 100644
index 000000000000..021ad06bbb62
--- /dev/null
+++ b/crypto/crypto_user_stat.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Crypto user configuration API.
+ *
+ * Copyright (C) 2017-2018 Corentin Labbe <clabbe@baylibre.com>
+ *
+ */
+
+#include <linux/crypto.h>
+#include <linux/cryptouser.h>
+#include <linux/sched.h>
+#include <net/netlink.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/internal/rng.h>
+#include <crypto/akcipher.h>
+#include <crypto/kpp.h>
+#include <crypto/internal/cryptouser.h>
+
+#include "internal.h"
+
+#define null_terminated(x)	(strnlen(x, sizeof(x)) < sizeof(x))
+
+static DEFINE_MUTEX(crypto_cfg_mutex);
+
+extern struct sock *crypto_nlsk;
+
+struct crypto_dump_info {
+	struct sk_buff *in_skb;
+	struct sk_buff *out_skb;
+	u32 nlmsg_seq;
+	u16 nlmsg_flags;
+};
+
+static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat raead;
+	u64 v64;
+	u32 v32;
+
+	strncpy(raead.type, "aead", sizeof(raead.type));
+
+	v32 = atomic_read(&alg->encrypt_cnt);
+	raead.stat_encrypt_cnt = v32;
+	v64 = atomic64_read(&alg->encrypt_tlen);
+	raead.stat_encrypt_tlen = v64;
+	v32 = atomic_read(&alg->decrypt_cnt);
+	raead.stat_decrypt_cnt = v32;
+	v64 = atomic64_read(&alg->decrypt_tlen);
+	raead.stat_decrypt_tlen = v64;
+	v32 = atomic_read(&alg->aead_err_cnt);
+	raead.stat_aead_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_AEAD,
+		    sizeof(struct crypto_stat), &raead))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rcipher;
+	u64 v64;
+	u32 v32;
+
+	strlcpy(rcipher.type, "cipher", sizeof(rcipher.type));
+
+	v32 = atomic_read(&alg->encrypt_cnt);
+	rcipher.stat_encrypt_cnt = v32;
+	v64 = atomic64_read(&alg->encrypt_tlen);
+	rcipher.stat_encrypt_tlen = v64;
+	v32 = atomic_read(&alg->decrypt_cnt);
+	rcipher.stat_decrypt_cnt = v32;
+	v64 = atomic64_read(&alg->decrypt_tlen);
+	rcipher.stat_decrypt_tlen = v64;
+	v32 = atomic_read(&alg->cipher_err_cnt);
+	rcipher.stat_cipher_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_CIPHER,
+		    sizeof(struct crypto_stat), &rcipher))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rcomp;
+	u64 v64;
+	u32 v32;
+
+	strlcpy(rcomp.type, "compression", sizeof(rcomp.type));
+	v32 = atomic_read(&alg->compress_cnt);
+	rcomp.stat_compress_cnt = v32;
+	v64 = atomic64_read(&alg->compress_tlen);
+	rcomp.stat_compress_tlen = v64;
+	v32 = atomic_read(&alg->decompress_cnt);
+	rcomp.stat_decompress_cnt = v32;
+	v64 = atomic64_read(&alg->decompress_tlen);
+	rcomp.stat_decompress_tlen = v64;
+	v32 = atomic_read(&alg->cipher_err_cnt);
+	rcomp.stat_compress_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_COMPRESS,
+		    sizeof(struct crypto_stat), &rcomp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat racomp;
+	u64 v64;
+	u32 v32;
+
+	strlcpy(racomp.type, "acomp", sizeof(racomp.type));
+	v32 = atomic_read(&alg->compress_cnt);
+	racomp.stat_compress_cnt = v32;
+	v64 = atomic64_read(&alg->compress_tlen);
+	racomp.stat_compress_tlen = v64;
+	v32 = atomic_read(&alg->decompress_cnt);
+	racomp.stat_decompress_cnt = v32;
+	v64 = atomic64_read(&alg->decompress_tlen);
+	racomp.stat_decompress_tlen = v64;
+	v32 = atomic_read(&alg->cipher_err_cnt);
+	racomp.stat_compress_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_ACOMP,
+		    sizeof(struct crypto_stat), &racomp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rakcipher;
+	u64 v64;
+	u32 v32;
+
+	strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
+	v32 = atomic_read(&alg->encrypt_cnt);
+	rakcipher.stat_encrypt_cnt = v32;
+	v64 = atomic64_read(&alg->encrypt_tlen);
+	rakcipher.stat_encrypt_tlen = v64;
+	v32 = atomic_read(&alg->decrypt_cnt);
+	rakcipher.stat_decrypt_cnt = v32;
+	v64 = atomic64_read(&alg->decrypt_tlen);
+	rakcipher.stat_decrypt_tlen = v64;
+	v32 = atomic_read(&alg->sign_cnt);
+	rakcipher.stat_sign_cnt = v32;
+	v32 = atomic_read(&alg->verify_cnt);
+	rakcipher.stat_verify_cnt = v32;
+	v32 = atomic_read(&alg->akcipher_err_cnt);
+	rakcipher.stat_akcipher_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER,
+		    sizeof(struct crypto_stat), &rakcipher))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rkpp;
+	u32 v;
+
+	strlcpy(rkpp.type, "kpp", sizeof(rkpp.type));
+
+	v = atomic_read(&alg->setsecret_cnt);
+	rkpp.stat_setsecret_cnt = v;
+	v = atomic_read(&alg->generate_public_key_cnt);
+	rkpp.stat_generate_public_key_cnt = v;
+	v = atomic_read(&alg->compute_shared_secret_cnt);
+	rkpp.stat_compute_shared_secret_cnt = v;
+	v = atomic_read(&alg->kpp_err_cnt);
+	rkpp.stat_kpp_err_cnt = v;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_KPP,
+		    sizeof(struct crypto_stat), &rkpp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rhash;
+	u64 v64;
+	u32 v32;
+
+	strncpy(rhash.type, "ahash", sizeof(rhash.type));
+
+	v32 = atomic_read(&alg->hash_cnt);
+	rhash.stat_hash_cnt = v32;
+	v64 = atomic64_read(&alg->hash_tlen);
+	rhash.stat_hash_tlen = v64;
+	v32 = atomic_read(&alg->hash_err_cnt);
+	rhash.stat_hash_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
+		    sizeof(struct crypto_stat), &rhash))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rhash;
+	u64 v64;
+	u32 v32;
+
+	strncpy(rhash.type, "shash", sizeof(rhash.type));
+
+	v32 = atomic_read(&alg->hash_cnt);
+	rhash.stat_hash_cnt = v32;
+	v64 = atomic64_read(&alg->hash_tlen);
+	rhash.stat_hash_tlen = v64;
+	v32 = atomic_read(&alg->hash_err_cnt);
+	rhash.stat_hash_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
+		    sizeof(struct crypto_stat), &rhash))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rrng;
+	u64 v64;
+	u32 v32;
+
+	strncpy(rrng.type, "rng", sizeof(rrng.type));
+
+	v32 = atomic_read(&alg->generate_cnt);
+	rrng.stat_generate_cnt = v32;
+	v64 = atomic64_read(&alg->generate_tlen);
+	rrng.stat_generate_tlen = v64;
+	v32 = atomic_read(&alg->seed_cnt);
+	rrng.stat_seed_cnt = v32;
+	v32 = atomic_read(&alg->hash_err_cnt);
+	rrng.stat_rng_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_RNG,
+		    sizeof(struct crypto_stat), &rrng))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_reportstat_one(struct crypto_alg *alg,
+				 struct crypto_user_alg *ualg,
+				 struct sk_buff *skb)
+{
+	strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
+	strlcpy(ualg->cru_driver_name, alg->cra_driver_name,
+		sizeof(ualg->cru_driver_name));
+	strlcpy(ualg->cru_module_name, module_name(alg->cra_module),
+		sizeof(ualg->cru_module_name));
+
+	ualg->cru_type = 0;
+	ualg->cru_mask = 0;
+	ualg->cru_flags = alg->cra_flags;
+	ualg->cru_refcnt = refcount_read(&alg->cra_refcnt);
+
+	if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority))
+		goto nla_put_failure;
+	if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
+		struct crypto_stat rl;
+
+		strlcpy(rl.type, "larval", sizeof(rl.type));
+		if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL,
+			    sizeof(struct crypto_stat), &rl))
+			goto nla_put_failure;
+		goto out;
+	}
+
+	switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) {
+	case CRYPTO_ALG_TYPE_AEAD:
+		if (crypto_report_aead(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_SKCIPHER:
+		if (crypto_report_cipher(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_BLKCIPHER:
+		if (crypto_report_cipher(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_CIPHER:
+		if (crypto_report_cipher(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_COMPRESS:
+		if (crypto_report_comp(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_ACOMPRESS:
+		if (crypto_report_acomp(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_SCOMPRESS:
+		if (crypto_report_acomp(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_AKCIPHER:
+		if (crypto_report_akcipher(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_KPP:
+		if (crypto_report_kpp(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_AHASH:
+		if (crypto_report_ahash(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_HASH:
+		if (crypto_report_shash(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_RNG:
+		if (crypto_report_rng(skb, alg))
+			goto nla_put_failure;
+		break;
+	default:
+		pr_err("ERROR: Unhandled alg %d in %s\n",
+		       alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL),
+		       __func__);
+	}
+
+out:
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_reportstat_alg(struct crypto_alg *alg,
+				 struct crypto_dump_info *info)
+{
+	struct sk_buff *in_skb = info->in_skb;
+	struct sk_buff *skb = info->out_skb;
+	struct nlmsghdr *nlh;
+	struct crypto_user_alg *ualg;
+	int err = 0;
+
+	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq,
+			CRYPTO_MSG_GETSTAT, sizeof(*ualg), info->nlmsg_flags);
+	if (!nlh) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	ualg = nlmsg_data(nlh);
+
+	err = crypto_reportstat_one(alg, ualg, skb);
+	if (err) {
+		nlmsg_cancel(skb, nlh);
+		goto out;
+	}
+
+	nlmsg_end(skb, nlh);
+
+out:
+	return err;
+}
+
+int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
+		      struct nlattr **attrs)
+{
+	struct crypto_user_alg *p = nlmsg_data(in_nlh);
+	struct crypto_alg *alg;
+	struct sk_buff *skb;
+	struct crypto_dump_info info;
+	int err;
+
+	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+		return -EINVAL;
+
+	alg = crypto_alg_match(p, 0);
+	if (!alg)
+		return -ENOENT;
+
+	err = -ENOMEM;
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!skb)
+		goto drop_alg;
+
+	info.in_skb = in_skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = in_nlh->nlmsg_seq;
+	info.nlmsg_flags = 0;
+
+	err = crypto_reportstat_alg(alg, &info);
+
+drop_alg:
+	crypto_mod_put(alg);
+
+	if (err)
+		return err;
+
+	return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
+}
+
+int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct crypto_alg *alg;
+	struct crypto_dump_info info;
+	int err;
+
+	if (cb->args[0])
+		goto out;
+
+	cb->args[0] = 1;
+
+	info.in_skb = cb->skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
+	info.nlmsg_flags = NLM_F_MULTI;
+
+	list_for_each_entry(alg, &crypto_alg_list, cra_list) {
+		err = crypto_reportstat_alg(alg, &info);
+		if (err)
+			goto out_err;
+	}
+
+out:
+	return skb->len;
+out_err:
+	return err;
+}
+
+int crypto_dump_reportstat_done(struct netlink_callback *cb)
+{
+	return 0;
+}
+
+MODULE_LICENSE("GPL");
diff --git a/crypto/echainiv.c b/crypto/echainiv.c
index 45819e6015bf..77e607fdbfb7 100644
--- a/crypto/echainiv.c
+++ b/crypto/echainiv.c
@@ -47,9 +47,9 @@ static int echainiv_encrypt(struct aead_request *req)
 	info = req->iv;
 
 	if (req->src != req->dst) {
-		SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
 
-		skcipher_request_set_tfm(nreq, ctx->sknull);
+		skcipher_request_set_sync_tfm(nreq, ctx->sknull);
 		skcipher_request_set_callback(nreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(nreq, req->src, req->dst,
diff --git a/crypto/gcm.c b/crypto/gcm.c
index 0ad879e1f9b2..e438492db2ca 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -50,7 +50,7 @@ struct crypto_rfc4543_instance_ctx {
 
 struct crypto_rfc4543_ctx {
 	struct crypto_aead *child;
-	struct crypto_skcipher *null;
+	struct crypto_sync_skcipher *null;
 	u8 nonce[4];
 };
 
@@ -1067,9 +1067,9 @@ static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc)
 	unsigned int authsize = crypto_aead_authsize(aead);
 	unsigned int nbytes = req->assoclen + req->cryptlen -
 			      (enc ? 0 : authsize);
-	SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null);
 
-	skcipher_request_set_tfm(nreq, ctx->null);
+	skcipher_request_set_sync_tfm(nreq, ctx->null);
 	skcipher_request_set_callback(nreq, req->base.flags, NULL, NULL);
 	skcipher_request_set_crypt(nreq, req->src, req->dst, nbytes, NULL);
 
@@ -1093,7 +1093,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
 	struct crypto_aead_spawn *spawn = &ictx->aead;
 	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_aead *aead;
-	struct crypto_skcipher *null;
+	struct crypto_sync_skcipher *null;
 	unsigned long align;
 	int err = 0;
 
diff --git a/crypto/internal.h b/crypto/internal.h
index 9a3f39939fba..ef769b5e8ad3 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -26,12 +26,6 @@
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 
-/* Crypto notification events. */
-enum {
-	CRYPTO_MSG_ALG_REQUEST,
-	CRYPTO_MSG_ALG_REGISTER,
-};
-
 struct crypto_instance;
 struct crypto_template;
 
@@ -90,8 +84,6 @@ struct crypto_alg *crypto_find_alg(const char *alg_name,
 void *crypto_alloc_tfm(const char *alg_name,
 		       const struct crypto_type *frontend, u32 type, u32 mask);
 
-int crypto_register_notifier(struct notifier_block *nb);
-int crypto_unregister_notifier(struct notifier_block *nb);
 int crypto_probing_notify(unsigned long val, void *v);
 
 unsigned int crypto_alg_extsize(struct crypto_alg *alg);
diff --git a/crypto/lrw.c b/crypto/lrw.c
index 393a782679c7..0430ccd08728 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -29,8 +29,6 @@
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
 
-#define LRW_BUFFER_SIZE 128u
-
 #define LRW_BLOCK_SIZE 16
 
 struct priv {
@@ -56,19 +54,7 @@ struct priv {
 };
 
 struct rctx {
-	be128 buf[LRW_BUFFER_SIZE / sizeof(be128)];
-
 	be128 t;
-
-	be128 *ext;
-
-	struct scatterlist srcbuf[2];
-	struct scatterlist dstbuf[2];
-	struct scatterlist *src;
-	struct scatterlist *dst;
-
-	unsigned int left;
-
 	struct skcipher_request subreq;
 };
 
@@ -120,112 +106,68 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
 	return 0;
 }
 
-static inline void inc(be128 *iv)
-{
-	be64_add_cpu(&iv->b, 1);
-	if (!iv->b)
-		be64_add_cpu(&iv->a, 1);
-}
-
-/* this returns the number of consequative 1 bits starting
- * from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */
-static inline int get_index128(be128 *block)
+/*
+ * Returns the number of trailing '1' bits in the words of the counter, which is
+ * represented by 4 32-bit words, arranged from least to most significant.
+ * At the same time, increments the counter by one.
+ *
+ * For example:
+ *
+ * u32 counter[4] = { 0xFFFFFFFF, 0x1, 0x0, 0x0 };
+ * int i = next_index(&counter);
+ * // i == 33, counter == { 0x0, 0x2, 0x0, 0x0 }
+ */
+static int next_index(u32 *counter)
 {
-	int x;
-	__be32 *p = (__be32 *) block;
+	int i, res = 0;
 
-	for (p += 3, x = 0; x < 128; p--, x += 32) {
-		u32 val = be32_to_cpup(p);
+	for (i = 0; i < 4; i++) {
+		if (counter[i] + 1 != 0)
+			return res + ffz(counter[i]++);
 
-		if (!~val)
-			continue;
-
-		return x + ffz(val);
+		counter[i] = 0;
+		res += 32;
 	}
 
-	return x;
+	/*
+	 * If we get here, then x == 128 and we are incrementing the counter
+	 * from all ones to all zeros. This means we must return index 127, i.e.
+	 * the one corresponding to key2*{ 1,...,1 }.
+	 */
+	return 127;
 }
 
-static int post_crypt(struct skcipher_request *req)
+/*
+ * We compute the tweak masks twice (both before and after the ECB encryption or
+ * decryption) to avoid having to allocate a temporary buffer and/or make
+ * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
+ * just doing the next_index() calls again.
+ */
+static int xor_tweak(struct skcipher_request *req, bool second_pass)
 {
-	struct rctx *rctx = skcipher_request_ctx(req);
-	be128 *buf = rctx->ext ?: rctx->buf;
-	struct skcipher_request *subreq;
 	const int bs = LRW_BLOCK_SIZE;
-	struct skcipher_walk w;
-	struct scatterlist *sg;
-	unsigned offset;
-	int err;
-
-	subreq = &rctx->subreq;
-	err = skcipher_walk_virt(&w, subreq, false);
-
-	while (w.nbytes) {
-		unsigned int avail = w.nbytes;
-		be128 *wdst;
-
-		wdst = w.dst.virt.addr;
-
-		do {
-			be128_xor(wdst, buf++, wdst);
-			wdst++;
-		} while ((avail -= bs) >= bs);
-
-		err = skcipher_walk_done(&w, avail);
-	}
-
-	rctx->left -= subreq->cryptlen;
-
-	if (err || !rctx->left)
-		goto out;
-
-	rctx->dst = rctx->dstbuf;
-
-	scatterwalk_done(&w.out, 0, 1);
-	sg = w.out.sg;
-	offset = w.out.offset;
-
-	if (rctx->dst != sg) {
-		rctx->dst[0] = *sg;
-		sg_unmark_end(rctx->dst);
-		scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 2);
-	}
-	rctx->dst[0].length -= offset - sg->offset;
-	rctx->dst[0].offset = offset;
-
-out:
-	return err;
-}
-
-static int pre_crypt(struct skcipher_request *req)
-{
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rctx *rctx = skcipher_request_ctx(req);
 	struct priv *ctx = crypto_skcipher_ctx(tfm);
-	be128 *buf = rctx->ext ?: rctx->buf;
-	struct skcipher_request *subreq;
-	const int bs = LRW_BLOCK_SIZE;
+	struct rctx *rctx = skcipher_request_ctx(req);
+	be128 t = rctx->t;
 	struct skcipher_walk w;
-	struct scatterlist *sg;
-	unsigned cryptlen;
-	unsigned offset;
-	be128 *iv;
-	bool more;
+	__be32 *iv;
+	u32 counter[4];
 	int err;
 
-	subreq = &rctx->subreq;
-	skcipher_request_set_tfm(subreq, tfm);
-
-	cryptlen = subreq->cryptlen;
-	more = rctx->left > cryptlen;
-	if (!more)
-		cryptlen = rctx->left;
+	if (second_pass) {
+		req = &rctx->subreq;
+		/* set to our TFM to enforce correct alignment: */
+		skcipher_request_set_tfm(req, tfm);
+	}
 
-	skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
-				   cryptlen, req->iv);
+	err = skcipher_walk_virt(&w, req, false);
+	iv = (__be32 *)w.iv;
 
-	err = skcipher_walk_virt(&w, subreq, false);
-	iv = w.iv;
+	counter[0] = be32_to_cpu(iv[3]);
+	counter[1] = be32_to_cpu(iv[2]);
+	counter[2] = be32_to_cpu(iv[1]);
+	counter[3] = be32_to_cpu(iv[0]);
 
 	while (w.nbytes) {
 		unsigned int avail = w.nbytes;
@@ -236,188 +178,85 @@ static int pre_crypt(struct skcipher_request *req)
 		wdst = w.dst.virt.addr;
 
 		do {
-			*buf++ = rctx->t;
-			be128_xor(wdst++, &rctx->t, wsrc++);
+			be128_xor(wdst++, &t, wsrc++);
 
 			/* T <- I*Key2, using the optimization
 			 * discussed in the specification */
-			be128_xor(&rctx->t, &rctx->t,
-				  &ctx->mulinc[get_index128(iv)]);
-			inc(iv);
+			be128_xor(&t, &t, &ctx->mulinc[next_index(counter)]);
 		} while ((avail -= bs) >= bs);
 
-		err = skcipher_walk_done(&w, avail);
-	}
-
-	skcipher_request_set_tfm(subreq, ctx->child);
-	skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
-				   cryptlen, NULL);
-
-	if (err || !more)
-		goto out;
-
-	rctx->src = rctx->srcbuf;
-
-	scatterwalk_done(&w.in, 0, 1);
-	sg = w.in.sg;
-	offset = w.in.offset;
+		if (second_pass && w.nbytes == w.total) {
+			iv[0] = cpu_to_be32(counter[3]);
+			iv[1] = cpu_to_be32(counter[2]);
+			iv[2] = cpu_to_be32(counter[1]);
+			iv[3] = cpu_to_be32(counter[0]);
+		}
 
-	if (rctx->src != sg) {
-		rctx->src[0] = *sg;
-		sg_unmark_end(rctx->src);
-		scatterwalk_crypto_chain(rctx->src, sg_next(sg), 2);
+		err = skcipher_walk_done(&w, avail);
 	}
-	rctx->src[0].length -= offset - sg->offset;
-	rctx->src[0].offset = offset;
 
-out:
 	return err;
 }
 
-static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
+static int xor_tweak_pre(struct skcipher_request *req)
 {
-	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
-	struct rctx *rctx = skcipher_request_ctx(req);
-	struct skcipher_request *subreq;
-	gfp_t gfp;
-
-	subreq = &rctx->subreq;
-	skcipher_request_set_callback(subreq, req->base.flags, done, req);
-
-	gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
-							   GFP_ATOMIC;
-	rctx->ext = NULL;
-
-	subreq->cryptlen = LRW_BUFFER_SIZE;
-	if (req->cryptlen > LRW_BUFFER_SIZE) {
-		unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
-
-		rctx->ext = kmalloc(n, gfp);
-		if (rctx->ext)
-			subreq->cryptlen = n;
-	}
-
-	rctx->src = req->src;
-	rctx->dst = req->dst;
-	rctx->left = req->cryptlen;
-
-	/* calculate first value of T */
-	memcpy(&rctx->t, req->iv, sizeof(rctx->t));
-
-	/* T <- I*Key2 */
-	gf128mul_64k_bbe(&rctx->t, ctx->table);
-
-	return 0;
+	return xor_tweak(req, false);
 }
 
-static void exit_crypt(struct skcipher_request *req)
+static int xor_tweak_post(struct skcipher_request *req)
 {
-	struct rctx *rctx = skcipher_request_ctx(req);
-
-	rctx->left = 0;
-
-	if (rctx->ext)
-		kzfree(rctx->ext);
+	return xor_tweak(req, true);
 }
 
-static int do_encrypt(struct skcipher_request *req, int err)
-{
-	struct rctx *rctx = skcipher_request_ctx(req);
-	struct skcipher_request *subreq;
-
-	subreq = &rctx->subreq;
-
-	while (!err && rctx->left) {
-		err = pre_crypt(req) ?:
-		      crypto_skcipher_encrypt(subreq) ?:
-		      post_crypt(req);
-
-		if (err == -EINPROGRESS || err == -EBUSY)
-			return err;
-	}
-
-	exit_crypt(req);
-	return err;
-}
-
-static void encrypt_done(struct crypto_async_request *areq, int err)
+static void crypt_done(struct crypto_async_request *areq, int err)
 {
 	struct skcipher_request *req = areq->data;
-	struct skcipher_request *subreq;
-	struct rctx *rctx;
 
-	rctx = skcipher_request_ctx(req);
+	if (!err)
+		err = xor_tweak_post(req);
 
-	if (err == -EINPROGRESS) {
-		if (rctx->left != req->cryptlen)
-			return;
-		goto out;
-	}
-
-	subreq = &rctx->subreq;
-	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
-
-	err = do_encrypt(req, err ?: post_crypt(req));
-	if (rctx->left)
-		return;
-
-out:
 	skcipher_request_complete(req, err);
 }
 
-static int encrypt(struct skcipher_request *req)
-{
-	return do_encrypt(req, init_crypt(req, encrypt_done));
-}
-
-static int do_decrypt(struct skcipher_request *req, int err)
+static void init_crypt(struct skcipher_request *req)
 {
+	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	struct rctx *rctx = skcipher_request_ctx(req);
-	struct skcipher_request *subreq;
-
-	subreq = &rctx->subreq;
+	struct skcipher_request *subreq = &rctx->subreq;
 
-	while (!err && rctx->left) {
-		err = pre_crypt(req) ?:
-		      crypto_skcipher_decrypt(subreq) ?:
-		      post_crypt(req);
+	skcipher_request_set_tfm(subreq, ctx->child);
+	skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
+	/* pass req->iv as IV (will be used by xor_tweak, ECB will ignore it) */
+	skcipher_request_set_crypt(subreq, req->dst, req->dst,
+				   req->cryptlen, req->iv);
 
-		if (err == -EINPROGRESS || err == -EBUSY)
-			return err;
-	}
+	/* calculate first value of T */
+	memcpy(&rctx->t, req->iv, sizeof(rctx->t));
 
-	exit_crypt(req);
-	return err;
+	/* T <- I*Key2 */
+	gf128mul_64k_bbe(&rctx->t, ctx->table);
 }
 
-static void decrypt_done(struct crypto_async_request *areq, int err)
+static int encrypt(struct skcipher_request *req)
 {
-	struct skcipher_request *req = areq->data;
-	struct skcipher_request *subreq;
-	struct rctx *rctx;
-
-	rctx = skcipher_request_ctx(req);
-
-	if (err == -EINPROGRESS) {
-		if (rctx->left != req->cryptlen)
-			return;
-		goto out;
-	}
-
-	subreq = &rctx->subreq;
-	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
-
-	err = do_decrypt(req, err ?: post_crypt(req));
-	if (rctx->left)
-		return;
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *subreq = &rctx->subreq;
 
-out:
-	skcipher_request_complete(req, err);
+	init_crypt(req);
+	return xor_tweak_pre(req) ?:
+		crypto_skcipher_encrypt(subreq) ?:
+		xor_tweak_post(req);
 }
 
 static int decrypt(struct skcipher_request *req)
 {
-	return do_decrypt(req, init_crypt(req, decrypt_done));
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *subreq = &rctx->subreq;
+
+	init_crypt(req);
+	return xor_tweak_pre(req) ?:
+		crypto_skcipher_decrypt(subreq) ?:
+		xor_tweak_post(req);
 }
 
 static int init_tfm(struct crypto_skcipher *tfm)
@@ -543,7 +382,7 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.base.cra_priority = alg->base.cra_priority;
 	inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE;
 	inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
-				       (__alignof__(u64) - 1);
+				       (__alignof__(__be32) - 1);
 
 	inst->alg.ivsize = LRW_BLOCK_SIZE;
 	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
deleted file mode 100644
index f14152147ce8..000000000000
--- a/crypto/mcryptd.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- * Software multibuffer async crypto daemon.
- *
- * Copyright (c) 2014 Tim Chen <tim.c.chen@linux.intel.com>
- *
- * Adapted from crypto daemon.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/aead.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-#include <linux/sched.h>
-#include <linux/sched/stat.h>
-#include <linux/slab.h>
-
-#define MCRYPTD_MAX_CPU_QLEN 100
-#define MCRYPTD_BATCH 9
-
-static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
-				   unsigned int tail);
-
-struct mcryptd_flush_list {
-	struct list_head list;
-	struct mutex lock;
-};
-
-static struct mcryptd_flush_list __percpu *mcryptd_flist;
-
-struct hashd_instance_ctx {
-	struct crypto_ahash_spawn spawn;
-	struct mcryptd_queue *queue;
-};
-
-static void mcryptd_queue_worker(struct work_struct *work);
-
-void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay)
-{
-	struct mcryptd_flush_list *flist;
-
-	if (!cstate->flusher_engaged) {
-		/* put the flusher on the flush list */
-		flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
-		mutex_lock(&flist->lock);
-		list_add_tail(&cstate->flush_list, &flist->list);
-		cstate->flusher_engaged = true;
-		cstate->next_flush = jiffies + delay;
-		queue_delayed_work_on(smp_processor_id(), kcrypto_wq,
-			&cstate->flush, delay);
-		mutex_unlock(&flist->lock);
-	}
-}
-EXPORT_SYMBOL(mcryptd_arm_flusher);
-
-static int mcryptd_init_queue(struct mcryptd_queue *queue,
-			     unsigned int max_cpu_qlen)
-{
-	int cpu;
-	struct mcryptd_cpu_queue *cpu_queue;
-
-	queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue);
-	pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue);
-	if (!queue->cpu_queue)
-		return -ENOMEM;
-	for_each_possible_cpu(cpu) {
-		cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
-		pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
-		crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
-		INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
-		spin_lock_init(&cpu_queue->q_lock);
-	}
-	return 0;
-}
-
-static void mcryptd_fini_queue(struct mcryptd_queue *queue)
-{
-	int cpu;
-	struct mcryptd_cpu_queue *cpu_queue;
-
-	for_each_possible_cpu(cpu) {
-		cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
-		BUG_ON(cpu_queue->queue.qlen);
-	}
-	free_percpu(queue->cpu_queue);
-}
-
-static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
-				  struct crypto_async_request *request,
-				  struct mcryptd_hash_request_ctx *rctx)
-{
-	int cpu, err;
-	struct mcryptd_cpu_queue *cpu_queue;
-
-	cpu_queue = raw_cpu_ptr(queue->cpu_queue);
-	spin_lock(&cpu_queue->q_lock);
-	cpu = smp_processor_id();
-	rctx->tag.cpu = smp_processor_id();
-
-	err = crypto_enqueue_request(&cpu_queue->queue, request);
-	pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
-		 cpu, cpu_queue, request);
-	spin_unlock(&cpu_queue->q_lock);
-	queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
-
-	return err;
-}
-
-/*
- * Try to opportunisticlly flush the partially completed jobs if
- * crypto daemon is the only task running.
- */
-static void mcryptd_opportunistic_flush(void)
-{
-	struct mcryptd_flush_list *flist;
-	struct mcryptd_alg_cstate *cstate;
-
-	flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
-	while (single_task_running()) {
-		mutex_lock(&flist->lock);
-		cstate = list_first_entry_or_null(&flist->list,
-				struct mcryptd_alg_cstate, flush_list);
-		if (!cstate || !cstate->flusher_engaged) {
-			mutex_unlock(&flist->lock);
-			return;
-		}
-		list_del(&cstate->flush_list);
-		cstate->flusher_engaged = false;
-		mutex_unlock(&flist->lock);
-		cstate->alg_state->flusher(cstate);
-	}
-}
-
-/*
- * Called in workqueue context, do one real cryption work (via
- * req->complete) and reschedule itself if there are more work to
- * do.
- */
-static void mcryptd_queue_worker(struct work_struct *work)
-{
-	struct mcryptd_cpu_queue *cpu_queue;
-	struct crypto_async_request *req, *backlog;
-	int i;
-
-	/*
-	 * Need to loop through more than once for multi-buffer to
-	 * be effective.
-	 */
-
-	cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
-	i = 0;
-	while (i < MCRYPTD_BATCH || single_task_running()) {
-
-		spin_lock_bh(&cpu_queue->q_lock);
-		backlog = crypto_get_backlog(&cpu_queue->queue);
-		req = crypto_dequeue_request(&cpu_queue->queue);
-		spin_unlock_bh(&cpu_queue->q_lock);
-
-		if (!req) {
-			mcryptd_opportunistic_flush();
-			return;
-		}
-
-		if (backlog)
-			backlog->complete(backlog, -EINPROGRESS);
-		req->complete(req, 0);
-		if (!cpu_queue->queue.qlen)
-			return;
-		++i;
-	}
-	if (cpu_queue->queue.qlen)
-		queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
-}
-
-void mcryptd_flusher(struct work_struct *__work)
-{
-	struct	mcryptd_alg_cstate	*alg_cpu_state;
-	struct	mcryptd_alg_state	*alg_state;
-	struct	mcryptd_flush_list	*flist;
-	int	cpu;
-
-	cpu = smp_processor_id();
-	alg_cpu_state = container_of(to_delayed_work(__work),
-				     struct mcryptd_alg_cstate, flush);
-	alg_state = alg_cpu_state->alg_state;
-	if (alg_cpu_state->cpu != cpu)
-		pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n",
-				cpu, alg_cpu_state->cpu);
-
-	if (alg_cpu_state->flusher_engaged) {
-		flist = per_cpu_ptr(mcryptd_flist, cpu);
-		mutex_lock(&flist->lock);
-		list_del(&alg_cpu_state->flush_list);
-		alg_cpu_state->flusher_engaged = false;
-		mutex_unlock(&flist->lock);
-		alg_state->flusher(alg_cpu_state);
-	}
-}
-EXPORT_SYMBOL_GPL(mcryptd_flusher);
-
-static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm)
-{
-	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
-	struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst);
-
-	return ictx->queue;
-}
-
-static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
-				   unsigned int tail)
-{
-	char *p;
-	struct crypto_instance *inst;
-	int err;
-
-	p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL);
-	if (!p)
-		return ERR_PTR(-ENOMEM);
-
-	inst = (void *)(p + head);
-
-	err = -ENAMETOOLONG;
-	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-		    "mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
-		goto out_free_inst;
-
-	memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
-
-	inst->alg.cra_priority = alg->cra_priority + 50;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-
-out:
-	return p;
-
-out_free_inst:
-	kfree(p);
-	p = ERR_PTR(err);
-	goto out;
-}
-
-static inline bool mcryptd_check_internal(struct rtattr **tb, u32 *type,
-					  u32 *mask)
-{
-	struct crypto_attr_type *algt;
-
-	algt = crypto_get_attr_type(tb);
-	if (IS_ERR(algt))
-		return false;
-
-	*type |= algt->type & CRYPTO_ALG_INTERNAL;
-	*mask |= algt->mask & CRYPTO_ALG_INTERNAL;
-
-	if (*type & *mask & CRYPTO_ALG_INTERNAL)
-		return true;
-	else
-		return false;
-}
-
-static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm)
-{
-	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
-	struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst);
-	struct crypto_ahash_spawn *spawn = &ictx->spawn;
-	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_ahash *hash;
-
-	hash = crypto_spawn_ahash(spawn);
-	if (IS_ERR(hash))
-		return PTR_ERR(hash);
-
-	ctx->child = hash;
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				 sizeof(struct mcryptd_hash_request_ctx) +
-				 crypto_ahash_reqsize(hash));
-	return 0;
-}
-
-static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	crypto_free_ahash(ctx->child);
-}
-
-static int mcryptd_hash_setkey(struct crypto_ahash *parent,
-				   const u8 *key, unsigned int keylen)
-{
-	struct mcryptd_hash_ctx *ctx   = crypto_ahash_ctx(parent);
-	struct crypto_ahash *child = ctx->child;
-	int err;
-
-	crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_ahash_set_flags(child, crypto_ahash_get_flags(parent) &
-				      CRYPTO_TFM_REQ_MASK);
-	err = crypto_ahash_setkey(child, key, keylen);
-	crypto_ahash_set_flags(parent, crypto_ahash_get_flags(child) &
-				       CRYPTO_TFM_RES_MASK);
-	return err;
-}
-
-static int mcryptd_hash_enqueue(struct ahash_request *req,
-				crypto_completion_t complete)
-{
-	int ret;
-
-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct mcryptd_queue *queue =
-		mcryptd_get_queue(crypto_ahash_tfm(tfm));
-
-	rctx->complete = req->base.complete;
-	req->base.complete = complete;
-
-	ret = mcryptd_enqueue_request(queue, &req->base, rctx);
-
-	return ret;
-}
-
-static void mcryptd_hash_init(struct crypto_async_request *req_async, int err)
-{
-	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
-	struct crypto_ahash *child = ctx->child;
-	struct ahash_request *req = ahash_request_cast(req_async);
-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-	struct ahash_request *desc = &rctx->areq;
-
-	if (unlikely(err == -EINPROGRESS))
-		goto out;
-
-	ahash_request_set_tfm(desc, child);
-	ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
-						rctx->complete, req_async);
-
-	rctx->out = req->result;
-	err = crypto_ahash_init(desc);
-
-out:
-	local_bh_disable();
-	rctx->complete(&req->base, err);
-	local_bh_enable();
-}
-
-static int mcryptd_hash_init_enqueue(struct ahash_request *req)
-{
-	return mcryptd_hash_enqueue(req, mcryptd_hash_init);
-}
-
-static void mcryptd_hash_update(struct crypto_async_request *req_async, int err)
-{
-	struct ahash_request *req = ahash_request_cast(req_async);
-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-
-	if (unlikely(err == -EINPROGRESS))
-		goto out;
-
-	rctx->out = req->result;
-	err = crypto_ahash_update(&rctx->areq);
-	if (err) {
-		req->base.complete = rctx->complete;
-		goto out;
-	}
-
-	return;
-out:
-	local_bh_disable();
-	rctx->complete(&req->base, err);
-	local_bh_enable();
-}
-
-static int mcryptd_hash_update_enqueue(struct ahash_request *req)
-{
-	return mcryptd_hash_enqueue(req, mcryptd_hash_update);
-}
-
-static void mcryptd_hash_final(struct crypto_async_request *req_async, int err)
-{
-	struct ahash_request *req = ahash_request_cast(req_async);
-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-
-	if (unlikely(err == -EINPROGRESS))
-		goto out;
-
-	rctx->out = req->result;
-	err = crypto_ahash_final(&rctx->areq);
-	if (err) {
-		req->base.complete = rctx->complete;
-		goto out;
-	}
-
-	return;
-out:
-	local_bh_disable();
-	rctx->complete(&req->base, err);
-	local_bh_enable();
-}
-
-static int mcryptd_hash_final_enqueue(struct ahash_request *req)
-{
-	return mcryptd_hash_enqueue(req, mcryptd_hash_final);
-}
-
-static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err)
-{
-	struct ahash_request *req = ahash_request_cast(req_async);
-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-
-	if (unlikely(err == -EINPROGRESS))
-		goto out;
-	rctx->out = req->result;
-	err = crypto_ahash_finup(&rctx->areq);
-
-	if (err) {
-		req->base.complete = rctx->complete;
-		goto out;
-	}
-
-	return;
-out:
-	local_bh_disable();
-	rctx->complete(&req->base, err);
-	local_bh_enable();
-}
-
-static int mcryptd_hash_finup_enqueue(struct ahash_request *req)
-{
-	return mcryptd_hash_enqueue(req, mcryptd_hash_finup);
-}
-
-static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err)
-{
-	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
-	struct crypto_ahash *child = ctx->child;
-	struct ahash_request *req = ahash_request_cast(req_async);
-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-	struct ahash_request *desc = &rctx->areq;
-
-	if (unlikely(err == -EINPROGRESS))
-		goto out;
-
-	ahash_request_set_tfm(desc, child);
-	ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
-						rctx->complete, req_async);
-
-	rctx->out = req->result;
-	err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc);
-
-out:
-	local_bh_disable();
-	rctx->complete(&req->base, err);
-	local_bh_enable();
-}
-
-static int mcryptd_hash_digest_enqueue(struct ahash_request *req)
-{
-	return mcryptd_hash_enqueue(req, mcryptd_hash_digest);
-}
-
-static int mcryptd_hash_export(struct ahash_request *req, void *out)
-{
-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-
-	return crypto_ahash_export(&rctx->areq, out);
-}
-
-static int mcryptd_hash_import(struct ahash_request *req, const void *in)
-{
-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-
-	return crypto_ahash_import(&rctx->areq, in);
-}
-
-static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
-			      struct mcryptd_queue *queue)
-{
-	struct hashd_instance_ctx *ctx;
-	struct ahash_instance *inst;
-	struct hash_alg_common *halg;
-	struct crypto_alg *alg;
-	u32 type = 0;
-	u32 mask = 0;
-	int err;
-
-	if (!mcryptd_check_internal(tb, &type, &mask))
-		return -EINVAL;
-
-	halg = ahash_attr_alg(tb[1], type, mask);
-	if (IS_ERR(halg))
-		return PTR_ERR(halg);
-
-	alg = &halg->base;
-	pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name);
-	inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(),
-					sizeof(*ctx));
-	err = PTR_ERR(inst);
-	if (IS_ERR(inst))
-		goto out_put_alg;
-
-	ctx = ahash_instance_ctx(inst);
-	ctx->queue = queue;
-
-	err = crypto_init_ahash_spawn(&ctx->spawn, halg,
-				      ahash_crypto_instance(inst));
-	if (err)
-		goto out_free_inst;
-
-	inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
-		(alg->cra_flags & (CRYPTO_ALG_INTERNAL |
-				   CRYPTO_ALG_OPTIONAL_KEY));
-
-	inst->alg.halg.digestsize = halg->digestsize;
-	inst->alg.halg.statesize = halg->statesize;
-	inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx);
-
-	inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm;
-	inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm;
-
-	inst->alg.init   = mcryptd_hash_init_enqueue;
-	inst->alg.update = mcryptd_hash_update_enqueue;
-	inst->alg.final  = mcryptd_hash_final_enqueue;
-	inst->alg.finup  = mcryptd_hash_finup_enqueue;
-	inst->alg.export = mcryptd_hash_export;
-	inst->alg.import = mcryptd_hash_import;
-	if (crypto_hash_alg_has_setkey(halg))
-		inst->alg.setkey = mcryptd_hash_setkey;
-	inst->alg.digest = mcryptd_hash_digest_enqueue;
-
-	err = ahash_register_instance(tmpl, inst);
-	if (err) {
-		crypto_drop_ahash(&ctx->spawn);
-out_free_inst:
-		kfree(inst);
-	}
-
-out_put_alg:
-	crypto_mod_put(alg);
-	return err;
-}
-
-static struct mcryptd_queue mqueue;
-
-static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
-{
-	struct crypto_attr_type *algt;
-
-	algt = crypto_get_attr_type(tb);
-	if (IS_ERR(algt))
-		return PTR_ERR(algt);
-
-	switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
-	case CRYPTO_ALG_TYPE_DIGEST:
-		return mcryptd_create_hash(tmpl, tb, &mqueue);
-	break;
-	}
-
-	return -EINVAL;
-}
-
-static void mcryptd_free(struct crypto_instance *inst)
-{
-	struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst);
-	struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst);
-
-	switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) {
-	case CRYPTO_ALG_TYPE_AHASH:
-		crypto_drop_ahash(&hctx->spawn);
-		kfree(ahash_instance(inst));
-		return;
-	default:
-		crypto_drop_spawn(&ctx->spawn);
-		kfree(inst);
-	}
-}
-
-static struct crypto_template mcryptd_tmpl = {
-	.name = "mcryptd",
-	.create = mcryptd_create,
-	.free = mcryptd_free,
-	.module = THIS_MODULE,
-};
-
-struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
-					u32 type, u32 mask)
-{
-	char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME];
-	struct crypto_ahash *tfm;
-
-	if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME,
-		     "mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
-		return ERR_PTR(-EINVAL);
-	tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask);
-	if (IS_ERR(tfm))
-		return ERR_CAST(tfm);
-	if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
-		crypto_free_ahash(tfm);
-		return ERR_PTR(-EINVAL);
-	}
-
-	return __mcryptd_ahash_cast(tfm);
-}
-EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
-
-struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
-{
-	struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
-
-	return ctx->child;
-}
-EXPORT_SYMBOL_GPL(mcryptd_ahash_child);
-
-struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req)
-{
-	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-	return &rctx->areq;
-}
-EXPORT_SYMBOL_GPL(mcryptd_ahash_desc);
-
-void mcryptd_free_ahash(struct mcryptd_ahash *tfm)
-{
-	crypto_free_ahash(&tfm->base);
-}
-EXPORT_SYMBOL_GPL(mcryptd_free_ahash);
-
-static int __init mcryptd_init(void)
-{
-	int err, cpu;
-	struct mcryptd_flush_list *flist;
-
-	mcryptd_flist = alloc_percpu(struct mcryptd_flush_list);
-	for_each_possible_cpu(cpu) {
-		flist = per_cpu_ptr(mcryptd_flist, cpu);
-		INIT_LIST_HEAD(&flist->list);
-		mutex_init(&flist->lock);
-	}
-
-	err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN);
-	if (err) {
-		free_percpu(mcryptd_flist);
-		return err;
-	}
-
-	err = crypto_register_template(&mcryptd_tmpl);
-	if (err) {
-		mcryptd_fini_queue(&mqueue);
-		free_percpu(mcryptd_flist);
-	}
-
-	return err;
-}
-
-static void __exit mcryptd_exit(void)
-{
-	mcryptd_fini_queue(&mqueue);
-	crypto_unregister_template(&mcryptd_tmpl);
-	free_percpu(mcryptd_flist);
-}
-
-subsys_initcall(mcryptd_init);
-module_exit(mcryptd_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Software async multibuffer crypto daemon");
-MODULE_ALIAS_CRYPTO("mcryptd");
diff --git a/crypto/morus1280.c b/crypto/morus1280.c
index d057cf5ac4a8..3889c188f266 100644
--- a/crypto/morus1280.c
+++ b/crypto/morus1280.c
@@ -385,14 +385,11 @@ static void crypto_morus1280_final(struct morus1280_state *state,
 				   struct morus1280_block *tag_xor,
 				   u64 assoclen, u64 cryptlen)
 {
-	u64 assocbits = assoclen * 8;
-	u64 cryptbits = cryptlen * 8;
-
 	struct morus1280_block tmp;
 	unsigned int i;
 
-	tmp.words[0] = cpu_to_le64(assocbits);
-	tmp.words[1] = cpu_to_le64(cryptbits);
+	tmp.words[0] = assoclen * 8;
+	tmp.words[1] = cryptlen * 8;
 	tmp.words[2] = 0;
 	tmp.words[3] = 0;
 
diff --git a/crypto/morus640.c b/crypto/morus640.c
index 1ca76e54281b..da06ec2f6a80 100644
--- a/crypto/morus640.c
+++ b/crypto/morus640.c
@@ -384,21 +384,13 @@ static void crypto_morus640_final(struct morus640_state *state,
 				  struct morus640_block *tag_xor,
 				  u64 assoclen, u64 cryptlen)
 {
-	u64 assocbits = assoclen * 8;
-	u64 cryptbits = cryptlen * 8;
-
-	u32 assocbits_lo = (u32)assocbits;
-	u32 assocbits_hi = (u32)(assocbits >> 32);
-	u32 cryptbits_lo = (u32)cryptbits;
-	u32 cryptbits_hi = (u32)(cryptbits >> 32);
-
 	struct morus640_block tmp;
 	unsigned int i;
 
-	tmp.words[0] = cpu_to_le32(assocbits_lo);
-	tmp.words[1] = cpu_to_le32(assocbits_hi);
-	tmp.words[2] = cpu_to_le32(cryptbits_lo);
-	tmp.words[3] = cpu_to_le32(cryptbits_hi);
+	tmp.words[0] = lower_32_bits(assoclen * 8);
+	tmp.words[1] = upper_32_bits(assoclen * 8);
+	tmp.words[2] = lower_32_bits(cryptlen * 8);
+	tmp.words[3] = upper_32_bits(cryptlen * 8);
 
 	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
 		state->s[4].words[i] ^= state->s[0].words[i];
diff --git a/crypto/ofb.c b/crypto/ofb.c
new file mode 100644
index 000000000000..886631708c5e
--- /dev/null
+++ b/crypto/ofb.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * OFB: Output FeedBack mode
+ *
+ * Copyright (C) 2018 ARM Limited or its affiliates.
+ * All rights reserved.
+ *
+ * Based loosely on public domain code gleaned from libtomcrypt
+ * (https://github.com/libtom/libtomcrypt).
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+struct crypto_ofb_ctx {
+	struct crypto_cipher *child;
+	int cnt;
+};
+
+
+static int crypto_ofb_setkey(struct crypto_skcipher *parent, const u8 *key,
+			     unsigned int keylen)
+{
+	struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_cipher *child = ctx->child;
+	int err;
+
+	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+				       CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(child, key, keylen);
+	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
+				  CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int crypto_ofb_encrypt_segment(struct crypto_ofb_ctx *ctx,
+				      struct skcipher_walk *walk,
+				      struct crypto_cipher *tfm)
+{
+	int bsize = crypto_cipher_blocksize(tfm);
+	int nbytes = walk->nbytes;
+
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		if (ctx->cnt == bsize) {
+			if (nbytes < bsize)
+				break;
+			crypto_cipher_encrypt_one(tfm, iv, iv);
+			ctx->cnt = 0;
+		}
+		*dst = *src ^ iv[ctx->cnt];
+		src++;
+		dst++;
+		ctx->cnt++;
+	} while (--nbytes);
+	return nbytes;
+}
+
+static int crypto_ofb_encrypt(struct skcipher_request *req)
+{
+	struct skcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	unsigned int bsize;
+	struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+	int ret = 0;
+
+	bsize =  crypto_cipher_blocksize(child);
+	ctx->cnt = bsize;
+
+	ret = skcipher_walk_virt(&walk, req, false);
+
+	while (walk.nbytes) {
+		ret = crypto_ofb_encrypt_segment(ctx, &walk, child);
+		ret = skcipher_walk_done(&walk, ret);
+	}
+
+	return ret;
+}
+
+/* OFB encrypt and decrypt are identical */
+static int crypto_ofb_decrypt(struct skcipher_request *req)
+{
+	return crypto_ofb_encrypt(req);
+}
+
+static int crypto_ofb_init_tfm(struct crypto_skcipher *tfm)
+{
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
+	struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_cipher *cipher;
+
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	ctx->child = cipher;
+	return 0;
+}
+
+static void crypto_ofb_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_cipher(ctx->child);
+}
+
+static void crypto_ofb_free(struct skcipher_instance *inst)
+{
+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
+	kfree(inst);
+}
+
+static int crypto_ofb_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct skcipher_instance *inst;
+	struct crypto_attr_type *algt;
+	struct crypto_spawn *spawn;
+	struct crypto_alg *alg;
+	u32 mask;
+	int err;
+
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER);
+	if (err)
+		return err;
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	algt = crypto_get_attr_type(tb);
+	err = PTR_ERR(algt);
+	if (IS_ERR(algt))
+		goto err_free_inst;
+
+	mask = CRYPTO_ALG_TYPE_MASK |
+		crypto_requires_off(algt->type, algt->mask,
+				    CRYPTO_ALG_NEED_FALLBACK);
+
+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, mask);
+	err = PTR_ERR(alg);
+	if (IS_ERR(alg))
+		goto err_free_inst;
+
+	spawn = skcipher_instance_ctx(inst);
+	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
+				CRYPTO_ALG_TYPE_MASK);
+	crypto_mod_put(alg);
+	if (err)
+		goto err_free_inst;
+
+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "ofb", alg);
+	if (err)
+		goto err_drop_spawn;
+
+	inst->alg.base.cra_priority = alg->cra_priority;
+	inst->alg.base.cra_blocksize = alg->cra_blocksize;
+	inst->alg.base.cra_alignmask = alg->cra_alignmask;
+
+	/* We access the data as u32s when xoring. */
+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
+
+	inst->alg.ivsize = alg->cra_blocksize;
+	inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
+	inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
+
+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_ofb_ctx);
+
+	inst->alg.init = crypto_ofb_init_tfm;
+	inst->alg.exit = crypto_ofb_exit_tfm;
+
+	inst->alg.setkey = crypto_ofb_setkey;
+	inst->alg.encrypt = crypto_ofb_encrypt;
+	inst->alg.decrypt = crypto_ofb_decrypt;
+
+	inst->free = crypto_ofb_free;
+
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto err_drop_spawn;
+
+out:
+	return err;
+
+err_drop_spawn:
+	crypto_drop_spawn(spawn);
+err_free_inst:
+	kfree(inst);
+	goto out;
+}
+
+static struct crypto_template crypto_ofb_tmpl = {
+	.name = "ofb",
+	.create = crypto_ofb_create,
+	.module = THIS_MODULE,
+};
+
+static int __init crypto_ofb_module_init(void)
+{
+	return crypto_register_template(&crypto_ofb_tmpl);
+}
+
+static void __exit crypto_ofb_module_exit(void)
+{
+	crypto_unregister_template(&crypto_ofb_tmpl);
+}
+
+module_init(crypto_ofb_module_init);
+module_exit(crypto_ofb_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OFB block cipher algorithm");
+MODULE_ALIAS_CRYPTO("ofb");
diff --git a/crypto/rng.c b/crypto/rng.c
index b4a618668161..547f16ecbfb0 100644
--- a/crypto/rng.c
+++ b/crypto/rng.c
@@ -50,6 +50,7 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 	}
 
 	err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
+	crypto_stat_rng_seed(tfm, err);
 out:
 	kzfree(buf);
 	return err;
diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 9893dbfc1af4..812476e46821 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -261,15 +261,6 @@ static int pkcs1pad_encrypt(struct akcipher_request *req)
 	pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf,
 			ctx->key_size - 1 - req->src_len, req->src);
 
-	req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL);
-	if (!req_ctx->out_buf) {
-		kfree(req_ctx->in_buf);
-		return -ENOMEM;
-	}
-
-	pkcs1pad_sg_set_buf(req_ctx->out_sg, req_ctx->out_buf,
-			ctx->key_size, NULL);
-
 	akcipher_request_set_tfm(&req_ctx->child_req, ctx->child);
 	akcipher_request_set_callback(&req_ctx->child_req, req->base.flags,
 			pkcs1pad_encrypt_sign_complete_cb, req);
diff --git a/crypto/seqiv.c b/crypto/seqiv.c
index 39dbf2f7e5f5..64a412be255e 100644
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
@@ -73,9 +73,9 @@ static int seqiv_aead_encrypt(struct aead_request *req)
 	info = req->iv;
 
 	if (req->src != req->dst) {
-		SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
 
-		skcipher_request_set_tfm(nreq, ctx->sknull);
+		skcipher_request_set_sync_tfm(nreq, ctx->sknull);
 		skcipher_request_set_callback(nreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(nreq, req->src, req->dst,
diff --git a/crypto/shash.c b/crypto/shash.c
index 5d732c6bb4b2..d21f04d70dce 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -73,13 +73,6 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
 }
 EXPORT_SYMBOL_GPL(crypto_shash_setkey);
 
-static inline unsigned int shash_align_buffer_size(unsigned len,
-						   unsigned long mask)
-{
-	typedef u8 __aligned_largest u8_aligned;
-	return len + (mask & ~(__alignof__(u8_aligned) - 1));
-}
-
 static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
 				  unsigned int len)
 {
@@ -88,11 +81,17 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
 	unsigned long alignmask = crypto_shash_alignmask(tfm);
 	unsigned int unaligned_len = alignmask + 1 -
 				     ((unsigned long)data & alignmask);
-	u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)]
-		__aligned_largest;
+	/*
+	 * We cannot count on __aligned() working for large values:
+	 * https://patchwork.kernel.org/patch/9507697/
+	 */
+	u8 ubuf[MAX_ALGAPI_ALIGNMASK * 2];
 	u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
 	int err;
 
+	if (WARN_ON(buf + unaligned_len > ubuf + sizeof(ubuf)))
+		return -EINVAL;
+
 	if (unaligned_len > len)
 		unaligned_len = len;
 
@@ -124,11 +123,17 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
 	unsigned long alignmask = crypto_shash_alignmask(tfm);
 	struct shash_alg *shash = crypto_shash_alg(tfm);
 	unsigned int ds = crypto_shash_digestsize(tfm);
-	u8 ubuf[shash_align_buffer_size(ds, alignmask)]
-		__aligned_largest;
+	/*
+	 * We cannot count on __aligned() working for large values:
+	 * https://patchwork.kernel.org/patch/9507697/
+	 */
+	u8 ubuf[MAX_ALGAPI_ALIGNMASK + HASH_MAX_DIGESTSIZE];
 	u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
 	int err;
 
+	if (WARN_ON(buf + ds > ubuf + sizeof(ubuf)))
+		return -EINVAL;
+
 	err = shash->final(desc, buf);
 	if (err)
 		goto out;
@@ -458,9 +463,9 @@ static int shash_prepare_alg(struct shash_alg *alg)
 {
 	struct crypto_alg *base = &alg->base;
 
-	if (alg->digestsize > PAGE_SIZE / 8 ||
-	    alg->descsize > PAGE_SIZE / 8 ||
-	    alg->statesize > PAGE_SIZE / 8)
+	if (alg->digestsize > HASH_MAX_DIGESTSIZE ||
+	    alg->descsize > HASH_MAX_DESCSIZE ||
+	    alg->statesize > HASH_MAX_STATESIZE)
 		return -EINVAL;
 
 	base->cra_type = &crypto_shash_type;
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 0bd8c6caa498..4caab81d2d02 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -949,6 +949,30 @@ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_skcipher);
 
+struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(
+				const char *alg_name, u32 type, u32 mask)
+{
+	struct crypto_skcipher *tfm;
+
+	/* Only sync algorithms allowed. */
+	mask |= CRYPTO_ALG_ASYNC;
+
+	tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type2, type, mask);
+
+	/*
+	 * Make sure we do not allocate something that might get used with
+	 * an on-stack request: check the request size.
+	 */
+	if (!IS_ERR(tfm) && WARN_ON(crypto_skcipher_reqsize(tfm) >
+				    MAX_SYNC_SKCIPHER_REQSIZE)) {
+		crypto_free_skcipher(tfm);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return (struct crypto_sync_skcipher *)tfm;
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_sync_skcipher);
+
 int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask)
 {
 	return crypto_type_has_alg(alg_name, &crypto_skcipher_type2,
diff --git a/crypto/speck.c b/crypto/speck.c
deleted file mode 100644
index 58aa9f7f91f7..000000000000
--- a/crypto/speck.c
+++ /dev/null
@@ -1,307 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Speck: a lightweight block cipher
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Speck has 10 variants, including 5 block sizes.  For now we only implement
- * the variants Speck128/128, Speck128/192, Speck128/256, Speck64/96, and
- * Speck64/128.   Speck${B}/${K} denotes the variant with a block size of B bits
- * and a key size of K bits.  The Speck128 variants are believed to be the most
- * secure variants, and they use the same block size and key sizes as AES.  The
- * Speck64 variants are less secure, but on 32-bit processors are usually
- * faster.  The remaining variants (Speck32, Speck48, and Speck96) are even less
- * secure and/or not as well suited for implementation on either 32-bit or
- * 64-bit processors, so are omitted.
- *
- * Reference: "The Simon and Speck Families of Lightweight Block Ciphers"
- * https://eprint.iacr.org/2013/404.pdf
- *
- * In a correspondence, the Speck designers have also clarified that the words
- * should be interpreted in little-endian format, and the words should be
- * ordered such that the first word of each block is 'y' rather than 'x', and
- * the first key word (rather than the last) becomes the first round key.
- */
-
-#include <asm/unaligned.h>
-#include <crypto/speck.h>
-#include <linux/bitops.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-
-/* Speck128 */
-
-static __always_inline void speck128_round(u64 *x, u64 *y, u64 k)
-{
-	*x = ror64(*x, 8);
-	*x += *y;
-	*x ^= k;
-	*y = rol64(*y, 3);
-	*y ^= *x;
-}
-
-static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k)
-{
-	*y ^= *x;
-	*y = ror64(*y, 3);
-	*x ^= k;
-	*x -= *y;
-	*x = rol64(*x, 8);
-}
-
-void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx,
-			     u8 *out, const u8 *in)
-{
-	u64 y = get_unaligned_le64(in);
-	u64 x = get_unaligned_le64(in + 8);
-	int i;
-
-	for (i = 0; i < ctx->nrounds; i++)
-		speck128_round(&x, &y, ctx->round_keys[i]);
-
-	put_unaligned_le64(y, out);
-	put_unaligned_le64(x, out + 8);
-}
-EXPORT_SYMBOL_GPL(crypto_speck128_encrypt);
-
-static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-	crypto_speck128_encrypt(crypto_tfm_ctx(tfm), out, in);
-}
-
-void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx,
-			     u8 *out, const u8 *in)
-{
-	u64 y = get_unaligned_le64(in);
-	u64 x = get_unaligned_le64(in + 8);
-	int i;
-
-	for (i = ctx->nrounds - 1; i >= 0; i--)
-		speck128_unround(&x, &y, ctx->round_keys[i]);
-
-	put_unaligned_le64(y, out);
-	put_unaligned_le64(x, out + 8);
-}
-EXPORT_SYMBOL_GPL(crypto_speck128_decrypt);
-
-static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-	crypto_speck128_decrypt(crypto_tfm_ctx(tfm), out, in);
-}
-
-int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key,
-			   unsigned int keylen)
-{
-	u64 l[3];
-	u64 k;
-	int i;
-
-	switch (keylen) {
-	case SPECK128_128_KEY_SIZE:
-		k = get_unaligned_le64(key);
-		l[0] = get_unaligned_le64(key + 8);
-		ctx->nrounds = SPECK128_128_NROUNDS;
-		for (i = 0; i < ctx->nrounds; i++) {
-			ctx->round_keys[i] = k;
-			speck128_round(&l[0], &k, i);
-		}
-		break;
-	case SPECK128_192_KEY_SIZE:
-		k = get_unaligned_le64(key);
-		l[0] = get_unaligned_le64(key + 8);
-		l[1] = get_unaligned_le64(key + 16);
-		ctx->nrounds = SPECK128_192_NROUNDS;
-		for (i = 0; i < ctx->nrounds; i++) {
-			ctx->round_keys[i] = k;
-			speck128_round(&l[i % 2], &k, i);
-		}
-		break;
-	case SPECK128_256_KEY_SIZE:
-		k = get_unaligned_le64(key);
-		l[0] = get_unaligned_le64(key + 8);
-		l[1] = get_unaligned_le64(key + 16);
-		l[2] = get_unaligned_le64(key + 24);
-		ctx->nrounds = SPECK128_256_NROUNDS;
-		for (i = 0; i < ctx->nrounds; i++) {
-			ctx->round_keys[i] = k;
-			speck128_round(&l[i % 3], &k, i);
-		}
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_speck128_setkey);
-
-static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keylen)
-{
-	return crypto_speck128_setkey(crypto_tfm_ctx(tfm), key, keylen);
-}
-
-/* Speck64 */
-
-static __always_inline void speck64_round(u32 *x, u32 *y, u32 k)
-{
-	*x = ror32(*x, 8);
-	*x += *y;
-	*x ^= k;
-	*y = rol32(*y, 3);
-	*y ^= *x;
-}
-
-static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k)
-{
-	*y ^= *x;
-	*y = ror32(*y, 3);
-	*x ^= k;
-	*x -= *y;
-	*x = rol32(*x, 8);
-}
-
-void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx,
-			    u8 *out, const u8 *in)
-{
-	u32 y = get_unaligned_le32(in);
-	u32 x = get_unaligned_le32(in + 4);
-	int i;
-
-	for (i = 0; i < ctx->nrounds; i++)
-		speck64_round(&x, &y, ctx->round_keys[i]);
-
-	put_unaligned_le32(y, out);
-	put_unaligned_le32(x, out + 4);
-}
-EXPORT_SYMBOL_GPL(crypto_speck64_encrypt);
-
-static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-	crypto_speck64_encrypt(crypto_tfm_ctx(tfm), out, in);
-}
-
-void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx,
-			    u8 *out, const u8 *in)
-{
-	u32 y = get_unaligned_le32(in);
-	u32 x = get_unaligned_le32(in + 4);
-	int i;
-
-	for (i = ctx->nrounds - 1; i >= 0; i--)
-		speck64_unround(&x, &y, ctx->round_keys[i]);
-
-	put_unaligned_le32(y, out);
-	put_unaligned_le32(x, out + 4);
-}
-EXPORT_SYMBOL_GPL(crypto_speck64_decrypt);
-
-static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-	crypto_speck64_decrypt(crypto_tfm_ctx(tfm), out, in);
-}
-
-int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key,
-			  unsigned int keylen)
-{
-	u32 l[3];
-	u32 k;
-	int i;
-
-	switch (keylen) {
-	case SPECK64_96_KEY_SIZE:
-		k = get_unaligned_le32(key);
-		l[0] = get_unaligned_le32(key + 4);
-		l[1] = get_unaligned_le32(key + 8);
-		ctx->nrounds = SPECK64_96_NROUNDS;
-		for (i = 0; i < ctx->nrounds; i++) {
-			ctx->round_keys[i] = k;
-			speck64_round(&l[i % 2], &k, i);
-		}
-		break;
-	case SPECK64_128_KEY_SIZE:
-		k = get_unaligned_le32(key);
-		l[0] = get_unaligned_le32(key + 4);
-		l[1] = get_unaligned_le32(key + 8);
-		l[2] = get_unaligned_le32(key + 12);
-		ctx->nrounds = SPECK64_128_NROUNDS;
-		for (i = 0; i < ctx->nrounds; i++) {
-			ctx->round_keys[i] = k;
-			speck64_round(&l[i % 3], &k, i);
-		}
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_speck64_setkey);
-
-static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key,
-			  unsigned int keylen)
-{
-	return crypto_speck64_setkey(crypto_tfm_ctx(tfm), key, keylen);
-}
-
-/* Algorithm definitions */
-
-static struct crypto_alg speck_algs[] = {
-	{
-		.cra_name		= "speck128",
-		.cra_driver_name	= "speck128-generic",
-		.cra_priority		= 100,
-		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-		.cra_blocksize		= SPECK128_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct speck128_tfm_ctx),
-		.cra_module		= THIS_MODULE,
-		.cra_u			= {
-			.cipher = {
-				.cia_min_keysize	= SPECK128_128_KEY_SIZE,
-				.cia_max_keysize	= SPECK128_256_KEY_SIZE,
-				.cia_setkey		= speck128_setkey,
-				.cia_encrypt		= speck128_encrypt,
-				.cia_decrypt		= speck128_decrypt
-			}
-		}
-	}, {
-		.cra_name		= "speck64",
-		.cra_driver_name	= "speck64-generic",
-		.cra_priority		= 100,
-		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-		.cra_blocksize		= SPECK64_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct speck64_tfm_ctx),
-		.cra_module		= THIS_MODULE,
-		.cra_u			= {
-			.cipher = {
-				.cia_min_keysize	= SPECK64_96_KEY_SIZE,
-				.cia_max_keysize	= SPECK64_128_KEY_SIZE,
-				.cia_setkey		= speck64_setkey,
-				.cia_encrypt		= speck64_encrypt,
-				.cia_decrypt		= speck64_decrypt
-			}
-		}
-	}
-};
-
-static int __init speck_module_init(void)
-{
-	return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-static void __exit speck_module_exit(void)
-{
-	crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-module_init(speck_module_init);
-module_exit(speck_module_exit);
-
-MODULE_DESCRIPTION("Speck block cipher (generic)");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("speck128");
-MODULE_ALIAS_CRYPTO("speck128-generic");
-MODULE_ALIAS_CRYPTO("speck64");
-MODULE_ALIAS_CRYPTO("speck64-generic");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index bdde95e8d369..c20c9f5c18f2 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -76,8 +76,7 @@ static char *check[] = {
 	"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
 	"khazad", "wp512", "wp384", "wp256", "tnepres", "xeta",  "fcrypt",
 	"camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320",
-	"lzo", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384", "sha3-512",
-	NULL
+	"lzo", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", NULL
 };
 
 static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 };
@@ -1103,6 +1102,9 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs,
 			break;
 		}
 
+		if (speed[i].klen)
+			crypto_ahash_setkey(tfm, tvmem[0], speed[i].klen);
+
 		pr_info("test%3u "
 			"(%5u byte blocks,%5u bytes per update,%4u updates): ",
 			i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
@@ -1733,6 +1735,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		ret += tcrypt_test("xts(aes)");
 		ret += tcrypt_test("ctr(aes)");
 		ret += tcrypt_test("rfc3686(ctr(aes))");
+		ret += tcrypt_test("ofb(aes)");
 		break;
 
 	case 11:
@@ -1878,10 +1881,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		ret += tcrypt_test("ecb(seed)");
 		break;
 
-	case 44:
-		ret += tcrypt_test("zlib");
-		break;
-
 	case 45:
 		ret += tcrypt_test("rfc4309(ccm(aes))");
 		break;
@@ -2033,6 +2032,8 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		break;
 	case 191:
 		ret += tcrypt_test("ecb(sm4)");
+		ret += tcrypt_test("cbc(sm4)");
+		ret += tcrypt_test("ctr(sm4)");
 		break;
 	case 200:
 		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
@@ -2282,6 +2283,20 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 				   num_mb);
 		break;
 
+	case 218:
+		test_cipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0,
+				speed_template_16);
+		test_cipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0,
+				speed_template_16);
+		test_cipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0,
+				speed_template_16);
+		test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
+				speed_template_16);
+		test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
+				speed_template_16);
+		test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
+				speed_template_16);
+		break;
 	case 300:
 		if (alg) {
 			test_hash_speed(alg, sec, generic_hash_speed_template);
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index f0bfee1bb293..d09ea8b10b4f 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -51,6 +51,7 @@ static struct cipher_speed_template des3_speed_template[] = {
  * Cipher speed tests
  */
 static u8 speed_template_8[] = {8, 0};
+static u8 speed_template_16[] = {16, 0};
 static u8 speed_template_24[] = {24, 0};
 static u8 speed_template_8_16[] = {8, 16, 0};
 static u8 speed_template_8_32[] = {8, 32, 0};
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index a1d42245082a..b1f79c6bf409 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1400,8 +1400,8 @@ static int test_comp(struct crypto_comp *tfm,
 		int ilen;
 		unsigned int dlen = COMP_BUF_SIZE;
 
-		memset(output, 0, sizeof(COMP_BUF_SIZE));
-		memset(decomp_output, 0, sizeof(COMP_BUF_SIZE));
+		memset(output, 0, COMP_BUF_SIZE);
+		memset(decomp_output, 0, COMP_BUF_SIZE);
 
 		ilen = ctemplate[i].inlen;
 		ret = crypto_comp_compress(tfm, ctemplate[i].input,
@@ -1445,7 +1445,7 @@ static int test_comp(struct crypto_comp *tfm,
 		int ilen;
 		unsigned int dlen = COMP_BUF_SIZE;
 
-		memset(decomp_output, 0, sizeof(COMP_BUF_SIZE));
+		memset(decomp_output, 0, COMP_BUF_SIZE);
 
 		ilen = dtemplate[i].inlen;
 		ret = crypto_comp_decompress(tfm, dtemplate[i].input,
@@ -2662,6 +2662,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(serpent_cbc_tv_template)
 		},
 	}, {
+		.alg = "cbc(sm4)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(sm4_cbc_tv_template)
+		}
+	}, {
 		.alg = "cbc(twofish)",
 		.test = alg_test_skcipher,
 		.suite = {
@@ -2785,6 +2791,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(serpent_ctr_tv_template)
 		}
 	}, {
+		.alg = "ctr(sm4)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(sm4_ctr_tv_template)
+		}
+	}, {
 		.alg = "ctr(twofish)",
 		.test = alg_test_skcipher,
 		.suite = {
@@ -3038,18 +3050,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(sm4_tv_template)
 		}
 	}, {
-		.alg = "ecb(speck128)",
-		.test = alg_test_skcipher,
-		.suite = {
-			.cipher = __VECS(speck128_tv_template)
-		}
-	}, {
-		.alg = "ecb(speck64)",
-		.test = alg_test_skcipher,
-		.suite = {
-			.cipher = __VECS(speck64_tv_template)
-		}
-	}, {
 		.alg = "ecb(tea)",
 		.test = alg_test_skcipher,
 		.suite = {
@@ -3577,18 +3577,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(serpent_xts_tv_template)
 		}
 	}, {
-		.alg = "xts(speck128)",
-		.test = alg_test_skcipher,
-		.suite = {
-			.cipher = __VECS(speck128_xts_tv_template)
-		}
-	}, {
-		.alg = "xts(speck64)",
-		.test = alg_test_skcipher,
-		.suite = {
-			.cipher = __VECS(speck64_xts_tv_template)
-		}
-	}, {
 		.alg = "xts(twofish)",
 		.test = alg_test_skcipher,
 		.suite = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 173111c70746..1fe7b97ba03f 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -24,8 +24,6 @@
 #ifndef _CRYPTO_TESTMGR_H
 #define _CRYPTO_TESTMGR_H
 
-#include <linux/netlink.h>
-
 #define MAX_DIGEST_SIZE		64
 #define MAX_TAP			8
 
@@ -10133,12 +10131,13 @@ static const struct cipher_testvec serpent_xts_tv_template[] = {
 };
 
 /*
- * SM4 test vector taken from the draft RFC
- * https://tools.ietf.org/html/draft-crypto-sm4-00#ref-GBT.32907-2016
+ * SM4 test vectors taken from the "The SM4 Blockcipher Algorithm And Its
+ * Modes Of Operations" draft RFC
+ * https://datatracker.ietf.org/doc/draft-ribose-cfrg-sm4
  */
 
 static const struct cipher_testvec sm4_tv_template[] = {
-	{ /* SM4 Appendix A: Example Calculations. Example 1. */
+	{ /* GB/T 32907-2016 Example 1. */
 		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
 			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
 		.klen	= 16,
@@ -10147,10 +10146,7 @@ static const struct cipher_testvec sm4_tv_template[] = {
 		.ctext	= "\x68\x1E\xDF\x34\xD2\x06\x96\x5E"
 			  "\x86\xB3\xE9\x4F\x53\x6E\x42\x46",
 		.len	= 16,
-	}, { /*
-	      *  SM4 Appendix A: Example Calculations.
-	      *  Last 10 iterations of Example 2.
-	      */
+	}, { /* Last 10 iterations of GB/T 32907-2016 Example 2. */
 		.key    = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
 			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
 		.klen	= 16,
@@ -10195,744 +10191,116 @@ static const struct cipher_testvec sm4_tv_template[] = {
 			  "\x59\x52\x98\xc7\xc6\xfd\x27\x1f"
 			  "\x4\x2\xf8\x4\xc3\x3d\x3f\x66",
 		.len	= 160
+	}, { /* A.2.1.1 SM4-ECB Example 1 */
+		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.ptext	= "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb"
+			  "\xcc\xcc\xcc\xcc\xdd\xdd\xdd\xdd"
+			  "\xee\xee\xee\xee\xff\xff\xff\xff"
+			  "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb",
+		.ctext	= "\x5e\xc8\x14\x3d\xe5\x09\xcf\xf7"
+			  "\xb5\x17\x9f\x8f\x47\x4b\x86\x19"
+			  "\x2f\x1d\x30\x5a\x7f\xb1\x7d\xf9"
+			  "\x85\xf8\x1c\x84\x82\x19\x23\x04",
+		.len	= 32,
+	}, { /* A.2.1.2 SM4-ECB Example 2 */
+		.key	= "\xFE\xDC\xBA\x98\x76\x54\x32\x10"
+			  "\x01\x23\x45\x67\x89\xAB\xCD\xEF",
+		.klen	= 16,
+		.ptext	= "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb"
+			  "\xcc\xcc\xcc\xcc\xdd\xdd\xdd\xdd"
+			  "\xee\xee\xee\xee\xff\xff\xff\xff"
+			  "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb",
+		.ctext	= "\xC5\x87\x68\x97\xE4\xA5\x9B\xBB"
+			  "\xA7\x2A\x10\xC8\x38\x72\x24\x5B"
+			  "\x12\xDD\x90\xBC\x2D\x20\x06\x92"
+			  "\xB5\x29\xA4\x15\x5A\xC9\xE6\x00",
+		.len	= 32,
 	}
 };
 
-/*
- * Speck test vectors taken from the original paper:
- * "The Simon and Speck Families of Lightweight Block Ciphers"
- * https://eprint.iacr.org/2013/404.pdf
- *
- * Note that the paper does not make byte and word order clear.  But it was
- * confirmed with the authors that the intended orders are little endian byte
- * order and (y, x) word order.  Equivalently, the printed test vectors, when
- * looking at only the bytes (ignoring the whitespace that divides them into
- * words), are backwards: the left-most byte is actually the one with the
- * highest memory address, while the right-most byte is actually the one with
- * the lowest memory address.
- */
-
-static const struct cipher_testvec speck128_tv_template[] = {
-	{ /* Speck128/128 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+static const struct cipher_testvec sm4_cbc_tv_template[] = {
+	{ /* A.2.2.1 SM4-CBC Example 1 */
+		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
 		.klen	= 16,
-		.ptext	= "\x20\x6d\x61\x64\x65\x20\x69\x74"
-			  "\x20\x65\x71\x75\x69\x76\x61\x6c",
-		.ctext	= "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
-			  "\x65\x32\x78\x79\x51\x98\x5d\xa6",
-		.len	= 16,
-	}, { /* Speck128/192 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17",
-		.klen	= 24,
-		.ptext	= "\x65\x6e\x74\x20\x74\x6f\x20\x43"
-			  "\x68\x69\x65\x66\x20\x48\x61\x72",
-		.ctext	= "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
-			  "\x66\x55\x13\x13\x3a\xcf\xe4\x1b",
-		.len	= 16,
-	}, { /* Speck128/256 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.klen	= 32,
-		.ptext	= "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
-			  "\x49\x6e\x20\x74\x68\x6f\x73\x65",
-		.ctext	= "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
-			  "\x3e\xf5\xc0\x05\x04\x01\x09\x41",
-		.len	= 16,
-	},
-};
-
-/*
- * Speck128-XTS test vectors, taken from the AES-XTS test vectors with the
- * ciphertext recomputed with Speck128 as the cipher
- */
-static const struct cipher_testvec speck128_xts_tv_template[] = {
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ctext	= "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
-			  "\x3b\x99\x4a\x64\x74\x77\xac\xed"
-			  "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42"
-			  "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54",
-		.len	= 32,
-	}, {
-		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ctext	= "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
-			  "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a"
-			  "\xd4\x84\xa4\x53\xd5\x88\x73\x1b"
-			  "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6",
+		.ptext	= "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb"
+			  "\xcc\xcc\xcc\xcc\xdd\xdd\xdd\xdd"
+			  "\xee\xee\xee\xee\xff\xff\xff\xff"
+			  "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb",
+		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
+		.ctext	= "\x78\xEB\xB1\x1C\xC4\x0B\x0A\x48"
+			  "\x31\x2A\xAE\xB2\x04\x02\x44\xCB"
+			  "\x4C\xB7\x01\x69\x51\x90\x92\x26"
+			  "\x97\x9B\x0D\x15\xDC\x6A\x8F\x6D",
 		.len	= 32,
-	}, {
-		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
-			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ctext	= "\x21\x52\x84\x15\xd1\xf7\x21\x55"
-			  "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d"
-			  "\xda\x63\xb2\xf1\x82\xb0\x89\x59"
-			  "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92",
+	}, { /* A.2.2.2 SM4-CBC Example 2 */
+		.key	= "\xFE\xDC\xBA\x98\x76\x54\x32\x10"
+			  "\x01\x23\x45\x67\x89\xAB\xCD\xEF",
+		.klen	= 16,
+		.ptext	= "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb"
+			  "\xcc\xcc\xcc\xcc\xdd\xdd\xdd\xdd"
+			  "\xee\xee\xee\xee\xff\xff\xff\xff"
+			  "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb",
+		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
+		.ctext	= "\x0d\x3a\x6d\xdc\x2d\x21\xc6\x98"
+			  "\x85\x72\x15\x58\x7b\x7b\xb5\x9a"
+			  "\x91\xf2\xc1\x47\x91\x1a\x41\x44"
+			  "\x66\x5e\x1f\xa1\xd4\x0b\xae\x38",
 		.len	= 32,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ctext	= "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
-			  "\x53\xd0\xed\x2d\x30\xc1\x20\xef"
-			  "\x70\x67\x5e\xff\x09\x70\xbb\xc1"
-			  "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48"
-			  "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7"
-			  "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9"
-			  "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44"
-			  "\x19\xc5\x58\x84\x63\xb9\x12\x68"
-			  "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c"
-			  "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd"
-			  "\x74\x79\x2e\xb4\x44\xd7\x69\xc4"
-			  "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d"
-			  "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb"
-			  "\x6d\x13\x65\xa0\xf9\x31\x12\xe2"
-			  "\x26\xd1\xec\x2b\x0a\x8b\x59\x99"
-			  "\xa7\x49\xa0\x0e\x09\x33\x85\x50"
-			  "\xc3\x23\xca\x7a\xdd\x13\x45\x5f"
-			  "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f"
-			  "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6"
-			  "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f"
-			  "\x79\x91\x8d\x36\x13\x7b\xd0\x4a"
-			  "\x6c\x39\xfb\x53\xb8\x6f\x02\x51"
-			  "\xa5\x20\xac\x24\x1c\x73\x59\x73"
-			  "\x58\x61\x3a\x87\x58\xb3\x20\x56"
-			  "\x39\x06\x2b\x4d\xd3\x20\x2b\x89"
-			  "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd"
-			  "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91"
-			  "\x09\x35\x71\x50\x65\xac\x92\xe3"
-			  "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92"
-			  "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9"
-			  "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d"
-			  "\x77\x04\x80\xa9\xbf\x38\xb5\xbd"
-			  "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8"
-			  "\x2a\x26\xcc\x49\x14\x6d\x55\x01"
-			  "\x06\x94\xd8\xb2\x2d\x53\x83\x1b"
-			  "\x8f\xd4\xdd\x57\x12\x7e\x18\xba"
-			  "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d"
-			  "\x24\xa9\x60\xa4\x97\x85\x86\x2a"
-			  "\x01\x00\x09\xf1\xcb\x4a\x24\x1c"
-			  "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4"
-			  "\x97\x1c\x10\xc6\x4d\x66\x4f\x98"
-			  "\x87\x30\xac\xd5\xea\x73\x49\x10"
-			  "\x80\xea\xe5\x5f\x4d\x5f\x03\x33"
-			  "\x66\x02\x35\x3d\x60\x06\x36\x4f"
-			  "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8"
-			  "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28"
-			  "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93"
-			  "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30"
-			  "\xcc\x75\xcf\x16\x26\xa9\x26\x3b"
-			  "\xe7\x68\x2f\x15\x21\x5b\xe4\x00"
-			  "\xbd\x48\x50\xcd\x75\x70\xc4\x62"
-			  "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b"
-			  "\x51\x66\x02\x69\x04\x97\x36\xd4"
-			  "\x75\xae\x0b\xa3\x42\xf8\xca\x79"
-			  "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2"
-			  "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd"
-			  "\xea\x15\x5a\xa0\x85\x7e\x81\x0d"
-			  "\x03\xe7\x05\x39\xf5\x05\x26\xee"
-			  "\xec\xaa\x1f\x3d\xc9\x98\x76\x01"
-			  "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4"
-			  "\x50\x65\x50\x6d\x04\x1f\xdf\x5a"
-			  "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca"
-			  "\x47\x26\xef\x39\xb8\xb4\xf2\xd1"
-			  "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf",
-		.len	= 512,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ctext	= "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
-			  "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb"
-			  "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73"
-			  "\x92\x99\xde\xd3\x76\xed\xcd\x63"
-			  "\x64\x3a\x22\x57\xc1\x43\x49\xd4"
-			  "\x79\x36\x31\x19\x62\xae\x10\x7e"
-			  "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa"
-			  "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0"
-			  "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00"
-			  "\xfc\x81\x99\x8a\x14\x62\xf5\x7e"
-			  "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec"
-			  "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6"
-			  "\x62\x62\x37\xfe\x0a\x4c\x4a\x37"
-			  "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e"
-			  "\x85\x3c\x4f\x26\x64\x85\xbc\x68"
-			  "\xb0\xe0\x86\x5e\x26\x41\xce\x11"
-			  "\x50\xda\x97\x14\xe9\x9e\xc7\x6d"
-			  "\x3b\xdc\x43\xde\x2b\x27\x69\x7d"
-			  "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31"
-			  "\x14\x4d\xf0\x74\x37\xfd\x07\x25"
-			  "\x96\x55\xe5\xfc\x9e\x27\x2a\x74"
-			  "\x1b\x83\x4d\x15\x83\xac\x57\xa0"
-			  "\xac\xa5\xd0\x38\xef\x19\x56\x53"
-			  "\x25\x4b\xfc\xce\x04\x23\xe5\x6b"
-			  "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5"
-			  "\xed\x22\x34\x1c\x5d\xed\x17\x06"
-			  "\x36\xa3\xe6\x77\xb9\x97\x46\xb8"
-			  "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc"
-			  "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82"
-			  "\x35\x91\x3d\x1b\xe4\x97\x9f\x92"
-			  "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1"
-			  "\x8d\x39\xfc\x42\xfb\x38\x80\xb9"
-			  "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1"
-			  "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7"
-			  "\xa1\xbf\xf7\xda\x95\x93\x4b\x78"
-			  "\x19\xf5\x94\xf9\xd2\x00\x33\x37"
-			  "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee"
-			  "\x42\xb2\x9e\x2c\x5f\x48\x23\x26"
-			  "\x15\x25\x17\x03\x3d\xfe\x2c\xfc"
-			  "\xeb\xba\xda\xe0\x00\x05\xb6\xa6"
-			  "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf"
-			  "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a"
-			  "\x49\xa1\xc3\xfa\x10\x52\xb9\x14"
-			  "\xad\xb7\x73\xf8\x78\x12\xc8\x59"
-			  "\x17\x80\x4c\x57\x39\xf1\x6d\x80"
-			  "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21"
-			  "\xec\xce\xb7\xc8\x02\x8a\xed\x53"
-			  "\x2c\x25\x68\x2e\x1f\x85\x5e\x67"
-			  "\xd1\x07\x7a\x3a\x89\x08\xe0\x34"
-			  "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40"
-			  "\x31\x15\x72\xa0\xf0\x73\xd9\x3b"
-			  "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2"
-			  "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8"
-			  "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6"
-			  "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58"
-			  "\xcc\x1f\x48\x49\x65\x47\x75\xe9"
-			  "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07"
-			  "\xf2\xec\x76\xd8\x8f\x09\xf3\x16"
-			  "\xa1\x51\x89\x3b\xeb\x96\x42\xac"
-			  "\x65\xe0\x67\x63\x29\xdc\xb4\x7d"
-			  "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb"
-			  "\x66\x8d\x13\xca\xe0\x59\x2a\x00"
-			  "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5"
-			  "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c",
-		.len	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
 	}
 };
 
-static const struct cipher_testvec speck64_tv_template[] = {
-	{ /* Speck64/96 */
-		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
-			  "\x10\x11\x12\x13",
-		.klen	= 12,
-		.ptext	= "\x65\x61\x6e\x73\x20\x46\x61\x74",
-		.ctext	= "\x6c\x94\x75\x41\xec\x52\x79\x9f",
-		.len	= 8,
-	}, { /* Speck64/128 */
-		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
-			  "\x10\x11\x12\x13\x18\x19\x1a\x1b",
+static const struct cipher_testvec sm4_ctr_tv_template[] = {
+	{ /* A.2.5.1 SM4-CTR Example 1 */
+		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
 		.klen	= 16,
-		.ptext	= "\x2d\x43\x75\x74\x74\x65\x72\x3b",
-		.ctext	= "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
-		.len	= 8,
-	},
-};
-
-/*
- * Speck64-XTS test vectors, taken from the AES-XTS test vectors with the
- * ciphertext recomputed with Speck64 as the cipher, and key lengths adjusted
- */
-static const struct cipher_testvec speck64_xts_tv_template[] = {
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 24,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ctext	= "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
-			  "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2"
-			  "\x80\xf5\x72\xe7\xcd\xf0\x99\x22"
-			  "\x35\xa7\x2f\x06\xef\xdc\x51\xaa",
-		.len	= 32,
-	}, {
-		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 24,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ctext	= "\x12\x56\x73\xcd\x15\x87\xa8\x59"
-			  "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f"
-			  "\xb3\x12\x69\x7e\x36\xeb\x52\xff"
-			  "\x62\xdd\xba\x90\xb3\xe1\xee\x99",
-		.len	= 32,
-	}, {
-		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
-			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 24,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ctext	= "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
-			  "\x27\x36\xc0\xbf\x5d\xea\x36\x37"
-			  "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b"
-			  "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34",
-		.len	= 32,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93",
-		.klen	= 24,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ctext	= "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
-			  "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09"
-			  "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3"
-			  "\x11\xc7\x39\x96\xd0\x95\xf4\x56"
-			  "\xf4\xdd\x03\x38\x01\x44\x2c\xcf"
-			  "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66"
-			  "\xfe\x3d\xc6\xfb\x01\x23\x51\x43"
-			  "\xd5\xd2\x13\x86\x94\x34\xe9\x62"
-			  "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef"
-			  "\x76\x35\x04\x3f\xdb\x23\x9d\x0b"
-			  "\x85\x42\xb9\x02\xd6\xcc\xdb\x96"
-			  "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d"
-			  "\xae\xd2\x04\xd5\xda\xc1\x7e\x24"
-			  "\x8c\x73\xbe\x48\x7e\xcf\x65\x28"
-			  "\x29\xe5\xbe\x54\x30\xcb\x46\x95"
-			  "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe"
-			  "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69"
-			  "\xa1\x09\x95\x71\x26\xe9\xc4\xdf"
-			  "\xe6\x31\xc3\x46\xda\xaf\x0b\x41"
-			  "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3"
-			  "\x82\xc0\x37\x27\xfc\x91\xa7\x05"
-			  "\xfb\xc5\xdc\x2b\x74\x96\x48\x43"
-			  "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f"
-			  "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a"
-			  "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c"
-			  "\x07\xff\xf3\x72\x74\x48\xb5\x40"
-			  "\x50\xb5\xdd\x90\x43\x31\x18\x15"
-			  "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a"
-			  "\x29\x93\x90\x8b\xda\x07\xf0\x35"
-			  "\x6d\x90\x88\x09\x4e\x83\xf5\x5b"
-			  "\x94\x12\xbb\x33\x27\x1d\x3f\x23"
-			  "\x51\xa8\x7c\x07\xa2\xae\x77\xa6"
-			  "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f"
-			  "\x66\xdd\xcd\x75\x24\x8b\x33\xf7"
-			  "\x20\xdb\x83\x9b\x4f\x11\x63\x6e"
-			  "\xcf\x37\xef\xc9\x11\x01\x5c\x45"
-			  "\x32\x99\x7c\x3c\x9e\x42\x89\xe3"
-			  "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05"
-			  "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc"
-			  "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d"
-			  "\xa0\xa8\x89\x3b\x73\x39\xa5\x94"
-			  "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89"
-			  "\x10\xff\xaf\xef\xca\xdd\x4f\x80"
-			  "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7"
-			  "\x33\xca\x00\x8b\x8b\x3f\xea\xec"
-			  "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f"
-			  "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5"
-			  "\x64\xa3\xf1\x1a\x76\x28\xcc\x35"
-			  "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b"
-			  "\xc7\x1b\x53\x17\x02\xea\xd1\xad"
-			  "\x13\x51\x73\xc0\xa0\xb2\x05\x32"
-			  "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19"
-			  "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d"
-			  "\x59\xda\xee\x1a\x22\x18\xda\x0d"
-			  "\x88\x0f\x55\x8b\x72\x62\xfd\xc1"
-			  "\x69\x13\xcd\x0d\x5f\xc1\x09\x52"
-			  "\xee\xd6\xe3\x84\x4d\xee\xf6\x88"
-			  "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f"
-			  "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54"
-			  "\x7d\x69\x8d\x00\x62\x77\x0d\x14"
-			  "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3"
-			  "\x50\xf7\x5f\xf4\xc2\xca\x41\x97"
-			  "\x37\xbe\x75\x74\xcd\xf0\x75\x6e"
-			  "\x25\x23\x94\xbd\xda\x8d\xb0\xd4",
-		.len	= 512,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27",
-		.klen	= 32,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ctext	= "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
-			  "\xc6\x71\x29\x2d\x3e\x89\x9f\x59"
-			  "\x68\x6a\xcc\x8a\x56\x97\xf3\x95"
-			  "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c"
-			  "\x78\x16\xea\x80\xdb\x33\x75\x94"
-			  "\xf9\x29\xc4\x2b\x76\x75\x97\xc7"
-			  "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b"
-			  "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee"
-			  "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a"
-			  "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c"
-			  "\xf5\xec\x32\x74\xa3\xb8\x03\x88"
-			  "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f"
-			  "\x84\x5e\x46\xed\x20\x89\xb6\x44"
-			  "\x8d\xd0\xed\x54\x47\x16\xbe\x95"
-			  "\x8a\xb3\x6b\x72\xc4\x32\x52\x13"
-			  "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6"
-			  "\x44\x18\xdd\x8c\x6e\xca\x6e\x45"
-			  "\x8f\x1e\x10\x07\x57\x25\x98\x7b"
-			  "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8"
-			  "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb"
-			  "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff"
-			  "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e"
-			  "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d"
-			  "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65"
-			  "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a"
-			  "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a"
-			  "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78"
-			  "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3"
-			  "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e"
-			  "\x35\x10\x30\x82\x0d\xe7\xc5\x9b"
-			  "\xde\x44\x18\xbd\x9f\xd1\x45\xa9"
-			  "\x7b\x7a\x4a\xad\x35\x65\x27\xca"
-			  "\xb2\xc3\xd4\x9b\x71\x86\x70\xee"
-			  "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf"
-			  "\xfc\x42\xc8\x31\x59\xbe\x16\x60"
-			  "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14"
-			  "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef"
-			  "\x52\x7f\x29\x51\x94\x20\x67\x3c"
-			  "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63"
-			  "\xe7\xff\x73\x25\xd1\xdd\x96\x8a"
-			  "\x98\x52\x6d\xf3\xac\x3e\xf2\x18"
-			  "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed"
-			  "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e"
-			  "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad"
-			  "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa"
-			  "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81"
-			  "\x65\x53\x0f\x41\x11\xbd\x98\x99"
-			  "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d"
-			  "\x84\x98\xf9\x34\xed\x33\x2a\x1f"
-			  "\x82\xed\xc1\x73\x98\xd3\x02\xdc"
-			  "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76"
-			  "\x63\x51\x34\x9d\x96\x12\xae\xce"
-			  "\x83\xc9\x76\x5e\xa4\x1b\x53\x37"
-			  "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d"
-			  "\x54\x27\x74\xbb\x10\x86\x57\x46"
-			  "\x68\xe1\xed\x14\xe7\x9d\xfc\x84"
-			  "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf"
-			  "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d"
-			  "\x7b\x4f\x38\x55\x36\x71\x64\xc1"
-			  "\xfc\x5c\x75\x52\x33\x02\x18\xf8"
-			  "\x17\xe1\x2b\xc2\x43\x39\xbd\x76"
-			  "\x9b\x63\x76\x32\x2f\x19\x72\x10"
-			  "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5"
-			  "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c",
-		.len	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
+		.ptext	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
+			  "\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
+			  "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+			  "\xee\xee\xee\xee\xee\xee\xee\xee"
+			  "\xff\xff\xff\xff\xff\xff\xff\xff"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb",
+		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
+		.ctext	= "\xac\x32\x36\xcb\x97\x0c\xc2\x07"
+			  "\x91\x36\x4c\x39\x5a\x13\x42\xd1"
+			  "\xa3\xcb\xc1\x87\x8c\x6f\x30\xcd"
+			  "\x07\x4c\xce\x38\x5c\xdd\x70\xc7"
+			  "\xf2\x34\xbc\x0e\x24\xc1\x19\x80"
+			  "\xfd\x12\x86\x31\x0c\xe3\x7b\x92"
+			  "\x6e\x02\xfc\xd0\xfa\xa0\xba\xf3"
+			  "\x8b\x29\x33\x85\x1d\x82\x45\x14",
+		.len	= 64,
+	}, { /* A.2.5.2 SM4-CTR Example 2 */
+		.key	= "\xFE\xDC\xBA\x98\x76\x54\x32\x10"
+			  "\x01\x23\x45\x67\x89\xAB\xCD\xEF",
+		.klen	= 16,
+		.ptext	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
+			  "\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
+			  "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+			  "\xee\xee\xee\xee\xee\xee\xee\xee"
+			  "\xff\xff\xff\xff\xff\xff\xff\xff"
+			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+			  "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb",
+		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
+		.ctext	= "\x5d\xcc\xcd\x25\xb9\x5a\xb0\x74"
+			  "\x17\xa0\x85\x12\xee\x16\x0e\x2f"
+			  "\x8f\x66\x15\x21\xcb\xba\xb4\x4c"
+			  "\xc8\x71\x38\x44\x5b\xc2\x9e\x5c"
+			  "\x0a\xe0\x29\x72\x05\xd6\x27\x04"
+			  "\x17\x3b\x21\x23\x9b\x88\x7f\x6c"
+			  "\x8c\xb5\xb8\x00\x91\x7a\x24\x88"
+			  "\x28\x4b\xde\x9e\x16\xea\x29\x06",
+		.len	= 64,
 	}
 };
 
@@ -13883,6 +13251,27 @@ static const struct cipher_testvec aes_lrw_tv_template[] = {
 		.ctext	= "\x5b\x90\x8e\xc1\xab\xdd\x67\x5f"
 			  "\x3d\x69\x8a\x95\x53\xc8\x9c\xe5",
 		.len	= 16,
+	}, { /* Test counter wrap-around, modified from LRW-32-AES 1 */
+		.key    = "\x45\x62\xac\x25\xf8\x28\x17\x6d"
+			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
+			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
+			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
+		.klen   = 32,
+		.iv     = "\xff\xff\xff\xff\xff\xff\xff\xff"
+			  "\xff\xff\xff\xff\xff\xff\xff\xff",
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ctext	= "\x47\x90\x50\xf6\xf4\x8d\x5c\x7f"
+			  "\x84\xc7\x83\x95\x2d\xa2\x02\xc0"
+			  "\xda\x7f\xa3\xc0\x88\x2a\x0a\x50"
+			  "\xfb\xc1\x78\x03\x39\xfe\x1d\xe5"
+			  "\xf1\xb2\x73\xcd\x65\xa3\xdf\x5f"
+			  "\xe9\x5d\x48\x92\x54\x63\x4e\xb8",
+		.len	= 48,
 	}, {
 /* http://www.mail-archive.com/stds-p1619@listserv.ieee.org/msg00173.html */
 		.key    = "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
diff --git a/crypto/xcbc.c b/crypto/xcbc.c
index 25c75af50d3f..c055f57fab11 100644
--- a/crypto/xcbc.c
+++ b/crypto/xcbc.c
@@ -57,15 +57,17 @@ struct xcbc_desc_ctx {
 	u8 ctx[];
 };
 
+#define XCBC_BLOCKSIZE	16
+
 static int crypto_xcbc_digest_setkey(struct crypto_shash *parent,
 				     const u8 *inkey, unsigned int keylen)
 {
 	unsigned long alignmask = crypto_shash_alignmask(parent);
 	struct xcbc_tfm_ctx *ctx = crypto_shash_ctx(parent);
-	int bs = crypto_shash_blocksize(parent);
 	u8 *consts = PTR_ALIGN(&ctx->ctx[0], alignmask + 1);
 	int err = 0;
-	u8 key1[bs];
+	u8 key1[XCBC_BLOCKSIZE];
+	int bs = sizeof(key1);
 
 	if ((err = crypto_cipher_setkey(ctx->child, inkey, keylen)))
 		return err;
@@ -212,7 +214,7 @@ static int xcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 		return PTR_ERR(alg);
 
 	switch(alg->cra_blocksize) {
-	case 16:
+	case XCBC_BLOCKSIZE:
 		break;
 	default:
 		goto out_put_alg;
diff --git a/crypto/xts.c b/crypto/xts.c
index ccf55fbb8bc2..847f54f76789 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -26,8 +26,6 @@
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
 
-#define XTS_BUFFER_SIZE 128u
-
 struct priv {
 	struct crypto_skcipher *child;
 	struct crypto_cipher *tweak;
@@ -39,19 +37,7 @@ struct xts_instance_ctx {
 };
 
 struct rctx {
-	le128 buf[XTS_BUFFER_SIZE / sizeof(le128)];
-
 	le128 t;
-
-	le128 *ext;
-
-	struct scatterlist srcbuf[2];
-	struct scatterlist dstbuf[2];
-	struct scatterlist *src;
-	struct scatterlist *dst;
-
-	unsigned int left;
-
 	struct skcipher_request subreq;
 };
 
@@ -96,81 +82,27 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
 	return err;
 }
 
-static int post_crypt(struct skcipher_request *req)
+/*
+ * We compute the tweak masks twice (both before and after the ECB encryption or
+ * decryption) to avoid having to allocate a temporary buffer and/or make
+ * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
+ * just doing the gf128mul_x_ble() calls again.
+ */
+static int xor_tweak(struct skcipher_request *req, bool second_pass)
 {
 	struct rctx *rctx = skcipher_request_ctx(req);
-	le128 *buf = rctx->ext ?: rctx->buf;
-	struct skcipher_request *subreq;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	const int bs = XTS_BLOCK_SIZE;
 	struct skcipher_walk w;
-	struct scatterlist *sg;
-	unsigned offset;
+	le128 t = rctx->t;
 	int err;
 
-	subreq = &rctx->subreq;
-	err = skcipher_walk_virt(&w, subreq, false);
-
-	while (w.nbytes) {
-		unsigned int avail = w.nbytes;
-		le128 *wdst;
-
-		wdst = w.dst.virt.addr;
-
-		do {
-			le128_xor(wdst, buf++, wdst);
-			wdst++;
-		} while ((avail -= bs) >= bs);
-
-		err = skcipher_walk_done(&w, avail);
+	if (second_pass) {
+		req = &rctx->subreq;
+		/* set to our TFM to enforce correct alignment: */
+		skcipher_request_set_tfm(req, tfm);
 	}
-
-	rctx->left -= subreq->cryptlen;
-
-	if (err || !rctx->left)
-		goto out;
-
-	rctx->dst = rctx->dstbuf;
-
-	scatterwalk_done(&w.out, 0, 1);
-	sg = w.out.sg;
-	offset = w.out.offset;
-
-	if (rctx->dst != sg) {
-		rctx->dst[0] = *sg;
-		sg_unmark_end(rctx->dst);
-		scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 2);
-	}
-	rctx->dst[0].length -= offset - sg->offset;
-	rctx->dst[0].offset = offset;
-
-out:
-	return err;
-}
-
-static int pre_crypt(struct skcipher_request *req)
-{
-	struct rctx *rctx = skcipher_request_ctx(req);
-	le128 *buf = rctx->ext ?: rctx->buf;
-	struct skcipher_request *subreq;
-	const int bs = XTS_BLOCK_SIZE;
-	struct skcipher_walk w;
-	struct scatterlist *sg;
-	unsigned cryptlen;
-	unsigned offset;
-	bool more;
-	int err;
-
-	subreq = &rctx->subreq;
-	cryptlen = subreq->cryptlen;
-
-	more = rctx->left > cryptlen;
-	if (!more)
-		cryptlen = rctx->left;
-
-	skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
-				   cryptlen, NULL);
-
-	err = skcipher_walk_virt(&w, subreq, false);
+	err = skcipher_walk_virt(&w, req, false);
 
 	while (w.nbytes) {
 		unsigned int avail = w.nbytes;
@@ -181,180 +113,71 @@ static int pre_crypt(struct skcipher_request *req)
 		wdst = w.dst.virt.addr;
 
 		do {
-			*buf++ = rctx->t;
-			le128_xor(wdst++, &rctx->t, wsrc++);
-			gf128mul_x_ble(&rctx->t, &rctx->t);
+			le128_xor(wdst++, &t, wsrc++);
+			gf128mul_x_ble(&t, &t);
 		} while ((avail -= bs) >= bs);
 
 		err = skcipher_walk_done(&w, avail);
 	}
 
-	skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
-				   cryptlen, NULL);
-
-	if (err || !more)
-		goto out;
-
-	rctx->src = rctx->srcbuf;
-
-	scatterwalk_done(&w.in, 0, 1);
-	sg = w.in.sg;
-	offset = w.in.offset;
-
-	if (rctx->src != sg) {
-		rctx->src[0] = *sg;
-		sg_unmark_end(rctx->src);
-		scatterwalk_crypto_chain(rctx->src, sg_next(sg), 2);
-	}
-	rctx->src[0].length -= offset - sg->offset;
-	rctx->src[0].offset = offset;
-
-out:
 	return err;
 }
 
-static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
+static int xor_tweak_pre(struct skcipher_request *req)
 {
-	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
-	struct rctx *rctx = skcipher_request_ctx(req);
-	struct skcipher_request *subreq;
-	gfp_t gfp;
-
-	subreq = &rctx->subreq;
-	skcipher_request_set_tfm(subreq, ctx->child);
-	skcipher_request_set_callback(subreq, req->base.flags, done, req);
-
-	gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
-							   GFP_ATOMIC;
-	rctx->ext = NULL;
-
-	subreq->cryptlen = XTS_BUFFER_SIZE;
-	if (req->cryptlen > XTS_BUFFER_SIZE) {
-		unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
-
-		rctx->ext = kmalloc(n, gfp);
-		if (rctx->ext)
-			subreq->cryptlen = n;
-	}
-
-	rctx->src = req->src;
-	rctx->dst = req->dst;
-	rctx->left = req->cryptlen;
-
-	/* calculate first value of T */
-	crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv);
-
-	return 0;
+	return xor_tweak(req, false);
 }
 
-static void exit_crypt(struct skcipher_request *req)
+static int xor_tweak_post(struct skcipher_request *req)
 {
-	struct rctx *rctx = skcipher_request_ctx(req);
-
-	rctx->left = 0;
-
-	if (rctx->ext)
-		kzfree(rctx->ext);
+	return xor_tweak(req, true);
 }
 
-static int do_encrypt(struct skcipher_request *req, int err)
-{
-	struct rctx *rctx = skcipher_request_ctx(req);
-	struct skcipher_request *subreq;
-
-	subreq = &rctx->subreq;
-
-	while (!err && rctx->left) {
-		err = pre_crypt(req) ?:
-		      crypto_skcipher_encrypt(subreq) ?:
-		      post_crypt(req);
-
-		if (err == -EINPROGRESS || err == -EBUSY)
-			return err;
-	}
-
-	exit_crypt(req);
-	return err;
-}
-
-static void encrypt_done(struct crypto_async_request *areq, int err)
+static void crypt_done(struct crypto_async_request *areq, int err)
 {
 	struct skcipher_request *req = areq->data;
-	struct skcipher_request *subreq;
-	struct rctx *rctx;
-
-	rctx = skcipher_request_ctx(req);
-
-	if (err == -EINPROGRESS) {
-		if (rctx->left != req->cryptlen)
-			return;
-		goto out;
-	}
-
-	subreq = &rctx->subreq;
-	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
 
-	err = do_encrypt(req, err ?: post_crypt(req));
-	if (rctx->left)
-		return;
+	if (!err)
+		err = xor_tweak_post(req);
 
-out:
 	skcipher_request_complete(req, err);
 }
 
-static int encrypt(struct skcipher_request *req)
-{
-	return do_encrypt(req, init_crypt(req, encrypt_done));
-}
-
-static int do_decrypt(struct skcipher_request *req, int err)
+static void init_crypt(struct skcipher_request *req)
 {
+	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	struct rctx *rctx = skcipher_request_ctx(req);
-	struct skcipher_request *subreq;
-
-	subreq = &rctx->subreq;
+	struct skcipher_request *subreq = &rctx->subreq;
 
-	while (!err && rctx->left) {
-		err = pre_crypt(req) ?:
-		      crypto_skcipher_decrypt(subreq) ?:
-		      post_crypt(req);
-
-		if (err == -EINPROGRESS || err == -EBUSY)
-			return err;
-	}
+	skcipher_request_set_tfm(subreq, ctx->child);
+	skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
+	skcipher_request_set_crypt(subreq, req->dst, req->dst,
+				   req->cryptlen, NULL);
 
-	exit_crypt(req);
-	return err;
+	/* calculate first value of T */
+	crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv);
 }
 
-static void decrypt_done(struct crypto_async_request *areq, int err)
+static int encrypt(struct skcipher_request *req)
 {
-	struct skcipher_request *req = areq->data;
-	struct skcipher_request *subreq;
-	struct rctx *rctx;
-
-	rctx = skcipher_request_ctx(req);
-
-	if (err == -EINPROGRESS) {
-		if (rctx->left != req->cryptlen)
-			return;
-		goto out;
-	}
-
-	subreq = &rctx->subreq;
-	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
-
-	err = do_decrypt(req, err ?: post_crypt(req));
-	if (rctx->left)
-		return;
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *subreq = &rctx->subreq;
 
-out:
-	skcipher_request_complete(req, err);
+	init_crypt(req);
+	return xor_tweak_pre(req) ?:
+		crypto_skcipher_encrypt(subreq) ?:
+		xor_tweak_post(req);
 }
 
 static int decrypt(struct skcipher_request *req)
 {
-	return do_decrypt(req, init_crypt(req, decrypt_done));
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *subreq = &rctx->subreq;
+
+	init_crypt(req);
+	return xor_tweak_pre(req) ?:
+		crypto_skcipher_decrypt(subreq) ?:
+		xor_tweak_post(req);
 }
 
 static int init_tfm(struct crypto_skcipher *tfm)
diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
index 432e9ad77070..51e8250d113f 100644
--- a/drivers/android/Kconfig
+++ b/drivers/android/Kconfig
@@ -10,7 +10,7 @@ if ANDROID
 
 config ANDROID_BINDER_IPC
 	bool "Android Binder IPC Driver"
-	depends on MMU
+	depends on MMU && !CPU_CACHE_VIVT
 	default n
 	---help---
 	  Binder is used in Android for both communication between processes,
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d58763b6b009..cb30a524d16d 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -71,6 +71,7 @@
 #include <linux/security.h>
 #include <linux/spinlock.h>
 #include <linux/ratelimit.h>
+#include <linux/syscalls.h>
 
 #include <uapi/linux/android/binder.h>
 
@@ -457,9 +458,8 @@ struct binder_ref {
 };
 
 enum binder_deferred_state {
-	BINDER_DEFERRED_PUT_FILES    = 0x01,
-	BINDER_DEFERRED_FLUSH        = 0x02,
-	BINDER_DEFERRED_RELEASE      = 0x04,
+	BINDER_DEFERRED_FLUSH        = 0x01,
+	BINDER_DEFERRED_RELEASE      = 0x02,
 };
 
 /**
@@ -480,9 +480,6 @@ enum binder_deferred_state {
  *                        (invariant after initialized)
  * @tsk                   task_struct for group_leader of process
  *                        (invariant after initialized)
- * @files                 files_struct for process
- *                        (protected by @files_lock)
- * @files_lock            mutex to protect @files
  * @deferred_work_node:   element for binder_deferred_list
  *                        (protected by binder_deferred_lock)
  * @deferred_work:        bitmap of deferred work to perform
@@ -527,8 +524,6 @@ struct binder_proc {
 	struct list_head waiting_threads;
 	int pid;
 	struct task_struct *tsk;
-	struct files_struct *files;
-	struct mutex files_lock;
 	struct hlist_node deferred_work_node;
 	int deferred_work;
 	bool is_dead;
@@ -611,6 +606,23 @@ struct binder_thread {
 	bool is_dead;
 };
 
+/**
+ * struct binder_txn_fd_fixup - transaction fd fixup list element
+ * @fixup_entry:          list entry
+ * @file:                 struct file to be associated with new fd
+ * @offset:               offset in buffer data to this fixup
+ *
+ * List element for fd fixups in a transaction. Since file
+ * descriptors need to be allocated in the context of the
+ * target process, we pass each fd to be processed in this
+ * struct.
+ */
+struct binder_txn_fd_fixup {
+	struct list_head fixup_entry;
+	struct file *file;
+	size_t offset;
+};
+
 struct binder_transaction {
 	int debug_id;
 	struct binder_work work;
@@ -628,6 +640,7 @@ struct binder_transaction {
 	long	priority;
 	long	saved_priority;
 	kuid_t	sender_euid;
+	struct list_head fd_fixups;
 	/**
 	 * @lock:  protects @from, @to_proc, and @to_thread
 	 *
@@ -822,6 +835,7 @@ static void
 binder_enqueue_deferred_thread_work_ilocked(struct binder_thread *thread,
 					    struct binder_work *work)
 {
+	WARN_ON(!list_empty(&thread->waiting_thread_node));
 	binder_enqueue_work_ilocked(work, &thread->todo);
 }
 
@@ -839,6 +853,7 @@ static void
 binder_enqueue_thread_work_ilocked(struct binder_thread *thread,
 				   struct binder_work *work)
 {
+	WARN_ON(!list_empty(&thread->waiting_thread_node));
 	binder_enqueue_work_ilocked(work, &thread->todo);
 	thread->process_todo = true;
 }
@@ -920,66 +935,6 @@ static void binder_free_thread(struct binder_thread *thread);
 static void binder_free_proc(struct binder_proc *proc);
 static void binder_inc_node_tmpref_ilocked(struct binder_node *node);
 
-static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
-{
-	unsigned long rlim_cur;
-	unsigned long irqs;
-	int ret;
-
-	mutex_lock(&proc->files_lock);
-	if (proc->files == NULL) {
-		ret = -ESRCH;
-		goto err;
-	}
-	if (!lock_task_sighand(proc->tsk, &irqs)) {
-		ret = -EMFILE;
-		goto err;
-	}
-	rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
-	unlock_task_sighand(proc->tsk, &irqs);
-
-	ret = __alloc_fd(proc->files, 0, rlim_cur, flags);
-err:
-	mutex_unlock(&proc->files_lock);
-	return ret;
-}
-
-/*
- * copied from fd_install
- */
-static void task_fd_install(
-	struct binder_proc *proc, unsigned int fd, struct file *file)
-{
-	mutex_lock(&proc->files_lock);
-	if (proc->files)
-		__fd_install(proc->files, fd, file);
-	mutex_unlock(&proc->files_lock);
-}
-
-/*
- * copied from sys_close
- */
-static long task_close_fd(struct binder_proc *proc, unsigned int fd)
-{
-	int retval;
-
-	mutex_lock(&proc->files_lock);
-	if (proc->files == NULL) {
-		retval = -ESRCH;
-		goto err;
-	}
-	retval = __close_fd(proc->files, fd);
-	/* can't restart close syscall because file table entry was cleared */
-	if (unlikely(retval == -ERESTARTSYS ||
-		     retval == -ERESTARTNOINTR ||
-		     retval == -ERESTARTNOHAND ||
-		     retval == -ERESTART_RESTARTBLOCK))
-		retval = -EINTR;
-err:
-	mutex_unlock(&proc->files_lock);
-	return retval;
-}
-
 static bool binder_has_work_ilocked(struct binder_thread *thread,
 				    bool do_proc_work)
 {
@@ -1270,19 +1225,12 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong,
 		} else
 			node->local_strong_refs++;
 		if (!node->has_strong_ref && target_list) {
+			struct binder_thread *thread = container_of(target_list,
+						    struct binder_thread, todo);
 			binder_dequeue_work_ilocked(&node->work);
-			/*
-			 * Note: this function is the only place where we queue
-			 * directly to a thread->todo without using the
-			 * corresponding binder_enqueue_thread_work() helper
-			 * functions; in this case it's ok to not set the
-			 * process_todo flag, since we know this node work will
-			 * always be followed by other work that starts queue
-			 * processing: in case of synchronous transactions, a
-			 * BR_REPLY or BR_ERROR; in case of oneway
-			 * transactions, a BR_TRANSACTION_COMPLETE.
-			 */
-			binder_enqueue_work_ilocked(&node->work, target_list);
+			BUG_ON(&thread->todo != target_list);
+			binder_enqueue_deferred_thread_work_ilocked(thread,
+								   &node->work);
 		}
 	} else {
 		if (!internal)
@@ -1958,10 +1906,32 @@ static struct binder_thread *binder_get_txn_from_and_acq_inner(
 	return NULL;
 }
 
+/**
+ * binder_free_txn_fixups() - free unprocessed fd fixups
+ * @t:	binder transaction for t->from
+ *
+ * If the transaction is being torn down prior to being
+ * processed by the target process, free all of the
+ * fd fixups and fput the file structs. It is safe to
+ * call this function after the fixups have been
+ * processed -- in that case, the list will be empty.
+ */
+static void binder_free_txn_fixups(struct binder_transaction *t)
+{
+	struct binder_txn_fd_fixup *fixup, *tmp;
+
+	list_for_each_entry_safe(fixup, tmp, &t->fd_fixups, fixup_entry) {
+		fput(fixup->file);
+		list_del(&fixup->fixup_entry);
+		kfree(fixup);
+	}
+}
+
 static void binder_free_transaction(struct binder_transaction *t)
 {
 	if (t->buffer)
 		t->buffer->transaction = NULL;
+	binder_free_txn_fixups(t);
 	kfree(t);
 	binder_stats_deleted(BINDER_STAT_TRANSACTION);
 }
@@ -2262,12 +2232,17 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
 		} break;
 
 		case BINDER_TYPE_FD: {
-			struct binder_fd_object *fp = to_binder_fd_object(hdr);
-
-			binder_debug(BINDER_DEBUG_TRANSACTION,
-				     "        fd %d\n", fp->fd);
-			if (failed_at)
-				task_close_fd(proc, fp->fd);
+			/*
+			 * No need to close the file here since user-space
+			 * closes it for for successfully delivered
+			 * transactions. For transactions that weren't
+			 * delivered, the new fd was never allocated so
+			 * there is no need to close and the fput on the
+			 * file is done when the transaction is torn
+			 * down.
+			 */
+			WARN_ON(failed_at &&
+				proc->tsk == current->group_leader);
 		} break;
 		case BINDER_TYPE_PTR:
 			/*
@@ -2283,6 +2258,15 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
 			size_t fd_index;
 			binder_size_t fd_buf_size;
 
+			if (proc->tsk != current->group_leader) {
+				/*
+				 * Nothing to do if running in sender context
+				 * The fd fixups have not been applied so no
+				 * fds need to be closed.
+				 */
+				continue;
+			}
+
 			fda = to_binder_fd_array_object(hdr);
 			parent = binder_validate_ptr(buffer, fda->parent,
 						     off_start,
@@ -2315,7 +2299,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
 			}
 			fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset);
 			for (fd_index = 0; fd_index < fda->num_fds; fd_index++)
-				task_close_fd(proc, fd_array[fd_index]);
+				ksys_close(fd_array[fd_index]);
 		} break;
 		default:
 			pr_err("transaction release %d bad object type %x\n",
@@ -2447,17 +2431,18 @@ done:
 	return ret;
 }
 
-static int binder_translate_fd(int fd,
+static int binder_translate_fd(u32 *fdp,
 			       struct binder_transaction *t,
 			       struct binder_thread *thread,
 			       struct binder_transaction *in_reply_to)
 {
 	struct binder_proc *proc = thread->proc;
 	struct binder_proc *target_proc = t->to_proc;
-	int target_fd;
+	struct binder_txn_fd_fixup *fixup;
 	struct file *file;
-	int ret;
+	int ret = 0;
 	bool target_allows_fd;
+	int fd = *fdp;
 
 	if (in_reply_to)
 		target_allows_fd = !!(in_reply_to->flags & TF_ACCEPT_FDS);
@@ -2485,19 +2470,24 @@ static int binder_translate_fd(int fd,
 		goto err_security;
 	}
 
-	target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC);
-	if (target_fd < 0) {
+	/*
+	 * Add fixup record for this transaction. The allocation
+	 * of the fd in the target needs to be done from a
+	 * target thread.
+	 */
+	fixup = kzalloc(sizeof(*fixup), GFP_KERNEL);
+	if (!fixup) {
 		ret = -ENOMEM;
-		goto err_get_unused_fd;
+		goto err_alloc;
 	}
-	task_fd_install(target_proc, target_fd, file);
-	trace_binder_transaction_fd(t, fd, target_fd);
-	binder_debug(BINDER_DEBUG_TRANSACTION, "        fd %d -> %d\n",
-		     fd, target_fd);
+	fixup->file = file;
+	fixup->offset = (uintptr_t)fdp - (uintptr_t)t->buffer->data;
+	trace_binder_transaction_fd_send(t, fd, fixup->offset);
+	list_add_tail(&fixup->fixup_entry, &t->fd_fixups);
 
-	return target_fd;
+	return ret;
 
-err_get_unused_fd:
+err_alloc:
 err_security:
 	fput(file);
 err_fget:
@@ -2511,8 +2501,7 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda,
 				     struct binder_thread *thread,
 				     struct binder_transaction *in_reply_to)
 {
-	binder_size_t fdi, fd_buf_size, num_installed_fds;
-	int target_fd;
+	binder_size_t fdi, fd_buf_size;
 	uintptr_t parent_buffer;
 	u32 *fd_array;
 	struct binder_proc *proc = thread->proc;
@@ -2544,23 +2533,12 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda,
 		return -EINVAL;
 	}
 	for (fdi = 0; fdi < fda->num_fds; fdi++) {
-		target_fd = binder_translate_fd(fd_array[fdi], t, thread,
+		int ret = binder_translate_fd(&fd_array[fdi], t, thread,
 						in_reply_to);
-		if (target_fd < 0)
-			goto err_translate_fd_failed;
-		fd_array[fdi] = target_fd;
+		if (ret < 0)
+			return ret;
 	}
 	return 0;
-
-err_translate_fd_failed:
-	/*
-	 * Failed to allocate fd or security error, free fds
-	 * installed so far.
-	 */
-	num_installed_fds = fdi;
-	for (fdi = 0; fdi < num_installed_fds; fdi++)
-		task_close_fd(target_proc, fd_array[fdi]);
-	return target_fd;
 }
 
 static int binder_fixup_parent(struct binder_transaction *t,
@@ -2723,6 +2701,7 @@ static void binder_transaction(struct binder_proc *proc,
 {
 	int ret;
 	struct binder_transaction *t;
+	struct binder_work *w;
 	struct binder_work *tcomplete;
 	binder_size_t *offp, *off_end, *off_start;
 	binder_size_t off_min;
@@ -2864,6 +2843,29 @@ static void binder_transaction(struct binder_proc *proc,
 			goto err_invalid_target_handle;
 		}
 		binder_inner_proc_lock(proc);
+
+		w = list_first_entry_or_null(&thread->todo,
+					     struct binder_work, entry);
+		if (!(tr->flags & TF_ONE_WAY) && w &&
+		    w->type == BINDER_WORK_TRANSACTION) {
+			/*
+			 * Do not allow new outgoing transaction from a
+			 * thread that has a transaction at the head of
+			 * its todo list. Only need to check the head
+			 * because binder_select_thread_ilocked picks a
+			 * thread from proc->waiting_threads to enqueue
+			 * the transaction, and nothing is queued to the
+			 * todo list while the thread is on waiting_threads.
+			 */
+			binder_user_error("%d:%d new transaction not allowed when there is a transaction on thread todo\n",
+					  proc->pid, thread->pid);
+			binder_inner_proc_unlock(proc);
+			return_error = BR_FAILED_REPLY;
+			return_error_param = -EPROTO;
+			return_error_line = __LINE__;
+			goto err_bad_todo_list;
+		}
+
 		if (!(tr->flags & TF_ONE_WAY) && thread->transaction_stack) {
 			struct binder_transaction *tmp;
 
@@ -2911,6 +2913,7 @@ static void binder_transaction(struct binder_proc *proc,
 		return_error_line = __LINE__;
 		goto err_alloc_t_failed;
 	}
+	INIT_LIST_HEAD(&t->fd_fixups);
 	binder_stats_created(BINDER_STAT_TRANSACTION);
 	spin_lock_init(&t->lock);
 
@@ -3066,17 +3069,16 @@ static void binder_transaction(struct binder_proc *proc,
 
 		case BINDER_TYPE_FD: {
 			struct binder_fd_object *fp = to_binder_fd_object(hdr);
-			int target_fd = binder_translate_fd(fp->fd, t, thread,
-							    in_reply_to);
+			int ret = binder_translate_fd(&fp->fd, t, thread,
+						      in_reply_to);
 
-			if (target_fd < 0) {
+			if (ret < 0) {
 				return_error = BR_FAILED_REPLY;
-				return_error_param = target_fd;
+				return_error_param = ret;
 				return_error_line = __LINE__;
 				goto err_translate_failed;
 			}
 			fp->pad_binder = 0;
-			fp->fd = target_fd;
 		} break;
 		case BINDER_TYPE_FDA: {
 			struct binder_fd_array_object *fda =
@@ -3233,6 +3235,7 @@ err_bad_object_type:
 err_bad_offset:
 err_bad_parent:
 err_copy_data_failed:
+	binder_free_txn_fixups(t);
 	trace_binder_transaction_failed_buffer_release(t->buffer);
 	binder_transaction_buffer_release(target_proc, t->buffer, offp);
 	if (target_node)
@@ -3247,6 +3250,7 @@ err_alloc_tcomplete_failed:
 	kfree(t);
 	binder_stats_deleted(BINDER_STAT_TRANSACTION);
 err_alloc_t_failed:
+err_bad_todo_list:
 err_bad_call_stack:
 err_empty_call_stack:
 err_dead_binder:
@@ -3294,6 +3298,47 @@ err_invalid_target_handle:
 	}
 }
 
+/**
+ * binder_free_buf() - free the specified buffer
+ * @proc:	binder proc that owns buffer
+ * @buffer:	buffer to be freed
+ *
+ * If buffer for an async transaction, enqueue the next async
+ * transaction from the node.
+ *
+ * Cleanup buffer and free it.
+ */
+static void
+binder_free_buf(struct binder_proc *proc, struct binder_buffer *buffer)
+{
+	if (buffer->transaction) {
+		buffer->transaction->buffer = NULL;
+		buffer->transaction = NULL;
+	}
+	if (buffer->async_transaction && buffer->target_node) {
+		struct binder_node *buf_node;
+		struct binder_work *w;
+
+		buf_node = buffer->target_node;
+		binder_node_inner_lock(buf_node);
+		BUG_ON(!buf_node->has_async_transaction);
+		BUG_ON(buf_node->proc != proc);
+		w = binder_dequeue_work_head_ilocked(
+				&buf_node->async_todo);
+		if (!w) {
+			buf_node->has_async_transaction = false;
+		} else {
+			binder_enqueue_work_ilocked(
+					w, &proc->todo);
+			binder_wakeup_proc_ilocked(proc);
+		}
+		binder_node_inner_unlock(buf_node);
+	}
+	trace_binder_transaction_buffer_release(buffer);
+	binder_transaction_buffer_release(proc, buffer, NULL);
+	binder_alloc_free_buf(&proc->alloc, buffer);
+}
+
 static int binder_thread_write(struct binder_proc *proc,
 			struct binder_thread *thread,
 			binder_uintptr_t binder_buffer, size_t size,
@@ -3480,33 +3525,7 @@ static int binder_thread_write(struct binder_proc *proc,
 				     proc->pid, thread->pid, (u64)data_ptr,
 				     buffer->debug_id,
 				     buffer->transaction ? "active" : "finished");
-
-			if (buffer->transaction) {
-				buffer->transaction->buffer = NULL;
-				buffer->transaction = NULL;
-			}
-			if (buffer->async_transaction && buffer->target_node) {
-				struct binder_node *buf_node;
-				struct binder_work *w;
-
-				buf_node = buffer->target_node;
-				binder_node_inner_lock(buf_node);
-				BUG_ON(!buf_node->has_async_transaction);
-				BUG_ON(buf_node->proc != proc);
-				w = binder_dequeue_work_head_ilocked(
-						&buf_node->async_todo);
-				if (!w) {
-					buf_node->has_async_transaction = false;
-				} else {
-					binder_enqueue_work_ilocked(
-							w, &proc->todo);
-					binder_wakeup_proc_ilocked(proc);
-				}
-				binder_node_inner_unlock(buf_node);
-			}
-			trace_binder_transaction_buffer_release(buffer);
-			binder_transaction_buffer_release(proc, buffer, NULL);
-			binder_alloc_free_buf(&proc->alloc, buffer);
+			binder_free_buf(proc, buffer);
 			break;
 		}
 
@@ -3829,6 +3848,76 @@ static int binder_wait_for_work(struct binder_thread *thread,
 	return ret;
 }
 
+/**
+ * binder_apply_fd_fixups() - finish fd translation
+ * @t:	binder transaction with list of fd fixups
+ *
+ * Now that we are in the context of the transaction target
+ * process, we can allocate and install fds. Process the
+ * list of fds to translate and fixup the buffer with the
+ * new fds.
+ *
+ * If we fail to allocate an fd, then free the resources by
+ * fput'ing files that have not been processed and ksys_close'ing
+ * any fds that have already been allocated.
+ */
+static int binder_apply_fd_fixups(struct binder_transaction *t)
+{
+	struct binder_txn_fd_fixup *fixup, *tmp;
+	int ret = 0;
+
+	list_for_each_entry(fixup, &t->fd_fixups, fixup_entry) {
+		int fd = get_unused_fd_flags(O_CLOEXEC);
+		u32 *fdp;
+
+		if (fd < 0) {
+			binder_debug(BINDER_DEBUG_TRANSACTION,
+				     "failed fd fixup txn %d fd %d\n",
+				     t->debug_id, fd);
+			ret = -ENOMEM;
+			break;
+		}
+		binder_debug(BINDER_DEBUG_TRANSACTION,
+			     "fd fixup txn %d fd %d\n",
+			     t->debug_id, fd);
+		trace_binder_transaction_fd_recv(t, fd, fixup->offset);
+		fd_install(fd, fixup->file);
+		fixup->file = NULL;
+		fdp = (u32 *)(t->buffer->data + fixup->offset);
+		/*
+		 * This store can cause problems for CPUs with a
+		 * VIVT cache (eg ARMv5) since the cache cannot
+		 * detect virtual aliases to the same physical cacheline.
+		 * To support VIVT, this address and the user-space VA
+		 * would both need to be flushed. Since this kernel
+		 * VA is not constructed via page_to_virt(), we can't
+		 * use flush_dcache_page() on it, so we'd have to use
+		 * an internal function. If devices with VIVT ever
+		 * need to run Android, we'll either need to go back
+		 * to patching the translated fd from the sender side
+		 * (using the non-standard kernel functions), or rework
+		 * how the kernel uses the buffer to use page_to_virt()
+		 * addresses instead of allocating in our own vm area.
+		 *
+		 * For now, we disable compilation if CONFIG_CPU_CACHE_VIVT.
+		 */
+		*fdp = fd;
+	}
+	list_for_each_entry_safe(fixup, tmp, &t->fd_fixups, fixup_entry) {
+		if (fixup->file) {
+			fput(fixup->file);
+		} else if (ret) {
+			u32 *fdp = (u32 *)(t->buffer->data + fixup->offset);
+
+			ksys_close(*fdp);
+		}
+		list_del(&fixup->fixup_entry);
+		kfree(fixup);
+	}
+
+	return ret;
+}
+
 static int binder_thread_read(struct binder_proc *proc,
 			      struct binder_thread *thread,
 			      binder_uintptr_t binder_buffer, size_t size,
@@ -4110,6 +4199,34 @@ retry:
 			tr.sender_pid = 0;
 		}
 
+		ret = binder_apply_fd_fixups(t);
+		if (ret) {
+			struct binder_buffer *buffer = t->buffer;
+			bool oneway = !!(t->flags & TF_ONE_WAY);
+			int tid = t->debug_id;
+
+			if (t_from)
+				binder_thread_dec_tmpref(t_from);
+			buffer->transaction = NULL;
+			binder_cleanup_transaction(t, "fd fixups failed",
+						   BR_FAILED_REPLY);
+			binder_free_buf(proc, buffer);
+			binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
+				     "%d:%d %stransaction %d fd fixups failed %d/%d, line %d\n",
+				     proc->pid, thread->pid,
+				     oneway ? "async " :
+					(cmd == BR_REPLY ? "reply " : ""),
+				     tid, BR_FAILED_REPLY, ret, __LINE__);
+			if (cmd == BR_REPLY) {
+				cmd = BR_FAILED_REPLY;
+				if (put_user(cmd, (uint32_t __user *)ptr))
+					return -EFAULT;
+				ptr += sizeof(uint32_t);
+				binder_stat_br(proc, thread, cmd);
+				break;
+			}
+			continue;
+		}
 		tr.data_size = t->buffer->data_size;
 		tr.offsets_size = t->buffer->offsets_size;
 		tr.data.ptr.buffer = (binder_uintptr_t)
@@ -4544,6 +4661,42 @@ out:
 	return ret;
 }
 
+static int binder_ioctl_get_node_info_for_ref(struct binder_proc *proc,
+		struct binder_node_info_for_ref *info)
+{
+	struct binder_node *node;
+	struct binder_context *context = proc->context;
+	__u32 handle = info->handle;
+
+	if (info->strong_count || info->weak_count || info->reserved1 ||
+	    info->reserved2 || info->reserved3) {
+		binder_user_error("%d BINDER_GET_NODE_INFO_FOR_REF: only handle may be non-zero.",
+				  proc->pid);
+		return -EINVAL;
+	}
+
+	/* This ioctl may only be used by the context manager */
+	mutex_lock(&context->context_mgr_node_lock);
+	if (!context->binder_context_mgr_node ||
+		context->binder_context_mgr_node->proc != proc) {
+		mutex_unlock(&context->context_mgr_node_lock);
+		return -EPERM;
+	}
+	mutex_unlock(&context->context_mgr_node_lock);
+
+	node = binder_get_node_from_ref(proc, handle, true, NULL);
+	if (!node)
+		return -EINVAL;
+
+	info->strong_count = node->local_strong_refs +
+		node->internal_strong_refs;
+	info->weak_count = node->local_weak_refs;
+
+	binder_put_node(node);
+
+	return 0;
+}
+
 static int binder_ioctl_get_node_debug_info(struct binder_proc *proc,
 				struct binder_node_debug_info *info)
 {
@@ -4638,6 +4791,25 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		}
 		break;
 	}
+	case BINDER_GET_NODE_INFO_FOR_REF: {
+		struct binder_node_info_for_ref info;
+
+		if (copy_from_user(&info, ubuf, sizeof(info))) {
+			ret = -EFAULT;
+			goto err;
+		}
+
+		ret = binder_ioctl_get_node_info_for_ref(proc, &info);
+		if (ret < 0)
+			goto err;
+
+		if (copy_to_user(ubuf, &info, sizeof(info))) {
+			ret = -EFAULT;
+			goto err;
+		}
+
+		break;
+	}
 	case BINDER_GET_NODE_DEBUG_INFO: {
 		struct binder_node_debug_info info;
 
@@ -4693,7 +4865,6 @@ static void binder_vma_close(struct vm_area_struct *vma)
 		     (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags,
 		     (unsigned long)pgprot_val(vma->vm_page_prot));
 	binder_alloc_vma_close(&proc->alloc);
-	binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES);
 }
 
 static vm_fault_t binder_vm_fault(struct vm_fault *vmf)
@@ -4739,9 +4910,6 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 	ret = binder_alloc_mmap_handler(&proc->alloc, vma);
 	if (ret)
 		return ret;
-	mutex_lock(&proc->files_lock);
-	proc->files = get_files_struct(current);
-	mutex_unlock(&proc->files_lock);
 	return 0;
 
 err_bad_arg:
@@ -4765,7 +4933,6 @@ static int binder_open(struct inode *nodp, struct file *filp)
 	spin_lock_init(&proc->outer_lock);
 	get_task_struct(current->group_leader);
 	proc->tsk = current->group_leader;
-	mutex_init(&proc->files_lock);
 	INIT_LIST_HEAD(&proc->todo);
 	proc->default_priority = task_nice(current);
 	binder_dev = container_of(filp->private_data, struct binder_device,
@@ -4915,8 +5082,6 @@ static void binder_deferred_release(struct binder_proc *proc)
 	struct rb_node *n;
 	int threads, nodes, incoming_refs, outgoing_refs, active_transactions;
 
-	BUG_ON(proc->files);
-
 	mutex_lock(&binder_procs_lock);
 	hlist_del(&proc->proc_node);
 	mutex_unlock(&binder_procs_lock);
@@ -4998,7 +5163,6 @@ static void binder_deferred_release(struct binder_proc *proc)
 static void binder_deferred_func(struct work_struct *work)
 {
 	struct binder_proc *proc;
-	struct files_struct *files;
 
 	int defer;
 
@@ -5016,23 +5180,11 @@ static void binder_deferred_func(struct work_struct *work)
 		}
 		mutex_unlock(&binder_deferred_lock);
 
-		files = NULL;
-		if (defer & BINDER_DEFERRED_PUT_FILES) {
-			mutex_lock(&proc->files_lock);
-			files = proc->files;
-			if (files)
-				proc->files = NULL;
-			mutex_unlock(&proc->files_lock);
-		}
-
 		if (defer & BINDER_DEFERRED_FLUSH)
 			binder_deferred_flush(proc);
 
 		if (defer & BINDER_DEFERRED_RELEASE)
 			binder_deferred_release(proc); /* frees proc */
-
-		if (files)
-			put_files_struct(files);
 	} while (proc);
 }
 static DECLARE_WORK(binder_deferred_work, binder_deferred_func);
@@ -5667,12 +5819,11 @@ static int __init binder_init(void)
 	 * Copy the module_parameter string, because we don't want to
 	 * tokenize it in-place.
 	 */
-	device_names = kzalloc(strlen(binder_devices_param) + 1, GFP_KERNEL);
+	device_names = kstrdup(binder_devices_param, GFP_KERNEL);
 	if (!device_names) {
 		ret = -ENOMEM;
 		goto err_alloc_device_names_failed;
 	}
-	strcpy(device_names, binder_devices_param);
 
 	device_tmp = device_names;
 	while ((device_name = strsep(&device_tmp, ","))) {
diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h
index 588eb3ec3507..14de7ac57a34 100644
--- a/drivers/android/binder_trace.h
+++ b/drivers/android/binder_trace.h
@@ -223,22 +223,40 @@ TRACE_EVENT(binder_transaction_ref_to_ref,
 		  __entry->dest_ref_debug_id, __entry->dest_ref_desc)
 );
 
-TRACE_EVENT(binder_transaction_fd,
-	TP_PROTO(struct binder_transaction *t, int src_fd, int dest_fd),
-	TP_ARGS(t, src_fd, dest_fd),
+TRACE_EVENT(binder_transaction_fd_send,
+	TP_PROTO(struct binder_transaction *t, int fd, size_t offset),
+	TP_ARGS(t, fd, offset),
 
 	TP_STRUCT__entry(
 		__field(int, debug_id)
-		__field(int, src_fd)
-		__field(int, dest_fd)
+		__field(int, fd)
+		__field(size_t, offset)
+	),
+	TP_fast_assign(
+		__entry->debug_id = t->debug_id;
+		__entry->fd = fd;
+		__entry->offset = offset;
+	),
+	TP_printk("transaction=%d src_fd=%d offset=%zu",
+		  __entry->debug_id, __entry->fd, __entry->offset)
+);
+
+TRACE_EVENT(binder_transaction_fd_recv,
+	TP_PROTO(struct binder_transaction *t, int fd, size_t offset),
+	TP_ARGS(t, fd, offset),
+
+	TP_STRUCT__entry(
+		__field(int, debug_id)
+		__field(int, fd)
+		__field(size_t, offset)
 	),
 	TP_fast_assign(
 		__entry->debug_id = t->debug_id;
-		__entry->src_fd = src_fd;
-		__entry->dest_fd = dest_fd;
+		__entry->fd = fd;
+		__entry->offset = offset;
 	),
-	TP_printk("transaction=%d src_fd=%d ==> dest_fd=%d",
-		  __entry->debug_id, __entry->src_fd, __entry->dest_fd)
+	TP_printk("transaction=%d dest_fd=%d offset=%zu",
+		  __entry->debug_id, __entry->fd, __entry->offset)
 );
 
 DECLARE_EVENT_CLASS(binder_buffer_class,
diff --git a/drivers/base/component.c b/drivers/base/component.c
index 8946dfee4768..e8d676fad0c9 100644
--- a/drivers/base/component.c
+++ b/drivers/base/component.c
@@ -536,9 +536,9 @@ int component_bind_all(struct device *master_dev, void *data)
 		}
 
 	if (ret != 0) {
-		for (; i--; )
-			if (!master->match->compare[i].duplicate) {
-				c = master->match->compare[i].component;
+		for (; i > 0; i--)
+			if (!master->match->compare[i - 1].duplicate) {
+				c = master->match->compare[i - 1].component;
 				component_unbind(c, master, data);
 			}
 	}
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index f98a097e73f2..4aaf00d2098b 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -11,6 +11,8 @@
 #include <linux/slab.h>
 #include <linux/percpu.h>
 
+#include <asm/sections.h>
+
 #include "base.h"
 
 struct devres_node {
@@ -823,6 +825,28 @@ char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp)
 EXPORT_SYMBOL_GPL(devm_kstrdup);
 
 /**
+ * devm_kstrdup_const - resource managed conditional string duplication
+ * @dev: device for which to duplicate the string
+ * @s: the string to duplicate
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Strings allocated by devm_kstrdup_const will be automatically freed when
+ * the associated device is detached.
+ *
+ * RETURNS:
+ * Source string if it is in .rodata section otherwise it falls back to
+ * devm_kstrdup.
+ */
+const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp)
+{
+	if (is_kernel_rodata((unsigned long)s))
+		return s;
+
+	return devm_kstrdup(dev, s, gfp);
+}
+EXPORT_SYMBOL_GPL(devm_kstrdup_const);
+
+/**
  * devm_kvasprintf - Allocate resource managed space and format a string
  *		     into that.
  * @dev: Device to allocate memory for
@@ -885,11 +909,19 @@ EXPORT_SYMBOL_GPL(devm_kasprintf);
  *
  * Free memory allocated with devm_kmalloc().
  */
-void devm_kfree(struct device *dev, void *p)
+void devm_kfree(struct device *dev, const void *p)
 {
 	int rc;
 
-	rc = devres_destroy(dev, devm_kmalloc_release, devm_kmalloc_match, p);
+	/*
+	 * Special case: pointer to a string in .rodata returned by
+	 * devm_kstrdup_const().
+	 */
+	if (unlikely(is_kernel_rodata((unsigned long)p)))
+		return;
+
+	rc = devres_destroy(dev, devm_kmalloc_release,
+			    devm_kmalloc_match, (void *)p);
 	WARN_ON(rc);
 }
 EXPORT_SYMBOL_GPL(devm_kfree);
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index f7768077e817..b93fc862d365 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -252,7 +252,7 @@ static int dev_rmdir(const char *name)
 
 static int delete_path(const char *nodepath)
 {
-	const char *path;
+	char *path;
 	int err = 0;
 
 	path = kstrdup(nodepath, GFP_KERNEL);
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 60d6cc618f1c..f39a920496fb 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -321,11 +321,12 @@ void *platform_msi_get_host_data(struct irq_domain *domain)
  * Returns an irqdomain for @nvec interrupts
  */
 struct irq_domain *
-platform_msi_create_device_domain(struct device *dev,
-				  unsigned int nvec,
-				  irq_write_msi_msg_t write_msi_msg,
-				  const struct irq_domain_ops *ops,
-				  void *host_data)
+__platform_msi_create_device_domain(struct device *dev,
+				    unsigned int nvec,
+				    bool is_tree,
+				    irq_write_msi_msg_t write_msi_msg,
+				    const struct irq_domain_ops *ops,
+				    void *host_data)
 {
 	struct platform_msi_priv_data *data;
 	struct irq_domain *domain;
@@ -336,7 +337,8 @@ platform_msi_create_device_domain(struct device *dev,
 		return NULL;
 
 	data->host_data = host_data;
-	domain = irq_domain_create_hierarchy(dev->msi_domain, 0, nvec,
+	domain = irq_domain_create_hierarchy(dev->msi_domain, 0,
+					     is_tree ? 0 : nvec,
 					     dev->fwnode, ops, data);
 	if (!domain)
 		goto free_priv;
diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c
index 7033a4beda66..254ee7d54e91 100644
--- a/drivers/block/cryptoloop.c
+++ b/drivers/block/cryptoloop.c
@@ -45,7 +45,7 @@ cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info)
 	char cms[LO_NAME_SIZE];			/* cipher-mode string */
 	char *mode;
 	char *cmsp = cms;			/* c-m string pointer */
-	struct crypto_skcipher *tfm;
+	struct crypto_sync_skcipher *tfm;
 
 	/* encryption breaks for non sector aligned offsets */
 
@@ -80,13 +80,13 @@ cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info)
 	*cmsp++ = ')';
 	*cmsp = 0;
 
-	tfm = crypto_alloc_skcipher(cms, 0, CRYPTO_ALG_ASYNC);
+	tfm = crypto_alloc_sync_skcipher(cms, 0, 0);
 	if (IS_ERR(tfm))
 		return PTR_ERR(tfm);
 
-	err = crypto_skcipher_setkey(tfm, info->lo_encrypt_key,
-				     info->lo_encrypt_key_size);
-	
+	err = crypto_sync_skcipher_setkey(tfm, info->lo_encrypt_key,
+					  info->lo_encrypt_key_size);
+
 	if (err != 0)
 		goto out_free_tfm;
 
@@ -94,7 +94,7 @@ cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info)
 	return 0;
 
  out_free_tfm:
-	crypto_free_skcipher(tfm);
+	crypto_free_sync_skcipher(tfm);
 
  out:
 	return err;
@@ -109,8 +109,8 @@ cryptoloop_transfer(struct loop_device *lo, int cmd,
 		    struct page *loop_page, unsigned loop_off,
 		    int size, sector_t IV)
 {
-	struct crypto_skcipher *tfm = lo->key_data;
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	struct crypto_sync_skcipher *tfm = lo->key_data;
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 	struct scatterlist sg_out;
 	struct scatterlist sg_in;
 
@@ -119,7 +119,7 @@ cryptoloop_transfer(struct loop_device *lo, int cmd,
 	unsigned in_offs, out_offs;
 	int err;
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
 				      NULL, NULL);
 
@@ -175,9 +175,9 @@ cryptoloop_ioctl(struct loop_device *lo, int cmd, unsigned long arg)
 static int
 cryptoloop_release(struct loop_device *lo)
 {
-	struct crypto_skcipher *tfm = lo->key_data;
+	struct crypto_sync_skcipher *tfm = lo->key_data;
 	if (tfm != NULL) {
-		crypto_free_skcipher(tfm);
+		crypto_free_sync_skcipher(tfm);
 		lo->key_data = NULL;
 		return 0;
 	}
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index 5d8266c6571f..f0404c6d1ff4 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -127,6 +127,16 @@ static int fsl_mc_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 	return 0;
 }
 
+static int fsl_mc_dma_configure(struct device *dev)
+{
+	struct device *dma_dev = dev;
+
+	while (dev_is_fsl_mc(dma_dev))
+		dma_dev = dma_dev->parent;
+
+	return of_dma_configure(dev, dma_dev->of_node, 0);
+}
+
 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
@@ -148,6 +158,7 @@ struct bus_type fsl_mc_bus_type = {
 	.name = "fsl-mc",
 	.match = fsl_mc_bus_match,
 	.uevent = fsl_mc_bus_uevent,
+	.dma_configure  = fsl_mc_dma_configure,
 	.dev_groups = fsl_mc_dev_groups,
 };
 EXPORT_SYMBOL_GPL(fsl_mc_bus_type);
@@ -188,6 +199,10 @@ struct device_type fsl_mc_bus_dprtc_type = {
 	.name = "fsl_mc_bus_dprtc"
 };
 
+struct device_type fsl_mc_bus_dpseci_type = {
+	.name = "fsl_mc_bus_dpseci"
+};
+
 static struct device_type *fsl_mc_get_device_type(const char *type)
 {
 	static const struct {
@@ -203,6 +218,7 @@ static struct device_type *fsl_mc_get_device_type(const char *type)
 		{ &fsl_mc_bus_dpmcp_type, "dpmcp" },
 		{ &fsl_mc_bus_dpmac_type, "dpmac" },
 		{ &fsl_mc_bus_dprtc_type, "dprtc" },
+		{ &fsl_mc_bus_dpseci_type, "dpseci" },
 		{ NULL, NULL }
 	};
 	int i;
@@ -616,6 +632,7 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc,
 		mc_dev->icid = parent_mc_dev->icid;
 		mc_dev->dma_mask = FSL_MC_DEFAULT_DMA_MASK;
 		mc_dev->dev.dma_mask = &mc_dev->dma_mask;
+		mc_dev->dev.coherent_dma_mask = mc_dev->dma_mask;
 		dev_set_msi_domain(&mc_dev->dev,
 				   dev_get_msi_domain(&parent_mc_dev->dev));
 	}
@@ -633,10 +650,6 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc,
 			goto error_cleanup_dev;
 	}
 
-	/* Objects are coherent, unless 'no shareability' flag set. */
-	if (!(obj_desc->flags & FSL_MC_OBJ_FLAG_NO_MEM_SHAREABILITY))
-		arch_setup_dma_ops(&mc_dev->dev, 0, 0, NULL, true);
-
 	/*
 	 * The device-specific probe callback will get invoked by device_add()
 	 */
@@ -693,8 +706,8 @@ static int parse_mc_ranges(struct device *dev,
 	*ranges_start = of_get_property(mc_node, "ranges", &ranges_len);
 	if (!(*ranges_start) || !ranges_len) {
 		dev_warn(dev,
-			 "missing or empty ranges property for device tree node '%s'\n",
-			 mc_node->name);
+			 "missing or empty ranges property for device tree node '%pOFn'\n",
+			 mc_node);
 		return 0;
 	}
 
@@ -717,7 +730,7 @@ static int parse_mc_ranges(struct device *dev,
 
 	tuple_len = range_tuple_cell_count * sizeof(__be32);
 	if (ranges_len % tuple_len != 0) {
-		dev_err(dev, "malformed ranges property '%s'\n", mc_node->name);
+		dev_err(dev, "malformed ranges property '%pOFn'\n", mc_node);
 		return -EINVAL;
 	}
 
diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c
index 70db4d5638a6..5b2a11a88951 100644
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -1229,7 +1229,7 @@ mbus_parse_ranges(struct device_node *node,
 	tuple_len = (*cell_count) * sizeof(__be32);
 
 	if (ranges_len % tuple_len) {
-		pr_warn("malformed ranges entry '%s'\n", node->name);
+		pr_warn("malformed ranges entry '%pOFn'\n", node);
 		return -EINVAL;
 	}
 	return 0;
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index aaf9e5afaad4..95be7228f327 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -44,10 +44,10 @@ static unsigned short default_quality; /* = 0; default to "off" */
 
 module_param(current_quality, ushort, 0644);
 MODULE_PARM_DESC(current_quality,
-		 "current hwrng entropy estimation per mill");
+		 "current hwrng entropy estimation per 1024 bits of input");
 module_param(default_quality, ushort, 0644);
 MODULE_PARM_DESC(default_quality,
-		 "default entropy content of hwrng per mill");
+		 "default entropy content of hwrng per 1024 bits of input");
 
 static void drop_current_rng(void);
 static int hwrng_init(struct hwrng *rng);
diff --git a/drivers/char/random.c b/drivers/char/random.c
index c75b6cdf0053..2eb70e76ed35 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -433,9 +433,9 @@ static int crng_init_cnt = 0;
 static unsigned long crng_global_init_time = 0;
 #define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE)
 static void _extract_crng(struct crng_state *crng,
-			  __u32 out[CHACHA20_BLOCK_WORDS]);
+			  __u8 out[CHACHA20_BLOCK_SIZE]);
 static void _crng_backtrack_protect(struct crng_state *crng,
-				    __u32 tmp[CHACHA20_BLOCK_WORDS], int used);
+				    __u8 tmp[CHACHA20_BLOCK_SIZE], int used);
 static void process_random_ready_list(void);
 static void _get_random_bytes(void *buf, int nbytes);
 
@@ -926,7 +926,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 	unsigned long	flags;
 	int		i, num;
 	union {
-		__u32	block[CHACHA20_BLOCK_WORDS];
+		__u8	block[CHACHA20_BLOCK_SIZE];
 		__u32	key[8];
 	} buf;
 
@@ -973,7 +973,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 }
 
 static void _extract_crng(struct crng_state *crng,
-			  __u32 out[CHACHA20_BLOCK_WORDS])
+			  __u8 out[CHACHA20_BLOCK_SIZE])
 {
 	unsigned long v, flags;
 
@@ -990,7 +990,7 @@ static void _extract_crng(struct crng_state *crng,
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
-static void extract_crng(__u32 out[CHACHA20_BLOCK_WORDS])
+static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
 {
 	struct crng_state *crng = NULL;
 
@@ -1008,7 +1008,7 @@ static void extract_crng(__u32 out[CHACHA20_BLOCK_WORDS])
  * enough) to mutate the CRNG key to provide backtracking protection.
  */
 static void _crng_backtrack_protect(struct crng_state *crng,
-				    __u32 tmp[CHACHA20_BLOCK_WORDS], int used)
+				    __u8 tmp[CHACHA20_BLOCK_SIZE], int used)
 {
 	unsigned long	flags;
 	__u32		*s, *d;
@@ -1020,14 +1020,14 @@ static void _crng_backtrack_protect(struct crng_state *crng,
 		used = 0;
 	}
 	spin_lock_irqsave(&crng->lock, flags);
-	s = &tmp[used / sizeof(__u32)];
+	s = (__u32 *) &tmp[used];
 	d = &crng->state[4];
 	for (i=0; i < 8; i++)
 		*d++ ^= *s++;
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
-static void crng_backtrack_protect(__u32 tmp[CHACHA20_BLOCK_WORDS], int used)
+static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used)
 {
 	struct crng_state *crng = NULL;
 
@@ -1043,7 +1043,7 @@ static void crng_backtrack_protect(__u32 tmp[CHACHA20_BLOCK_WORDS], int used)
 static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
 {
 	ssize_t ret = 0, i = CHACHA20_BLOCK_SIZE;
-	__u32 tmp[CHACHA20_BLOCK_WORDS];
+	__u8 tmp[CHACHA20_BLOCK_SIZE] __aligned(4);
 	int large_request = (nbytes > 256);
 
 	while (nbytes) {
@@ -1622,7 +1622,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller,
  */
 static void _get_random_bytes(void *buf, int nbytes)
 {
-	__u32 tmp[CHACHA20_BLOCK_WORDS];
+	__u8 tmp[CHACHA20_BLOCK_SIZE] __aligned(4);
 
 	trace_get_random_bytes(nbytes, _RET_IP_);
 
@@ -2248,7 +2248,7 @@ u64 get_random_u64(void)
 	if (use_lock)
 		read_lock_irqsave(&batched_entropy_reset_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
-		extract_crng((__u32 *)batch->entropy_u64);
+		extract_crng((u8 *)batch->entropy_u64);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u64[batch->position++];
@@ -2278,7 +2278,7 @@ u32 get_random_u32(void)
 	if (use_lock)
 		read_lock_irqsave(&batched_entropy_reset_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
-		extract_crng(batch->entropy_u32);
+		extract_crng((u8 *)batch->entropy_u32);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u32[batch->position++];
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index 18c81cbe4704..536e55d3919f 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -5,7 +5,7 @@
 menuconfig TCG_TPM
 	tristate "TPM Hardware Support"
 	depends on HAS_IOMEM
-	select SECURITYFS
+	imply SECURITYFS
 	select CRYPTO
 	select CRYPTO_HASH_INFO
 	---help---
diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c
index e4a04b2d3c32..99b5133a9d05 100644
--- a/drivers/char/tpm/tpm-dev-common.c
+++ b/drivers/char/tpm/tpm-dev-common.c
@@ -17,11 +17,36 @@
  * License.
  *
  */
+#include <linux/poll.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/workqueue.h>
 #include "tpm.h"
 #include "tpm-dev.h"
 
+static struct workqueue_struct *tpm_dev_wq;
+static DEFINE_MUTEX(tpm_dev_wq_lock);
+
+static void tpm_async_work(struct work_struct *work)
+{
+	struct file_priv *priv =
+			container_of(work, struct file_priv, async_work);
+	ssize_t ret;
+
+	mutex_lock(&priv->buffer_mutex);
+	priv->command_enqueued = false;
+	ret = tpm_transmit(priv->chip, priv->space, priv->data_buffer,
+			   sizeof(priv->data_buffer), 0);
+
+	tpm_put_ops(priv->chip);
+	if (ret > 0) {
+		priv->data_pending = ret;
+		mod_timer(&priv->user_read_timer, jiffies + (120 * HZ));
+	}
+	mutex_unlock(&priv->buffer_mutex);
+	wake_up_interruptible(&priv->async_wait);
+}
+
 static void user_reader_timeout(struct timer_list *t)
 {
 	struct file_priv *priv = from_timer(priv, t, user_read_timer);
@@ -29,27 +54,32 @@ static void user_reader_timeout(struct timer_list *t)
 	pr_warn("TPM user space timeout is deprecated (pid=%d)\n",
 		task_tgid_nr(current));
 
-	schedule_work(&priv->work);
+	schedule_work(&priv->timeout_work);
 }
 
-static void timeout_work(struct work_struct *work)
+static void tpm_timeout_work(struct work_struct *work)
 {
-	struct file_priv *priv = container_of(work, struct file_priv, work);
+	struct file_priv *priv = container_of(work, struct file_priv,
+					      timeout_work);
 
 	mutex_lock(&priv->buffer_mutex);
 	priv->data_pending = 0;
 	memset(priv->data_buffer, 0, sizeof(priv->data_buffer));
 	mutex_unlock(&priv->buffer_mutex);
+	wake_up_interruptible(&priv->async_wait);
 }
 
 void tpm_common_open(struct file *file, struct tpm_chip *chip,
-		     struct file_priv *priv)
+		     struct file_priv *priv, struct tpm_space *space)
 {
 	priv->chip = chip;
+	priv->space = space;
+
 	mutex_init(&priv->buffer_mutex);
 	timer_setup(&priv->user_read_timer, user_reader_timeout, 0);
-	INIT_WORK(&priv->work, timeout_work);
-
+	INIT_WORK(&priv->timeout_work, tpm_timeout_work);
+	INIT_WORK(&priv->async_work, tpm_async_work);
+	init_waitqueue_head(&priv->async_wait);
 	file->private_data = priv;
 }
 
@@ -61,15 +91,17 @@ ssize_t tpm_common_read(struct file *file, char __user *buf,
 	int rc;
 
 	del_singleshot_timer_sync(&priv->user_read_timer);
-	flush_work(&priv->work);
+	flush_work(&priv->timeout_work);
 	mutex_lock(&priv->buffer_mutex);
 
 	if (priv->data_pending) {
 		ret_size = min_t(ssize_t, size, priv->data_pending);
-		rc = copy_to_user(buf, priv->data_buffer, ret_size);
-		memset(priv->data_buffer, 0, priv->data_pending);
-		if (rc)
-			ret_size = -EFAULT;
+		if (ret_size > 0) {
+			rc = copy_to_user(buf, priv->data_buffer, ret_size);
+			memset(priv->data_buffer, 0, priv->data_pending);
+			if (rc)
+				ret_size = -EFAULT;
+		}
 
 		priv->data_pending = 0;
 	}
@@ -79,13 +111,12 @@ ssize_t tpm_common_read(struct file *file, char __user *buf,
 }
 
 ssize_t tpm_common_write(struct file *file, const char __user *buf,
-			 size_t size, loff_t *off, struct tpm_space *space)
+			 size_t size, loff_t *off)
 {
 	struct file_priv *priv = file->private_data;
-	size_t in_size = size;
-	ssize_t out_size;
+	int ret = 0;
 
-	if (in_size > TPM_BUFSIZE)
+	if (size > TPM_BUFSIZE)
 		return -E2BIG;
 
 	mutex_lock(&priv->buffer_mutex);
@@ -94,21 +125,20 @@ ssize_t tpm_common_write(struct file *file, const char __user *buf,
 	 * tpm_read or a user_read_timer timeout. This also prevents split
 	 * buffered writes from blocking here.
 	 */
-	if (priv->data_pending != 0) {
-		mutex_unlock(&priv->buffer_mutex);
-		return -EBUSY;
+	if (priv->data_pending != 0 || priv->command_enqueued) {
+		ret = -EBUSY;
+		goto out;
 	}
 
-	if (copy_from_user
-	    (priv->data_buffer, (void __user *) buf, in_size)) {
-		mutex_unlock(&priv->buffer_mutex);
-		return -EFAULT;
+	if (copy_from_user(priv->data_buffer, buf, size)) {
+		ret = -EFAULT;
+		goto out;
 	}
 
-	if (in_size < 6 ||
-	    in_size < be32_to_cpu(*((__be32 *) (priv->data_buffer + 2)))) {
-		mutex_unlock(&priv->buffer_mutex);
-		return -EINVAL;
+	if (size < 6 ||
+	    size < be32_to_cpu(*((__be32 *)(priv->data_buffer + 2)))) {
+		ret = -EINVAL;
+		goto out;
 	}
 
 	/* atomic tpm command send and result receive. We only hold the ops
@@ -116,25 +146,50 @@ ssize_t tpm_common_write(struct file *file, const char __user *buf,
 	 * the char dev is held open.
 	 */
 	if (tpm_try_get_ops(priv->chip)) {
-		mutex_unlock(&priv->buffer_mutex);
-		return -EPIPE;
+		ret = -EPIPE;
+		goto out;
 	}
-	out_size = tpm_transmit(priv->chip, space, priv->data_buffer,
-				sizeof(priv->data_buffer), 0);
 
-	tpm_put_ops(priv->chip);
-	if (out_size < 0) {
+	/*
+	 * If in nonblocking mode schedule an async job to send
+	 * the command return the size.
+	 * In case of error the err code will be returned in
+	 * the subsequent read call.
+	 */
+	if (file->f_flags & O_NONBLOCK) {
+		priv->command_enqueued = true;
+		queue_work(tpm_dev_wq, &priv->async_work);
 		mutex_unlock(&priv->buffer_mutex);
-		return out_size;
+		return size;
 	}
 
-	priv->data_pending = out_size;
+	ret = tpm_transmit(priv->chip, priv->space, priv->data_buffer,
+			   sizeof(priv->data_buffer), 0);
+	tpm_put_ops(priv->chip);
+
+	if (ret > 0) {
+		priv->data_pending = ret;
+		mod_timer(&priv->user_read_timer, jiffies + (120 * HZ));
+		ret = size;
+	}
+out:
 	mutex_unlock(&priv->buffer_mutex);
+	return ret;
+}
+
+__poll_t tpm_common_poll(struct file *file, poll_table *wait)
+{
+	struct file_priv *priv = file->private_data;
+	__poll_t mask = 0;
+
+	poll_wait(file, &priv->async_wait, wait);
 
-	/* Set a timeout by which the reader must come claim the result */
-	mod_timer(&priv->user_read_timer, jiffies + (120 * HZ));
+	if (priv->data_pending)
+		mask = EPOLLIN | EPOLLRDNORM;
+	else
+		mask = EPOLLOUT | EPOLLWRNORM;
 
-	return in_size;
+	return mask;
 }
 
 /*
@@ -142,8 +197,24 @@ ssize_t tpm_common_write(struct file *file, const char __user *buf,
  */
 void tpm_common_release(struct file *file, struct file_priv *priv)
 {
+	flush_work(&priv->async_work);
 	del_singleshot_timer_sync(&priv->user_read_timer);
-	flush_work(&priv->work);
+	flush_work(&priv->timeout_work);
 	file->private_data = NULL;
 	priv->data_pending = 0;
 }
+
+int __init tpm_dev_common_init(void)
+{
+	tpm_dev_wq = alloc_workqueue("tpm_dev_wq", WQ_MEM_RECLAIM, 0);
+
+	return !tpm_dev_wq ? -ENOMEM : 0;
+}
+
+void __exit tpm_dev_common_exit(void)
+{
+	if (tpm_dev_wq) {
+		destroy_workqueue(tpm_dev_wq);
+		tpm_dev_wq = NULL;
+	}
+}
diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c
index ebd74ab5abef..32f9738f1cb2 100644
--- a/drivers/char/tpm/tpm-dev.c
+++ b/drivers/char/tpm/tpm-dev.c
@@ -39,7 +39,7 @@ static int tpm_open(struct inode *inode, struct file *file)
 	if (priv == NULL)
 		goto out;
 
-	tpm_common_open(file, chip, priv);
+	tpm_common_open(file, chip, priv, NULL);
 
 	return 0;
 
@@ -48,12 +48,6 @@ static int tpm_open(struct inode *inode, struct file *file)
 	return -ENOMEM;
 }
 
-static ssize_t tpm_write(struct file *file, const char __user *buf,
-			 size_t size, loff_t *off)
-{
-	return tpm_common_write(file, buf, size, off, NULL);
-}
-
 /*
  * Called on file close
  */
@@ -73,6 +67,7 @@ const struct file_operations tpm_fops = {
 	.llseek = no_llseek,
 	.open = tpm_open,
 	.read = tpm_common_read,
-	.write = tpm_write,
+	.write = tpm_common_write,
+	.poll = tpm_common_poll,
 	.release = tpm_release,
 };
diff --git a/drivers/char/tpm/tpm-dev.h b/drivers/char/tpm/tpm-dev.h
index b24cfb4d3ee1..a126b575cb8c 100644
--- a/drivers/char/tpm/tpm-dev.h
+++ b/drivers/char/tpm/tpm-dev.h
@@ -2,27 +2,33 @@
 #ifndef _TPM_DEV_H
 #define _TPM_DEV_H
 
+#include <linux/poll.h>
 #include "tpm.h"
 
 struct file_priv {
 	struct tpm_chip *chip;
+	struct tpm_space *space;
 
-	/* Data passed to and from the tpm via the read/write calls */
-	size_t data_pending;
+	/* Holds the amount of data passed or an error code from async op */
+	ssize_t data_pending;
 	struct mutex buffer_mutex;
 
 	struct timer_list user_read_timer;      /* user needs to claim result */
-	struct work_struct work;
+	struct work_struct timeout_work;
+	struct work_struct async_work;
+	wait_queue_head_t async_wait;
+	bool command_enqueued;
 
 	u8 data_buffer[TPM_BUFSIZE];
 };
 
 void tpm_common_open(struct file *file, struct tpm_chip *chip,
-		     struct file_priv *priv);
+		     struct file_priv *priv, struct tpm_space *space);
 ssize_t tpm_common_read(struct file *file, char __user *buf,
 			size_t size, loff_t *off);
 ssize_t tpm_common_write(struct file *file, const char __user *buf,
-			 size_t size, loff_t *off, struct tpm_space *space);
-void tpm_common_release(struct file *file, struct file_priv *priv);
+			 size_t size, loff_t *off);
+__poll_t tpm_common_poll(struct file *file, poll_table *wait);
 
+void tpm_common_release(struct file *file, struct file_priv *priv);
 #endif
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 1a803b0cf980..129f640424b7 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -663,7 +663,8 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, struct tpm_space *space,
 		return len;
 
 	err = be32_to_cpu(header->return_code);
-	if (err != 0 && desc)
+	if (err != 0 && err != TPM_ERR_DISABLED && err != TPM_ERR_DEACTIVATED
+	    && desc)
 		dev_err(&chip->dev, "A TPM error (%d) occurred %s\n", err,
 			desc);
 	if (err)
@@ -1321,7 +1322,8 @@ int tpm_get_random(struct tpm_chip *chip, u8 *out, size_t max)
 		}
 
 		rlength = be32_to_cpu(tpm_cmd.header.out.length);
-		if (rlength < offsetof(struct tpm_getrandom_out, rng_data) +
+		if (rlength < TPM_HEADER_SIZE +
+			      offsetof(struct tpm_getrandom_out, rng_data) +
 			      recd) {
 			total = -EFAULT;
 			break;
@@ -1407,19 +1409,32 @@ static int __init tpm_init(void)
 	tpmrm_class = class_create(THIS_MODULE, "tpmrm");
 	if (IS_ERR(tpmrm_class)) {
 		pr_err("couldn't create tpmrm class\n");
-		class_destroy(tpm_class);
-		return PTR_ERR(tpmrm_class);
+		rc = PTR_ERR(tpmrm_class);
+		goto out_destroy_tpm_class;
 	}
 
 	rc = alloc_chrdev_region(&tpm_devt, 0, 2*TPM_NUM_DEVICES, "tpm");
 	if (rc < 0) {
 		pr_err("tpm: failed to allocate char dev region\n");
-		class_destroy(tpmrm_class);
-		class_destroy(tpm_class);
-		return rc;
+		goto out_destroy_tpmrm_class;
+	}
+
+	rc = tpm_dev_common_init();
+	if (rc) {
+		pr_err("tpm: failed to allocate char dev region\n");
+		goto out_unreg_chrdev;
 	}
 
 	return 0;
+
+out_unreg_chrdev:
+	unregister_chrdev_region(tpm_devt, 2 * TPM_NUM_DEVICES);
+out_destroy_tpmrm_class:
+	class_destroy(tpmrm_class);
+out_destroy_tpm_class:
+	class_destroy(tpm_class);
+
+	return rc;
 }
 
 static void __exit tpm_exit(void)
@@ -1428,6 +1443,7 @@ static void __exit tpm_exit(void)
 	class_destroy(tpm_class);
 	class_destroy(tpmrm_class);
 	unregister_chrdev_region(tpm_devt, 2*TPM_NUM_DEVICES);
+	tpm_dev_common_exit();
 }
 
 subsys_initcall(tpm_init);
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index f3501d05264f..f20dc8ece348 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -604,4 +604,6 @@ int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space,
 
 int tpm_bios_log_setup(struct tpm_chip *chip);
 void tpm_bios_log_teardown(struct tpm_chip *chip);
+int tpm_dev_common_init(void);
+void tpm_dev_common_exit(void);
 #endif
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index c31b490bd41d..3acf4fd4e5a5 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -329,7 +329,9 @@ int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max)
 			&buf.data[TPM_HEADER_SIZE];
 		recd = min_t(u32, be16_to_cpu(out->size), num_bytes);
 		if (tpm_buf_length(&buf) <
-		    offsetof(struct tpm2_get_random_out, buffer) + recd) {
+		    TPM_HEADER_SIZE +
+		    offsetof(struct tpm2_get_random_out, buffer) +
+		    recd) {
 			err = -EFAULT;
 			goto out;
 		}
diff --git a/drivers/char/tpm/tpmrm-dev.c b/drivers/char/tpm/tpmrm-dev.c
index 1a0e97a5da5a..0c751a79bbed 100644
--- a/drivers/char/tpm/tpmrm-dev.c
+++ b/drivers/char/tpm/tpmrm-dev.c
@@ -28,7 +28,7 @@ static int tpmrm_open(struct inode *inode, struct file *file)
 		return -ENOMEM;
 	}
 
-	tpm_common_open(file, chip, &priv->priv);
+	tpm_common_open(file, chip, &priv->priv, &priv->space);
 
 	return 0;
 }
@@ -45,21 +45,12 @@ static int tpmrm_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static ssize_t tpmrm_write(struct file *file, const char __user *buf,
-		   size_t size, loff_t *off)
-{
-	struct file_priv *fpriv = file->private_data;
-	struct tpmrm_priv *priv = container_of(fpriv, struct tpmrm_priv, priv);
-
-	return tpm_common_write(file, buf, size, off, &priv->space);
-}
-
 const struct file_operations tpmrm_fops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.open = tpmrm_open,
 	.read = tpm_common_read,
-	.write = tpmrm_write,
+	.write = tpm_common_write,
+	.poll = tpm_common_poll,
 	.release = tpmrm_release,
 };
-
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index 911475d36800..b150f87f38f5 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -264,7 +264,7 @@ static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv)
 		return -ENOMEM;
 	}
 
-	rv = xenbus_grant_ring(dev, &priv->shr, 1, &gref);
+	rv = xenbus_grant_ring(dev, priv->shr, 1, &gref);
 	if (rv < 0)
 		return rv;
 
diff --git a/drivers/clk/mvebu/clk-cpu.c b/drivers/clk/mvebu/clk-cpu.c
index 072aa38374ce..3045067448fb 100644
--- a/drivers/clk/mvebu/clk-cpu.c
+++ b/drivers/clk/mvebu/clk-cpu.c
@@ -183,7 +183,7 @@ static void __init of_cpu_clk_setup(struct device_node *node)
 		pr_warn("%s: pmu-dfs base register not set, dynamic frequency scaling not available\n",
 			__func__);
 
-	for_each_node_by_type(dn, "cpu")
+	for_each_of_cpu_node(dn)
 		ncpus++;
 
 	cpuclk = kcalloc(ncpus, sizeof(*cpuclk), GFP_KERNEL);
@@ -194,7 +194,7 @@ static void __init of_cpu_clk_setup(struct device_node *node)
 	if (WARN_ON(!clks))
 		goto clks_out;
 
-	for_each_node_by_type(dn, "cpu") {
+	for_each_of_cpu_node(dn) {
 		struct clk_init_data init;
 		struct clk *clk;
 		char *clk_name = kzalloc(5, GFP_KERNEL);
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index db51b2427e8a..e33b21d3f9d8 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -23,8 +23,8 @@ obj-$(CONFIG_FTTMR010_TIMER)	+= timer-fttmr010.o
 obj-$(CONFIG_ROCKCHIP_TIMER)      += rockchip_timer.o
 obj-$(CONFIG_CLKSRC_NOMADIK_MTU)	+= nomadik-mtu.o
 obj-$(CONFIG_CLKSRC_DBX500_PRCMU)	+= clksrc-dbx500-prcmu.o
-obj-$(CONFIG_ARMADA_370_XP_TIMER)	+= time-armada-370-xp.o
-obj-$(CONFIG_ORION_TIMER)	+= time-orion.o
+obj-$(CONFIG_ARMADA_370_XP_TIMER)	+= timer-armada-370-xp.o
+obj-$(CONFIG_ORION_TIMER)	+= timer-orion.o
 obj-$(CONFIG_BCM2835_TIMER)	+= bcm2835_timer.o
 obj-$(CONFIG_CLPS711X_TIMER)	+= clps711x-timer.o
 obj-$(CONFIG_ATLAS7_TIMER)	+= timer-atlas7.o
@@ -36,25 +36,25 @@ obj-$(CONFIG_SUN4I_TIMER)	+= sun4i_timer.o
 obj-$(CONFIG_SUN5I_HSTIMER)	+= timer-sun5i.o
 obj-$(CONFIG_MESON6_TIMER)	+= meson6_timer.o
 obj-$(CONFIG_TEGRA_TIMER)	+= tegra20_timer.o
-obj-$(CONFIG_VT8500_TIMER)	+= vt8500_timer.o
-obj-$(CONFIG_NSPIRE_TIMER)	+= zevio-timer.o
+obj-$(CONFIG_VT8500_TIMER)	+= timer-vt8500.o
+obj-$(CONFIG_NSPIRE_TIMER)	+= timer-zevio.o
 obj-$(CONFIG_BCM_KONA_TIMER)	+= bcm_kona_timer.o
-obj-$(CONFIG_CADENCE_TTC_TIMER)	+= cadence_ttc_timer.o
-obj-$(CONFIG_CLKSRC_EFM32)	+= time-efm32.o
+obj-$(CONFIG_CADENCE_TTC_TIMER)	+= timer-cadence-ttc.o
+obj-$(CONFIG_CLKSRC_EFM32)	+= timer-efm32.o
 obj-$(CONFIG_CLKSRC_STM32)	+= timer-stm32.o
 obj-$(CONFIG_CLKSRC_EXYNOS_MCT)	+= exynos_mct.o
-obj-$(CONFIG_CLKSRC_LPC32XX)	+= time-lpc32xx.o
+obj-$(CONFIG_CLKSRC_LPC32XX)	+= timer-lpc32xx.o
 obj-$(CONFIG_CLKSRC_MPS2)	+= mps2-timer.o
 obj-$(CONFIG_CLKSRC_SAMSUNG_PWM)	+= samsung_pwm_timer.o
-obj-$(CONFIG_FSL_FTM_TIMER)	+= fsl_ftm_timer.o
-obj-$(CONFIG_VF_PIT_TIMER)	+= vf_pit_timer.o
-obj-$(CONFIG_CLKSRC_QCOM)	+= qcom-timer.o
+obj-$(CONFIG_FSL_FTM_TIMER)	+= timer-fsl-ftm.o
+obj-$(CONFIG_VF_PIT_TIMER)	+= timer-vf-pit.o
+obj-$(CONFIG_CLKSRC_QCOM)	+= timer-qcom.o
 obj-$(CONFIG_MTK_TIMER)		+= timer-mediatek.o
-obj-$(CONFIG_CLKSRC_PISTACHIO)	+= time-pistachio.o
+obj-$(CONFIG_CLKSRC_PISTACHIO)	+= timer-pistachio.o
 obj-$(CONFIG_CLKSRC_TI_32K)	+= timer-ti-32k.o
 obj-$(CONFIG_CLKSRC_NPS)	+= timer-nps.o
 obj-$(CONFIG_OXNAS_RPS_TIMER)	+= timer-oxnas-rps.o
-obj-$(CONFIG_OWL_TIMER)		+= owl-timer.o
+obj-$(CONFIG_OWL_TIMER)		+= timer-owl.o
 obj-$(CONFIG_SPRD_TIMER)	+= timer-sprd.o
 obj-$(CONFIG_NPCM7XX_TIMER)	+= timer-npcm7xx.o
 
@@ -66,7 +66,7 @@ obj-$(CONFIG_ARM_TIMER_SP804)		+= timer-sp804.o
 obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST)	+= dummy_timer.o
 obj-$(CONFIG_KEYSTONE_TIMER)		+= timer-keystone.o
 obj-$(CONFIG_INTEGRATOR_AP_TIMER)	+= timer-integrator-ap.o
-obj-$(CONFIG_CLKSRC_VERSATILE)		+= versatile.o
+obj-$(CONFIG_CLKSRC_VERSATILE)		+= timer-versatile.o
 obj-$(CONFIG_CLKSRC_MIPS_GIC)		+= mips-gic-timer.o
 obj-$(CONFIG_CLKSRC_TANGO_XTAL)		+= tango_xtal.o
 obj-$(CONFIG_CLKSRC_IMX_GPT)		+= timer-imx-gpt.o
diff --git a/drivers/clocksource/asm9260_timer.c b/drivers/clocksource/asm9260_timer.c
index 38cd2feb87c4..fbaee04fd1d9 100644
--- a/drivers/clocksource/asm9260_timer.c
+++ b/drivers/clocksource/asm9260_timer.c
@@ -193,7 +193,7 @@ static int __init asm9260_timer_init(struct device_node *np)
 
 	priv.base = of_io_request_and_map(np, 0, np->name);
 	if (IS_ERR(priv.base)) {
-		pr_err("%s: unable to map resource\n", np->name);
+		pr_err("%pOFn: unable to map resource\n", np);
 		return PTR_ERR(priv.base);
 	}
 
diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
index 69866cd8f4bb..db410acd8964 100644
--- a/drivers/clocksource/dw_apb_timer_of.c
+++ b/drivers/clocksource/dw_apb_timer_of.c
@@ -22,6 +22,7 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/clk.h>
+#include <linux/reset.h>
 #include <linux/sched_clock.h>
 
 static void __init timer_get_base_and_rate(struct device_node *np,
@@ -29,11 +30,22 @@ static void __init timer_get_base_and_rate(struct device_node *np,
 {
 	struct clk *timer_clk;
 	struct clk *pclk;
+	struct reset_control *rstc;
 
 	*base = of_iomap(np, 0);
 
 	if (!*base)
-		panic("Unable to map regs for %s", np->name);
+		panic("Unable to map regs for %pOFn", np);
+
+	/*
+	 * Reset the timer if the reset control is available, wiping
+	 * out the state the firmware may have left it
+	 */
+	rstc = of_reset_control_get(np, NULL);
+	if (!IS_ERR(rstc)) {
+		reset_control_assert(rstc);
+		reset_control_deassert(rstc);
+	}
 
 	/*
 	 * Not all implementations use a periphal clock, so don't panic
@@ -42,8 +54,8 @@ static void __init timer_get_base_and_rate(struct device_node *np,
 	pclk = of_clk_get_by_name(np, "pclk");
 	if (!IS_ERR(pclk))
 		if (clk_prepare_enable(pclk))
-			pr_warn("pclk for %s is present, but could not be activated\n",
-				np->name);
+			pr_warn("pclk for %pOFn is present, but could not be activated\n",
+				np);
 
 	timer_clk = of_clk_get_by_name(np, "timer");
 	if (IS_ERR(timer_clk))
@@ -57,7 +69,7 @@ static void __init timer_get_base_and_rate(struct device_node *np,
 try_clock_freq:
 	if (of_property_read_u32(np, "clock-freq", rate) &&
 	    of_property_read_u32(np, "clock-frequency", rate))
-		panic("No clock nor clock-frequency property for %s", np->name);
+		panic("No clock nor clock-frequency property for %pOFn", np);
 }
 
 static void __init add_clockevent(struct device_node *event_timer)
diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c
index 08cd6eaf3795..395837938301 100644
--- a/drivers/clocksource/pxa_timer.c
+++ b/drivers/clocksource/pxa_timer.c
@@ -191,13 +191,13 @@ static int __init pxa_timer_dt_init(struct device_node *np)
 	/* timer registers are shared with watchdog timer */
 	timer_base = of_iomap(np, 0);
 	if (!timer_base) {
-		pr_err("%s: unable to map resource\n", np->name);
+		pr_err("%pOFn: unable to map resource\n", np);
 		return -ENXIO;
 	}
 
 	clk = of_clk_get(np, 0);
 	if (IS_ERR(clk)) {
-		pr_crit("%s: unable to get clk\n", np->name);
+		pr_crit("%pOFn: unable to get clk\n", np);
 		return PTR_ERR(clk);
 	}
 
@@ -210,7 +210,7 @@ static int __init pxa_timer_dt_init(struct device_node *np)
 	/* we are only interested in OS-timer0 irq */
 	irq = irq_of_parse_and_map(np, 0);
 	if (irq <= 0) {
-		pr_crit("%s: unable to parse OS-timer0 irq\n", np->name);
+		pr_crit("%pOFn: unable to parse OS-timer0 irq\n", np);
 		return -EINVAL;
 	}
 
diff --git a/drivers/clocksource/renesas-ostm.c b/drivers/clocksource/renesas-ostm.c
index 6cffd7c6001a..61d5f3b539ce 100644
--- a/drivers/clocksource/renesas-ostm.c
+++ b/drivers/clocksource/renesas-ostm.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Renesas Timer Support - OSTM
  *
  * Copyright (C) 2017 Renesas Electronics America, Inc.
  * Copyright (C) 2017 Chris Brandt
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/of_address.h>
diff --git a/drivers/clocksource/riscv_timer.c b/drivers/clocksource/riscv_timer.c
index 4e8b347e43e2..084e97dc10ed 100644
--- a/drivers/clocksource/riscv_timer.c
+++ b/drivers/clocksource/riscv_timer.c
@@ -8,6 +8,7 @@
 #include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
+#include <asm/smp.h>
 #include <asm/sbi.h>
 
 /*
@@ -84,13 +85,16 @@ void riscv_timer_interrupt(void)
 
 static int __init riscv_timer_init_dt(struct device_node *n)
 {
-	int cpu_id = riscv_of_processor_hart(n), error;
+	int cpuid, hartid, error;
 	struct clocksource *cs;
 
-	if (cpu_id != smp_processor_id())
+	hartid = riscv_of_processor_hartid(n);
+	cpuid = riscv_hartid_to_cpuid(hartid);
+
+	if (cpuid != smp_processor_id())
 		return 0;
 
-	cs = per_cpu_ptr(&riscv_clocksource, cpu_id);
+	cs = per_cpu_ptr(&riscv_clocksource, cpuid);
 	clocksource_register_hz(cs, riscv_timebase);
 
 	error = cpuhp_setup_state(CPUHP_AP_RISCV_TIMER_STARTING,
@@ -98,7 +102,7 @@ static int __init riscv_timer_init_dt(struct device_node *n)
 			 riscv_timer_starting_cpu, riscv_timer_dying_cpu);
 	if (error)
 		pr_err("RISCV timer register failed [%d] for cpu = [%d]\n",
-		       error, cpu_id);
+		       error, cpuid);
 	return error;
 }
 
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index bbbf37c471a3..55d3e03f2cd4 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * SuperH Timer Support - CMT
  *
  *  Copyright (C) 2008 Magnus Damm
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
@@ -78,18 +70,17 @@ struct sh_cmt_info {
 	unsigned int channels_mask;
 
 	unsigned long width; /* 16 or 32 bit version of hardware block */
-	unsigned long overflow_bit;
-	unsigned long clear_bits;
+	u32 overflow_bit;
+	u32 clear_bits;
 
 	/* callbacks for CMSTR and CMCSR access */
-	unsigned long (*read_control)(void __iomem *base, unsigned long offs);
+	u32 (*read_control)(void __iomem *base, unsigned long offs);
 	void (*write_control)(void __iomem *base, unsigned long offs,
-			      unsigned long value);
+			      u32 value);
 
 	/* callbacks for CMCNT and CMCOR access */
-	unsigned long (*read_count)(void __iomem *base, unsigned long offs);
-	void (*write_count)(void __iomem *base, unsigned long offs,
-			    unsigned long value);
+	u32 (*read_count)(void __iomem *base, unsigned long offs);
+	void (*write_count)(void __iomem *base, unsigned long offs, u32 value);
 };
 
 struct sh_cmt_channel {
@@ -103,13 +94,13 @@ struct sh_cmt_channel {
 
 	unsigned int timer_bit;
 	unsigned long flags;
-	unsigned long match_value;
-	unsigned long next_match_value;
-	unsigned long max_match_value;
+	u32 match_value;
+	u32 next_match_value;
+	u32 max_match_value;
 	raw_spinlock_t lock;
 	struct clock_event_device ced;
 	struct clocksource cs;
-	unsigned long total_cycles;
+	u64 total_cycles;
 	bool cs_enabled;
 };
 
@@ -160,24 +151,22 @@ struct sh_cmt_device {
 #define SH_CMT32_CMCSR_CKS_RCLK1	(7 << 0)
 #define SH_CMT32_CMCSR_CKS_MASK		(7 << 0)
 
-static unsigned long sh_cmt_read16(void __iomem *base, unsigned long offs)
+static u32 sh_cmt_read16(void __iomem *base, unsigned long offs)
 {
 	return ioread16(base + (offs << 1));
 }
 
-static unsigned long sh_cmt_read32(void __iomem *base, unsigned long offs)
+static u32 sh_cmt_read32(void __iomem *base, unsigned long offs)
 {
 	return ioread32(base + (offs << 2));
 }
 
-static void sh_cmt_write16(void __iomem *base, unsigned long offs,
-			   unsigned long value)
+static void sh_cmt_write16(void __iomem *base, unsigned long offs, u32 value)
 {
 	iowrite16(value, base + (offs << 1));
 }
 
-static void sh_cmt_write32(void __iomem *base, unsigned long offs,
-			   unsigned long value)
+static void sh_cmt_write32(void __iomem *base, unsigned long offs, u32 value)
 {
 	iowrite32(value, base + (offs << 2));
 }
@@ -242,7 +231,7 @@ static const struct sh_cmt_info sh_cmt_info[] = {
 #define CMCNT 1 /* channel register */
 #define CMCOR 2 /* channel register */
 
-static inline unsigned long sh_cmt_read_cmstr(struct sh_cmt_channel *ch)
+static inline u32 sh_cmt_read_cmstr(struct sh_cmt_channel *ch)
 {
 	if (ch->iostart)
 		return ch->cmt->info->read_control(ch->iostart, 0);
@@ -250,8 +239,7 @@ static inline unsigned long sh_cmt_read_cmstr(struct sh_cmt_channel *ch)
 		return ch->cmt->info->read_control(ch->cmt->mapbase, 0);
 }
 
-static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch,
-				      unsigned long value)
+static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch, u32 value)
 {
 	if (ch->iostart)
 		ch->cmt->info->write_control(ch->iostart, 0, value);
@@ -259,39 +247,35 @@ static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch,
 		ch->cmt->info->write_control(ch->cmt->mapbase, 0, value);
 }
 
-static inline unsigned long sh_cmt_read_cmcsr(struct sh_cmt_channel *ch)
+static inline u32 sh_cmt_read_cmcsr(struct sh_cmt_channel *ch)
 {
 	return ch->cmt->info->read_control(ch->ioctrl, CMCSR);
 }
 
-static inline void sh_cmt_write_cmcsr(struct sh_cmt_channel *ch,
-				      unsigned long value)
+static inline void sh_cmt_write_cmcsr(struct sh_cmt_channel *ch, u32 value)
 {
 	ch->cmt->info->write_control(ch->ioctrl, CMCSR, value);
 }
 
-static inline unsigned long sh_cmt_read_cmcnt(struct sh_cmt_channel *ch)
+static inline u32 sh_cmt_read_cmcnt(struct sh_cmt_channel *ch)
 {
 	return ch->cmt->info->read_count(ch->ioctrl, CMCNT);
 }
 
-static inline void sh_cmt_write_cmcnt(struct sh_cmt_channel *ch,
-				      unsigned long value)
+static inline void sh_cmt_write_cmcnt(struct sh_cmt_channel *ch, u32 value)
 {
 	ch->cmt->info->write_count(ch->ioctrl, CMCNT, value);
 }
 
-static inline void sh_cmt_write_cmcor(struct sh_cmt_channel *ch,
-				      unsigned long value)
+static inline void sh_cmt_write_cmcor(struct sh_cmt_channel *ch, u32 value)
 {
 	ch->cmt->info->write_count(ch->ioctrl, CMCOR, value);
 }
 
-static unsigned long sh_cmt_get_counter(struct sh_cmt_channel *ch,
-					int *has_wrapped)
+static u32 sh_cmt_get_counter(struct sh_cmt_channel *ch, u32 *has_wrapped)
 {
-	unsigned long v1, v2, v3;
-	int o1, o2;
+	u32 v1, v2, v3;
+	u32 o1, o2;
 
 	o1 = sh_cmt_read_cmcsr(ch) & ch->cmt->info->overflow_bit;
 
@@ -311,7 +295,8 @@ static unsigned long sh_cmt_get_counter(struct sh_cmt_channel *ch,
 
 static void sh_cmt_start_stop_ch(struct sh_cmt_channel *ch, int start)
 {
-	unsigned long flags, value;
+	unsigned long flags;
+	u32 value;
 
 	/* start stop register shared by multiple timer channels */
 	raw_spin_lock_irqsave(&ch->cmt->lock, flags);
@@ -418,11 +403,11 @@ static void sh_cmt_disable(struct sh_cmt_channel *ch)
 static void sh_cmt_clock_event_program_verify(struct sh_cmt_channel *ch,
 					      int absolute)
 {
-	unsigned long new_match;
-	unsigned long value = ch->next_match_value;
-	unsigned long delay = 0;
-	unsigned long now = 0;
-	int has_wrapped;
+	u32 value = ch->next_match_value;
+	u32 new_match;
+	u32 delay = 0;
+	u32 now = 0;
+	u32 has_wrapped;
 
 	now = sh_cmt_get_counter(ch, &has_wrapped);
 	ch->flags |= FLAG_REPROGRAM; /* force reprogram */
@@ -619,9 +604,10 @@ static struct sh_cmt_channel *cs_to_sh_cmt(struct clocksource *cs)
 static u64 sh_cmt_clocksource_read(struct clocksource *cs)
 {
 	struct sh_cmt_channel *ch = cs_to_sh_cmt(cs);
-	unsigned long flags, raw;
-	unsigned long value;
-	int has_wrapped;
+	unsigned long flags;
+	u32 has_wrapped;
+	u64 value;
+	u32 raw;
 
 	raw_spin_lock_irqsave(&ch->lock, flags);
 	value = ch->total_cycles;
@@ -694,7 +680,7 @@ static int sh_cmt_register_clocksource(struct sh_cmt_channel *ch,
 	cs->disable = sh_cmt_clocksource_disable;
 	cs->suspend = sh_cmt_clocksource_suspend;
 	cs->resume = sh_cmt_clocksource_resume;
-	cs->mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8);
+	cs->mask = CLOCKSOURCE_MASK(sizeof(u64) * 8);
 	cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
 
 	dev_info(&ch->cmt->pdev->dev, "ch%u: used as clock source\n",
@@ -941,8 +927,22 @@ static const struct of_device_id sh_cmt_of_table[] __maybe_unused = {
 		.compatible = "renesas,cmt-48-gen2",
 		.data = &sh_cmt_info[SH_CMT0_RCAR_GEN2]
 	},
-	{ .compatible = "renesas,rcar-gen2-cmt0", .data = &sh_cmt_info[SH_CMT0_RCAR_GEN2] },
-	{ .compatible = "renesas,rcar-gen2-cmt1", .data = &sh_cmt_info[SH_CMT1_RCAR_GEN2] },
+	{
+		.compatible = "renesas,rcar-gen2-cmt0",
+		.data = &sh_cmt_info[SH_CMT0_RCAR_GEN2]
+	},
+	{
+		.compatible = "renesas,rcar-gen2-cmt1",
+		.data = &sh_cmt_info[SH_CMT1_RCAR_GEN2]
+	},
+	{
+		.compatible = "renesas,rcar-gen3-cmt0",
+		.data = &sh_cmt_info[SH_CMT0_RCAR_GEN2]
+	},
+	{
+		.compatible = "renesas,rcar-gen3-cmt1",
+		.data = &sh_cmt_info[SH_CMT1_RCAR_GEN2]
+	},
 	{ }
 };
 MODULE_DEVICE_TABLE(of, sh_cmt_of_table);
diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c
index 6812e099b6a3..354b27d14a19 100644
--- a/drivers/clocksource/sh_mtu2.c
+++ b/drivers/clocksource/sh_mtu2.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * SuperH Timer Support - MTU2
  *
  *  Copyright (C) 2009 Magnus Damm
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index c74a6c543ca2..49f1c805fc95 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * SuperH Timer Support - TMU
  *
  *  Copyright (C) 2009 Magnus Damm
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/timer-armada-370-xp.c
index edf1a46269f1..edf1a46269f1 100644
--- a/drivers/clocksource/time-armada-370-xp.c
+++ b/drivers/clocksource/timer-armada-370-xp.c
diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/timer-cadence-ttc.c
index 29d51755e18b..b33402980b6f 100644
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/timer-cadence-ttc.c
@@ -535,7 +535,7 @@ static int __init ttc_timer_init(struct device_node *timer)
 	if (ret)
 		return ret;
 
-	pr_info("%s #0 at %p, irq=%d\n", timer->name, timer_baseaddr, irq);
+	pr_info("%pOFn #0 at %p, irq=%d\n", timer, timer_baseaddr, irq);
 
 	return 0;
 }
diff --git a/drivers/clocksource/time-efm32.c b/drivers/clocksource/timer-efm32.c
index 257e810ec1ad..257e810ec1ad 100644
--- a/drivers/clocksource/time-efm32.c
+++ b/drivers/clocksource/timer-efm32.c
diff --git a/drivers/clocksource/fsl_ftm_timer.c b/drivers/clocksource/timer-fsl-ftm.c
index 846d18daf893..846d18daf893 100644
--- a/drivers/clocksource/fsl_ftm_timer.c
+++ b/drivers/clocksource/timer-fsl-ftm.c
diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c
index 62d24690ba02..76e526f58620 100644
--- a/drivers/clocksource/timer-integrator-ap.c
+++ b/drivers/clocksource/timer-integrator-ap.c
@@ -190,7 +190,7 @@ static int __init integrator_ap_timer_init_of(struct device_node *node)
 
 	clk = of_clk_get(node, 0);
 	if (IS_ERR(clk)) {
-		pr_err("No clock for %s\n", node->name);
+		pr_err("No clock for %pOFn\n", node);
 		return PTR_ERR(clk);
 	}
 	clk_prepare_enable(clk);
diff --git a/drivers/clocksource/time-lpc32xx.c b/drivers/clocksource/timer-lpc32xx.c
index d51a62a79ef7..d51a62a79ef7 100644
--- a/drivers/clocksource/time-lpc32xx.c
+++ b/drivers/clocksource/timer-lpc32xx.c
diff --git a/drivers/clocksource/time-orion.c b/drivers/clocksource/timer-orion.c
index 12202067fe4b..7d487107e3cd 100644
--- a/drivers/clocksource/time-orion.c
+++ b/drivers/clocksource/timer-orion.c
@@ -129,13 +129,13 @@ static int __init orion_timer_init(struct device_node *np)
 	/* timer registers are shared with watchdog timer */
 	timer_base = of_iomap(np, 0);
 	if (!timer_base) {
-		pr_err("%s: unable to map resource\n", np->name);
+		pr_err("%pOFn: unable to map resource\n", np);
 		return -ENXIO;
 	}
 
 	clk = of_clk_get(np, 0);
 	if (IS_ERR(clk)) {
-		pr_err("%s: unable to get clk\n", np->name);
+		pr_err("%pOFn: unable to get clk\n", np);
 		return PTR_ERR(clk);
 	}
 
@@ -148,7 +148,7 @@ static int __init orion_timer_init(struct device_node *np)
 	/* we are only interested in timer1 irq */
 	irq = irq_of_parse_and_map(np, 1);
 	if (irq <= 0) {
-		pr_err("%s: unable to parse timer1 irq\n", np->name);
+		pr_err("%pOFn: unable to parse timer1 irq\n", np);
 		return -EINVAL;
 	}
 
@@ -174,7 +174,7 @@ static int __init orion_timer_init(struct device_node *np)
 	/* setup timer1 as clockevent timer */
 	ret = setup_irq(irq, &orion_clkevt_irq);
 	if (ret) {
-		pr_err("%s: unable to setup irq\n", np->name);
+		pr_err("%pOFn: unable to setup irq\n", np);
 		return ret;
 	}
 
diff --git a/drivers/clocksource/owl-timer.c b/drivers/clocksource/timer-owl.c
index ea00a5e8f95d..ea00a5e8f95d 100644
--- a/drivers/clocksource/owl-timer.c
+++ b/drivers/clocksource/timer-owl.c
diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/timer-pistachio.c
index a2dd85d0c1d7..a2dd85d0c1d7 100644
--- a/drivers/clocksource/time-pistachio.c
+++ b/drivers/clocksource/timer-pistachio.c
diff --git a/drivers/clocksource/qcom-timer.c b/drivers/clocksource/timer-qcom.c
index 89816f89ff3f..89816f89ff3f 100644
--- a/drivers/clocksource/qcom-timer.c
+++ b/drivers/clocksource/timer-qcom.c
diff --git a/drivers/clocksource/timer-sp804.c b/drivers/clocksource/timer-sp804.c
index e01222ea888f..052b230ca312 100644
--- a/drivers/clocksource/timer-sp804.c
+++ b/drivers/clocksource/timer-sp804.c
@@ -249,7 +249,7 @@ static int __init sp804_of_init(struct device_node *np)
 	if (of_clk_get_parent_count(np) == 3) {
 		clk2 = of_clk_get(np, 1);
 		if (IS_ERR(clk2)) {
-			pr_err("sp804: %s clock not found: %d\n", np->name,
+			pr_err("sp804: %pOFn clock not found: %d\n", np,
 				(int)PTR_ERR(clk2));
 			clk2 = NULL;
 		}
diff --git a/drivers/clocksource/versatile.c b/drivers/clocksource/timer-versatile.c
index 39725d38aede..39725d38aede 100644
--- a/drivers/clocksource/versatile.c
+++ b/drivers/clocksource/timer-versatile.c
diff --git a/drivers/clocksource/vf_pit_timer.c b/drivers/clocksource/timer-vf-pit.c
index 0f92089ec08c..0f92089ec08c 100644
--- a/drivers/clocksource/vf_pit_timer.c
+++ b/drivers/clocksource/timer-vf-pit.c
diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/timer-vt8500.c
index e0f7489cfc8e..e0f7489cfc8e 100644
--- a/drivers/clocksource/vt8500_timer.c
+++ b/drivers/clocksource/timer-vt8500.c
diff --git a/drivers/clocksource/zevio-timer.c b/drivers/clocksource/timer-zevio.c
index f74689334f7c..6127e8062a71 100644
--- a/drivers/clocksource/zevio-timer.c
+++ b/drivers/clocksource/timer-zevio.c
@@ -148,12 +148,12 @@ static int __init zevio_timer_add(struct device_node *node)
 
 	of_address_to_resource(node, 0, &res);
 	scnprintf(timer->clocksource_name, sizeof(timer->clocksource_name),
-			"%llx.%s_clocksource",
-			(unsigned long long)res.start, node->name);
+			"%llx.%pOFn_clocksource",
+			(unsigned long long)res.start, node);
 
 	scnprintf(timer->clockevent_name, sizeof(timer->clockevent_name),
-			"%llx.%s_clockevent",
-			(unsigned long long)res.start, node->name);
+			"%llx.%pOFn_clockevent",
+			(unsigned long long)res.start, node);
 
 	if (timer->interrupt_regs && irqnr) {
 		timer->clkevt.name		= timer->clockevent_name;
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index c23396f32c8a..8e7e225d2446 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
 obj-$(CONFIG_CRYPTO_DEV_CPT) += cavium/cpt/
 obj-$(CONFIG_CRYPTO_DEV_NITROX) += cavium/nitrox/
 obj-$(CONFIG_CRYPTO_DEV_EXYNOS_RNG) += exynos-rng.o
-obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_COMMON) += caam/
 obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
 obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
 obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 801aeab5ab1e..2b7af44c7b85 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Cryptographic API.
  *
@@ -6,10 +7,6 @@
  * Copyright (c) 2012 Eukréa Electromatique - ATMEL
  * Author: Nicolas Royer <nicolas@eukrea.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
  * Some ideas are from omap-aes.c driver.
  */
 
diff --git a/drivers/crypto/atmel-authenc.h b/drivers/crypto/atmel-authenc.h
index 2a60d1224143..cbd37a2edada 100644
--- a/drivers/crypto/atmel-authenc.h
+++ b/drivers/crypto/atmel-authenc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * API for Atmel Secure Protocol Layers Improved Performances (SPLIP)
  *
@@ -5,18 +6,6 @@
  *
  * Author: Cyrille Pitchen <cyrille.pitchen@atmel.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
  * This driver is based on drivers/mtd/spi-nor/fsl-quadspi.c from Freescale.
  */
 
diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index 74f083f45e97..ba00e4563ca0 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Microchip / Atmel ECC (I2C) driver.
  *
  * Copyright (c) 2017, Microchip Technology Inc.
  * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/bitrev.h>
diff --git a/drivers/crypto/atmel-ecc.h b/drivers/crypto/atmel-ecc.h
index 25232c8abcc2..643a3b947338 100644
--- a/drivers/crypto/atmel-ecc.h
+++ b/drivers/crypto/atmel-ecc.h
@@ -1,19 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2017, Microchip Technology Inc.
  * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
  */
 
 #ifndef __ATMEL_ECC_H__
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 8a19df2fba6a..ab0cfe748931 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Cryptographic API.
  *
@@ -6,10 +7,6 @@
  * Copyright (c) 2012 Eukréa Electromatique - ATMEL
  * Author: Nicolas Royer <nicolas@eukrea.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
  * Some ideas are from omap-sham.c drivers.
  */
 
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index 97b0423efa7f..438e1ffb2ec0 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Cryptographic API.
  *
@@ -6,10 +7,6 @@
  * Copyright (c) 2012 Eukréa Electromatique - ATMEL
  * Author: Nicolas Royer <nicolas@eukrea.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
  * Some ideas are from omap-aes.c drivers.
  */
 
diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c
index 7f07a5085e9b..f3442c2bdbdc 100644
--- a/drivers/crypto/axis/artpec6_crypto.c
+++ b/drivers/crypto/axis/artpec6_crypto.c
@@ -330,7 +330,7 @@ struct artpec6_cryptotfm_context {
 	size_t key_length;
 	u32 key_md;
 	int crypto_type;
-	struct crypto_skcipher *fallback;
+	struct crypto_sync_skcipher *fallback;
 };
 
 struct artpec6_crypto_aead_hw_ctx {
@@ -1199,15 +1199,15 @@ artpec6_crypto_ctr_crypt(struct skcipher_request *req, bool encrypt)
 		pr_debug("counter %x will overflow (nblks %u), falling back\n",
 			 counter, counter + nblks);
 
-		ret = crypto_skcipher_setkey(ctx->fallback, ctx->aes_key,
-					     ctx->key_length);
+		ret = crypto_sync_skcipher_setkey(ctx->fallback, ctx->aes_key,
+						  ctx->key_length);
 		if (ret)
 			return ret;
 
 		{
-			SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+			SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 
-			skcipher_request_set_tfm(subreq, ctx->fallback);
+			skcipher_request_set_sync_tfm(subreq, ctx->fallback);
 			skcipher_request_set_callback(subreq, req->base.flags,
 						      NULL, NULL);
 			skcipher_request_set_crypt(subreq, req->src, req->dst,
@@ -1561,10 +1561,9 @@ static int artpec6_crypto_aes_ctr_init(struct crypto_skcipher *tfm)
 {
 	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
 
-	ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(&tfm->base),
-					      0,
-					      CRYPTO_ALG_ASYNC |
-					      CRYPTO_ALG_NEED_FALLBACK);
+	ctx->fallback =
+		crypto_alloc_sync_skcipher(crypto_tfm_alg_name(&tfm->base),
+					   0, CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ctx->fallback))
 		return PTR_ERR(ctx->fallback);
 
@@ -1605,7 +1604,7 @@ static void artpec6_crypto_aes_ctr_exit(struct crypto_skcipher *tfm)
 {
 	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
 
-	crypto_free_skcipher(ctx->fallback);
+	crypto_free_sync_skcipher(ctx->fallback);
 	artpec6_crypto_aes_exit(tfm);
 }
 
@@ -3174,7 +3173,6 @@ static struct platform_driver artpec6_crypto_driver = {
 	.remove  = artpec6_crypto_remove,
 	.driver  = {
 		.name  = "artpec6-crypto",
-		.owner = THIS_MODULE,
 		.of_match_table = artpec6_crypto_of_match,
 	},
 };
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index 1eb852765469..c4b1cade55c1 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -1,7 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+config CRYPTO_DEV_FSL_CAAM_COMMON
+	tristate
+
 config CRYPTO_DEV_FSL_CAAM
-	tristate "Freescale CAAM-Multicore driver backend"
+	tristate "Freescale CAAM-Multicore platform driver backend"
 	depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
 	select SOC_BUS
+	select CRYPTO_DEV_FSL_CAAM_COMMON
 	help
 	  Enables the driver module for Freescale's Cryptographic Accelerator
 	  and Assurance Module (CAAM), also known as the SEC version 4 (SEC4).
@@ -12,9 +17,16 @@ config CRYPTO_DEV_FSL_CAAM
 	  To compile this driver as a module, choose M here: the module
 	  will be called caam.
 
+if CRYPTO_DEV_FSL_CAAM
+
+config CRYPTO_DEV_FSL_CAAM_DEBUG
+	bool "Enable debug output in CAAM driver"
+	help
+	  Selecting this will enable printing of various debug
+	  information in the CAAM driver.
+
 config CRYPTO_DEV_FSL_CAAM_JR
 	tristate "Freescale CAAM Job Ring driver backend"
-	depends on CRYPTO_DEV_FSL_CAAM
 	default y
 	help
 	  Enables the driver module for Job Rings which are part of
@@ -25,9 +37,10 @@ config CRYPTO_DEV_FSL_CAAM_JR
 	  To compile this driver as a module, choose M here: the module
 	  will be called caam_jr.
 
+if CRYPTO_DEV_FSL_CAAM_JR
+
 config CRYPTO_DEV_FSL_CAAM_RINGSIZE
 	int "Job Ring size"
-	depends on CRYPTO_DEV_FSL_CAAM_JR
 	range 2 9
 	default "9"
 	help
@@ -45,7 +58,6 @@ config CRYPTO_DEV_FSL_CAAM_RINGSIZE
 
 config CRYPTO_DEV_FSL_CAAM_INTC
 	bool "Job Ring interrupt coalescing"
-	depends on CRYPTO_DEV_FSL_CAAM_JR
 	help
 	  Enable the Job Ring's interrupt coalescing feature.
 
@@ -75,7 +87,6 @@ config CRYPTO_DEV_FSL_CAAM_INTC_TIME_THLD
 
 config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
 	tristate "Register algorithm implementations with the Crypto API"
-	depends on CRYPTO_DEV_FSL_CAAM_JR
 	default y
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
@@ -90,7 +101,7 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
 
 config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI
 	tristate "Queue Interface as Crypto API backend"
-	depends on CRYPTO_DEV_FSL_CAAM_JR && FSL_DPAA && NET
+	depends on FSL_DPAA && NET
 	default y
 	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
@@ -107,7 +118,6 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI
 
 config CRYPTO_DEV_FSL_CAAM_AHASH_API
 	tristate "Register hash algorithm implementations with Crypto API"
-	depends on CRYPTO_DEV_FSL_CAAM_JR
 	default y
 	select CRYPTO_HASH
 	help
@@ -119,7 +129,6 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API
 
 config CRYPTO_DEV_FSL_CAAM_PKC_API
         tristate "Register public key cryptography implementations with Crypto API"
-        depends on CRYPTO_DEV_FSL_CAAM_JR
         default y
         select CRYPTO_RSA
         help
@@ -131,7 +140,6 @@ config CRYPTO_DEV_FSL_CAAM_PKC_API
 
 config CRYPTO_DEV_FSL_CAAM_RNG_API
 	tristate "Register caam device for hwrng API"
-	depends on CRYPTO_DEV_FSL_CAAM_JR
 	default y
 	select CRYPTO_RNG
 	select HW_RANDOM
@@ -142,13 +150,32 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API
 	  To compile this as a module, choose M here: the module
 	  will be called caamrng.
 
-config CRYPTO_DEV_FSL_CAAM_DEBUG
-	bool "Enable debug output in CAAM driver"
-	depends on CRYPTO_DEV_FSL_CAAM
+endif # CRYPTO_DEV_FSL_CAAM_JR
+
+endif # CRYPTO_DEV_FSL_CAAM
+
+config CRYPTO_DEV_FSL_DPAA2_CAAM
+	tristate "QorIQ DPAA2 CAAM (DPSECI) driver"
+	depends on FSL_MC_DPIO
+	depends on NETDEVICES
+	select CRYPTO_DEV_FSL_CAAM_COMMON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_AUTHENC
+	select CRYPTO_AEAD
+	select CRYPTO_HASH
 	help
-	  Selecting this will enable printing of various debug
-	  information in the CAAM driver.
+	  CAAM driver for QorIQ Data Path Acceleration Architecture 2.
+	  It handles DPSECI DPAA2 objects that sit on the Management Complex
+	  (MC) fsl-mc bus.
+
+	  To compile this as a module, choose M here: the module
+	  will be called dpaa2_caam.
 
 config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
 	def_tristate (CRYPTO_DEV_FSL_CAAM_CRYPTO_API || \
-		      CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI)
+		      CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI || \
+		      CRYPTO_DEV_FSL_DPAA2_CAAM)
+
+config CRYPTO_DEV_FSL_CAAM_AHASH_API_DESC
+	def_tristate (CRYPTO_DEV_FSL_CAAM_AHASH_API || \
+		      CRYPTO_DEV_FSL_DPAA2_CAAM)
diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile
index cb652ee7dfc8..7bbfd06a11ff 100644
--- a/drivers/crypto/caam/Makefile
+++ b/drivers/crypto/caam/Makefile
@@ -6,19 +6,27 @@ ifeq ($(CONFIG_CRYPTO_DEV_FSL_CAAM_DEBUG), y)
 	ccflags-y := -DDEBUG
 endif
 
+ccflags-y += -DVERSION=\"\"
+
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_COMMON) += error.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_JR) += caam_jr.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) += caamalg_qi.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC) += caamalg_desc.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API_DESC) += caamhash_desc.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caam_pkc.o
 
 caam-objs := ctrl.o
-caam_jr-objs := jr.o key_gen.o error.o
+caam_jr-objs := jr.o key_gen.o
 caam_pkc-y := caampkc.o pkc_desc.o
 ifneq ($(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI),)
 	ccflags-y += -DCONFIG_CAAM_QI
 	caam-objs += qi.o
 endif
+
+obj-$(CONFIG_CRYPTO_DEV_FSL_DPAA2_CAAM) += dpaa2_caam.o
+
+dpaa2_caam-y    := caamalg_qi2.o dpseci.o
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index ec40f991e6c6..869f092432de 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -1,8 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * caam - Freescale FSL CAAM support for crypto API
  *
  * Copyright 2008-2011 Freescale Semiconductor, Inc.
- * Copyright 2016 NXP
+ * Copyright 2016-2018 NXP
  *
  * Based on talitos crypto API driver.
  *
@@ -81,8 +82,6 @@
 #define debug(format, arg...)
 #endif
 
-static struct list_head alg_list;
-
 struct caam_alg_entry {
 	int class1_alg_type;
 	int class2_alg_type;
@@ -96,17 +95,21 @@ struct caam_aead_alg {
 	bool registered;
 };
 
+struct caam_skcipher_alg {
+	struct skcipher_alg skcipher;
+	struct caam_alg_entry caam;
+	bool registered;
+};
+
 /*
  * per-session context
  */
 struct caam_ctx {
 	u32 sh_desc_enc[DESC_MAX_USED_LEN];
 	u32 sh_desc_dec[DESC_MAX_USED_LEN];
-	u32 sh_desc_givenc[DESC_MAX_USED_LEN];
 	u8 key[CAAM_MAX_KEY_SIZE];
 	dma_addr_t sh_desc_enc_dma;
 	dma_addr_t sh_desc_dec_dma;
-	dma_addr_t sh_desc_givenc_dma;
 	dma_addr_t key_dma;
 	enum dma_data_direction dir;
 	struct device *jrdev;
@@ -648,20 +651,20 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 	return rfc4543_set_sh_desc(aead);
 }
 
-static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
-			     const u8 *key, unsigned int keylen)
+static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
+			   unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(ablkcipher);
-	const char *alg_name = crypto_tfm_alg_name(tfm);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_skcipher_alg *alg =
+		container_of(crypto_skcipher_alg(skcipher), typeof(*alg),
+			     skcipher);
 	struct device *jrdev = ctx->jrdev;
-	unsigned int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
 	u32 *desc;
 	u32 ctx1_iv_off = 0;
 	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
 			       OP_ALG_AAI_CTR_MOD128);
-	const bool is_rfc3686 = (ctr_mode &&
-				 (strstr(alg_name, "rfc3686") != NULL));
+	const bool is_rfc3686 = alg->caam.rfc3686;
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
@@ -689,40 +692,32 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	ctx->cdata.key_virt = key;
 	ctx->cdata.key_inline = true;
 
-	/* ablkcipher_encrypt shared descriptor */
+	/* skcipher_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_ablkcipher_encap(desc, &ctx->cdata, ivsize, is_rfc3686,
-				     ctx1_iv_off);
+	cnstr_shdsc_skcipher_encap(desc, &ctx->cdata, ivsize, is_rfc3686,
+				   ctx1_iv_off);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), ctx->dir);
 
-	/* ablkcipher_decrypt shared descriptor */
+	/* skcipher_decrypt shared descriptor */
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_ablkcipher_decap(desc, &ctx->cdata, ivsize, is_rfc3686,
-				     ctx1_iv_off);
+	cnstr_shdsc_skcipher_decap(desc, &ctx->cdata, ivsize, is_rfc3686,
+				   ctx1_iv_off);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), ctx->dir);
 
-	/* ablkcipher_givencrypt shared descriptor */
-	desc = ctx->sh_desc_givenc;
-	cnstr_shdsc_ablkcipher_givencap(desc, &ctx->cdata, ivsize, is_rfc3686,
-					ctx1_iv_off);
-	dma_sync_single_for_device(jrdev, ctx->sh_desc_givenc_dma,
-				   desc_bytes(desc), ctx->dir);
-
 	return 0;
 }
 
-static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
-				 const u8 *key, unsigned int keylen)
+static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
+			       unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	struct device *jrdev = ctx->jrdev;
 	u32 *desc;
 
 	if (keylen != 2 * AES_MIN_KEY_SIZE  && keylen != 2 * AES_MAX_KEY_SIZE) {
-		crypto_ablkcipher_set_flags(ablkcipher,
-					    CRYPTO_TFM_RES_BAD_KEY_LEN);
+		crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		dev_err(jrdev, "key size mismatch\n");
 		return -EINVAL;
 	}
@@ -731,15 +726,15 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	ctx->cdata.key_virt = key;
 	ctx->cdata.key_inline = true;
 
-	/* xts_ablkcipher_encrypt shared descriptor */
+	/* xts_skcipher_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_xts_ablkcipher_encap(desc, &ctx->cdata);
+	cnstr_shdsc_xts_skcipher_encap(desc, &ctx->cdata);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), ctx->dir);
 
-	/* xts_ablkcipher_decrypt shared descriptor */
+	/* xts_skcipher_decrypt shared descriptor */
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_xts_ablkcipher_decap(desc, &ctx->cdata);
+	cnstr_shdsc_xts_skcipher_decap(desc, &ctx->cdata);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -765,22 +760,20 @@ struct aead_edesc {
 };
 
 /*
- * ablkcipher_edesc - s/w-extended ablkcipher descriptor
+ * skcipher_edesc - s/w-extended skcipher descriptor
  * @src_nents: number of segments in input s/w scatterlist
  * @dst_nents: number of segments in output s/w scatterlist
  * @iv_dma: dma address of iv for checking continuity and link table
- * @iv_dir: DMA mapping direction for IV
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
  * @sec4_sg_dma: bus physical mapped address of h/w link table
  * @sec4_sg: pointer to h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
  *	     and IV
  */
-struct ablkcipher_edesc {
+struct skcipher_edesc {
 	int src_nents;
 	int dst_nents;
 	dma_addr_t iv_dma;
-	enum dma_data_direction iv_dir;
 	int sec4_sg_bytes;
 	dma_addr_t sec4_sg_dma;
 	struct sec4_sg_entry *sec4_sg;
@@ -790,8 +783,7 @@ struct ablkcipher_edesc {
 static void caam_unmap(struct device *dev, struct scatterlist *src,
 		       struct scatterlist *dst, int src_nents,
 		       int dst_nents,
-		       dma_addr_t iv_dma, int ivsize,
-		       enum dma_data_direction iv_dir, dma_addr_t sec4_sg_dma,
+		       dma_addr_t iv_dma, int ivsize, dma_addr_t sec4_sg_dma,
 		       int sec4_sg_bytes)
 {
 	if (dst != src) {
@@ -803,7 +795,7 @@ static void caam_unmap(struct device *dev, struct scatterlist *src,
 	}
 
 	if (iv_dma)
-		dma_unmap_single(dev, iv_dma, ivsize, iv_dir);
+		dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE);
 	if (sec4_sg_bytes)
 		dma_unmap_single(dev, sec4_sg_dma, sec4_sg_bytes,
 				 DMA_TO_DEVICE);
@@ -814,20 +806,19 @@ static void aead_unmap(struct device *dev,
 		       struct aead_request *req)
 {
 	caam_unmap(dev, req->src, req->dst,
-		   edesc->src_nents, edesc->dst_nents, 0, 0, DMA_NONE,
+		   edesc->src_nents, edesc->dst_nents, 0, 0,
 		   edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
 }
 
-static void ablkcipher_unmap(struct device *dev,
-			     struct ablkcipher_edesc *edesc,
-			     struct ablkcipher_request *req)
+static void skcipher_unmap(struct device *dev, struct skcipher_edesc *edesc,
+			   struct skcipher_request *req)
 {
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	int ivsize = crypto_skcipher_ivsize(skcipher);
 
 	caam_unmap(dev, req->src, req->dst,
 		   edesc->src_nents, edesc->dst_nents,
-		   edesc->iv_dma, ivsize, edesc->iv_dir,
+		   edesc->iv_dma, ivsize,
 		   edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
 }
 
@@ -881,87 +872,74 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	aead_request_complete(req, err);
 }
 
-static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
-				   void *context)
+static void skcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
+				  void *context)
 {
-	struct ablkcipher_request *req = context;
-	struct ablkcipher_edesc *edesc;
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	struct skcipher_request *req = context;
+	struct skcipher_edesc *edesc;
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	int ivsize = crypto_skcipher_ivsize(skcipher);
 
 #ifdef DEBUG
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-	edesc = container_of(desc, struct ablkcipher_edesc, hw_desc[0]);
+	edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]);
 
 	if (err)
 		caam_jr_strstatus(jrdev, err);
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
+		       DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
 #endif
 	caam_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
 		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
+		     edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
 
-	ablkcipher_unmap(jrdev, edesc, req);
+	skcipher_unmap(jrdev, edesc, req);
 
 	/*
-	 * The crypto API expects us to set the IV (req->info) to the last
+	 * The crypto API expects us to set the IV (req->iv) to the last
 	 * ciphertext block. This is used e.g. by the CTS mode.
 	 */
-	scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize,
+	scatterwalk_map_and_copy(req->iv, req->dst, req->cryptlen - ivsize,
 				 ivsize, 0);
 
-	/* In case initial IV was generated, copy it in GIVCIPHER request */
-	if (edesc->iv_dir == DMA_FROM_DEVICE) {
-		u8 *iv;
-		struct skcipher_givcrypt_request *greq;
-
-		greq = container_of(req, struct skcipher_givcrypt_request,
-				    creq);
-		iv = (u8 *)edesc->hw_desc + desc_bytes(edesc->hw_desc) +
-		     edesc->sec4_sg_bytes;
-		memcpy(greq->giv, iv, ivsize);
-	}
-
 	kfree(edesc);
 
-	ablkcipher_request_complete(req, err);
+	skcipher_request_complete(req, err);
 }
 
-static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
-				    void *context)
+static void skcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
+				  void *context)
 {
-	struct ablkcipher_request *req = context;
-	struct ablkcipher_edesc *edesc;
+	struct skcipher_request *req = context;
+	struct skcipher_edesc *edesc;
 #ifdef DEBUG
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	int ivsize = crypto_skcipher_ivsize(skcipher);
 
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-	edesc = container_of(desc, struct ablkcipher_edesc, hw_desc[0]);
+	edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]);
 	if (err)
 		caam_jr_strstatus(jrdev, err);
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
-		       ivsize, 1);
+		       DUMP_PREFIX_ADDRESS, 16, 4, req->iv, ivsize, 1);
 #endif
 	caam_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
 		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
+		     edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
 
-	ablkcipher_unmap(jrdev, edesc, req);
+	skcipher_unmap(jrdev, edesc, req);
 	kfree(edesc);
 
-	ablkcipher_request_complete(req, err);
+	skcipher_request_complete(req, err);
 }
 
 /*
@@ -1103,34 +1081,38 @@ static void init_authenc_job(struct aead_request *req,
 }
 
 /*
- * Fill in ablkcipher job descriptor
+ * Fill in skcipher job descriptor
  */
-static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
-				struct ablkcipher_edesc *edesc,
-				struct ablkcipher_request *req)
+static void init_skcipher_job(struct skcipher_request *req,
+			      struct skcipher_edesc *edesc,
+			      const bool encrypt)
 {
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	int ivsize = crypto_skcipher_ivsize(skcipher);
 	u32 *desc = edesc->hw_desc;
+	u32 *sh_desc;
 	u32 out_options = 0;
-	dma_addr_t dst_dma;
+	dma_addr_t dst_dma, ptr;
 	int len;
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
-		       ivsize, 1);
-	pr_err("asked=%d, nbytes%d\n",
-	       (int)edesc->src_nents > 1 ? 100 : req->nbytes, req->nbytes);
+		       DUMP_PREFIX_ADDRESS, 16, 4, req->iv, ivsize, 1);
+	pr_err("asked=%d, cryptlen%d\n",
+	       (int)edesc->src_nents > 1 ? 100 : req->cryptlen, req->cryptlen);
 #endif
 	caam_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__)": ",
 		     DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		     edesc->src_nents > 1 ? 100 : req->nbytes, 1);
+		     edesc->src_nents > 1 ? 100 : req->cryptlen, 1);
+
+	sh_desc = encrypt ? ctx->sh_desc_enc : ctx->sh_desc_dec;
+	ptr = encrypt ? ctx->sh_desc_enc_dma : ctx->sh_desc_dec_dma;
 
 	len = desc_len(sh_desc);
 	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
 
-	append_seq_in_ptr(desc, edesc->sec4_sg_dma, req->nbytes + ivsize,
+	append_seq_in_ptr(desc, edesc->sec4_sg_dma, req->cryptlen + ivsize,
 			  LDST_SGF);
 
 	if (likely(req->src == req->dst)) {
@@ -1145,48 +1127,7 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
 			out_options = LDST_SGF;
 		}
 	}
-	append_seq_out_ptr(desc, dst_dma, req->nbytes, out_options);
-}
-
-/*
- * Fill in ablkcipher givencrypt job descriptor
- */
-static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
-				    struct ablkcipher_edesc *edesc,
-				    struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
-	u32 *desc = edesc->hw_desc;
-	u32 in_options;
-	dma_addr_t dst_dma, src_dma;
-	int len, sec4_sg_index = 0;
-
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
-		       ivsize, 1);
-#endif
-	caam_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
-		     DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		     edesc->src_nents > 1 ? 100 : req->nbytes, 1);
-
-	len = desc_len(sh_desc);
-	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
-
-	if (edesc->src_nents == 1) {
-		src_dma = sg_dma_address(req->src);
-		in_options = 0;
-	} else {
-		src_dma = edesc->sec4_sg_dma;
-		sec4_sg_index += edesc->src_nents;
-		in_options = LDST_SGF;
-	}
-	append_seq_in_ptr(desc, src_dma, req->nbytes, in_options);
-
-	dst_dma = edesc->sec4_sg_dma + sec4_sg_index *
-		  sizeof(struct sec4_sg_entry);
-	append_seq_out_ptr(desc, dst_dma, req->nbytes + ivsize, LDST_SGF);
+	append_seq_out_ptr(desc, dst_dma, req->cryptlen, out_options);
 }
 
 /*
@@ -1275,7 +1216,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 			GFP_DMA | flags);
 	if (!edesc) {
 		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, DMA_NONE, 0, 0);
+			   0, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -1476,35 +1417,35 @@ static int aead_decrypt(struct aead_request *req)
 }
 
 /*
- * allocate and map the ablkcipher extended descriptor for ablkcipher
+ * allocate and map the skcipher extended descriptor for skcipher
  */
-static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
-						       *req, int desc_bytes)
+static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
+						   int desc_bytes)
 {
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
-	struct ablkcipher_edesc *edesc;
+	struct skcipher_edesc *edesc;
 	dma_addr_t iv_dma;
 	u8 *iv;
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	int ivsize = crypto_skcipher_ivsize(skcipher);
 	int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes;
 
-	src_nents = sg_nents_for_len(req->src, req->nbytes);
+	src_nents = sg_nents_for_len(req->src, req->cryptlen);
 	if (unlikely(src_nents < 0)) {
 		dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n",
-			req->nbytes);
+			req->cryptlen);
 		return ERR_PTR(src_nents);
 	}
 
 	if (req->dst != req->src) {
-		dst_nents = sg_nents_for_len(req->dst, req->nbytes);
+		dst_nents = sg_nents_for_len(req->dst, req->cryptlen);
 		if (unlikely(dst_nents < 0)) {
 			dev_err(jrdev, "Insufficient bytes (%d) in dst S/G\n",
-				req->nbytes);
+				req->cryptlen);
 			return ERR_PTR(dst_nents);
 		}
 	}
@@ -1546,7 +1487,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	if (!edesc) {
 		dev_err(jrdev, "could not allocate extended descriptor\n");
 		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, DMA_NONE, 0, 0);
+			   0, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -1555,17 +1496,16 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
 	edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc +
 						  desc_bytes);
-	edesc->iv_dir = DMA_TO_DEVICE;
 
 	/* Make sure IV is located in a DMAable area */
 	iv = (u8 *)edesc->hw_desc + desc_bytes + sec4_sg_bytes;
-	memcpy(iv, req->info, ivsize);
+	memcpy(iv, req->iv, ivsize);
 
 	iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, iv_dma)) {
 		dev_err(jrdev, "unable to map IV\n");
 		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, DMA_NONE, 0, 0);
+			   0, 0, 0);
 		kfree(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1583,7 +1523,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
 		dev_err(jrdev, "unable to map S/G table\n");
 		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, DMA_TO_DEVICE, 0, 0);
+			   iv_dma, ivsize, 0, 0);
 		kfree(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1591,7 +1531,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	edesc->iv_dma = iv_dma;
 
 #ifdef DEBUG
-	print_hex_dump(KERN_ERR, "ablkcipher sec4_sg@"__stringify(__LINE__)": ",
+	print_hex_dump(KERN_ERR, "skcipher sec4_sg@" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->sec4_sg,
 		       sec4_sg_bytes, 1);
 #endif
@@ -1599,362 +1539,187 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	return edesc;
 }
 
-static int ablkcipher_encrypt(struct ablkcipher_request *req)
+static int skcipher_encrypt(struct skcipher_request *req)
 {
-	struct ablkcipher_edesc *edesc;
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
+	struct skcipher_edesc *edesc;
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	struct device *jrdev = ctx->jrdev;
 	u32 *desc;
 	int ret = 0;
 
 	/* allocate extended descriptor */
-	edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
+	edesc = skcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
 	/* Create and submit job descriptor*/
-	init_ablkcipher_job(ctx->sh_desc_enc, ctx->sh_desc_enc_dma, edesc, req);
+	init_skcipher_job(req, edesc, true);
 #ifdef DEBUG
-	print_hex_dump(KERN_ERR, "ablkcipher jobdesc@"__stringify(__LINE__)": ",
+	print_hex_dump(KERN_ERR, "skcipher jobdesc@" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
 		       desc_bytes(edesc->hw_desc), 1);
 #endif
 	desc = edesc->hw_desc;
-	ret = caam_jr_enqueue(jrdev, desc, ablkcipher_encrypt_done, req);
+	ret = caam_jr_enqueue(jrdev, desc, skcipher_encrypt_done, req);
 
 	if (!ret) {
 		ret = -EINPROGRESS;
 	} else {
-		ablkcipher_unmap(jrdev, edesc, req);
+		skcipher_unmap(jrdev, edesc, req);
 		kfree(edesc);
 	}
 
 	return ret;
 }
 
-static int ablkcipher_decrypt(struct ablkcipher_request *req)
+static int skcipher_decrypt(struct skcipher_request *req)
 {
-	struct ablkcipher_edesc *edesc;
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	struct skcipher_edesc *edesc;
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	int ivsize = crypto_skcipher_ivsize(skcipher);
 	struct device *jrdev = ctx->jrdev;
 	u32 *desc;
 	int ret = 0;
 
 	/* allocate extended descriptor */
-	edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
+	edesc = skcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
 	/*
-	 * The crypto API expects us to set the IV (req->info) to the last
+	 * The crypto API expects us to set the IV (req->iv) to the last
 	 * ciphertext block.
 	 */
-	scatterwalk_map_and_copy(req->info, req->src, req->nbytes - ivsize,
+	scatterwalk_map_and_copy(req->iv, req->src, req->cryptlen - ivsize,
 				 ivsize, 0);
 
 	/* Create and submit job descriptor*/
-	init_ablkcipher_job(ctx->sh_desc_dec, ctx->sh_desc_dec_dma, edesc, req);
+	init_skcipher_job(req, edesc, false);
 	desc = edesc->hw_desc;
 #ifdef DEBUG
-	print_hex_dump(KERN_ERR, "ablkcipher jobdesc@"__stringify(__LINE__)": ",
+	print_hex_dump(KERN_ERR, "skcipher jobdesc@" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
 		       desc_bytes(edesc->hw_desc), 1);
 #endif
 
-	ret = caam_jr_enqueue(jrdev, desc, ablkcipher_decrypt_done, req);
+	ret = caam_jr_enqueue(jrdev, desc, skcipher_decrypt_done, req);
 	if (!ret) {
 		ret = -EINPROGRESS;
 	} else {
-		ablkcipher_unmap(jrdev, edesc, req);
+		skcipher_unmap(jrdev, edesc, req);
 		kfree(edesc);
 	}
 
 	return ret;
 }
 
-/*
- * allocate and map the ablkcipher extended descriptor
- * for ablkcipher givencrypt
- */
-static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
-				struct skcipher_givcrypt_request *greq,
-				int desc_bytes)
-{
-	struct ablkcipher_request *req = &greq->creq;
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
-	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags &  CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		       GFP_KERNEL : GFP_ATOMIC;
-	int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
-	struct ablkcipher_edesc *edesc;
-	dma_addr_t iv_dma;
-	u8 *iv;
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
-	int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes;
-
-	src_nents = sg_nents_for_len(req->src, req->nbytes);
-	if (unlikely(src_nents < 0)) {
-		dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n",
-			req->nbytes);
-		return ERR_PTR(src_nents);
-	}
-
-	if (likely(req->src == req->dst)) {
-		mapped_src_nents = dma_map_sg(jrdev, req->src, src_nents,
-					      DMA_BIDIRECTIONAL);
-		if (unlikely(!mapped_src_nents)) {
-			dev_err(jrdev, "unable to map source\n");
-			return ERR_PTR(-ENOMEM);
-		}
-
-		dst_nents = src_nents;
-		mapped_dst_nents = src_nents;
-	} else {
-		mapped_src_nents = dma_map_sg(jrdev, req->src, src_nents,
-					      DMA_TO_DEVICE);
-		if (unlikely(!mapped_src_nents)) {
-			dev_err(jrdev, "unable to map source\n");
-			return ERR_PTR(-ENOMEM);
-		}
-
-		dst_nents = sg_nents_for_len(req->dst, req->nbytes);
-		if (unlikely(dst_nents < 0)) {
-			dev_err(jrdev, "Insufficient bytes (%d) in dst S/G\n",
-				req->nbytes);
-			return ERR_PTR(dst_nents);
-		}
-
-		mapped_dst_nents = dma_map_sg(jrdev, req->dst, dst_nents,
-					      DMA_FROM_DEVICE);
-		if (unlikely(!mapped_dst_nents)) {
-			dev_err(jrdev, "unable to map destination\n");
-			dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
-			return ERR_PTR(-ENOMEM);
-		}
-	}
-
-	sec4_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0;
-	dst_sg_idx = sec4_sg_ents;
-	sec4_sg_ents += 1 + mapped_dst_nents;
-
-	/*
-	 * allocate space for base edesc and hw desc commands, link tables, IV
-	 */
-	sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry);
-	edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes + ivsize,
-			GFP_DMA | flags);
-	if (!edesc) {
-		dev_err(jrdev, "could not allocate extended descriptor\n");
-		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, DMA_NONE, 0, 0);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	edesc->src_nents = src_nents;
-	edesc->dst_nents = dst_nents;
-	edesc->sec4_sg_bytes = sec4_sg_bytes;
-	edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc +
-						  desc_bytes);
-	edesc->iv_dir = DMA_FROM_DEVICE;
-
-	/* Make sure IV is located in a DMAable area */
-	iv = (u8 *)edesc->hw_desc + desc_bytes + sec4_sg_bytes;
-	iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_FROM_DEVICE);
-	if (dma_mapping_error(jrdev, iv_dma)) {
-		dev_err(jrdev, "unable to map IV\n");
-		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, DMA_NONE, 0, 0);
-		kfree(edesc);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	if (mapped_src_nents > 1)
-		sg_to_sec4_sg_last(req->src, mapped_src_nents, edesc->sec4_sg,
-				   0);
-
-	dma_to_sec4_sg_one(edesc->sec4_sg + dst_sg_idx, iv_dma, ivsize, 0);
-	sg_to_sec4_sg_last(req->dst, mapped_dst_nents, edesc->sec4_sg +
-			   dst_sg_idx + 1, 0);
-
-	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
-					    sec4_sg_bytes, DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
-		dev_err(jrdev, "unable to map S/G table\n");
-		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, DMA_FROM_DEVICE, 0, 0);
-		kfree(edesc);
-		return ERR_PTR(-ENOMEM);
-	}
-	edesc->iv_dma = iv_dma;
-
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR,
-		       "ablkcipher sec4_sg@" __stringify(__LINE__) ": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->sec4_sg,
-		       sec4_sg_bytes, 1);
-#endif
-
-	return edesc;
-}
-
-static int ablkcipher_givencrypt(struct skcipher_givcrypt_request *creq)
-{
-	struct ablkcipher_request *req = &creq->creq;
-	struct ablkcipher_edesc *edesc;
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
-	struct device *jrdev = ctx->jrdev;
-	u32 *desc;
-	int ret = 0;
-
-	/* allocate extended descriptor */
-	edesc = ablkcipher_giv_edesc_alloc(creq, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
-	if (IS_ERR(edesc))
-		return PTR_ERR(edesc);
-
-	/* Create and submit job descriptor*/
-	init_ablkcipher_giv_job(ctx->sh_desc_givenc, ctx->sh_desc_givenc_dma,
-				edesc, req);
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR,
-		       "ablkcipher jobdesc@" __stringify(__LINE__) ": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
-		       desc_bytes(edesc->hw_desc), 1);
-#endif
-	desc = edesc->hw_desc;
-	ret = caam_jr_enqueue(jrdev, desc, ablkcipher_encrypt_done, req);
-
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
-		ablkcipher_unmap(jrdev, edesc, req);
-		kfree(edesc);
-	}
-
-	return ret;
-}
-
-#define template_aead		template_u.aead
-#define template_ablkcipher	template_u.ablkcipher
-struct caam_alg_template {
-	char name[CRYPTO_MAX_ALG_NAME];
-	char driver_name[CRYPTO_MAX_ALG_NAME];
-	unsigned int blocksize;
-	u32 type;
-	union {
-		struct ablkcipher_alg ablkcipher;
-	} template_u;
-	u32 class1_alg_type;
-	u32 class2_alg_type;
-};
-
-static struct caam_alg_template driver_algs[] = {
-	/* ablkcipher descriptor */
+static struct caam_skcipher_alg driver_algs[] = {
 	{
-		.name = "cbc(aes)",
-		.driver_name = "cbc-aes-caam",
-		.blocksize = AES_BLOCK_SIZE,
-		.type = CRYPTO_ALG_TYPE_GIVCIPHER,
-		.template_ablkcipher = {
-			.setkey = ablkcipher_setkey,
-			.encrypt = ablkcipher_encrypt,
-			.decrypt = ablkcipher_decrypt,
-			.givencrypt = ablkcipher_givencrypt,
-			.geniv = "<built-in>",
+		.skcipher = {
+			.base = {
+				.cra_name = "cbc(aes)",
+				.cra_driver_name = "cbc-aes-caam",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE,
 			.max_keysize = AES_MAX_KEY_SIZE,
 			.ivsize = AES_BLOCK_SIZE,
-			},
-		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+		},
+		.caam.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 	},
 	{
-		.name = "cbc(des3_ede)",
-		.driver_name = "cbc-3des-caam",
-		.blocksize = DES3_EDE_BLOCK_SIZE,
-		.type = CRYPTO_ALG_TYPE_GIVCIPHER,
-		.template_ablkcipher = {
-			.setkey = ablkcipher_setkey,
-			.encrypt = ablkcipher_encrypt,
-			.decrypt = ablkcipher_decrypt,
-			.givencrypt = ablkcipher_givencrypt,
-			.geniv = "<built-in>",
+		.skcipher = {
+			.base = {
+				.cra_name = "cbc(des3_ede)",
+				.cra_driver_name = "cbc-3des-caam",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
 			.min_keysize = DES3_EDE_KEY_SIZE,
 			.max_keysize = DES3_EDE_KEY_SIZE,
 			.ivsize = DES3_EDE_BLOCK_SIZE,
-			},
-		.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+		},
+		.caam.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 	},
 	{
-		.name = "cbc(des)",
-		.driver_name = "cbc-des-caam",
-		.blocksize = DES_BLOCK_SIZE,
-		.type = CRYPTO_ALG_TYPE_GIVCIPHER,
-		.template_ablkcipher = {
-			.setkey = ablkcipher_setkey,
-			.encrypt = ablkcipher_encrypt,
-			.decrypt = ablkcipher_decrypt,
-			.givencrypt = ablkcipher_givencrypt,
-			.geniv = "<built-in>",
+		.skcipher = {
+			.base = {
+				.cra_name = "cbc(des)",
+				.cra_driver_name = "cbc-des-caam",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
 			.min_keysize = DES_KEY_SIZE,
 			.max_keysize = DES_KEY_SIZE,
 			.ivsize = DES_BLOCK_SIZE,
-			},
-		.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+		},
+		.caam.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 	},
 	{
-		.name = "ctr(aes)",
-		.driver_name = "ctr-aes-caam",
-		.blocksize = 1,
-		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.template_ablkcipher = {
-			.setkey = ablkcipher_setkey,
-			.encrypt = ablkcipher_encrypt,
-			.decrypt = ablkcipher_decrypt,
-			.geniv = "chainiv",
+		.skcipher = {
+			.base = {
+				.cra_name = "ctr(aes)",
+				.cra_driver_name = "ctr-aes-caam",
+				.cra_blocksize = 1,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE,
 			.max_keysize = AES_MAX_KEY_SIZE,
 			.ivsize = AES_BLOCK_SIZE,
-			},
-		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128,
+			.chunksize = AES_BLOCK_SIZE,
+		},
+		.caam.class1_alg_type = OP_ALG_ALGSEL_AES |
+					OP_ALG_AAI_CTR_MOD128,
 	},
 	{
-		.name = "rfc3686(ctr(aes))",
-		.driver_name = "rfc3686-ctr-aes-caam",
-		.blocksize = 1,
-		.type = CRYPTO_ALG_TYPE_GIVCIPHER,
-		.template_ablkcipher = {
-			.setkey = ablkcipher_setkey,
-			.encrypt = ablkcipher_encrypt,
-			.decrypt = ablkcipher_decrypt,
-			.givencrypt = ablkcipher_givencrypt,
-			.geniv = "<built-in>",
+		.skcipher = {
+			.base = {
+				.cra_name = "rfc3686(ctr(aes))",
+				.cra_driver_name = "rfc3686-ctr-aes-caam",
+				.cra_blocksize = 1,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE +
 				       CTR_RFC3686_NONCE_SIZE,
 			.max_keysize = AES_MAX_KEY_SIZE +
 				       CTR_RFC3686_NONCE_SIZE,
 			.ivsize = CTR_RFC3686_IV_SIZE,
-			},
-		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128,
+			.chunksize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.rfc3686 = true,
+		},
 	},
 	{
-		.name = "xts(aes)",
-		.driver_name = "xts-aes-caam",
-		.blocksize = AES_BLOCK_SIZE,
-		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.template_ablkcipher = {
-			.setkey = xts_ablkcipher_setkey,
-			.encrypt = ablkcipher_encrypt,
-			.decrypt = ablkcipher_decrypt,
-			.geniv = "eseqiv",
+		.skcipher = {
+			.base = {
+				.cra_name = "xts(aes)",
+				.cra_driver_name = "xts-aes-caam",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = xts_skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
 			.min_keysize = 2 * AES_MIN_KEY_SIZE,
 			.max_keysize = 2 * AES_MAX_KEY_SIZE,
 			.ivsize = AES_BLOCK_SIZE,
-			},
-		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_XTS,
+		},
+		.caam.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_XTS,
 	},
 };
 
@@ -3239,12 +3004,6 @@ static struct caam_aead_alg driver_aeads[] = {
 	},
 };
 
-struct caam_crypto_alg {
-	struct crypto_alg crypto_alg;
-	struct list_head entry;
-	struct caam_alg_entry caam;
-};
-
 static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
 			    bool uses_dkp)
 {
@@ -3276,8 +3035,6 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
 	ctx->sh_desc_enc_dma = dma_addr;
 	ctx->sh_desc_dec_dma = dma_addr + offsetof(struct caam_ctx,
 						   sh_desc_dec);
-	ctx->sh_desc_givenc_dma = dma_addr + offsetof(struct caam_ctx,
-						      sh_desc_givenc);
 	ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key);
 
 	/* copy descriptor header template value */
@@ -3287,14 +3044,14 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
 	return 0;
 }
 
-static int caam_cra_init(struct crypto_tfm *tfm)
+static int caam_cra_init(struct crypto_skcipher *tfm)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
-	struct caam_crypto_alg *caam_alg =
-		 container_of(alg, struct caam_crypto_alg, crypto_alg);
-	struct caam_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct caam_skcipher_alg *caam_alg =
+		container_of(alg, typeof(*caam_alg), skcipher);
 
-	return caam_init_common(ctx, &caam_alg->caam, false);
+	return caam_init_common(crypto_skcipher_ctx(tfm), &caam_alg->caam,
+				false);
 }
 
 static int caam_aead_init(struct crypto_aead *tfm)
@@ -3316,9 +3073,9 @@ static void caam_exit_common(struct caam_ctx *ctx)
 	caam_jr_free(ctx->jrdev);
 }
 
-static void caam_cra_exit(struct crypto_tfm *tfm)
+static void caam_cra_exit(struct crypto_skcipher *tfm)
 {
-	caam_exit_common(crypto_tfm_ctx(tfm));
+	caam_exit_common(crypto_skcipher_ctx(tfm));
 }
 
 static void caam_aead_exit(struct crypto_aead *tfm)
@@ -3328,8 +3085,6 @@ static void caam_aead_exit(struct crypto_aead *tfm)
 
 static void __exit caam_algapi_exit(void)
 {
-
-	struct caam_crypto_alg *t_alg, *n;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(driver_aeads); i++) {
@@ -3339,57 +3094,25 @@ static void __exit caam_algapi_exit(void)
 			crypto_unregister_aead(&t_alg->aead);
 	}
 
-	if (!alg_list.next)
-		return;
+	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
+		struct caam_skcipher_alg *t_alg = driver_algs + i;
 
-	list_for_each_entry_safe(t_alg, n, &alg_list, entry) {
-		crypto_unregister_alg(&t_alg->crypto_alg);
-		list_del(&t_alg->entry);
-		kfree(t_alg);
+		if (t_alg->registered)
+			crypto_unregister_skcipher(&t_alg->skcipher);
 	}
 }
 
-static struct caam_crypto_alg *caam_alg_alloc(struct caam_alg_template
-					      *template)
+static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg)
 {
-	struct caam_crypto_alg *t_alg;
-	struct crypto_alg *alg;
+	struct skcipher_alg *alg = &t_alg->skcipher;
 
-	t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL);
-	if (!t_alg) {
-		pr_err("failed to allocate t_alg\n");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	alg = &t_alg->crypto_alg;
-
-	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name);
-	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
-		 template->driver_name);
-	alg->cra_module = THIS_MODULE;
-	alg->cra_init = caam_cra_init;
-	alg->cra_exit = caam_cra_exit;
-	alg->cra_priority = CAAM_CRA_PRIORITY;
-	alg->cra_blocksize = template->blocksize;
-	alg->cra_alignmask = 0;
-	alg->cra_ctxsize = sizeof(struct caam_ctx);
-	alg->cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY |
-			 template->type;
-	switch (template->type) {
-	case CRYPTO_ALG_TYPE_GIVCIPHER:
-		alg->cra_type = &crypto_givcipher_type;
-		alg->cra_ablkcipher = template->template_ablkcipher;
-		break;
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		alg->cra_type = &crypto_ablkcipher_type;
-		alg->cra_ablkcipher = template->template_ablkcipher;
-		break;
-	}
-
-	t_alg->caam.class1_alg_type = template->class1_alg_type;
-	t_alg->caam.class2_alg_type = template->class2_alg_type;
+	alg->base.cra_module = THIS_MODULE;
+	alg->base.cra_priority = CAAM_CRA_PRIORITY;
+	alg->base.cra_ctxsize = sizeof(struct caam_ctx);
+	alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
 
-	return t_alg;
+	alg->init = caam_cra_init;
+	alg->exit = caam_cra_exit;
 }
 
 static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
@@ -3441,8 +3164,6 @@ static int __init caam_algapi_init(void)
 		return -ENODEV;
 
 
-	INIT_LIST_HEAD(&alg_list);
-
 	/*
 	 * Register crypto algorithms the device supports.
 	 * First, detect presence and attributes of DES, AES, and MD blocks.
@@ -3458,9 +3179,8 @@ static int __init caam_algapi_init(void)
 		md_limit = SHA256_DIGEST_SIZE;
 
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
-		struct caam_crypto_alg *t_alg;
-		struct caam_alg_template *alg = driver_algs + i;
-		u32 alg_sel = alg->class1_alg_type & OP_ALG_ALGSEL_MASK;
+		struct caam_skcipher_alg *t_alg = driver_algs + i;
+		u32 alg_sel = t_alg->caam.class1_alg_type & OP_ALG_ALGSEL_MASK;
 
 		/* Skip DES algorithms if not supported by device */
 		if (!des_inst &&
@@ -3477,26 +3197,20 @@ static int __init caam_algapi_init(void)
 		 * on LP devices.
 		 */
 		if ((cha_vid & CHA_ID_LS_AES_MASK) == CHA_ID_LS_AES_LP)
-			if ((alg->class1_alg_type & OP_ALG_AAI_MASK) ==
+			if ((t_alg->caam.class1_alg_type & OP_ALG_AAI_MASK) ==
 			     OP_ALG_AAI_XTS)
 				continue;
 
-		t_alg = caam_alg_alloc(alg);
-		if (IS_ERR(t_alg)) {
-			err = PTR_ERR(t_alg);
-			pr_warn("%s alg allocation failed\n", alg->driver_name);
-			continue;
-		}
+		caam_skcipher_alg_init(t_alg);
 
-		err = crypto_register_alg(&t_alg->crypto_alg);
+		err = crypto_register_skcipher(&t_alg->skcipher);
 		if (err) {
 			pr_warn("%s alg registration failed\n",
-				t_alg->crypto_alg.cra_driver_name);
-			kfree(t_alg);
+				t_alg->skcipher.base.cra_driver_name);
 			continue;
 		}
 
-		list_add_tail(&t_alg->entry, &alg_list);
+		t_alg->registered = true;
 		registered = true;
 	}
 
diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c
index a408edd84f34..1a6f0da14106 100644
--- a/drivers/crypto/caam/caamalg_desc.c
+++ b/drivers/crypto/caam/caamalg_desc.c
@@ -1,7 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
- * Shared descriptors for aead, ablkcipher algorithms
+ * Shared descriptors for aead, skcipher algorithms
  *
- * Copyright 2016 NXP
+ * Copyright 2016-2018 NXP
  */
 
 #include "compat.h"
@@ -1212,11 +1213,8 @@ void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
 }
 EXPORT_SYMBOL(cnstr_shdsc_rfc4543_decap);
 
-/*
- * For ablkcipher encrypt and decrypt, read from req->src and
- * write to req->dst
- */
-static inline void ablkcipher_append_src_dst(u32 *desc)
+/* For skcipher encrypt and decrypt, read from req->src and write to req->dst */
+static inline void skcipher_append_src_dst(u32 *desc)
 {
 	append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
 	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
@@ -1226,7 +1224,7 @@ static inline void ablkcipher_append_src_dst(u32 *desc)
 }
 
 /**
- * cnstr_shdsc_ablkcipher_encap - ablkcipher encapsulation shared descriptor
+ * cnstr_shdsc_skcipher_encap - skcipher encapsulation shared descriptor
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
@@ -1235,9 +1233,9 @@ static inline void ablkcipher_append_src_dst(u32 *desc)
  * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
  * @ctx1_iv_off: IV offset in CONTEXT1 register
  */
-void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
-				  unsigned int ivsize, const bool is_rfc3686,
-				  const u32 ctx1_iv_off)
+void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata,
+				unsigned int ivsize, const bool is_rfc3686,
+				const u32 ctx1_iv_off)
 {
 	u32 *key_jump_cmd;
 
@@ -1280,18 +1278,18 @@ void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
 			 OP_ALG_ENCRYPT);
 
 	/* Perform operation */
-	ablkcipher_append_src_dst(desc);
+	skcipher_append_src_dst(desc);
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR,
-		       "ablkcipher enc shdesc@" __stringify(__LINE__)": ",
+		       "skcipher enc shdesc@" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
 #endif
 }
-EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_encap);
+EXPORT_SYMBOL(cnstr_shdsc_skcipher_encap);
 
 /**
- * cnstr_shdsc_ablkcipher_decap - ablkcipher decapsulation shared descriptor
+ * cnstr_shdsc_skcipher_decap - skcipher decapsulation shared descriptor
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
@@ -1300,9 +1298,9 @@ EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_encap);
  * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
  * @ctx1_iv_off: IV offset in CONTEXT1 register
  */
-void cnstr_shdsc_ablkcipher_decap(u32 * const desc, struct alginfo *cdata,
-				  unsigned int ivsize, const bool is_rfc3686,
-				  const u32 ctx1_iv_off)
+void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata,
+				unsigned int ivsize, const bool is_rfc3686,
+				const u32 ctx1_iv_off)
 {
 	u32 *key_jump_cmd;
 
@@ -1348,105 +1346,23 @@ void cnstr_shdsc_ablkcipher_decap(u32 * const desc, struct alginfo *cdata,
 		append_dec_op1(desc, cdata->algtype);
 
 	/* Perform operation */
-	ablkcipher_append_src_dst(desc);
-
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR,
-		       "ablkcipher dec shdesc@" __stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
-}
-EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_decap);
-
-/**
- * cnstr_shdsc_ablkcipher_givencap - ablkcipher encapsulation shared descriptor
- *                                   with HW-generated initialization vector.
- * @desc: pointer to buffer used for descriptor construction
- * @cdata: pointer to block cipher transform definitions
- *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
- *         with OP_ALG_AAI_CBC.
- * @ivsize: initialization vector size
- * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
- * @ctx1_iv_off: IV offset in CONTEXT1 register
- */
-void cnstr_shdsc_ablkcipher_givencap(u32 * const desc, struct alginfo *cdata,
-				     unsigned int ivsize, const bool is_rfc3686,
-				     const u32 ctx1_iv_off)
-{
-	u32 *key_jump_cmd, geniv;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-	/* Skip if already shared */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-
-	/* Load class1 key only */
-	append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
-			  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
-
-	/* Load Nonce into CONTEXT1 reg */
-	if (is_rfc3686) {
-		const u8 *nonce = cdata->key_virt + cdata->keylen;
-
-		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
-				   LDST_CLASS_IND_CCB |
-				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
-		append_move(desc, MOVE_WAITCOMP | MOVE_SRC_OUTFIFO |
-			    MOVE_DEST_CLASS1CTX | (16 << MOVE_OFFSET_SHIFT) |
-			    (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
-	}
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* Generate IV */
-	geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
-		NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | NFIFOENTRY_PTYPE_RND |
-		(ivsize << NFIFOENTRY_DLEN_SHIFT);
-	append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
-			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
-	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-	append_move(desc, MOVE_WAITCOMP | MOVE_SRC_INFIFO |
-		    MOVE_DEST_CLASS1CTX | (ivsize << MOVE_LEN_SHIFT) |
-		    (ctx1_iv_off << MOVE_OFFSET_SHIFT));
-	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
-
-	/* Copy generated IV to memory */
-	append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
-			 LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT));
-
-	/* Load Counter into CONTEXT1 reg */
-	if (is_rfc3686)
-		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-				     LDST_SRCDST_BYTE_CONTEXT |
-				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				      LDST_OFFSET_SHIFT));
-
-	if (ctx1_iv_off)
-		append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NCP |
-			    (1 << JUMP_OFFSET_SHIFT));
-
-	/* Load operation */
-	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
-			 OP_ALG_ENCRYPT);
-
-	/* Perform operation */
-	ablkcipher_append_src_dst(desc);
+	skcipher_append_src_dst(desc);
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR,
-		       "ablkcipher givenc shdesc@" __stringify(__LINE__) ": ",
+		       "skcipher dec shdesc@" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
 #endif
 }
-EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_givencap);
+EXPORT_SYMBOL(cnstr_shdsc_skcipher_decap);
 
 /**
- * cnstr_shdsc_xts_ablkcipher_encap - xts ablkcipher encapsulation shared
- *                                    descriptor
+ * cnstr_shdsc_xts_skcipher_encap - xts skcipher encapsulation shared descriptor
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_XTS.
  */
-void cnstr_shdsc_xts_ablkcipher_encap(u32 * const desc, struct alginfo *cdata)
+void cnstr_shdsc_xts_skcipher_encap(u32 * const desc, struct alginfo *cdata)
 {
 	__be64 sector_size = cpu_to_be64(512);
 	u32 *key_jump_cmd;
@@ -1481,24 +1397,23 @@ void cnstr_shdsc_xts_ablkcipher_encap(u32 * const desc, struct alginfo *cdata)
 			 OP_ALG_ENCRYPT);
 
 	/* Perform operation */
-	ablkcipher_append_src_dst(desc);
+	skcipher_append_src_dst(desc);
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR,
-		       "xts ablkcipher enc shdesc@" __stringify(__LINE__) ": ",
+		       "xts skcipher enc shdesc@" __stringify(__LINE__) ": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
 #endif
 }
-EXPORT_SYMBOL(cnstr_shdsc_xts_ablkcipher_encap);
+EXPORT_SYMBOL(cnstr_shdsc_xts_skcipher_encap);
 
 /**
- * cnstr_shdsc_xts_ablkcipher_decap - xts ablkcipher decapsulation shared
- *                                    descriptor
+ * cnstr_shdsc_xts_skcipher_decap - xts skcipher decapsulation shared descriptor
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_XTS.
  */
-void cnstr_shdsc_xts_ablkcipher_decap(u32 * const desc, struct alginfo *cdata)
+void cnstr_shdsc_xts_skcipher_decap(u32 * const desc, struct alginfo *cdata)
 {
 	__be64 sector_size = cpu_to_be64(512);
 	u32 *key_jump_cmd;
@@ -1532,15 +1447,15 @@ void cnstr_shdsc_xts_ablkcipher_decap(u32 * const desc, struct alginfo *cdata)
 	append_dec_op1(desc, cdata->algtype);
 
 	/* Perform operation */
-	ablkcipher_append_src_dst(desc);
+	skcipher_append_src_dst(desc);
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR,
-		       "xts ablkcipher dec shdesc@" __stringify(__LINE__) ": ",
+		       "xts skcipher dec shdesc@" __stringify(__LINE__) ": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
 #endif
 }
-EXPORT_SYMBOL(cnstr_shdsc_xts_ablkcipher_decap);
+EXPORT_SYMBOL(cnstr_shdsc_xts_skcipher_decap);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("FSL CAAM descriptor support");
diff --git a/drivers/crypto/caam/caamalg_desc.h b/drivers/crypto/caam/caamalg_desc.h
index a917af5776ce..1315c8f6f951 100644
--- a/drivers/crypto/caam/caamalg_desc.h
+++ b/drivers/crypto/caam/caamalg_desc.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Shared descriptors for aead, ablkcipher algorithms
+ * Shared descriptors for aead, skcipher algorithms
  *
  * Copyright 2016 NXP
  */
@@ -42,10 +42,10 @@
 #define DESC_QI_RFC4543_ENC_LEN		(DESC_RFC4543_ENC_LEN + 4 * CAAM_CMD_SZ)
 #define DESC_QI_RFC4543_DEC_LEN		(DESC_RFC4543_DEC_LEN + 4 * CAAM_CMD_SZ)
 
-#define DESC_ABLKCIPHER_BASE		(3 * CAAM_CMD_SZ)
-#define DESC_ABLKCIPHER_ENC_LEN		(DESC_ABLKCIPHER_BASE + \
+#define DESC_SKCIPHER_BASE		(3 * CAAM_CMD_SZ)
+#define DESC_SKCIPHER_ENC_LEN		(DESC_SKCIPHER_BASE + \
 					 20 * CAAM_CMD_SZ)
-#define DESC_ABLKCIPHER_DEC_LEN		(DESC_ABLKCIPHER_BASE + \
+#define DESC_SKCIPHER_DEC_LEN		(DESC_SKCIPHER_BASE + \
 					 15 * CAAM_CMD_SZ)
 
 void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
@@ -96,20 +96,16 @@ void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
 			       unsigned int ivsize, unsigned int icvsize,
 			       const bool is_qi);
 
-void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
-				  unsigned int ivsize, const bool is_rfc3686,
-				  const u32 ctx1_iv_off);
+void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata,
+				unsigned int ivsize, const bool is_rfc3686,
+				const u32 ctx1_iv_off);
 
-void cnstr_shdsc_ablkcipher_decap(u32 * const desc, struct alginfo *cdata,
-				  unsigned int ivsize, const bool is_rfc3686,
-				  const u32 ctx1_iv_off);
+void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata,
+				unsigned int ivsize, const bool is_rfc3686,
+				const u32 ctx1_iv_off);
 
-void cnstr_shdsc_ablkcipher_givencap(u32 * const desc, struct alginfo *cdata,
-				     unsigned int ivsize, const bool is_rfc3686,
-				     const u32 ctx1_iv_off);
+void cnstr_shdsc_xts_skcipher_encap(u32 * const desc, struct alginfo *cdata);
 
-void cnstr_shdsc_xts_ablkcipher_encap(u32 * const desc, struct alginfo *cdata);
-
-void cnstr_shdsc_xts_ablkcipher_decap(u32 * const desc, struct alginfo *cdata);
+void cnstr_shdsc_xts_skcipher_decap(u32 * const desc, struct alginfo *cdata);
 
 #endif /* _CAAMALG_DESC_H_ */
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index d7aa7d7ff102..23c9fc4975f8 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -1,9 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Freescale FSL CAAM support for crypto API over QI backend.
  * Based on caamalg.c
  *
  * Copyright 2013-2016 Freescale Semiconductor, Inc.
- * Copyright 2016-2017 NXP
+ * Copyright 2016-2018 NXP
  */
 
 #include "compat.h"
@@ -43,6 +44,12 @@ struct caam_aead_alg {
 	bool registered;
 };
 
+struct caam_skcipher_alg {
+	struct skcipher_alg skcipher;
+	struct caam_alg_entry caam;
+	bool registered;
+};
+
 /*
  * per-session context
  */
@@ -50,7 +57,6 @@ struct caam_ctx {
 	struct device *jrdev;
 	u32 sh_desc_enc[DESC_MAX_USED_LEN];
 	u32 sh_desc_dec[DESC_MAX_USED_LEN];
-	u32 sh_desc_givenc[DESC_MAX_USED_LEN];
 	u8 key[CAAM_MAX_KEY_SIZE];
 	dma_addr_t key_dma;
 	enum dma_data_direction dir;
@@ -589,18 +595,19 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 	return 0;
 }
 
-static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
-			     const u8 *key, unsigned int keylen)
+static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
+			   unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(ablkcipher);
-	const char *alg_name = crypto_tfm_alg_name(tfm);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_skcipher_alg *alg =
+		container_of(crypto_skcipher_alg(skcipher), typeof(*alg),
+			     skcipher);
 	struct device *jrdev = ctx->jrdev;
-	unsigned int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
 	u32 ctx1_iv_off = 0;
 	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
 			       OP_ALG_AAI_CTR_MOD128);
-	const bool is_rfc3686 = (ctr_mode && strstr(alg_name, "rfc3686"));
+	const bool is_rfc3686 = alg->caam.rfc3686;
 	int ret = 0;
 
 #ifdef DEBUG
@@ -629,13 +636,11 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	ctx->cdata.key_virt = key;
 	ctx->cdata.key_inline = true;
 
-	/* ablkcipher encrypt, decrypt, givencrypt shared descriptors */
-	cnstr_shdsc_ablkcipher_encap(ctx->sh_desc_enc, &ctx->cdata, ivsize,
-				     is_rfc3686, ctx1_iv_off);
-	cnstr_shdsc_ablkcipher_decap(ctx->sh_desc_dec, &ctx->cdata, ivsize,
-				     is_rfc3686, ctx1_iv_off);
-	cnstr_shdsc_ablkcipher_givencap(ctx->sh_desc_givenc, &ctx->cdata,
-					ivsize, is_rfc3686, ctx1_iv_off);
+	/* skcipher encrypt, decrypt shared descriptors */
+	cnstr_shdsc_skcipher_encap(ctx->sh_desc_enc, &ctx->cdata, ivsize,
+				   is_rfc3686, ctx1_iv_off);
+	cnstr_shdsc_skcipher_decap(ctx->sh_desc_dec, &ctx->cdata, ivsize,
+				   is_rfc3686, ctx1_iv_off);
 
 	/* Now update the driver contexts with the new shared descriptor */
 	if (ctx->drv_ctx[ENCRYPT]) {
@@ -656,25 +661,16 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 		}
 	}
 
-	if (ctx->drv_ctx[GIVENCRYPT]) {
-		ret = caam_drv_ctx_update(ctx->drv_ctx[GIVENCRYPT],
-					  ctx->sh_desc_givenc);
-		if (ret) {
-			dev_err(jrdev, "driver givenc context update failed\n");
-			goto badkey;
-		}
-	}
-
 	return ret;
 badkey:
-	crypto_ablkcipher_set_flags(ablkcipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
 }
 
-static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
-				 const u8 *key, unsigned int keylen)
+static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
+			       unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	struct device *jrdev = ctx->jrdev;
 	int ret = 0;
 
@@ -687,9 +683,9 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	ctx->cdata.key_virt = key;
 	ctx->cdata.key_inline = true;
 
-	/* xts ablkcipher encrypt, decrypt shared descriptors */
-	cnstr_shdsc_xts_ablkcipher_encap(ctx->sh_desc_enc, &ctx->cdata);
-	cnstr_shdsc_xts_ablkcipher_decap(ctx->sh_desc_dec, &ctx->cdata);
+	/* xts skcipher encrypt, decrypt shared descriptors */
+	cnstr_shdsc_xts_skcipher_encap(ctx->sh_desc_enc, &ctx->cdata);
+	cnstr_shdsc_xts_skcipher_decap(ctx->sh_desc_dec, &ctx->cdata);
 
 	/* Now update the driver contexts with the new shared descriptor */
 	if (ctx->drv_ctx[ENCRYPT]) {
@@ -712,7 +708,7 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
 	return ret;
 badkey:
-	crypto_ablkcipher_set_flags(ablkcipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
 }
 
@@ -741,7 +737,7 @@ struct aead_edesc {
 };
 
 /*
- * ablkcipher_edesc - s/w-extended ablkcipher descriptor
+ * skcipher_edesc - s/w-extended skcipher descriptor
  * @src_nents: number of segments in input scatterlist
  * @dst_nents: number of segments in output scatterlist
  * @iv_dma: dma address of iv for checking continuity and link table
@@ -750,7 +746,7 @@ struct aead_edesc {
  * @drv_req: driver-specific request structure
  * @sgt: the h/w link table, followed by IV
  */
-struct ablkcipher_edesc {
+struct skcipher_edesc {
 	int src_nents;
 	int dst_nents;
 	dma_addr_t iv_dma;
@@ -781,10 +777,8 @@ static struct caam_drv_ctx *get_drv_ctx(struct caam_ctx *ctx,
 
 			if (type == ENCRYPT)
 				desc = ctx->sh_desc_enc;
-			else if (type == DECRYPT)
+			else /* (type == DECRYPT) */
 				desc = ctx->sh_desc_dec;
-			else /* (type == GIVENCRYPT) */
-				desc = ctx->sh_desc_givenc;
 
 			cpu = smp_processor_id();
 			drv_ctx = caam_drv_ctx_init(ctx->qidev, &cpu, desc);
@@ -803,8 +797,7 @@ static struct caam_drv_ctx *get_drv_ctx(struct caam_ctx *ctx,
 static void caam_unmap(struct device *dev, struct scatterlist *src,
 		       struct scatterlist *dst, int src_nents,
 		       int dst_nents, dma_addr_t iv_dma, int ivsize,
-		       enum optype op_type, dma_addr_t qm_sg_dma,
-		       int qm_sg_bytes)
+		       dma_addr_t qm_sg_dma, int qm_sg_bytes)
 {
 	if (dst != src) {
 		if (src_nents)
@@ -815,9 +808,7 @@ static void caam_unmap(struct device *dev, struct scatterlist *src,
 	}
 
 	if (iv_dma)
-		dma_unmap_single(dev, iv_dma, ivsize,
-				 op_type == GIVENCRYPT ? DMA_FROM_DEVICE :
-							 DMA_TO_DEVICE);
+		dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE);
 	if (qm_sg_bytes)
 		dma_unmap_single(dev, qm_sg_dma, qm_sg_bytes, DMA_TO_DEVICE);
 }
@@ -830,21 +821,18 @@ static void aead_unmap(struct device *dev,
 	int ivsize = crypto_aead_ivsize(aead);
 
 	caam_unmap(dev, req->src, req->dst, edesc->src_nents, edesc->dst_nents,
-		   edesc->iv_dma, ivsize, edesc->drv_req.drv_ctx->op_type,
-		   edesc->qm_sg_dma, edesc->qm_sg_bytes);
+		   edesc->iv_dma, ivsize, edesc->qm_sg_dma, edesc->qm_sg_bytes);
 	dma_unmap_single(dev, edesc->assoclen_dma, 4, DMA_TO_DEVICE);
 }
 
-static void ablkcipher_unmap(struct device *dev,
-			     struct ablkcipher_edesc *edesc,
-			     struct ablkcipher_request *req)
+static void skcipher_unmap(struct device *dev, struct skcipher_edesc *edesc,
+			   struct skcipher_request *req)
 {
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	int ivsize = crypto_skcipher_ivsize(skcipher);
 
 	caam_unmap(dev, req->src, req->dst, edesc->src_nents, edesc->dst_nents,
-		   edesc->iv_dma, ivsize, edesc->drv_req.drv_ctx->op_type,
-		   edesc->qm_sg_dma, edesc->qm_sg_bytes);
+		   edesc->iv_dma, ivsize, edesc->qm_sg_dma, edesc->qm_sg_bytes);
 }
 
 static void aead_done(struct caam_drv_req *drv_req, u32 status)
@@ -902,9 +890,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	int in_len, out_len;
 	struct qm_sg_entry *sg_table, *fd_sgt;
 	struct caam_drv_ctx *drv_ctx;
-	enum optype op_type = encrypt ? ENCRYPT : DECRYPT;
 
-	drv_ctx = get_drv_ctx(ctx, op_type);
+	drv_ctx = get_drv_ctx(ctx, encrypt ? ENCRYPT : DECRYPT);
 	if (unlikely(IS_ERR_OR_NULL(drv_ctx)))
 		return (struct aead_edesc *)drv_ctx;
 
@@ -994,7 +981,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 		dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
 			qm_sg_ents, ivsize);
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, 0, 0, 0);
+			   0, 0, 0);
 		qi_cache_free(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1009,7 +996,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 		if (dma_mapping_error(qidev, iv_dma)) {
 			dev_err(qidev, "unable to map IV\n");
 			caam_unmap(qidev, req->src, req->dst, src_nents,
-				   dst_nents, 0, 0, 0, 0, 0);
+				   dst_nents, 0, 0, 0, 0);
 			qi_cache_free(edesc);
 			return ERR_PTR(-ENOMEM);
 		}
@@ -1028,7 +1015,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	if (dma_mapping_error(qidev, edesc->assoclen_dma)) {
 		dev_err(qidev, "unable to map assoclen\n");
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, op_type, 0, 0);
+			   iv_dma, ivsize, 0, 0);
 		qi_cache_free(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1051,7 +1038,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 		dev_err(qidev, "unable to map S/G table\n");
 		dma_unmap_single(qidev, edesc->assoclen_dma, 4, DMA_TO_DEVICE);
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, op_type, 0, 0);
+			   iv_dma, ivsize, 0, 0);
 		qi_cache_free(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1138,14 +1125,14 @@ static int ipsec_gcm_decrypt(struct aead_request *req)
 	return aead_crypt(req, false);
 }
 
-static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
+static void skcipher_done(struct caam_drv_req *drv_req, u32 status)
 {
-	struct ablkcipher_edesc *edesc;
-	struct ablkcipher_request *req = drv_req->app_ctx;
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct caam_ctx *caam_ctx = crypto_ablkcipher_ctx(ablkcipher);
+	struct skcipher_edesc *edesc;
+	struct skcipher_request *req = drv_req->app_ctx;
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_ctx *caam_ctx = crypto_skcipher_ctx(skcipher);
 	struct device *qidev = caam_ctx->qidev;
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	int ivsize = crypto_skcipher_ivsize(skcipher);
 
 #ifdef DEBUG
 	dev_err(qidev, "%s %d: status 0x%x\n", __func__, __LINE__, status);
@@ -1158,72 +1145,60 @@ static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "dstiv  @" __stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
+		       DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
 	caam_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
 		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
+		     edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
 #endif
 
-	ablkcipher_unmap(qidev, edesc, req);
-
-	/* In case initial IV was generated, copy it in GIVCIPHER request */
-	if (edesc->drv_req.drv_ctx->op_type == GIVENCRYPT) {
-		u8 *iv;
-		struct skcipher_givcrypt_request *greq;
-
-		greq = container_of(req, struct skcipher_givcrypt_request,
-				    creq);
-		iv = (u8 *)edesc->sgt + edesc->qm_sg_bytes;
-		memcpy(greq->giv, iv, ivsize);
-	}
+	skcipher_unmap(qidev, edesc, req);
 
 	/*
-	 * The crypto API expects us to set the IV (req->info) to the last
+	 * The crypto API expects us to set the IV (req->iv) to the last
 	 * ciphertext block. This is used e.g. by the CTS mode.
 	 */
-	if (edesc->drv_req.drv_ctx->op_type != DECRYPT)
-		scatterwalk_map_and_copy(req->info, req->dst, req->nbytes -
+	if (edesc->drv_req.drv_ctx->op_type == ENCRYPT)
+		scatterwalk_map_and_copy(req->iv, req->dst, req->cryptlen -
 					 ivsize, ivsize, 0);
 
 	qi_cache_free(edesc);
-	ablkcipher_request_complete(req, status);
+	skcipher_request_complete(req, status);
 }
 
-static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
-						       *req, bool encrypt)
+static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
+						   bool encrypt)
 {
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	struct device *qidev = ctx->qidev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
-	struct ablkcipher_edesc *edesc;
+	struct skcipher_edesc *edesc;
 	dma_addr_t iv_dma;
 	u8 *iv;
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	int ivsize = crypto_skcipher_ivsize(skcipher);
 	int dst_sg_idx, qm_sg_ents, qm_sg_bytes;
 	struct qm_sg_entry *sg_table, *fd_sgt;
 	struct caam_drv_ctx *drv_ctx;
-	enum optype op_type = encrypt ? ENCRYPT : DECRYPT;
 
-	drv_ctx = get_drv_ctx(ctx, op_type);
+	drv_ctx = get_drv_ctx(ctx, encrypt ? ENCRYPT : DECRYPT);
 	if (unlikely(IS_ERR_OR_NULL(drv_ctx)))
-		return (struct ablkcipher_edesc *)drv_ctx;
+		return (struct skcipher_edesc *)drv_ctx;
 
-	src_nents = sg_nents_for_len(req->src, req->nbytes);
+	src_nents = sg_nents_for_len(req->src, req->cryptlen);
 	if (unlikely(src_nents < 0)) {
 		dev_err(qidev, "Insufficient bytes (%d) in src S/G\n",
-			req->nbytes);
+			req->cryptlen);
 		return ERR_PTR(src_nents);
 	}
 
 	if (unlikely(req->src != req->dst)) {
-		dst_nents = sg_nents_for_len(req->dst, req->nbytes);
+		dst_nents = sg_nents_for_len(req->dst, req->cryptlen);
 		if (unlikely(dst_nents < 0)) {
 			dev_err(qidev, "Insufficient bytes (%d) in dst S/G\n",
-				req->nbytes);
+				req->cryptlen);
 			return ERR_PTR(dst_nents);
 		}
 
@@ -1255,12 +1230,12 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 
 	qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
 	qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
-	if (unlikely(offsetof(struct ablkcipher_edesc, sgt) + qm_sg_bytes +
+	if (unlikely(offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes +
 		     ivsize > CAAM_QI_MEMCACHE_SIZE)) {
 		dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
 			qm_sg_ents, ivsize);
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, 0, 0, 0);
+			   0, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -1269,20 +1244,20 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	if (unlikely(!edesc)) {
 		dev_err(qidev, "could not allocate extended descriptor\n");
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, 0, 0, 0);
+			   0, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
 	/* Make sure IV is located in a DMAable area */
 	sg_table = &edesc->sgt[0];
 	iv = (u8 *)(sg_table + qm_sg_ents);
-	memcpy(iv, req->info, ivsize);
+	memcpy(iv, req->iv, ivsize);
 
 	iv_dma = dma_map_single(qidev, iv, ivsize, DMA_TO_DEVICE);
 	if (dma_mapping_error(qidev, iv_dma)) {
 		dev_err(qidev, "unable to map IV\n");
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, 0, 0, 0);
+			   0, 0, 0);
 		qi_cache_free(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1292,7 +1267,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	edesc->iv_dma = iv_dma;
 	edesc->qm_sg_bytes = qm_sg_bytes;
 	edesc->drv_req.app_ctx = req;
-	edesc->drv_req.cbk = ablkcipher_done;
+	edesc->drv_req.cbk = skcipher_done;
 	edesc->drv_req.drv_ctx = drv_ctx;
 
 	dma_to_qm_sg_one(sg_table, iv_dma, ivsize, 0);
@@ -1307,7 +1282,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	if (dma_mapping_error(qidev, edesc->qm_sg_dma)) {
 		dev_err(qidev, "unable to map S/G table\n");
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, op_type, 0, 0);
+			   iv_dma, ivsize, 0, 0);
 		qi_cache_free(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1315,348 +1290,172 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	fd_sgt = &edesc->drv_req.fd_sgt[0];
 
 	dma_to_qm_sg_one_last_ext(&fd_sgt[1], edesc->qm_sg_dma,
-				  ivsize + req->nbytes, 0);
+				  ivsize + req->cryptlen, 0);
 
 	if (req->src == req->dst) {
 		dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma +
-				     sizeof(*sg_table), req->nbytes, 0);
+				     sizeof(*sg_table), req->cryptlen, 0);
 	} else if (mapped_dst_nents > 1) {
 		dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma + dst_sg_idx *
-				     sizeof(*sg_table), req->nbytes, 0);
+				     sizeof(*sg_table), req->cryptlen, 0);
 	} else {
 		dma_to_qm_sg_one(&fd_sgt[0], sg_dma_address(req->dst),
-				 req->nbytes, 0);
-	}
-
-	return edesc;
-}
-
-static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
-	struct skcipher_givcrypt_request *creq)
-{
-	struct ablkcipher_request *req = &creq->creq;
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
-	struct device *qidev = ctx->qidev;
-	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		       GFP_KERNEL : GFP_ATOMIC;
-	int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
-	struct ablkcipher_edesc *edesc;
-	dma_addr_t iv_dma;
-	u8 *iv;
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
-	struct qm_sg_entry *sg_table, *fd_sgt;
-	int dst_sg_idx, qm_sg_ents, qm_sg_bytes;
-	struct caam_drv_ctx *drv_ctx;
-
-	drv_ctx = get_drv_ctx(ctx, GIVENCRYPT);
-	if (unlikely(IS_ERR_OR_NULL(drv_ctx)))
-		return (struct ablkcipher_edesc *)drv_ctx;
-
-	src_nents = sg_nents_for_len(req->src, req->nbytes);
-	if (unlikely(src_nents < 0)) {
-		dev_err(qidev, "Insufficient bytes (%d) in src S/G\n",
-			req->nbytes);
-		return ERR_PTR(src_nents);
-	}
-
-	if (unlikely(req->src != req->dst)) {
-		dst_nents = sg_nents_for_len(req->dst, req->nbytes);
-		if (unlikely(dst_nents < 0)) {
-			dev_err(qidev, "Insufficient bytes (%d) in dst S/G\n",
-				req->nbytes);
-			return ERR_PTR(dst_nents);
-		}
-
-		mapped_src_nents = dma_map_sg(qidev, req->src, src_nents,
-					      DMA_TO_DEVICE);
-		if (unlikely(!mapped_src_nents)) {
-			dev_err(qidev, "unable to map source\n");
-			return ERR_PTR(-ENOMEM);
-		}
-
-		mapped_dst_nents = dma_map_sg(qidev, req->dst, dst_nents,
-					      DMA_FROM_DEVICE);
-		if (unlikely(!mapped_dst_nents)) {
-			dev_err(qidev, "unable to map destination\n");
-			dma_unmap_sg(qidev, req->src, src_nents, DMA_TO_DEVICE);
-			return ERR_PTR(-ENOMEM);
-		}
-	} else {
-		mapped_src_nents = dma_map_sg(qidev, req->src, src_nents,
-					      DMA_BIDIRECTIONAL);
-		if (unlikely(!mapped_src_nents)) {
-			dev_err(qidev, "unable to map source\n");
-			return ERR_PTR(-ENOMEM);
-		}
-
-		dst_nents = src_nents;
-		mapped_dst_nents = src_nents;
-	}
-
-	qm_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0;
-	dst_sg_idx = qm_sg_ents;
-
-	qm_sg_ents += 1 + mapped_dst_nents;
-	qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
-	if (unlikely(offsetof(struct ablkcipher_edesc, sgt) + qm_sg_bytes +
-		     ivsize > CAAM_QI_MEMCACHE_SIZE)) {
-		dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
-			qm_sg_ents, ivsize);
-		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, 0, 0, 0);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	/* allocate space for base edesc, link tables and IV */
-	edesc = qi_cache_alloc(GFP_DMA | flags);
-	if (!edesc) {
-		dev_err(qidev, "could not allocate extended descriptor\n");
-		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, 0, 0, 0);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	/* Make sure IV is located in a DMAable area */
-	sg_table = &edesc->sgt[0];
-	iv = (u8 *)(sg_table + qm_sg_ents);
-	iv_dma = dma_map_single(qidev, iv, ivsize, DMA_FROM_DEVICE);
-	if (dma_mapping_error(qidev, iv_dma)) {
-		dev_err(qidev, "unable to map IV\n");
-		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, 0, 0, 0);
-		qi_cache_free(edesc);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	edesc->src_nents = src_nents;
-	edesc->dst_nents = dst_nents;
-	edesc->iv_dma = iv_dma;
-	edesc->qm_sg_bytes = qm_sg_bytes;
-	edesc->drv_req.app_ctx = req;
-	edesc->drv_req.cbk = ablkcipher_done;
-	edesc->drv_req.drv_ctx = drv_ctx;
-
-	if (mapped_src_nents > 1)
-		sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table, 0);
-
-	dma_to_qm_sg_one(sg_table + dst_sg_idx, iv_dma, ivsize, 0);
-	sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table + dst_sg_idx + 1,
-			 0);
-
-	edesc->qm_sg_dma = dma_map_single(qidev, sg_table, edesc->qm_sg_bytes,
-					  DMA_TO_DEVICE);
-	if (dma_mapping_error(qidev, edesc->qm_sg_dma)) {
-		dev_err(qidev, "unable to map S/G table\n");
-		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, GIVENCRYPT, 0, 0);
-		qi_cache_free(edesc);
-		return ERR_PTR(-ENOMEM);
+				 req->cryptlen, 0);
 	}
 
-	fd_sgt = &edesc->drv_req.fd_sgt[0];
-
-	if (mapped_src_nents > 1)
-		dma_to_qm_sg_one_ext(&fd_sgt[1], edesc->qm_sg_dma, req->nbytes,
-				     0);
-	else
-		dma_to_qm_sg_one(&fd_sgt[1], sg_dma_address(req->src),
-				 req->nbytes, 0);
-
-	dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma + dst_sg_idx *
-			     sizeof(*sg_table), ivsize + req->nbytes, 0);
-
 	return edesc;
 }
 
-static inline int ablkcipher_crypt(struct ablkcipher_request *req, bool encrypt)
+static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct ablkcipher_edesc *edesc;
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
-	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
+	struct skcipher_edesc *edesc;
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	int ivsize = crypto_skcipher_ivsize(skcipher);
 	int ret;
 
 	if (unlikely(caam_congested))
 		return -EAGAIN;
 
 	/* allocate extended descriptor */
-	edesc = ablkcipher_edesc_alloc(req, encrypt);
+	edesc = skcipher_edesc_alloc(req, encrypt);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
 	/*
-	 * The crypto API expects us to set the IV (req->info) to the last
+	 * The crypto API expects us to set the IV (req->iv) to the last
 	 * ciphertext block.
 	 */
 	if (!encrypt)
-		scatterwalk_map_and_copy(req->info, req->src, req->nbytes -
+		scatterwalk_map_and_copy(req->iv, req->src, req->cryptlen -
 					 ivsize, ivsize, 0);
 
 	ret = caam_qi_enqueue(ctx->qidev, &edesc->drv_req);
 	if (!ret) {
 		ret = -EINPROGRESS;
 	} else {
-		ablkcipher_unmap(ctx->qidev, edesc, req);
+		skcipher_unmap(ctx->qidev, edesc, req);
 		qi_cache_free(edesc);
 	}
 
 	return ret;
 }
 
-static int ablkcipher_encrypt(struct ablkcipher_request *req)
+static int skcipher_encrypt(struct skcipher_request *req)
 {
-	return ablkcipher_crypt(req, true);
+	return skcipher_crypt(req, true);
 }
 
-static int ablkcipher_decrypt(struct ablkcipher_request *req)
+static int skcipher_decrypt(struct skcipher_request *req)
 {
-	return ablkcipher_crypt(req, false);
-}
-
-static int ablkcipher_givencrypt(struct skcipher_givcrypt_request *creq)
-{
-	struct ablkcipher_request *req = &creq->creq;
-	struct ablkcipher_edesc *edesc;
-	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
-	int ret;
-
-	if (unlikely(caam_congested))
-		return -EAGAIN;
-
-	/* allocate extended descriptor */
-	edesc = ablkcipher_giv_edesc_alloc(creq);
-	if (IS_ERR(edesc))
-		return PTR_ERR(edesc);
-
-	ret = caam_qi_enqueue(ctx->qidev, &edesc->drv_req);
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
-		ablkcipher_unmap(ctx->qidev, edesc, req);
-		qi_cache_free(edesc);
-	}
-
-	return ret;
+	return skcipher_crypt(req, false);
 }
 
-#define template_ablkcipher	template_u.ablkcipher
-struct caam_alg_template {
-	char name[CRYPTO_MAX_ALG_NAME];
-	char driver_name[CRYPTO_MAX_ALG_NAME];
-	unsigned int blocksize;
-	u32 type;
-	union {
-		struct ablkcipher_alg ablkcipher;
-	} template_u;
-	u32 class1_alg_type;
-	u32 class2_alg_type;
-};
-
-static struct caam_alg_template driver_algs[] = {
-	/* ablkcipher descriptor */
+static struct caam_skcipher_alg driver_algs[] = {
 	{
-		.name = "cbc(aes)",
-		.driver_name = "cbc-aes-caam-qi",
-		.blocksize = AES_BLOCK_SIZE,
-		.type = CRYPTO_ALG_TYPE_GIVCIPHER,
-		.template_ablkcipher = {
-			.setkey = ablkcipher_setkey,
-			.encrypt = ablkcipher_encrypt,
-			.decrypt = ablkcipher_decrypt,
-			.givencrypt = ablkcipher_givencrypt,
-			.geniv = "<built-in>",
+		.skcipher = {
+			.base = {
+				.cra_name = "cbc(aes)",
+				.cra_driver_name = "cbc-aes-caam-qi",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE,
 			.max_keysize = AES_MAX_KEY_SIZE,
 			.ivsize = AES_BLOCK_SIZE,
 		},
-		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+		.caam.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 	},
 	{
-		.name = "cbc(des3_ede)",
-		.driver_name = "cbc-3des-caam-qi",
-		.blocksize = DES3_EDE_BLOCK_SIZE,
-		.type = CRYPTO_ALG_TYPE_GIVCIPHER,
-		.template_ablkcipher = {
-			.setkey = ablkcipher_setkey,
-			.encrypt = ablkcipher_encrypt,
-			.decrypt = ablkcipher_decrypt,
-			.givencrypt = ablkcipher_givencrypt,
-			.geniv = "<built-in>",
+		.skcipher = {
+			.base = {
+				.cra_name = "cbc(des3_ede)",
+				.cra_driver_name = "cbc-3des-caam-qi",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
 			.min_keysize = DES3_EDE_KEY_SIZE,
 			.max_keysize = DES3_EDE_KEY_SIZE,
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 		},
-		.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+		.caam.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 	},
 	{
-		.name = "cbc(des)",
-		.driver_name = "cbc-des-caam-qi",
-		.blocksize = DES_BLOCK_SIZE,
-		.type = CRYPTO_ALG_TYPE_GIVCIPHER,
-		.template_ablkcipher = {
-			.setkey = ablkcipher_setkey,
-			.encrypt = ablkcipher_encrypt,
-			.decrypt = ablkcipher_decrypt,
-			.givencrypt = ablkcipher_givencrypt,
-			.geniv = "<built-in>",
+		.skcipher = {
+			.base = {
+				.cra_name = "cbc(des)",
+				.cra_driver_name = "cbc-des-caam-qi",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
 			.min_keysize = DES_KEY_SIZE,
 			.max_keysize = DES_KEY_SIZE,
 			.ivsize = DES_BLOCK_SIZE,
 		},
-		.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+		.caam.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 	},
 	{
-		.name = "ctr(aes)",
-		.driver_name = "ctr-aes-caam-qi",
-		.blocksize = 1,
-		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.template_ablkcipher = {
-			.setkey = ablkcipher_setkey,
-			.encrypt = ablkcipher_encrypt,
-			.decrypt = ablkcipher_decrypt,
-			.geniv = "chainiv",
+		.skcipher = {
+			.base = {
+				.cra_name = "ctr(aes)",
+				.cra_driver_name = "ctr-aes-caam-qi",
+				.cra_blocksize = 1,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE,
 			.max_keysize = AES_MAX_KEY_SIZE,
 			.ivsize = AES_BLOCK_SIZE,
+			.chunksize = AES_BLOCK_SIZE,
 		},
-		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128,
+		.caam.class1_alg_type = OP_ALG_ALGSEL_AES |
+					OP_ALG_AAI_CTR_MOD128,
 	},
 	{
-		.name = "rfc3686(ctr(aes))",
-		.driver_name = "rfc3686-ctr-aes-caam-qi",
-		.blocksize = 1,
-		.type = CRYPTO_ALG_TYPE_GIVCIPHER,
-		.template_ablkcipher = {
-			.setkey = ablkcipher_setkey,
-			.encrypt = ablkcipher_encrypt,
-			.decrypt = ablkcipher_decrypt,
-			.givencrypt = ablkcipher_givencrypt,
-			.geniv = "<built-in>",
+		.skcipher = {
+			.base = {
+				.cra_name = "rfc3686(ctr(aes))",
+				.cra_driver_name = "rfc3686-ctr-aes-caam-qi",
+				.cra_blocksize = 1,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE +
 				       CTR_RFC3686_NONCE_SIZE,
 			.max_keysize = AES_MAX_KEY_SIZE +
 				       CTR_RFC3686_NONCE_SIZE,
 			.ivsize = CTR_RFC3686_IV_SIZE,
+			.chunksize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.rfc3686 = true,
 		},
-		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128,
 	},
 	{
-		.name = "xts(aes)",
-		.driver_name = "xts-aes-caam-qi",
-		.blocksize = AES_BLOCK_SIZE,
-		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.template_ablkcipher = {
-			.setkey = xts_ablkcipher_setkey,
-			.encrypt = ablkcipher_encrypt,
-			.decrypt = ablkcipher_decrypt,
-			.geniv = "eseqiv",
+		.skcipher = {
+			.base = {
+				.cra_name = "xts(aes)",
+				.cra_driver_name = "xts-aes-caam-qi",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = xts_skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
 			.min_keysize = 2 * AES_MIN_KEY_SIZE,
 			.max_keysize = 2 * AES_MAX_KEY_SIZE,
 			.ivsize = AES_BLOCK_SIZE,
 		},
-		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_XTS,
+		.caam.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_XTS,
 	},
 };
 
@@ -2528,12 +2327,6 @@ static struct caam_aead_alg driver_aeads[] = {
 	},
 };
 
-struct caam_crypto_alg {
-	struct list_head entry;
-	struct crypto_alg crypto_alg;
-	struct caam_alg_entry caam;
-};
-
 static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
 			    bool uses_dkp)
 {
@@ -2572,19 +2365,18 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
 	spin_lock_init(&ctx->lock);
 	ctx->drv_ctx[ENCRYPT] = NULL;
 	ctx->drv_ctx[DECRYPT] = NULL;
-	ctx->drv_ctx[GIVENCRYPT] = NULL;
 
 	return 0;
 }
 
-static int caam_cra_init(struct crypto_tfm *tfm)
+static int caam_cra_init(struct crypto_skcipher *tfm)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
-	struct caam_crypto_alg *caam_alg = container_of(alg, typeof(*caam_alg),
-							crypto_alg);
-	struct caam_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct caam_skcipher_alg *caam_alg =
+		container_of(alg, typeof(*caam_alg), skcipher);
 
-	return caam_init_common(ctx, &caam_alg->caam, false);
+	return caam_init_common(crypto_skcipher_ctx(tfm), &caam_alg->caam,
+				false);
 }
 
 static int caam_aead_init(struct crypto_aead *tfm)
@@ -2602,16 +2394,15 @@ static void caam_exit_common(struct caam_ctx *ctx)
 {
 	caam_drv_ctx_rel(ctx->drv_ctx[ENCRYPT]);
 	caam_drv_ctx_rel(ctx->drv_ctx[DECRYPT]);
-	caam_drv_ctx_rel(ctx->drv_ctx[GIVENCRYPT]);
 
 	dma_unmap_single(ctx->jrdev, ctx->key_dma, sizeof(ctx->key), ctx->dir);
 
 	caam_jr_free(ctx->jrdev);
 }
 
-static void caam_cra_exit(struct crypto_tfm *tfm)
+static void caam_cra_exit(struct crypto_skcipher *tfm)
 {
-	caam_exit_common(crypto_tfm_ctx(tfm));
+	caam_exit_common(crypto_skcipher_ctx(tfm));
 }
 
 static void caam_aead_exit(struct crypto_aead *tfm)
@@ -2619,10 +2410,8 @@ static void caam_aead_exit(struct crypto_aead *tfm)
 	caam_exit_common(crypto_aead_ctx(tfm));
 }
 
-static struct list_head alg_list;
 static void __exit caam_qi_algapi_exit(void)
 {
-	struct caam_crypto_alg *t_alg, *n;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(driver_aeads); i++) {
@@ -2632,55 +2421,25 @@ static void __exit caam_qi_algapi_exit(void)
 			crypto_unregister_aead(&t_alg->aead);
 	}
 
-	if (!alg_list.next)
-		return;
+	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
+		struct caam_skcipher_alg *t_alg = driver_algs + i;
 
-	list_for_each_entry_safe(t_alg, n, &alg_list, entry) {
-		crypto_unregister_alg(&t_alg->crypto_alg);
-		list_del(&t_alg->entry);
-		kfree(t_alg);
+		if (t_alg->registered)
+			crypto_unregister_skcipher(&t_alg->skcipher);
 	}
 }
 
-static struct caam_crypto_alg *caam_alg_alloc(struct caam_alg_template
-					      *template)
+static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg)
 {
-	struct caam_crypto_alg *t_alg;
-	struct crypto_alg *alg;
-
-	t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL);
-	if (!t_alg)
-		return ERR_PTR(-ENOMEM);
+	struct skcipher_alg *alg = &t_alg->skcipher;
 
-	alg = &t_alg->crypto_alg;
-
-	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name);
-	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
-		 template->driver_name);
-	alg->cra_module = THIS_MODULE;
-	alg->cra_init = caam_cra_init;
-	alg->cra_exit = caam_cra_exit;
-	alg->cra_priority = CAAM_CRA_PRIORITY;
-	alg->cra_blocksize = template->blocksize;
-	alg->cra_alignmask = 0;
-	alg->cra_ctxsize = sizeof(struct caam_ctx);
-	alg->cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY |
-			 template->type;
-	switch (template->type) {
-	case CRYPTO_ALG_TYPE_GIVCIPHER:
-		alg->cra_type = &crypto_givcipher_type;
-		alg->cra_ablkcipher = template->template_ablkcipher;
-		break;
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		alg->cra_type = &crypto_ablkcipher_type;
-		alg->cra_ablkcipher = template->template_ablkcipher;
-		break;
-	}
-
-	t_alg->caam.class1_alg_type = template->class1_alg_type;
-	t_alg->caam.class2_alg_type = template->class2_alg_type;
+	alg->base.cra_module = THIS_MODULE;
+	alg->base.cra_priority = CAAM_CRA_PRIORITY;
+	alg->base.cra_ctxsize = sizeof(struct caam_ctx);
+	alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
 
-	return t_alg;
+	alg->init = caam_cra_init;
+	alg->exit = caam_cra_exit;
 }
 
 static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
@@ -2734,8 +2493,6 @@ static int __init caam_qi_algapi_init(void)
 		return -ENODEV;
 	}
 
-	INIT_LIST_HEAD(&alg_list);
-
 	/*
 	 * Register crypto algorithms the device supports.
 	 * First, detect presence and attributes of DES, AES, and MD blocks.
@@ -2751,9 +2508,8 @@ static int __init caam_qi_algapi_init(void)
 		md_limit = SHA256_DIGEST_SIZE;
 
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
-		struct caam_crypto_alg *t_alg;
-		struct caam_alg_template *alg = driver_algs + i;
-		u32 alg_sel = alg->class1_alg_type & OP_ALG_ALGSEL_MASK;
+		struct caam_skcipher_alg *t_alg = driver_algs + i;
+		u32 alg_sel = t_alg->caam.class1_alg_type & OP_ALG_ALGSEL_MASK;
 
 		/* Skip DES algorithms if not supported by device */
 		if (!des_inst &&
@@ -2765,23 +2521,16 @@ static int __init caam_qi_algapi_init(void)
 		if (!aes_inst && (alg_sel == OP_ALG_ALGSEL_AES))
 			continue;
 
-		t_alg = caam_alg_alloc(alg);
-		if (IS_ERR(t_alg)) {
-			err = PTR_ERR(t_alg);
-			dev_warn(priv->qidev, "%s alg allocation failed\n",
-				 alg->driver_name);
-			continue;
-		}
+		caam_skcipher_alg_init(t_alg);
 
-		err = crypto_register_alg(&t_alg->crypto_alg);
+		err = crypto_register_skcipher(&t_alg->skcipher);
 		if (err) {
 			dev_warn(priv->qidev, "%s alg registration failed\n",
-				 t_alg->crypto_alg.cra_driver_name);
-			kfree(t_alg);
+				 t_alg->skcipher.base.cra_driver_name);
 			continue;
 		}
 
-		list_add_tail(&t_alg->entry, &alg_list);
+		t_alg->registered = true;
 		registered = true;
 	}
 
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
new file mode 100644
index 000000000000..7d8ac0222fa3
--- /dev/null
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -0,0 +1,5165 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright 2015-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2018 NXP
+ */
+
+#include "compat.h"
+#include "regs.h"
+#include "caamalg_qi2.h"
+#include "dpseci_cmd.h"
+#include "desc_constr.h"
+#include "error.h"
+#include "sg_sw_sec4.h"
+#include "sg_sw_qm2.h"
+#include "key_gen.h"
+#include "caamalg_desc.h"
+#include "caamhash_desc.h"
+#include <linux/fsl/mc.h>
+#include <soc/fsl/dpaa2-io.h>
+#include <soc/fsl/dpaa2-fd.h>
+
+#define CAAM_CRA_PRIORITY	2000
+
+/* max key is sum of AES_MAX_KEY_SIZE, max split key size */
+#define CAAM_MAX_KEY_SIZE	(AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE + \
+				 SHA512_DIGEST_SIZE * 2)
+
+#if !IS_ENABLED(CONFIG_CRYPTO_DEV_FSL_CAAM)
+bool caam_little_end;
+EXPORT_SYMBOL(caam_little_end);
+bool caam_imx;
+EXPORT_SYMBOL(caam_imx);
+#endif
+
+/*
+ * This is a a cache of buffers, from which the users of CAAM QI driver
+ * can allocate short buffers. It's speedier than doing kmalloc on the hotpath.
+ * NOTE: A more elegant solution would be to have some headroom in the frames
+ *       being processed. This can be added by the dpaa2-eth driver. This would
+ *       pose a problem for userspace application processing which cannot
+ *       know of this limitation. So for now, this will work.
+ * NOTE: The memcache is SMP-safe. No need to handle spinlocks in-here
+ */
+static struct kmem_cache *qi_cache;
+
+struct caam_alg_entry {
+	struct device *dev;
+	int class1_alg_type;
+	int class2_alg_type;
+	bool rfc3686;
+	bool geniv;
+};
+
+struct caam_aead_alg {
+	struct aead_alg aead;
+	struct caam_alg_entry caam;
+	bool registered;
+};
+
+struct caam_skcipher_alg {
+	struct skcipher_alg skcipher;
+	struct caam_alg_entry caam;
+	bool registered;
+};
+
+/**
+ * caam_ctx - per-session context
+ * @flc: Flow Contexts array
+ * @key:  [authentication key], encryption key
+ * @flc_dma: I/O virtual addresses of the Flow Contexts
+ * @key_dma: I/O virtual address of the key
+ * @dir: DMA direction for mapping key and Flow Contexts
+ * @dev: dpseci device
+ * @adata: authentication algorithm details
+ * @cdata: encryption algorithm details
+ * @authsize: authentication tag (a.k.a. ICV / MAC) size
+ */
+struct caam_ctx {
+	struct caam_flc flc[NUM_OP];
+	u8 key[CAAM_MAX_KEY_SIZE];
+	dma_addr_t flc_dma[NUM_OP];
+	dma_addr_t key_dma;
+	enum dma_data_direction dir;
+	struct device *dev;
+	struct alginfo adata;
+	struct alginfo cdata;
+	unsigned int authsize;
+};
+
+static void *dpaa2_caam_iova_to_virt(struct dpaa2_caam_priv *priv,
+				     dma_addr_t iova_addr)
+{
+	phys_addr_t phys_addr;
+
+	phys_addr = priv->domain ? iommu_iova_to_phys(priv->domain, iova_addr) :
+				   iova_addr;
+
+	return phys_to_virt(phys_addr);
+}
+
+/*
+ * qi_cache_zalloc - Allocate buffers from CAAM-QI cache
+ *
+ * Allocate data on the hotpath. Instead of using kzalloc, one can use the
+ * services of the CAAM QI memory cache (backed by kmem_cache). The buffers
+ * will have a size of CAAM_QI_MEMCACHE_SIZE, which should be sufficient for
+ * hosting 16 SG entries.
+ *
+ * @flags - flags that would be used for the equivalent kmalloc(..) call
+ *
+ * Returns a pointer to a retrieved buffer on success or NULL on failure.
+ */
+static inline void *qi_cache_zalloc(gfp_t flags)
+{
+	return kmem_cache_zalloc(qi_cache, flags);
+}
+
+/*
+ * qi_cache_free - Frees buffers allocated from CAAM-QI cache
+ *
+ * @obj - buffer previously allocated by qi_cache_zalloc
+ *
+ * No checking is being done, the call is a passthrough call to
+ * kmem_cache_free(...)
+ */
+static inline void qi_cache_free(void *obj)
+{
+	kmem_cache_free(qi_cache, obj);
+}
+
+static struct caam_request *to_caam_req(struct crypto_async_request *areq)
+{
+	switch (crypto_tfm_alg_type(areq->tfm)) {
+	case CRYPTO_ALG_TYPE_SKCIPHER:
+		return skcipher_request_ctx(skcipher_request_cast(areq));
+	case CRYPTO_ALG_TYPE_AEAD:
+		return aead_request_ctx(container_of(areq, struct aead_request,
+						     base));
+	case CRYPTO_ALG_TYPE_AHASH:
+		return ahash_request_ctx(ahash_request_cast(areq));
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+}
+
+static void caam_unmap(struct device *dev, struct scatterlist *src,
+		       struct scatterlist *dst, int src_nents,
+		       int dst_nents, dma_addr_t iv_dma, int ivsize,
+		       dma_addr_t qm_sg_dma, int qm_sg_bytes)
+{
+	if (dst != src) {
+		if (src_nents)
+			dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
+		dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE);
+	} else {
+		dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL);
+	}
+
+	if (iv_dma)
+		dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE);
+
+	if (qm_sg_bytes)
+		dma_unmap_single(dev, qm_sg_dma, qm_sg_bytes, DMA_TO_DEVICE);
+}
+
+static int aead_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
+						 typeof(*alg), aead);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	struct device *dev = ctx->dev;
+	struct dpaa2_caam_priv *priv = dev_get_drvdata(dev);
+	struct caam_flc *flc;
+	u32 *desc;
+	u32 ctx1_iv_off = 0;
+	u32 *nonce = NULL;
+	unsigned int data_len[2];
+	u32 inl_mask;
+	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
+			       OP_ALG_AAI_CTR_MOD128);
+	const bool is_rfc3686 = alg->caam.rfc3686;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	/*
+	 * AES-CTR needs to load IV in CONTEXT1 reg
+	 * at an offset of 128bits (16bytes)
+	 * CONTEXT1[255:128] = IV
+	 */
+	if (ctr_mode)
+		ctx1_iv_off = 16;
+
+	/*
+	 * RFC3686 specific:
+	 *	CONTEXT1[255:128] = {NONCE, IV, COUNTER}
+	 */
+	if (is_rfc3686) {
+		ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE;
+		nonce = (u32 *)((void *)ctx->key + ctx->adata.keylen_pad +
+				ctx->cdata.keylen - CTR_RFC3686_NONCE_SIZE);
+	}
+
+	data_len[0] = ctx->adata.keylen_pad;
+	data_len[1] = ctx->cdata.keylen;
+
+	/* aead_encrypt shared descriptor */
+	if (desc_inline_query((alg->caam.geniv ? DESC_QI_AEAD_GIVENC_LEN :
+						 DESC_QI_AEAD_ENC_LEN) +
+			      (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0),
+			      DESC_JOB_IO_LEN, data_len, &inl_mask,
+			      ARRAY_SIZE(data_len)) < 0)
+		return -EINVAL;
+
+	if (inl_mask & 1)
+		ctx->adata.key_virt = ctx->key;
+	else
+		ctx->adata.key_dma = ctx->key_dma;
+
+	if (inl_mask & 2)
+		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
+	else
+		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
+
+	ctx->adata.key_inline = !!(inl_mask & 1);
+	ctx->cdata.key_inline = !!(inl_mask & 2);
+
+	flc = &ctx->flc[ENCRYPT];
+	desc = flc->sh_desc;
+
+	if (alg->caam.geniv)
+		cnstr_shdsc_aead_givencap(desc, &ctx->cdata, &ctx->adata,
+					  ivsize, ctx->authsize, is_rfc3686,
+					  nonce, ctx1_iv_off, true,
+					  priv->sec_attr.era);
+	else
+		cnstr_shdsc_aead_encap(desc, &ctx->cdata, &ctx->adata,
+				       ivsize, ctx->authsize, is_rfc3686, nonce,
+				       ctx1_iv_off, true, priv->sec_attr.era);
+
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   ctx->dir);
+
+	/* aead_decrypt shared descriptor */
+	if (desc_inline_query(DESC_QI_AEAD_DEC_LEN +
+			      (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0),
+			      DESC_JOB_IO_LEN, data_len, &inl_mask,
+			      ARRAY_SIZE(data_len)) < 0)
+		return -EINVAL;
+
+	if (inl_mask & 1)
+		ctx->adata.key_virt = ctx->key;
+	else
+		ctx->adata.key_dma = ctx->key_dma;
+
+	if (inl_mask & 2)
+		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
+	else
+		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
+
+	ctx->adata.key_inline = !!(inl_mask & 1);
+	ctx->cdata.key_inline = !!(inl_mask & 2);
+
+	flc = &ctx->flc[DECRYPT];
+	desc = flc->sh_desc;
+	cnstr_shdsc_aead_decap(desc, &ctx->cdata, &ctx->adata,
+			       ivsize, ctx->authsize, alg->caam.geniv,
+			       is_rfc3686, nonce, ctx1_iv_off, true,
+			       priv->sec_attr.era);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   ctx->dir);
+
+	return 0;
+}
+
+static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	aead_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int aead_setkey(struct crypto_aead *aead, const u8 *key,
+		       unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *dev = ctx->dev;
+	struct crypto_authenc_keys keys;
+
+	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
+		goto badkey;
+
+	dev_dbg(dev, "keylen %d enckeylen %d authkeylen %d\n",
+		keys.authkeylen + keys.enckeylen, keys.enckeylen,
+		keys.authkeylen);
+	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+
+	ctx->adata.keylen = keys.authkeylen;
+	ctx->adata.keylen_pad = split_key_len(ctx->adata.algtype &
+					      OP_ALG_ALGSEL_MASK);
+
+	if (ctx->adata.keylen_pad + keys.enckeylen > CAAM_MAX_KEY_SIZE)
+		goto badkey;
+
+	memcpy(ctx->key, keys.authkey, keys.authkeylen);
+	memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey, keys.enckeylen);
+	dma_sync_single_for_device(dev, ctx->key_dma, ctx->adata.keylen_pad +
+				   keys.enckeylen, ctx->dir);
+	print_hex_dump_debug("ctx.key@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
+			     ctx->adata.keylen_pad + keys.enckeylen, 1);
+
+	ctx->cdata.keylen = keys.enckeylen;
+
+	memzero_explicit(&keys, sizeof(keys));
+	return aead_set_sh_desc(aead);
+badkey:
+	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&keys, sizeof(keys));
+	return -EINVAL;
+}
+
+static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
+					   bool encrypt)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct caam_request *req_ctx = aead_request_ctx(req);
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
+						 typeof(*alg), aead);
+	struct device *dev = ctx->dev;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
+	struct aead_edesc *edesc;
+	dma_addr_t qm_sg_dma, iv_dma = 0;
+	int ivsize = 0;
+	unsigned int authsize = ctx->authsize;
+	int qm_sg_index = 0, qm_sg_nents = 0, qm_sg_bytes;
+	int in_len, out_len;
+	struct dpaa2_sg_entry *sg_table;
+
+	/* allocate space for base edesc, link tables and IV */
+	edesc = qi_cache_zalloc(GFP_DMA | flags);
+	if (unlikely(!edesc)) {
+		dev_err(dev, "could not allocate extended descriptor\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (unlikely(req->dst != req->src)) {
+		src_nents = sg_nents_for_len(req->src, req->assoclen +
+					     req->cryptlen);
+		if (unlikely(src_nents < 0)) {
+			dev_err(dev, "Insufficient bytes (%d) in src S/G\n",
+				req->assoclen + req->cryptlen);
+			qi_cache_free(edesc);
+			return ERR_PTR(src_nents);
+		}
+
+		dst_nents = sg_nents_for_len(req->dst, req->assoclen +
+					     req->cryptlen +
+					     (encrypt ? authsize :
+							(-authsize)));
+		if (unlikely(dst_nents < 0)) {
+			dev_err(dev, "Insufficient bytes (%d) in dst S/G\n",
+				req->assoclen + req->cryptlen +
+				(encrypt ? authsize : (-authsize)));
+			qi_cache_free(edesc);
+			return ERR_PTR(dst_nents);
+		}
+
+		if (src_nents) {
+			mapped_src_nents = dma_map_sg(dev, req->src, src_nents,
+						      DMA_TO_DEVICE);
+			if (unlikely(!mapped_src_nents)) {
+				dev_err(dev, "unable to map source\n");
+				qi_cache_free(edesc);
+				return ERR_PTR(-ENOMEM);
+			}
+		} else {
+			mapped_src_nents = 0;
+		}
+
+		mapped_dst_nents = dma_map_sg(dev, req->dst, dst_nents,
+					      DMA_FROM_DEVICE);
+		if (unlikely(!mapped_dst_nents)) {
+			dev_err(dev, "unable to map destination\n");
+			dma_unmap_sg(dev, req->src, src_nents, DMA_TO_DEVICE);
+			qi_cache_free(edesc);
+			return ERR_PTR(-ENOMEM);
+		}
+	} else {
+		src_nents = sg_nents_for_len(req->src, req->assoclen +
+					     req->cryptlen +
+						(encrypt ? authsize : 0));
+		if (unlikely(src_nents < 0)) {
+			dev_err(dev, "Insufficient bytes (%d) in src S/G\n",
+				req->assoclen + req->cryptlen +
+				(encrypt ? authsize : 0));
+			qi_cache_free(edesc);
+			return ERR_PTR(src_nents);
+		}
+
+		mapped_src_nents = dma_map_sg(dev, req->src, src_nents,
+					      DMA_BIDIRECTIONAL);
+		if (unlikely(!mapped_src_nents)) {
+			dev_err(dev, "unable to map source\n");
+			qi_cache_free(edesc);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
+	if ((alg->caam.rfc3686 && encrypt) || !alg->caam.geniv)
+		ivsize = crypto_aead_ivsize(aead);
+
+	/*
+	 * Create S/G table: req->assoclen, [IV,] req->src [, req->dst].
+	 * Input is not contiguous.
+	 */
+	qm_sg_nents = 1 + !!ivsize + mapped_src_nents +
+		      (mapped_dst_nents > 1 ? mapped_dst_nents : 0);
+	sg_table = &edesc->sgt[0];
+	qm_sg_bytes = qm_sg_nents * sizeof(*sg_table);
+	if (unlikely(offsetof(struct aead_edesc, sgt) + qm_sg_bytes + ivsize >
+		     CAAM_QI_MEMCACHE_SIZE)) {
+		dev_err(dev, "No space for %d S/G entries and/or %dB IV\n",
+			qm_sg_nents, ivsize);
+		caam_unmap(dev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, 0, 0);
+		qi_cache_free(edesc);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (ivsize) {
+		u8 *iv = (u8 *)(sg_table + qm_sg_nents);
+
+		/* Make sure IV is located in a DMAable area */
+		memcpy(iv, req->iv, ivsize);
+
+		iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, iv_dma)) {
+			dev_err(dev, "unable to map IV\n");
+			caam_unmap(dev, req->src, req->dst, src_nents,
+				   dst_nents, 0, 0, 0, 0);
+			qi_cache_free(edesc);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
+	edesc->src_nents = src_nents;
+	edesc->dst_nents = dst_nents;
+	edesc->iv_dma = iv_dma;
+
+	edesc->assoclen = cpu_to_caam32(req->assoclen);
+	edesc->assoclen_dma = dma_map_single(dev, &edesc->assoclen, 4,
+					     DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, edesc->assoclen_dma)) {
+		dev_err(dev, "unable to map assoclen\n");
+		caam_unmap(dev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, 0, 0);
+		qi_cache_free(edesc);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	dma_to_qm_sg_one(sg_table, edesc->assoclen_dma, 4, 0);
+	qm_sg_index++;
+	if (ivsize) {
+		dma_to_qm_sg_one(sg_table + qm_sg_index, iv_dma, ivsize, 0);
+		qm_sg_index++;
+	}
+	sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table + qm_sg_index, 0);
+	qm_sg_index += mapped_src_nents;
+
+	if (mapped_dst_nents > 1)
+		sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table +
+				 qm_sg_index, 0);
+
+	qm_sg_dma = dma_map_single(dev, sg_table, qm_sg_bytes, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, qm_sg_dma)) {
+		dev_err(dev, "unable to map S/G table\n");
+		dma_unmap_single(dev, edesc->assoclen_dma, 4, DMA_TO_DEVICE);
+		caam_unmap(dev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, 0, 0);
+		qi_cache_free(edesc);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	edesc->qm_sg_dma = qm_sg_dma;
+	edesc->qm_sg_bytes = qm_sg_bytes;
+
+	out_len = req->assoclen + req->cryptlen +
+		  (encrypt ? ctx->authsize : (-ctx->authsize));
+	in_len = 4 + ivsize + req->assoclen + req->cryptlen;
+
+	memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+	dpaa2_fl_set_addr(in_fle, qm_sg_dma);
+	dpaa2_fl_set_len(in_fle, in_len);
+
+	if (req->dst == req->src) {
+		if (mapped_src_nents == 1) {
+			dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+			dpaa2_fl_set_addr(out_fle, sg_dma_address(req->src));
+		} else {
+			dpaa2_fl_set_format(out_fle, dpaa2_fl_sg);
+			dpaa2_fl_set_addr(out_fle, qm_sg_dma +
+					  (1 + !!ivsize) * sizeof(*sg_table));
+		}
+	} else if (mapped_dst_nents == 1) {
+		dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+		dpaa2_fl_set_addr(out_fle, sg_dma_address(req->dst));
+	} else {
+		dpaa2_fl_set_format(out_fle, dpaa2_fl_sg);
+		dpaa2_fl_set_addr(out_fle, qm_sg_dma + qm_sg_index *
+				  sizeof(*sg_table));
+	}
+
+	dpaa2_fl_set_len(out_fle, out_len);
+
+	return edesc;
+}
+
+static int gcm_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *dev = ctx->dev;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	struct caam_flc *flc;
+	u32 *desc;
+	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	/*
+	 * AES GCM encrypt shared descriptor
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_GCM_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	flc = &ctx->flc[ENCRYPT];
+	desc = flc->sh_desc;
+	cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ivsize, ctx->authsize, true);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   ctx->dir);
+
+	/*
+	 * Job Descriptor and Shared Descriptors
+	 * must all fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_GCM_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	flc = &ctx->flc[DECRYPT];
+	desc = flc->sh_desc;
+	cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ivsize, ctx->authsize, true);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   ctx->dir);
+
+	return 0;
+}
+
+static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	gcm_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int gcm_setkey(struct crypto_aead *aead,
+		      const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *dev = ctx->dev;
+
+	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+
+	memcpy(ctx->key, key, keylen);
+	dma_sync_single_for_device(dev, ctx->key_dma, keylen, ctx->dir);
+	ctx->cdata.keylen = keylen;
+
+	return gcm_set_sh_desc(aead);
+}
+
+static int rfc4106_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *dev = ctx->dev;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	struct caam_flc *flc;
+	u32 *desc;
+	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	ctx->cdata.key_virt = ctx->key;
+
+	/*
+	 * RFC4106 encrypt shared descriptor
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4106_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	flc = &ctx->flc[ENCRYPT];
+	desc = flc->sh_desc;
+	cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  true);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   ctx->dir);
+
+	/*
+	 * Job Descriptor and Shared Descriptors
+	 * must all fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4106_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	flc = &ctx->flc[DECRYPT];
+	desc = flc->sh_desc;
+	cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  true);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   ctx->dir);
+
+	return 0;
+}
+
+static int rfc4106_setauthsize(struct crypto_aead *authenc,
+			       unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	rfc4106_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int rfc4106_setkey(struct crypto_aead *aead,
+			  const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *dev = ctx->dev;
+
+	if (keylen < 4)
+		return -EINVAL;
+
+	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+
+	memcpy(ctx->key, key, keylen);
+	/*
+	 * The last four bytes of the key material are used as the salt value
+	 * in the nonce. Update the AES key length.
+	 */
+	ctx->cdata.keylen = keylen - 4;
+	dma_sync_single_for_device(dev, ctx->key_dma, ctx->cdata.keylen,
+				   ctx->dir);
+
+	return rfc4106_set_sh_desc(aead);
+}
+
+static int rfc4543_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *dev = ctx->dev;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	struct caam_flc *flc;
+	u32 *desc;
+	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	ctx->cdata.key_virt = ctx->key;
+
+	/*
+	 * RFC4543 encrypt shared descriptor
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4543_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	flc = &ctx->flc[ENCRYPT];
+	desc = flc->sh_desc;
+	cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  true);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   ctx->dir);
+
+	/*
+	 * Job Descriptor and Shared Descriptors
+	 * must all fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4543_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	flc = &ctx->flc[DECRYPT];
+	desc = flc->sh_desc;
+	cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  true);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   ctx->dir);
+
+	return 0;
+}
+
+static int rfc4543_setauthsize(struct crypto_aead *authenc,
+			       unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	rfc4543_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int rfc4543_setkey(struct crypto_aead *aead,
+			  const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *dev = ctx->dev;
+
+	if (keylen < 4)
+		return -EINVAL;
+
+	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+
+	memcpy(ctx->key, key, keylen);
+	/*
+	 * The last four bytes of the key material are used as the salt value
+	 * in the nonce. Update the AES key length.
+	 */
+	ctx->cdata.keylen = keylen - 4;
+	dma_sync_single_for_device(dev, ctx->key_dma, ctx->cdata.keylen,
+				   ctx->dir);
+
+	return rfc4543_set_sh_desc(aead);
+}
+
+static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
+			   unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_skcipher_alg *alg =
+		container_of(crypto_skcipher_alg(skcipher),
+			     struct caam_skcipher_alg, skcipher);
+	struct device *dev = ctx->dev;
+	struct caam_flc *flc;
+	unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
+	u32 *desc;
+	u32 ctx1_iv_off = 0;
+	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
+			       OP_ALG_AAI_CTR_MOD128);
+	const bool is_rfc3686 = alg->caam.rfc3686;
+
+	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+
+	/*
+	 * AES-CTR needs to load IV in CONTEXT1 reg
+	 * at an offset of 128bits (16bytes)
+	 * CONTEXT1[255:128] = IV
+	 */
+	if (ctr_mode)
+		ctx1_iv_off = 16;
+
+	/*
+	 * RFC3686 specific:
+	 *	| CONTEXT1[255:128] = {NONCE, IV, COUNTER}
+	 *	| *key = {KEY, NONCE}
+	 */
+	if (is_rfc3686) {
+		ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE;
+		keylen -= CTR_RFC3686_NONCE_SIZE;
+	}
+
+	ctx->cdata.keylen = keylen;
+	ctx->cdata.key_virt = key;
+	ctx->cdata.key_inline = true;
+
+	/* skcipher_encrypt shared descriptor */
+	flc = &ctx->flc[ENCRYPT];
+	desc = flc->sh_desc;
+	cnstr_shdsc_skcipher_encap(desc, &ctx->cdata, ivsize, is_rfc3686,
+				   ctx1_iv_off);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   ctx->dir);
+
+	/* skcipher_decrypt shared descriptor */
+	flc = &ctx->flc[DECRYPT];
+	desc = flc->sh_desc;
+	cnstr_shdsc_skcipher_decap(desc, &ctx->cdata, ivsize, is_rfc3686,
+				   ctx1_iv_off);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   ctx->dir);
+
+	return 0;
+}
+
+static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
+			       unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct device *dev = ctx->dev;
+	struct caam_flc *flc;
+	u32 *desc;
+
+	if (keylen != 2 * AES_MIN_KEY_SIZE  && keylen != 2 * AES_MAX_KEY_SIZE) {
+		dev_err(dev, "key size mismatch\n");
+		crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	ctx->cdata.keylen = keylen;
+	ctx->cdata.key_virt = key;
+	ctx->cdata.key_inline = true;
+
+	/* xts_skcipher_encrypt shared descriptor */
+	flc = &ctx->flc[ENCRYPT];
+	desc = flc->sh_desc;
+	cnstr_shdsc_xts_skcipher_encap(desc, &ctx->cdata);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   ctx->dir);
+
+	/* xts_skcipher_decrypt shared descriptor */
+	flc = &ctx->flc[DECRYPT];
+	desc = flc->sh_desc;
+	cnstr_shdsc_xts_skcipher_decap(desc, &ctx->cdata);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   ctx->dir);
+
+	return 0;
+}
+
+static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req)
+{
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_request *req_ctx = skcipher_request_ctx(req);
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct device *dev = ctx->dev;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
+	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
+	struct skcipher_edesc *edesc;
+	dma_addr_t iv_dma;
+	u8 *iv;
+	int ivsize = crypto_skcipher_ivsize(skcipher);
+	int dst_sg_idx, qm_sg_ents, qm_sg_bytes;
+	struct dpaa2_sg_entry *sg_table;
+
+	src_nents = sg_nents_for_len(req->src, req->cryptlen);
+	if (unlikely(src_nents < 0)) {
+		dev_err(dev, "Insufficient bytes (%d) in src S/G\n",
+			req->cryptlen);
+		return ERR_PTR(src_nents);
+	}
+
+	if (unlikely(req->dst != req->src)) {
+		dst_nents = sg_nents_for_len(req->dst, req->cryptlen);
+		if (unlikely(dst_nents < 0)) {
+			dev_err(dev, "Insufficient bytes (%d) in dst S/G\n",
+				req->cryptlen);
+			return ERR_PTR(dst_nents);
+		}
+
+		mapped_src_nents = dma_map_sg(dev, req->src, src_nents,
+					      DMA_TO_DEVICE);
+		if (unlikely(!mapped_src_nents)) {
+			dev_err(dev, "unable to map source\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		mapped_dst_nents = dma_map_sg(dev, req->dst, dst_nents,
+					      DMA_FROM_DEVICE);
+		if (unlikely(!mapped_dst_nents)) {
+			dev_err(dev, "unable to map destination\n");
+			dma_unmap_sg(dev, req->src, src_nents, DMA_TO_DEVICE);
+			return ERR_PTR(-ENOMEM);
+		}
+	} else {
+		mapped_src_nents = dma_map_sg(dev, req->src, src_nents,
+					      DMA_BIDIRECTIONAL);
+		if (unlikely(!mapped_src_nents)) {
+			dev_err(dev, "unable to map source\n");
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
+	qm_sg_ents = 1 + mapped_src_nents;
+	dst_sg_idx = qm_sg_ents;
+
+	qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
+	qm_sg_bytes = qm_sg_ents * sizeof(struct dpaa2_sg_entry);
+	if (unlikely(offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes +
+		     ivsize > CAAM_QI_MEMCACHE_SIZE)) {
+		dev_err(dev, "No space for %d S/G entries and/or %dB IV\n",
+			qm_sg_ents, ivsize);
+		caam_unmap(dev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, 0, 0);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* allocate space for base edesc, link tables and IV */
+	edesc = qi_cache_zalloc(GFP_DMA | flags);
+	if (unlikely(!edesc)) {
+		dev_err(dev, "could not allocate extended descriptor\n");
+		caam_unmap(dev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, 0, 0);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Make sure IV is located in a DMAable area */
+	sg_table = &edesc->sgt[0];
+	iv = (u8 *)(sg_table + qm_sg_ents);
+	memcpy(iv, req->iv, ivsize);
+
+	iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, iv_dma)) {
+		dev_err(dev, "unable to map IV\n");
+		caam_unmap(dev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, 0, 0);
+		qi_cache_free(edesc);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	edesc->src_nents = src_nents;
+	edesc->dst_nents = dst_nents;
+	edesc->iv_dma = iv_dma;
+	edesc->qm_sg_bytes = qm_sg_bytes;
+
+	dma_to_qm_sg_one(sg_table, iv_dma, ivsize, 0);
+	sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table + 1, 0);
+
+	if (mapped_dst_nents > 1)
+		sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table +
+				 dst_sg_idx, 0);
+
+	edesc->qm_sg_dma = dma_map_single(dev, sg_table, edesc->qm_sg_bytes,
+					  DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, edesc->qm_sg_dma)) {
+		dev_err(dev, "unable to map S/G table\n");
+		caam_unmap(dev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, 0, 0);
+		qi_cache_free(edesc);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_len(in_fle, req->cryptlen + ivsize);
+	dpaa2_fl_set_len(out_fle, req->cryptlen);
+
+	dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+	dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+
+	if (req->src == req->dst) {
+		dpaa2_fl_set_format(out_fle, dpaa2_fl_sg);
+		dpaa2_fl_set_addr(out_fle, edesc->qm_sg_dma +
+				  sizeof(*sg_table));
+	} else if (mapped_dst_nents > 1) {
+		dpaa2_fl_set_format(out_fle, dpaa2_fl_sg);
+		dpaa2_fl_set_addr(out_fle, edesc->qm_sg_dma + dst_sg_idx *
+				  sizeof(*sg_table));
+	} else {
+		dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+		dpaa2_fl_set_addr(out_fle, sg_dma_address(req->dst));
+	}
+
+	return edesc;
+}
+
+static void aead_unmap(struct device *dev, struct aead_edesc *edesc,
+		       struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	int ivsize = crypto_aead_ivsize(aead);
+
+	caam_unmap(dev, req->src, req->dst, edesc->src_nents, edesc->dst_nents,
+		   edesc->iv_dma, ivsize, edesc->qm_sg_dma, edesc->qm_sg_bytes);
+	dma_unmap_single(dev, edesc->assoclen_dma, 4, DMA_TO_DEVICE);
+}
+
+static void skcipher_unmap(struct device *dev, struct skcipher_edesc *edesc,
+			   struct skcipher_request *req)
+{
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	int ivsize = crypto_skcipher_ivsize(skcipher);
+
+	caam_unmap(dev, req->src, req->dst, edesc->src_nents, edesc->dst_nents,
+		   edesc->iv_dma, ivsize, edesc->qm_sg_dma, edesc->qm_sg_bytes);
+}
+
+static void aead_encrypt_done(void *cbk_ctx, u32 status)
+{
+	struct crypto_async_request *areq = cbk_ctx;
+	struct aead_request *req = container_of(areq, struct aead_request,
+						base);
+	struct caam_request *req_ctx = to_caam_req(areq);
+	struct aead_edesc *edesc = req_ctx->edesc;
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	int ecode = 0;
+
+	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
+
+	if (unlikely(status)) {
+		caam_qi2_strstatus(ctx->dev, status);
+		ecode = -EIO;
+	}
+
+	aead_unmap(ctx->dev, edesc, req);
+	qi_cache_free(edesc);
+	aead_request_complete(req, ecode);
+}
+
+static void aead_decrypt_done(void *cbk_ctx, u32 status)
+{
+	struct crypto_async_request *areq = cbk_ctx;
+	struct aead_request *req = container_of(areq, struct aead_request,
+						base);
+	struct caam_request *req_ctx = to_caam_req(areq);
+	struct aead_edesc *edesc = req_ctx->edesc;
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	int ecode = 0;
+
+	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
+
+	if (unlikely(status)) {
+		caam_qi2_strstatus(ctx->dev, status);
+		/*
+		 * verify hw auth check passed else return -EBADMSG
+		 */
+		if ((status & JRSTA_CCBERR_ERRID_MASK) ==
+		     JRSTA_CCBERR_ERRID_ICVCHK)
+			ecode = -EBADMSG;
+		else
+			ecode = -EIO;
+	}
+
+	aead_unmap(ctx->dev, edesc, req);
+	qi_cache_free(edesc);
+	aead_request_complete(req, ecode);
+}
+
+static int aead_encrypt(struct aead_request *req)
+{
+	struct aead_edesc *edesc;
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_request *caam_req = aead_request_ctx(req);
+	int ret;
+
+	/* allocate extended descriptor */
+	edesc = aead_edesc_alloc(req, true);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	caam_req->flc = &ctx->flc[ENCRYPT];
+	caam_req->flc_dma = ctx->flc_dma[ENCRYPT];
+	caam_req->cbk = aead_encrypt_done;
+	caam_req->ctx = &req->base;
+	caam_req->edesc = edesc;
+	ret = dpaa2_caam_enqueue(ctx->dev, caam_req);
+	if (ret != -EINPROGRESS &&
+	    !(ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+		aead_unmap(ctx->dev, edesc, req);
+		qi_cache_free(edesc);
+	}
+
+	return ret;
+}
+
+static int aead_decrypt(struct aead_request *req)
+{
+	struct aead_edesc *edesc;
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_request *caam_req = aead_request_ctx(req);
+	int ret;
+
+	/* allocate extended descriptor */
+	edesc = aead_edesc_alloc(req, false);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	caam_req->flc = &ctx->flc[DECRYPT];
+	caam_req->flc_dma = ctx->flc_dma[DECRYPT];
+	caam_req->cbk = aead_decrypt_done;
+	caam_req->ctx = &req->base;
+	caam_req->edesc = edesc;
+	ret = dpaa2_caam_enqueue(ctx->dev, caam_req);
+	if (ret != -EINPROGRESS &&
+	    !(ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+		aead_unmap(ctx->dev, edesc, req);
+		qi_cache_free(edesc);
+	}
+
+	return ret;
+}
+
+static int ipsec_gcm_encrypt(struct aead_request *req)
+{
+	if (req->assoclen < 8)
+		return -EINVAL;
+
+	return aead_encrypt(req);
+}
+
+static int ipsec_gcm_decrypt(struct aead_request *req)
+{
+	if (req->assoclen < 8)
+		return -EINVAL;
+
+	return aead_decrypt(req);
+}
+
+static void skcipher_encrypt_done(void *cbk_ctx, u32 status)
+{
+	struct crypto_async_request *areq = cbk_ctx;
+	struct skcipher_request *req = skcipher_request_cast(areq);
+	struct caam_request *req_ctx = to_caam_req(areq);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct skcipher_edesc *edesc = req_ctx->edesc;
+	int ecode = 0;
+	int ivsize = crypto_skcipher_ivsize(skcipher);
+
+	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
+
+	if (unlikely(status)) {
+		caam_qi2_strstatus(ctx->dev, status);
+		ecode = -EIO;
+	}
+
+	print_hex_dump_debug("dstiv  @" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
+			     edesc->src_nents > 1 ? 100 : ivsize, 1);
+	caam_dump_sg(KERN_DEBUG, "dst    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		     edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
+
+	skcipher_unmap(ctx->dev, edesc, req);
+
+	/*
+	 * The crypto API expects us to set the IV (req->iv) to the last
+	 * ciphertext block. This is used e.g. by the CTS mode.
+	 */
+	scatterwalk_map_and_copy(req->iv, req->dst, req->cryptlen - ivsize,
+				 ivsize, 0);
+
+	qi_cache_free(edesc);
+	skcipher_request_complete(req, ecode);
+}
+
+static void skcipher_decrypt_done(void *cbk_ctx, u32 status)
+{
+	struct crypto_async_request *areq = cbk_ctx;
+	struct skcipher_request *req = skcipher_request_cast(areq);
+	struct caam_request *req_ctx = to_caam_req(areq);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct skcipher_edesc *edesc = req_ctx->edesc;
+	int ecode = 0;
+	int ivsize = crypto_skcipher_ivsize(skcipher);
+
+	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
+
+	if (unlikely(status)) {
+		caam_qi2_strstatus(ctx->dev, status);
+		ecode = -EIO;
+	}
+
+	print_hex_dump_debug("dstiv  @" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
+			     edesc->src_nents > 1 ? 100 : ivsize, 1);
+	caam_dump_sg(KERN_DEBUG, "dst    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		     edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
+
+	skcipher_unmap(ctx->dev, edesc, req);
+	qi_cache_free(edesc);
+	skcipher_request_complete(req, ecode);
+}
+
+static int skcipher_encrypt(struct skcipher_request *req)
+{
+	struct skcipher_edesc *edesc;
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_request *caam_req = skcipher_request_ctx(req);
+	int ret;
+
+	/* allocate extended descriptor */
+	edesc = skcipher_edesc_alloc(req);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	caam_req->flc = &ctx->flc[ENCRYPT];
+	caam_req->flc_dma = ctx->flc_dma[ENCRYPT];
+	caam_req->cbk = skcipher_encrypt_done;
+	caam_req->ctx = &req->base;
+	caam_req->edesc = edesc;
+	ret = dpaa2_caam_enqueue(ctx->dev, caam_req);
+	if (ret != -EINPROGRESS &&
+	    !(ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+		skcipher_unmap(ctx->dev, edesc, req);
+		qi_cache_free(edesc);
+	}
+
+	return ret;
+}
+
+static int skcipher_decrypt(struct skcipher_request *req)
+{
+	struct skcipher_edesc *edesc;
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_request *caam_req = skcipher_request_ctx(req);
+	int ivsize = crypto_skcipher_ivsize(skcipher);
+	int ret;
+
+	/* allocate extended descriptor */
+	edesc = skcipher_edesc_alloc(req);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	/*
+	 * The crypto API expects us to set the IV (req->iv) to the last
+	 * ciphertext block.
+	 */
+	scatterwalk_map_and_copy(req->iv, req->src, req->cryptlen - ivsize,
+				 ivsize, 0);
+
+	caam_req->flc = &ctx->flc[DECRYPT];
+	caam_req->flc_dma = ctx->flc_dma[DECRYPT];
+	caam_req->cbk = skcipher_decrypt_done;
+	caam_req->ctx = &req->base;
+	caam_req->edesc = edesc;
+	ret = dpaa2_caam_enqueue(ctx->dev, caam_req);
+	if (ret != -EINPROGRESS &&
+	    !(ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+		skcipher_unmap(ctx->dev, edesc, req);
+		qi_cache_free(edesc);
+	}
+
+	return ret;
+}
+
+static int caam_cra_init(struct caam_ctx *ctx, struct caam_alg_entry *caam,
+			 bool uses_dkp)
+{
+	dma_addr_t dma_addr;
+	int i;
+
+	/* copy descriptor header template value */
+	ctx->cdata.algtype = OP_TYPE_CLASS1_ALG | caam->class1_alg_type;
+	ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam->class2_alg_type;
+
+	ctx->dev = caam->dev;
+	ctx->dir = uses_dkp ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
+	dma_addr = dma_map_single_attrs(ctx->dev, ctx->flc,
+					offsetof(struct caam_ctx, flc_dma),
+					ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(ctx->dev, dma_addr)) {
+		dev_err(ctx->dev, "unable to map key, shared descriptors\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < NUM_OP; i++)
+		ctx->flc_dma[i] = dma_addr + i * sizeof(ctx->flc[i]);
+	ctx->key_dma = dma_addr + NUM_OP * sizeof(ctx->flc[0]);
+
+	return 0;
+}
+
+static int caam_cra_init_skcipher(struct crypto_skcipher *tfm)
+{
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct caam_skcipher_alg *caam_alg =
+		container_of(alg, typeof(*caam_alg), skcipher);
+
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_request));
+	return caam_cra_init(crypto_skcipher_ctx(tfm), &caam_alg->caam, false);
+}
+
+static int caam_cra_init_aead(struct crypto_aead *tfm)
+{
+	struct aead_alg *alg = crypto_aead_alg(tfm);
+	struct caam_aead_alg *caam_alg = container_of(alg, typeof(*caam_alg),
+						      aead);
+
+	crypto_aead_set_reqsize(tfm, sizeof(struct caam_request));
+	return caam_cra_init(crypto_aead_ctx(tfm), &caam_alg->caam,
+			     alg->setkey == aead_setkey);
+}
+
+static void caam_exit_common(struct caam_ctx *ctx)
+{
+	dma_unmap_single_attrs(ctx->dev, ctx->flc_dma[0],
+			       offsetof(struct caam_ctx, flc_dma), ctx->dir,
+			       DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static void caam_cra_exit(struct crypto_skcipher *tfm)
+{
+	caam_exit_common(crypto_skcipher_ctx(tfm));
+}
+
+static void caam_cra_exit_aead(struct crypto_aead *tfm)
+{
+	caam_exit_common(crypto_aead_ctx(tfm));
+}
+
+static struct caam_skcipher_alg driver_algs[] = {
+	{
+		.skcipher = {
+			.base = {
+				.cra_name = "cbc(aes)",
+				.cra_driver_name = "cbc-aes-caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+		},
+		.caam.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+	},
+	{
+		.skcipher = {
+			.base = {
+				.cra_name = "cbc(des3_ede)",
+				.cra_driver_name = "cbc-3des-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+		},
+		.caam.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+	},
+	{
+		.skcipher = {
+			.base = {
+				.cra_name = "cbc(des)",
+				.cra_driver_name = "cbc-des-caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
+			.min_keysize = DES_KEY_SIZE,
+			.max_keysize = DES_KEY_SIZE,
+			.ivsize = DES_BLOCK_SIZE,
+		},
+		.caam.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+	},
+	{
+		.skcipher = {
+			.base = {
+				.cra_name = "ctr(aes)",
+				.cra_driver_name = "ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			.chunksize = AES_BLOCK_SIZE,
+		},
+		.caam.class1_alg_type = OP_ALG_ALGSEL_AES |
+					OP_ALG_AAI_CTR_MOD128,
+	},
+	{
+		.skcipher = {
+			.base = {
+				.cra_name = "rfc3686(ctr(aes))",
+				.cra_driver_name = "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE +
+				       CTR_RFC3686_NONCE_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE +
+				       CTR_RFC3686_NONCE_SIZE,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.chunksize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.rfc3686 = true,
+		},
+	},
+	{
+		.skcipher = {
+			.base = {
+				.cra_name = "xts(aes)",
+				.cra_driver_name = "xts-aes-caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = xts_skcipher_setkey,
+			.encrypt = skcipher_encrypt,
+			.decrypt = skcipher_decrypt,
+			.min_keysize = 2 * AES_MIN_KEY_SIZE,
+			.max_keysize = 2 * AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+		},
+		.caam.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_XTS,
+	}
+};
+
+static struct caam_aead_alg driver_aeads[] = {
+	{
+		.aead = {
+			.base = {
+				.cra_name = "rfc4106(gcm(aes))",
+				.cra_driver_name = "rfc4106-gcm-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = rfc4106_setkey,
+			.setauthsize = rfc4106_setauthsize,
+			.encrypt = ipsec_gcm_encrypt,
+			.decrypt = ipsec_gcm_decrypt,
+			.ivsize = 8,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "rfc4543(gcm(aes))",
+				.cra_driver_name = "rfc4543-gcm-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = rfc4543_setkey,
+			.setauthsize = rfc4543_setauthsize,
+			.encrypt = ipsec_gcm_encrypt,
+			.decrypt = ipsec_gcm_decrypt,
+			.ivsize = 8,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		},
+	},
+	/* Galois Counter Mode */
+	{
+		.aead = {
+			.base = {
+				.cra_name = "gcm(aes)",
+				.cra_driver_name = "gcm-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = gcm_setkey,
+			.setauthsize = gcm_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = 12,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		}
+	},
+	/* single-pass ipsec_esp descriptor */
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(md5),cbc(aes))",
+				.cra_driver_name = "authenc-hmac-md5-"
+						   "cbc-aes-caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(md5),"
+					    "cbc(aes)))",
+				.cra_driver_name = "echainiv-authenc-hmac-md5-"
+						   "cbc-aes-caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha1),cbc(aes))",
+				.cra_driver_name = "authenc-hmac-sha1-"
+						   "cbc-aes-caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha1),"
+					    "cbc(aes)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha1-cbc-aes-caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha224),cbc(aes))",
+				.cra_driver_name = "authenc-hmac-sha224-"
+						   "cbc-aes-caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha224),"
+					    "cbc(aes)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha224-cbc-aes-caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha256),cbc(aes))",
+				.cra_driver_name = "authenc-hmac-sha256-"
+						   "cbc-aes-caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha256),"
+					    "cbc(aes)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha256-cbc-aes-"
+						   "caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha384),cbc(aes))",
+				.cra_driver_name = "authenc-hmac-sha384-"
+						   "cbc-aes-caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA384_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha384),"
+					    "cbc(aes)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha384-cbc-aes-"
+						   "caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA384_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha512),cbc(aes))",
+				.cra_driver_name = "authenc-hmac-sha512-"
+						   "cbc-aes-caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA512_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha512),"
+					    "cbc(aes)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha512-cbc-aes-"
+						   "caam-qi2",
+				.cra_blocksize = AES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA512_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(md5),cbc(des3_ede))",
+				.cra_driver_name = "authenc-hmac-md5-"
+						   "cbc-des3_ede-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(md5),"
+					    "cbc(des3_ede)))",
+				.cra_driver_name = "echainiv-authenc-hmac-md5-"
+						   "cbc-des3_ede-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha1),"
+					    "cbc(des3_ede))",
+				.cra_driver_name = "authenc-hmac-sha1-"
+						   "cbc-des3_ede-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha1),"
+					    "cbc(des3_ede)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha1-"
+						   "cbc-des3_ede-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha224),"
+					    "cbc(des3_ede))",
+				.cra_driver_name = "authenc-hmac-sha224-"
+						   "cbc-des3_ede-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha224),"
+					    "cbc(des3_ede)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha224-"
+						   "cbc-des3_ede-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha256),"
+					    "cbc(des3_ede))",
+				.cra_driver_name = "authenc-hmac-sha256-"
+						   "cbc-des3_ede-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha256),"
+					    "cbc(des3_ede)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha256-"
+						   "cbc-des3_ede-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha384),"
+					    "cbc(des3_ede))",
+				.cra_driver_name = "authenc-hmac-sha384-"
+						   "cbc-des3_ede-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA384_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha384),"
+					    "cbc(des3_ede)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha384-"
+						   "cbc-des3_ede-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA384_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha512),"
+					    "cbc(des3_ede))",
+				.cra_driver_name = "authenc-hmac-sha512-"
+						   "cbc-des3_ede-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA512_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha512),"
+					    "cbc(des3_ede)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha512-"
+						   "cbc-des3_ede-caam-qi2",
+				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA512_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(md5),cbc(des))",
+				.cra_driver_name = "authenc-hmac-md5-"
+						   "cbc-des-caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(md5),"
+					    "cbc(des)))",
+				.cra_driver_name = "echainiv-authenc-hmac-md5-"
+						   "cbc-des-caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha1),cbc(des))",
+				.cra_driver_name = "authenc-hmac-sha1-"
+						   "cbc-des-caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha1),"
+					    "cbc(des)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha1-cbc-des-caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha224),cbc(des))",
+				.cra_driver_name = "authenc-hmac-sha224-"
+						   "cbc-des-caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha224),"
+					    "cbc(des)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha224-cbc-des-"
+						   "caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha256),cbc(des))",
+				.cra_driver_name = "authenc-hmac-sha256-"
+						   "cbc-des-caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha256),"
+					    "cbc(des)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha256-cbc-desi-"
+						   "caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha384),cbc(des))",
+				.cra_driver_name = "authenc-hmac-sha384-"
+						   "cbc-des-caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = SHA384_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha384),"
+					    "cbc(des)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha384-cbc-des-"
+						   "caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = SHA384_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha512),cbc(des))",
+				.cra_driver_name = "authenc-hmac-sha512-"
+						   "cbc-des-caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = SHA512_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "echainiv(authenc(hmac(sha512),"
+					    "cbc(des)))",
+				.cra_driver_name = "echainiv-authenc-"
+						   "hmac-sha512-cbc-des-"
+						   "caam-qi2",
+				.cra_blocksize = DES_BLOCK_SIZE,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = SHA512_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.geniv = true,
+		}
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(md5),"
+					    "rfc3686(ctr(aes)))",
+				.cra_driver_name = "authenc-hmac-md5-"
+						   "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.rfc3686 = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "seqiv(authenc("
+					    "hmac(md5),rfc3686(ctr(aes))))",
+				.cra_driver_name = "seqiv-authenc-hmac-md5-"
+						   "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.rfc3686 = true,
+			.geniv = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha1),"
+					    "rfc3686(ctr(aes)))",
+				.cra_driver_name = "authenc-hmac-sha1-"
+						   "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.rfc3686 = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "seqiv(authenc("
+					    "hmac(sha1),rfc3686(ctr(aes))))",
+				.cra_driver_name = "seqiv-authenc-hmac-sha1-"
+						   "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.rfc3686 = true,
+			.geniv = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha224),"
+					    "rfc3686(ctr(aes)))",
+				.cra_driver_name = "authenc-hmac-sha224-"
+						   "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.rfc3686 = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "seqiv(authenc("
+					    "hmac(sha224),rfc3686(ctr(aes))))",
+				.cra_driver_name = "seqiv-authenc-hmac-sha224-"
+						   "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.rfc3686 = true,
+			.geniv = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha256),"
+					    "rfc3686(ctr(aes)))",
+				.cra_driver_name = "authenc-hmac-sha256-"
+						   "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.rfc3686 = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "seqiv(authenc(hmac(sha256),"
+					    "rfc3686(ctr(aes))))",
+				.cra_driver_name = "seqiv-authenc-hmac-sha256-"
+						   "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.rfc3686 = true,
+			.geniv = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha384),"
+					    "rfc3686(ctr(aes)))",
+				.cra_driver_name = "authenc-hmac-sha384-"
+						   "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA384_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.rfc3686 = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "seqiv(authenc(hmac(sha384),"
+					    "rfc3686(ctr(aes))))",
+				.cra_driver_name = "seqiv-authenc-hmac-sha384-"
+						   "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA384_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.rfc3686 = true,
+			.geniv = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha512),"
+					    "rfc3686(ctr(aes)))",
+				.cra_driver_name = "authenc-hmac-sha512-"
+						   "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA512_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.rfc3686 = true,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "seqiv(authenc(hmac(sha512),"
+					    "rfc3686(ctr(aes))))",
+				.cra_driver_name = "seqiv-authenc-hmac-sha512-"
+						   "rfc3686-ctr-aes-caam-qi2",
+				.cra_blocksize = 1,
+			},
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA512_DIGEST_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES |
+					   OP_ALG_AAI_CTR_MOD128,
+			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
+					   OP_ALG_AAI_HMAC_PRECOMP,
+			.rfc3686 = true,
+			.geniv = true,
+		},
+	},
+};
+
+static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg)
+{
+	struct skcipher_alg *alg = &t_alg->skcipher;
+
+	alg->base.cra_module = THIS_MODULE;
+	alg->base.cra_priority = CAAM_CRA_PRIORITY;
+	alg->base.cra_ctxsize = sizeof(struct caam_ctx);
+	alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+
+	alg->init = caam_cra_init_skcipher;
+	alg->exit = caam_cra_exit;
+}
+
+static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
+{
+	struct aead_alg *alg = &t_alg->aead;
+
+	alg->base.cra_module = THIS_MODULE;
+	alg->base.cra_priority = CAAM_CRA_PRIORITY;
+	alg->base.cra_ctxsize = sizeof(struct caam_ctx);
+	alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+
+	alg->init = caam_cra_init_aead;
+	alg->exit = caam_cra_exit_aead;
+}
+
+/* max hash key is max split key size */
+#define CAAM_MAX_HASH_KEY_SIZE		(SHA512_DIGEST_SIZE * 2)
+
+#define CAAM_MAX_HASH_BLOCK_SIZE	SHA512_BLOCK_SIZE
+
+/* caam context sizes for hashes: running digest + 8 */
+#define HASH_MSG_LEN			8
+#define MAX_CTX_LEN			(HASH_MSG_LEN + SHA512_DIGEST_SIZE)
+
+enum hash_optype {
+	UPDATE = 0,
+	UPDATE_FIRST,
+	FINALIZE,
+	DIGEST,
+	HASH_NUM_OP
+};
+
+/**
+ * caam_hash_ctx - ahash per-session context
+ * @flc: Flow Contexts array
+ * @flc_dma: I/O virtual addresses of the Flow Contexts
+ * @dev: dpseci device
+ * @ctx_len: size of Context Register
+ * @adata: hashing algorithm details
+ */
+struct caam_hash_ctx {
+	struct caam_flc flc[HASH_NUM_OP];
+	dma_addr_t flc_dma[HASH_NUM_OP];
+	struct device *dev;
+	int ctx_len;
+	struct alginfo adata;
+};
+
+/* ahash state */
+struct caam_hash_state {
+	struct caam_request caam_req;
+	dma_addr_t buf_dma;
+	dma_addr_t ctx_dma;
+	u8 buf_0[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
+	int buflen_0;
+	u8 buf_1[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
+	int buflen_1;
+	u8 caam_ctx[MAX_CTX_LEN] ____cacheline_aligned;
+	int (*update)(struct ahash_request *req);
+	int (*final)(struct ahash_request *req);
+	int (*finup)(struct ahash_request *req);
+	int current_buf;
+};
+
+struct caam_export_state {
+	u8 buf[CAAM_MAX_HASH_BLOCK_SIZE];
+	u8 caam_ctx[MAX_CTX_LEN];
+	int buflen;
+	int (*update)(struct ahash_request *req);
+	int (*final)(struct ahash_request *req);
+	int (*finup)(struct ahash_request *req);
+};
+
+static inline void switch_buf(struct caam_hash_state *state)
+{
+	state->current_buf ^= 1;
+}
+
+static inline u8 *current_buf(struct caam_hash_state *state)
+{
+	return state->current_buf ? state->buf_1 : state->buf_0;
+}
+
+static inline u8 *alt_buf(struct caam_hash_state *state)
+{
+	return state->current_buf ? state->buf_0 : state->buf_1;
+}
+
+static inline int *current_buflen(struct caam_hash_state *state)
+{
+	return state->current_buf ? &state->buflen_1 : &state->buflen_0;
+}
+
+static inline int *alt_buflen(struct caam_hash_state *state)
+{
+	return state->current_buf ? &state->buflen_0 : &state->buflen_1;
+}
+
+/* Map current buffer in state (if length > 0) and put it in link table */
+static inline int buf_map_to_qm_sg(struct device *dev,
+				   struct dpaa2_sg_entry *qm_sg,
+				   struct caam_hash_state *state)
+{
+	int buflen = *current_buflen(state);
+
+	if (!buflen)
+		return 0;
+
+	state->buf_dma = dma_map_single(dev, current_buf(state), buflen,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, state->buf_dma)) {
+		dev_err(dev, "unable to map buf\n");
+		state->buf_dma = 0;
+		return -ENOMEM;
+	}
+
+	dma_to_qm_sg_one(qm_sg, state->buf_dma, buflen, 0);
+
+	return 0;
+}
+
+/* Map state->caam_ctx, and add it to link table */
+static inline int ctx_map_to_qm_sg(struct device *dev,
+				   struct caam_hash_state *state, int ctx_len,
+				   struct dpaa2_sg_entry *qm_sg, u32 flag)
+{
+	state->ctx_dma = dma_map_single(dev, state->caam_ctx, ctx_len, flag);
+	if (dma_mapping_error(dev, state->ctx_dma)) {
+		dev_err(dev, "unable to map ctx\n");
+		state->ctx_dma = 0;
+		return -ENOMEM;
+	}
+
+	dma_to_qm_sg_one(qm_sg, state->ctx_dma, ctx_len, 0);
+
+	return 0;
+}
+
+static int ahash_set_sh_desc(struct crypto_ahash *ahash)
+{
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct dpaa2_caam_priv *priv = dev_get_drvdata(ctx->dev);
+	struct caam_flc *flc;
+	u32 *desc;
+
+	/* ahash_update shared descriptor */
+	flc = &ctx->flc[UPDATE];
+	desc = flc->sh_desc;
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_UPDATE, ctx->ctx_len,
+			  ctx->ctx_len, true, priv->sec_attr.era);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(ctx->dev, ctx->flc_dma[UPDATE],
+				   desc_bytes(desc), DMA_BIDIRECTIONAL);
+	print_hex_dump_debug("ahash update shdesc@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+			     1);
+
+	/* ahash_update_first shared descriptor */
+	flc = &ctx->flc[UPDATE_FIRST];
+	desc = flc->sh_desc;
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_INIT, ctx->ctx_len,
+			  ctx->ctx_len, false, priv->sec_attr.era);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(ctx->dev, ctx->flc_dma[UPDATE_FIRST],
+				   desc_bytes(desc), DMA_BIDIRECTIONAL);
+	print_hex_dump_debug("ahash update first shdesc@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+			     1);
+
+	/* ahash_final shared descriptor */
+	flc = &ctx->flc[FINALIZE];
+	desc = flc->sh_desc;
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_FINALIZE, digestsize,
+			  ctx->ctx_len, true, priv->sec_attr.era);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(ctx->dev, ctx->flc_dma[FINALIZE],
+				   desc_bytes(desc), DMA_BIDIRECTIONAL);
+	print_hex_dump_debug("ahash final shdesc@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+			     1);
+
+	/* ahash_digest shared descriptor */
+	flc = &ctx->flc[DIGEST];
+	desc = flc->sh_desc;
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_INITFINAL, digestsize,
+			  ctx->ctx_len, false, priv->sec_attr.era);
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	dma_sync_single_for_device(ctx->dev, ctx->flc_dma[DIGEST],
+				   desc_bytes(desc), DMA_BIDIRECTIONAL);
+	print_hex_dump_debug("ahash digest shdesc@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+			     1);
+
+	return 0;
+}
+
+struct split_key_sh_result {
+	struct completion completion;
+	int err;
+	struct device *dev;
+};
+
+static void split_key_sh_done(void *cbk_ctx, u32 err)
+{
+	struct split_key_sh_result *res = cbk_ctx;
+
+	dev_dbg(res->dev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
+
+	if (err)
+		caam_qi2_strstatus(res->dev, err);
+
+	res->err = err;
+	complete(&res->completion);
+}
+
+/* Digest hash size if it is too large */
+static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
+			   u32 *keylen, u8 *key_out, u32 digestsize)
+{
+	struct caam_request *req_ctx;
+	u32 *desc;
+	struct split_key_sh_result result;
+	dma_addr_t src_dma, dst_dma;
+	struct caam_flc *flc;
+	dma_addr_t flc_dma;
+	int ret = -ENOMEM;
+	struct dpaa2_fl_entry *in_fle, *out_fle;
+
+	req_ctx = kzalloc(sizeof(*req_ctx), GFP_KERNEL | GFP_DMA);
+	if (!req_ctx)
+		return -ENOMEM;
+
+	in_fle = &req_ctx->fd_flt[1];
+	out_fle = &req_ctx->fd_flt[0];
+
+	flc = kzalloc(sizeof(*flc), GFP_KERNEL | GFP_DMA);
+	if (!flc)
+		goto err_flc;
+
+	src_dma = dma_map_single(ctx->dev, (void *)key_in, *keylen,
+				 DMA_TO_DEVICE);
+	if (dma_mapping_error(ctx->dev, src_dma)) {
+		dev_err(ctx->dev, "unable to map key input memory\n");
+		goto err_src_dma;
+	}
+	dst_dma = dma_map_single(ctx->dev, (void *)key_out, digestsize,
+				 DMA_FROM_DEVICE);
+	if (dma_mapping_error(ctx->dev, dst_dma)) {
+		dev_err(ctx->dev, "unable to map key output memory\n");
+		goto err_dst_dma;
+	}
+
+	desc = flc->sh_desc;
+
+	init_sh_desc(desc, 0);
+
+	/* descriptor to perform unkeyed hash on key_in */
+	append_operation(desc, ctx->adata.algtype | OP_ALG_ENCRYPT |
+			 OP_ALG_AS_INITFINAL);
+	append_seq_fifo_load(desc, *keylen, FIFOLD_CLASS_CLASS2 |
+			     FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_MSG);
+	append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+
+	flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */
+	flc_dma = dma_map_single(ctx->dev, flc, sizeof(flc->flc) +
+				 desc_bytes(desc), DMA_TO_DEVICE);
+	if (dma_mapping_error(ctx->dev, flc_dma)) {
+		dev_err(ctx->dev, "unable to map shared descriptor\n");
+		goto err_flc_dma;
+	}
+
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_format(in_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(in_fle, src_dma);
+	dpaa2_fl_set_len(in_fle, *keylen);
+	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(out_fle, dst_dma);
+	dpaa2_fl_set_len(out_fle, digestsize);
+
+	print_hex_dump_debug("key_in@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, key_in, *keylen, 1);
+	print_hex_dump_debug("shdesc@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+			     1);
+
+	result.err = 0;
+	init_completion(&result.completion);
+	result.dev = ctx->dev;
+
+	req_ctx->flc = flc;
+	req_ctx->flc_dma = flc_dma;
+	req_ctx->cbk = split_key_sh_done;
+	req_ctx->ctx = &result;
+
+	ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+	if (ret == -EINPROGRESS) {
+		/* in progress */
+		wait_for_completion(&result.completion);
+		ret = result.err;
+		print_hex_dump_debug("digested key@" __stringify(__LINE__)": ",
+				     DUMP_PREFIX_ADDRESS, 16, 4, key_in,
+				     digestsize, 1);
+	}
+
+	dma_unmap_single(ctx->dev, flc_dma, sizeof(flc->flc) + desc_bytes(desc),
+			 DMA_TO_DEVICE);
+err_flc_dma:
+	dma_unmap_single(ctx->dev, dst_dma, digestsize, DMA_FROM_DEVICE);
+err_dst_dma:
+	dma_unmap_single(ctx->dev, src_dma, *keylen, DMA_TO_DEVICE);
+err_src_dma:
+	kfree(flc);
+err_flc:
+	kfree(req_ctx);
+
+	*keylen = digestsize;
+
+	return ret;
+}
+
+static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key,
+			unsigned int keylen)
+{
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	unsigned int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
+	unsigned int digestsize = crypto_ahash_digestsize(ahash);
+	int ret;
+	u8 *hashed_key = NULL;
+
+	dev_dbg(ctx->dev, "keylen %d blocksize %d\n", keylen, blocksize);
+
+	if (keylen > blocksize) {
+		hashed_key = kmalloc_array(digestsize, sizeof(*hashed_key),
+					   GFP_KERNEL | GFP_DMA);
+		if (!hashed_key)
+			return -ENOMEM;
+		ret = hash_digest_key(ctx, key, &keylen, hashed_key,
+				      digestsize);
+		if (ret)
+			goto bad_free_key;
+		key = hashed_key;
+	}
+
+	ctx->adata.keylen = keylen;
+	ctx->adata.keylen_pad = split_key_len(ctx->adata.algtype &
+					      OP_ALG_ALGSEL_MASK);
+	if (ctx->adata.keylen_pad > CAAM_MAX_HASH_KEY_SIZE)
+		goto bad_free_key;
+
+	ctx->adata.key_virt = key;
+	ctx->adata.key_inline = true;
+
+	ret = ahash_set_sh_desc(ahash);
+	kfree(hashed_key);
+	return ret;
+bad_free_key:
+	kfree(hashed_key);
+	crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	return -EINVAL;
+}
+
+static inline void ahash_unmap(struct device *dev, struct ahash_edesc *edesc,
+			       struct ahash_request *req, int dst_len)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	if (edesc->src_nents)
+		dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE);
+	if (edesc->dst_dma)
+		dma_unmap_single(dev, edesc->dst_dma, dst_len, DMA_FROM_DEVICE);
+
+	if (edesc->qm_sg_bytes)
+		dma_unmap_single(dev, edesc->qm_sg_dma, edesc->qm_sg_bytes,
+				 DMA_TO_DEVICE);
+
+	if (state->buf_dma) {
+		dma_unmap_single(dev, state->buf_dma, *current_buflen(state),
+				 DMA_TO_DEVICE);
+		state->buf_dma = 0;
+	}
+}
+
+static inline void ahash_unmap_ctx(struct device *dev,
+				   struct ahash_edesc *edesc,
+				   struct ahash_request *req, int dst_len,
+				   u32 flag)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	if (state->ctx_dma) {
+		dma_unmap_single(dev, state->ctx_dma, ctx->ctx_len, flag);
+		state->ctx_dma = 0;
+	}
+	ahash_unmap(dev, edesc, req, dst_len);
+}
+
+static void ahash_done(void *cbk_ctx, u32 status)
+{
+	struct crypto_async_request *areq = cbk_ctx;
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct ahash_edesc *edesc = state->caam_req.edesc;
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int digestsize = crypto_ahash_digestsize(ahash);
+	int ecode = 0;
+
+	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
+
+	if (unlikely(status)) {
+		caam_qi2_strstatus(ctx->dev, status);
+		ecode = -EIO;
+	}
+
+	ahash_unmap(ctx->dev, edesc, req, digestsize);
+	qi_cache_free(edesc);
+
+	print_hex_dump_debug("ctx@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+			     ctx->ctx_len, 1);
+	if (req->result)
+		print_hex_dump_debug("result@" __stringify(__LINE__)": ",
+				     DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+				     digestsize, 1);
+
+	req->base.complete(&req->base, ecode);
+}
+
+static void ahash_done_bi(void *cbk_ctx, u32 status)
+{
+	struct crypto_async_request *areq = cbk_ctx;
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct ahash_edesc *edesc = state->caam_req.edesc;
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int ecode = 0;
+
+	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
+
+	if (unlikely(status)) {
+		caam_qi2_strstatus(ctx->dev, status);
+		ecode = -EIO;
+	}
+
+	ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
+	switch_buf(state);
+	qi_cache_free(edesc);
+
+	print_hex_dump_debug("ctx@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+			     ctx->ctx_len, 1);
+	if (req->result)
+		print_hex_dump_debug("result@" __stringify(__LINE__)": ",
+				     DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+				     crypto_ahash_digestsize(ahash), 1);
+
+	req->base.complete(&req->base, ecode);
+}
+
+static void ahash_done_ctx_src(void *cbk_ctx, u32 status)
+{
+	struct crypto_async_request *areq = cbk_ctx;
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct ahash_edesc *edesc = state->caam_req.edesc;
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int digestsize = crypto_ahash_digestsize(ahash);
+	int ecode = 0;
+
+	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
+
+	if (unlikely(status)) {
+		caam_qi2_strstatus(ctx->dev, status);
+		ecode = -EIO;
+	}
+
+	ahash_unmap_ctx(ctx->dev, edesc, req, digestsize, DMA_TO_DEVICE);
+	qi_cache_free(edesc);
+
+	print_hex_dump_debug("ctx@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+			     ctx->ctx_len, 1);
+	if (req->result)
+		print_hex_dump_debug("result@" __stringify(__LINE__)": ",
+				     DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+				     digestsize, 1);
+
+	req->base.complete(&req->base, ecode);
+}
+
+static void ahash_done_ctx_dst(void *cbk_ctx, u32 status)
+{
+	struct crypto_async_request *areq = cbk_ctx;
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct ahash_edesc *edesc = state->caam_req.edesc;
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int ecode = 0;
+
+	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
+
+	if (unlikely(status)) {
+		caam_qi2_strstatus(ctx->dev, status);
+		ecode = -EIO;
+	}
+
+	ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE);
+	switch_buf(state);
+	qi_cache_free(edesc);
+
+	print_hex_dump_debug("ctx@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+			     ctx->ctx_len, 1);
+	if (req->result)
+		print_hex_dump_debug("result@" __stringify(__LINE__)": ",
+				     DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+				     crypto_ahash_digestsize(ahash), 1);
+
+	req->base.complete(&req->base, ecode);
+}
+
+static int ahash_update_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	u8 *buf = current_buf(state);
+	int *buflen = current_buflen(state);
+	u8 *next_buf = alt_buf(state);
+	int *next_buflen = alt_buflen(state), last_buflen;
+	int in_len = *buflen + req->nbytes, to_hash;
+	int src_nents, mapped_nents, qm_sg_bytes, qm_sg_src_index;
+	struct ahash_edesc *edesc;
+	int ret = 0;
+
+	last_buflen = *next_buflen;
+	*next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
+	to_hash = in_len - *next_buflen;
+
+	if (to_hash) {
+		struct dpaa2_sg_entry *sg_table;
+
+		src_nents = sg_nents_for_len(req->src,
+					     req->nbytes - (*next_buflen));
+		if (src_nents < 0) {
+			dev_err(ctx->dev, "Invalid number of src SG.\n");
+			return src_nents;
+		}
+
+		if (src_nents) {
+			mapped_nents = dma_map_sg(ctx->dev, req->src, src_nents,
+						  DMA_TO_DEVICE);
+			if (!mapped_nents) {
+				dev_err(ctx->dev, "unable to DMA map source\n");
+				return -ENOMEM;
+			}
+		} else {
+			mapped_nents = 0;
+		}
+
+		/* allocate space for base edesc and link tables */
+		edesc = qi_cache_zalloc(GFP_DMA | flags);
+		if (!edesc) {
+			dma_unmap_sg(ctx->dev, req->src, src_nents,
+				     DMA_TO_DEVICE);
+			return -ENOMEM;
+		}
+
+		edesc->src_nents = src_nents;
+		qm_sg_src_index = 1 + (*buflen ? 1 : 0);
+		qm_sg_bytes = (qm_sg_src_index + mapped_nents) *
+			      sizeof(*sg_table);
+		sg_table = &edesc->sgt[0];
+
+		ret = ctx_map_to_qm_sg(ctx->dev, state, ctx->ctx_len, sg_table,
+				       DMA_BIDIRECTIONAL);
+		if (ret)
+			goto unmap_ctx;
+
+		ret = buf_map_to_qm_sg(ctx->dev, sg_table + 1, state);
+		if (ret)
+			goto unmap_ctx;
+
+		if (mapped_nents) {
+			sg_to_qm_sg_last(req->src, mapped_nents,
+					 sg_table + qm_sg_src_index, 0);
+			if (*next_buflen)
+				scatterwalk_map_and_copy(next_buf, req->src,
+							 to_hash - *buflen,
+							 *next_buflen, 0);
+		} else {
+			dpaa2_sg_set_final(sg_table + qm_sg_src_index - 1,
+					   true);
+		}
+
+		edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table,
+						  qm_sg_bytes, DMA_TO_DEVICE);
+		if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+			dev_err(ctx->dev, "unable to map S/G table\n");
+			ret = -ENOMEM;
+			goto unmap_ctx;
+		}
+		edesc->qm_sg_bytes = qm_sg_bytes;
+
+		memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+		dpaa2_fl_set_final(in_fle, true);
+		dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+		dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+		dpaa2_fl_set_len(in_fle, ctx->ctx_len + to_hash);
+		dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+		dpaa2_fl_set_addr(out_fle, state->ctx_dma);
+		dpaa2_fl_set_len(out_fle, ctx->ctx_len);
+
+		req_ctx->flc = &ctx->flc[UPDATE];
+		req_ctx->flc_dma = ctx->flc_dma[UPDATE];
+		req_ctx->cbk = ahash_done_bi;
+		req_ctx->ctx = &req->base;
+		req_ctx->edesc = edesc;
+
+		ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+		if (ret != -EINPROGRESS &&
+		    !(ret == -EBUSY &&
+		      req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			goto unmap_ctx;
+	} else if (*next_buflen) {
+		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
+					 req->nbytes, 0);
+		*buflen = *next_buflen;
+		*next_buflen = last_buflen;
+	}
+
+	print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
+	print_hex_dump_debug("next buf@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, next_buf, *next_buflen,
+			     1);
+
+	return ret;
+unmap_ctx:
+	ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_final_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	int buflen = *current_buflen(state);
+	int qm_sg_bytes, qm_sg_src_index;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct ahash_edesc *edesc;
+	struct dpaa2_sg_entry *sg_table;
+	int ret;
+
+	/* allocate space for base edesc and link tables */
+	edesc = qi_cache_zalloc(GFP_DMA | flags);
+	if (!edesc)
+		return -ENOMEM;
+
+	qm_sg_src_index = 1 + (buflen ? 1 : 0);
+	qm_sg_bytes = qm_sg_src_index * sizeof(*sg_table);
+	sg_table = &edesc->sgt[0];
+
+	ret = ctx_map_to_qm_sg(ctx->dev, state, ctx->ctx_len, sg_table,
+			       DMA_TO_DEVICE);
+	if (ret)
+		goto unmap_ctx;
+
+	ret = buf_map_to_qm_sg(ctx->dev, sg_table + 1, state);
+	if (ret)
+		goto unmap_ctx;
+
+	dpaa2_sg_set_final(sg_table + qm_sg_src_index - 1, true);
+
+	edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table, qm_sg_bytes,
+					  DMA_TO_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+		dev_err(ctx->dev, "unable to map S/G table\n");
+		ret = -ENOMEM;
+		goto unmap_ctx;
+	}
+	edesc->qm_sg_bytes = qm_sg_bytes;
+
+	edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize,
+					DMA_FROM_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->dst_dma)) {
+		dev_err(ctx->dev, "unable to map dst\n");
+		edesc->dst_dma = 0;
+		ret = -ENOMEM;
+		goto unmap_ctx;
+	}
+
+	memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+	dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+	dpaa2_fl_set_len(in_fle, ctx->ctx_len + buflen);
+	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(out_fle, edesc->dst_dma);
+	dpaa2_fl_set_len(out_fle, digestsize);
+
+	req_ctx->flc = &ctx->flc[FINALIZE];
+	req_ctx->flc_dma = ctx->flc_dma[FINALIZE];
+	req_ctx->cbk = ahash_done_ctx_src;
+	req_ctx->ctx = &req->base;
+	req_ctx->edesc = edesc;
+
+	ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+	if (ret == -EINPROGRESS ||
+	    (ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		return ret;
+
+unmap_ctx:
+	ahash_unmap_ctx(ctx->dev, edesc, req, digestsize, DMA_FROM_DEVICE);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_finup_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	int buflen = *current_buflen(state);
+	int qm_sg_bytes, qm_sg_src_index;
+	int src_nents, mapped_nents;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct ahash_edesc *edesc;
+	struct dpaa2_sg_entry *sg_table;
+	int ret;
+
+	src_nents = sg_nents_for_len(req->src, req->nbytes);
+	if (src_nents < 0) {
+		dev_err(ctx->dev, "Invalid number of src SG.\n");
+		return src_nents;
+	}
+
+	if (src_nents) {
+		mapped_nents = dma_map_sg(ctx->dev, req->src, src_nents,
+					  DMA_TO_DEVICE);
+		if (!mapped_nents) {
+			dev_err(ctx->dev, "unable to DMA map source\n");
+			return -ENOMEM;
+		}
+	} else {
+		mapped_nents = 0;
+	}
+
+	/* allocate space for base edesc and link tables */
+	edesc = qi_cache_zalloc(GFP_DMA | flags);
+	if (!edesc) {
+		dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE);
+		return -ENOMEM;
+	}
+
+	edesc->src_nents = src_nents;
+	qm_sg_src_index = 1 + (buflen ? 1 : 0);
+	qm_sg_bytes = (qm_sg_src_index + mapped_nents) * sizeof(*sg_table);
+	sg_table = &edesc->sgt[0];
+
+	ret = ctx_map_to_qm_sg(ctx->dev, state, ctx->ctx_len, sg_table,
+			       DMA_TO_DEVICE);
+	if (ret)
+		goto unmap_ctx;
+
+	ret = buf_map_to_qm_sg(ctx->dev, sg_table + 1, state);
+	if (ret)
+		goto unmap_ctx;
+
+	sg_to_qm_sg_last(req->src, mapped_nents, sg_table + qm_sg_src_index, 0);
+
+	edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table, qm_sg_bytes,
+					  DMA_TO_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+		dev_err(ctx->dev, "unable to map S/G table\n");
+		ret = -ENOMEM;
+		goto unmap_ctx;
+	}
+	edesc->qm_sg_bytes = qm_sg_bytes;
+
+	edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize,
+					DMA_FROM_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->dst_dma)) {
+		dev_err(ctx->dev, "unable to map dst\n");
+		edesc->dst_dma = 0;
+		ret = -ENOMEM;
+		goto unmap_ctx;
+	}
+
+	memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+	dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+	dpaa2_fl_set_len(in_fle, ctx->ctx_len + buflen + req->nbytes);
+	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(out_fle, edesc->dst_dma);
+	dpaa2_fl_set_len(out_fle, digestsize);
+
+	req_ctx->flc = &ctx->flc[FINALIZE];
+	req_ctx->flc_dma = ctx->flc_dma[FINALIZE];
+	req_ctx->cbk = ahash_done_ctx_src;
+	req_ctx->ctx = &req->base;
+	req_ctx->edesc = edesc;
+
+	ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+	if (ret == -EINPROGRESS ||
+	    (ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		return ret;
+
+unmap_ctx:
+	ahash_unmap_ctx(ctx->dev, edesc, req, digestsize, DMA_FROM_DEVICE);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_digest(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	int src_nents, mapped_nents;
+	struct ahash_edesc *edesc;
+	int ret = -ENOMEM;
+
+	state->buf_dma = 0;
+
+	src_nents = sg_nents_for_len(req->src, req->nbytes);
+	if (src_nents < 0) {
+		dev_err(ctx->dev, "Invalid number of src SG.\n");
+		return src_nents;
+	}
+
+	if (src_nents) {
+		mapped_nents = dma_map_sg(ctx->dev, req->src, src_nents,
+					  DMA_TO_DEVICE);
+		if (!mapped_nents) {
+			dev_err(ctx->dev, "unable to map source for DMA\n");
+			return ret;
+		}
+	} else {
+		mapped_nents = 0;
+	}
+
+	/* allocate space for base edesc and link tables */
+	edesc = qi_cache_zalloc(GFP_DMA | flags);
+	if (!edesc) {
+		dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE);
+		return ret;
+	}
+
+	edesc->src_nents = src_nents;
+	memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+
+	if (mapped_nents > 1) {
+		int qm_sg_bytes;
+		struct dpaa2_sg_entry *sg_table = &edesc->sgt[0];
+
+		qm_sg_bytes = mapped_nents * sizeof(*sg_table);
+		sg_to_qm_sg_last(req->src, mapped_nents, sg_table, 0);
+		edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table,
+						  qm_sg_bytes, DMA_TO_DEVICE);
+		if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+			dev_err(ctx->dev, "unable to map S/G table\n");
+			goto unmap;
+		}
+		edesc->qm_sg_bytes = qm_sg_bytes;
+		dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+		dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+	} else {
+		dpaa2_fl_set_format(in_fle, dpaa2_fl_single);
+		dpaa2_fl_set_addr(in_fle, sg_dma_address(req->src));
+	}
+
+	edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize,
+					DMA_FROM_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->dst_dma)) {
+		dev_err(ctx->dev, "unable to map dst\n");
+		edesc->dst_dma = 0;
+		goto unmap;
+	}
+
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_len(in_fle, req->nbytes);
+	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(out_fle, edesc->dst_dma);
+	dpaa2_fl_set_len(out_fle, digestsize);
+
+	req_ctx->flc = &ctx->flc[DIGEST];
+	req_ctx->flc_dma = ctx->flc_dma[DIGEST];
+	req_ctx->cbk = ahash_done;
+	req_ctx->ctx = &req->base;
+	req_ctx->edesc = edesc;
+	ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+	if (ret == -EINPROGRESS ||
+	    (ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		return ret;
+
+unmap:
+	ahash_unmap(ctx->dev, edesc, req, digestsize);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_final_no_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	u8 *buf = current_buf(state);
+	int buflen = *current_buflen(state);
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct ahash_edesc *edesc;
+	int ret = -ENOMEM;
+
+	/* allocate space for base edesc and link tables */
+	edesc = qi_cache_zalloc(GFP_DMA | flags);
+	if (!edesc)
+		return ret;
+
+	state->buf_dma = dma_map_single(ctx->dev, buf, buflen, DMA_TO_DEVICE);
+	if (dma_mapping_error(ctx->dev, state->buf_dma)) {
+		dev_err(ctx->dev, "unable to map src\n");
+		goto unmap;
+	}
+
+	edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize,
+					DMA_FROM_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->dst_dma)) {
+		dev_err(ctx->dev, "unable to map dst\n");
+		edesc->dst_dma = 0;
+		goto unmap;
+	}
+
+	memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_format(in_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(in_fle, state->buf_dma);
+	dpaa2_fl_set_len(in_fle, buflen);
+	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(out_fle, edesc->dst_dma);
+	dpaa2_fl_set_len(out_fle, digestsize);
+
+	req_ctx->flc = &ctx->flc[DIGEST];
+	req_ctx->flc_dma = ctx->flc_dma[DIGEST];
+	req_ctx->cbk = ahash_done;
+	req_ctx->ctx = &req->base;
+	req_ctx->edesc = edesc;
+
+	ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+	if (ret == -EINPROGRESS ||
+	    (ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		return ret;
+
+unmap:
+	ahash_unmap(ctx->dev, edesc, req, digestsize);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_update_no_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	u8 *buf = current_buf(state);
+	int *buflen = current_buflen(state);
+	u8 *next_buf = alt_buf(state);
+	int *next_buflen = alt_buflen(state);
+	int in_len = *buflen + req->nbytes, to_hash;
+	int qm_sg_bytes, src_nents, mapped_nents;
+	struct ahash_edesc *edesc;
+	int ret = 0;
+
+	*next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
+	to_hash = in_len - *next_buflen;
+
+	if (to_hash) {
+		struct dpaa2_sg_entry *sg_table;
+
+		src_nents = sg_nents_for_len(req->src,
+					     req->nbytes - *next_buflen);
+		if (src_nents < 0) {
+			dev_err(ctx->dev, "Invalid number of src SG.\n");
+			return src_nents;
+		}
+
+		if (src_nents) {
+			mapped_nents = dma_map_sg(ctx->dev, req->src, src_nents,
+						  DMA_TO_DEVICE);
+			if (!mapped_nents) {
+				dev_err(ctx->dev, "unable to DMA map source\n");
+				return -ENOMEM;
+			}
+		} else {
+			mapped_nents = 0;
+		}
+
+		/* allocate space for base edesc and link tables */
+		edesc = qi_cache_zalloc(GFP_DMA | flags);
+		if (!edesc) {
+			dma_unmap_sg(ctx->dev, req->src, src_nents,
+				     DMA_TO_DEVICE);
+			return -ENOMEM;
+		}
+
+		edesc->src_nents = src_nents;
+		qm_sg_bytes = (1 + mapped_nents) * sizeof(*sg_table);
+		sg_table = &edesc->sgt[0];
+
+		ret = buf_map_to_qm_sg(ctx->dev, sg_table, state);
+		if (ret)
+			goto unmap_ctx;
+
+		sg_to_qm_sg_last(req->src, mapped_nents, sg_table + 1, 0);
+
+		if (*next_buflen)
+			scatterwalk_map_and_copy(next_buf, req->src,
+						 to_hash - *buflen,
+						 *next_buflen, 0);
+
+		edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table,
+						  qm_sg_bytes, DMA_TO_DEVICE);
+		if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+			dev_err(ctx->dev, "unable to map S/G table\n");
+			ret = -ENOMEM;
+			goto unmap_ctx;
+		}
+		edesc->qm_sg_bytes = qm_sg_bytes;
+
+		state->ctx_dma = dma_map_single(ctx->dev, state->caam_ctx,
+						ctx->ctx_len, DMA_FROM_DEVICE);
+		if (dma_mapping_error(ctx->dev, state->ctx_dma)) {
+			dev_err(ctx->dev, "unable to map ctx\n");
+			state->ctx_dma = 0;
+			ret = -ENOMEM;
+			goto unmap_ctx;
+		}
+
+		memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+		dpaa2_fl_set_final(in_fle, true);
+		dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+		dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+		dpaa2_fl_set_len(in_fle, to_hash);
+		dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+		dpaa2_fl_set_addr(out_fle, state->ctx_dma);
+		dpaa2_fl_set_len(out_fle, ctx->ctx_len);
+
+		req_ctx->flc = &ctx->flc[UPDATE_FIRST];
+		req_ctx->flc_dma = ctx->flc_dma[UPDATE_FIRST];
+		req_ctx->cbk = ahash_done_ctx_dst;
+		req_ctx->ctx = &req->base;
+		req_ctx->edesc = edesc;
+
+		ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+		if (ret != -EINPROGRESS &&
+		    !(ret == -EBUSY &&
+		      req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			goto unmap_ctx;
+
+		state->update = ahash_update_ctx;
+		state->finup = ahash_finup_ctx;
+		state->final = ahash_final_ctx;
+	} else if (*next_buflen) {
+		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
+					 req->nbytes, 0);
+		*buflen = *next_buflen;
+		*next_buflen = 0;
+	}
+
+	print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
+	print_hex_dump_debug("next buf@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, next_buf, *next_buflen,
+			     1);
+
+	return ret;
+unmap_ctx:
+	ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_finup_no_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	int buflen = *current_buflen(state);
+	int qm_sg_bytes, src_nents, mapped_nents;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct ahash_edesc *edesc;
+	struct dpaa2_sg_entry *sg_table;
+	int ret;
+
+	src_nents = sg_nents_for_len(req->src, req->nbytes);
+	if (src_nents < 0) {
+		dev_err(ctx->dev, "Invalid number of src SG.\n");
+		return src_nents;
+	}
+
+	if (src_nents) {
+		mapped_nents = dma_map_sg(ctx->dev, req->src, src_nents,
+					  DMA_TO_DEVICE);
+		if (!mapped_nents) {
+			dev_err(ctx->dev, "unable to DMA map source\n");
+			return -ENOMEM;
+		}
+	} else {
+		mapped_nents = 0;
+	}
+
+	/* allocate space for base edesc and link tables */
+	edesc = qi_cache_zalloc(GFP_DMA | flags);
+	if (!edesc) {
+		dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE);
+		return -ENOMEM;
+	}
+
+	edesc->src_nents = src_nents;
+	qm_sg_bytes = (2 + mapped_nents) * sizeof(*sg_table);
+	sg_table = &edesc->sgt[0];
+
+	ret = buf_map_to_qm_sg(ctx->dev, sg_table, state);
+	if (ret)
+		goto unmap;
+
+	sg_to_qm_sg_last(req->src, mapped_nents, sg_table + 1, 0);
+
+	edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table, qm_sg_bytes,
+					  DMA_TO_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+		dev_err(ctx->dev, "unable to map S/G table\n");
+		ret = -ENOMEM;
+		goto unmap;
+	}
+	edesc->qm_sg_bytes = qm_sg_bytes;
+
+	edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize,
+					DMA_FROM_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->dst_dma)) {
+		dev_err(ctx->dev, "unable to map dst\n");
+		edesc->dst_dma = 0;
+		ret = -ENOMEM;
+		goto unmap;
+	}
+
+	memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+	dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+	dpaa2_fl_set_len(in_fle, buflen + req->nbytes);
+	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(out_fle, edesc->dst_dma);
+	dpaa2_fl_set_len(out_fle, digestsize);
+
+	req_ctx->flc = &ctx->flc[DIGEST];
+	req_ctx->flc_dma = ctx->flc_dma[DIGEST];
+	req_ctx->cbk = ahash_done;
+	req_ctx->ctx = &req->base;
+	req_ctx->edesc = edesc;
+	ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+	if (ret != -EINPROGRESS &&
+	    !(ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		goto unmap;
+
+	return ret;
+unmap:
+	ahash_unmap(ctx->dev, edesc, req, digestsize);
+	qi_cache_free(edesc);
+	return -ENOMEM;
+}
+
+static int ahash_update_first(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	u8 *next_buf = alt_buf(state);
+	int *next_buflen = alt_buflen(state);
+	int to_hash;
+	int src_nents, mapped_nents;
+	struct ahash_edesc *edesc;
+	int ret = 0;
+
+	*next_buflen = req->nbytes & (crypto_tfm_alg_blocksize(&ahash->base) -
+				      1);
+	to_hash = req->nbytes - *next_buflen;
+
+	if (to_hash) {
+		struct dpaa2_sg_entry *sg_table;
+
+		src_nents = sg_nents_for_len(req->src,
+					     req->nbytes - (*next_buflen));
+		if (src_nents < 0) {
+			dev_err(ctx->dev, "Invalid number of src SG.\n");
+			return src_nents;
+		}
+
+		if (src_nents) {
+			mapped_nents = dma_map_sg(ctx->dev, req->src, src_nents,
+						  DMA_TO_DEVICE);
+			if (!mapped_nents) {
+				dev_err(ctx->dev, "unable to map source for DMA\n");
+				return -ENOMEM;
+			}
+		} else {
+			mapped_nents = 0;
+		}
+
+		/* allocate space for base edesc and link tables */
+		edesc = qi_cache_zalloc(GFP_DMA | flags);
+		if (!edesc) {
+			dma_unmap_sg(ctx->dev, req->src, src_nents,
+				     DMA_TO_DEVICE);
+			return -ENOMEM;
+		}
+
+		edesc->src_nents = src_nents;
+		sg_table = &edesc->sgt[0];
+
+		memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+		dpaa2_fl_set_final(in_fle, true);
+		dpaa2_fl_set_len(in_fle, to_hash);
+
+		if (mapped_nents > 1) {
+			int qm_sg_bytes;
+
+			sg_to_qm_sg_last(req->src, mapped_nents, sg_table, 0);
+			qm_sg_bytes = mapped_nents * sizeof(*sg_table);
+			edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table,
+							  qm_sg_bytes,
+							  DMA_TO_DEVICE);
+			if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+				dev_err(ctx->dev, "unable to map S/G table\n");
+				ret = -ENOMEM;
+				goto unmap_ctx;
+			}
+			edesc->qm_sg_bytes = qm_sg_bytes;
+			dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+			dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+		} else {
+			dpaa2_fl_set_format(in_fle, dpaa2_fl_single);
+			dpaa2_fl_set_addr(in_fle, sg_dma_address(req->src));
+		}
+
+		if (*next_buflen)
+			scatterwalk_map_and_copy(next_buf, req->src, to_hash,
+						 *next_buflen, 0);
+
+		state->ctx_dma = dma_map_single(ctx->dev, state->caam_ctx,
+						ctx->ctx_len, DMA_FROM_DEVICE);
+		if (dma_mapping_error(ctx->dev, state->ctx_dma)) {
+			dev_err(ctx->dev, "unable to map ctx\n");
+			state->ctx_dma = 0;
+			ret = -ENOMEM;
+			goto unmap_ctx;
+		}
+
+		dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+		dpaa2_fl_set_addr(out_fle, state->ctx_dma);
+		dpaa2_fl_set_len(out_fle, ctx->ctx_len);
+
+		req_ctx->flc = &ctx->flc[UPDATE_FIRST];
+		req_ctx->flc_dma = ctx->flc_dma[UPDATE_FIRST];
+		req_ctx->cbk = ahash_done_ctx_dst;
+		req_ctx->ctx = &req->base;
+		req_ctx->edesc = edesc;
+
+		ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+		if (ret != -EINPROGRESS &&
+		    !(ret == -EBUSY && req->base.flags &
+		      CRYPTO_TFM_REQ_MAY_BACKLOG))
+			goto unmap_ctx;
+
+		state->update = ahash_update_ctx;
+		state->finup = ahash_finup_ctx;
+		state->final = ahash_final_ctx;
+	} else if (*next_buflen) {
+		state->update = ahash_update_no_ctx;
+		state->finup = ahash_finup_no_ctx;
+		state->final = ahash_final_no_ctx;
+		scatterwalk_map_and_copy(next_buf, req->src, 0,
+					 req->nbytes, 0);
+		switch_buf(state);
+	}
+
+	print_hex_dump_debug("next buf@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, next_buf, *next_buflen,
+			     1);
+
+	return ret;
+unmap_ctx:
+	ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_finup_first(struct ahash_request *req)
+{
+	return ahash_digest(req);
+}
+
+static int ahash_init(struct ahash_request *req)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	state->update = ahash_update_first;
+	state->finup = ahash_finup_first;
+	state->final = ahash_final_no_ctx;
+
+	state->ctx_dma = 0;
+	state->current_buf = 0;
+	state->buf_dma = 0;
+	state->buflen_0 = 0;
+	state->buflen_1 = 0;
+
+	return 0;
+}
+
+static int ahash_update(struct ahash_request *req)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	return state->update(req);
+}
+
+static int ahash_finup(struct ahash_request *req)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	return state->finup(req);
+}
+
+static int ahash_final(struct ahash_request *req)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	return state->final(req);
+}
+
+static int ahash_export(struct ahash_request *req, void *out)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_export_state *export = out;
+	int len;
+	u8 *buf;
+
+	if (state->current_buf) {
+		buf = state->buf_1;
+		len = state->buflen_1;
+	} else {
+		buf = state->buf_0;
+		len = state->buflen_0;
+	}
+
+	memcpy(export->buf, buf, len);
+	memcpy(export->caam_ctx, state->caam_ctx, sizeof(export->caam_ctx));
+	export->buflen = len;
+	export->update = state->update;
+	export->final = state->final;
+	export->finup = state->finup;
+
+	return 0;
+}
+
+static int ahash_import(struct ahash_request *req, const void *in)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	const struct caam_export_state *export = in;
+
+	memset(state, 0, sizeof(*state));
+	memcpy(state->buf_0, export->buf, export->buflen);
+	memcpy(state->caam_ctx, export->caam_ctx, sizeof(state->caam_ctx));
+	state->buflen_0 = export->buflen;
+	state->update = export->update;
+	state->final = export->final;
+	state->finup = export->finup;
+
+	return 0;
+}
+
+struct caam_hash_template {
+	char name[CRYPTO_MAX_ALG_NAME];
+	char driver_name[CRYPTO_MAX_ALG_NAME];
+	char hmac_name[CRYPTO_MAX_ALG_NAME];
+	char hmac_driver_name[CRYPTO_MAX_ALG_NAME];
+	unsigned int blocksize;
+	struct ahash_alg template_ahash;
+	u32 alg_type;
+};
+
+/* ahash descriptors */
+static struct caam_hash_template driver_hash[] = {
+	{
+		.name = "sha1",
+		.driver_name = "sha1-caam-qi2",
+		.hmac_name = "hmac(sha1)",
+		.hmac_driver_name = "hmac-sha1-caam-qi2",
+		.blocksize = SHA1_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA1_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_export_state),
+			},
+		},
+		.alg_type = OP_ALG_ALGSEL_SHA1,
+	}, {
+		.name = "sha224",
+		.driver_name = "sha224-caam-qi2",
+		.hmac_name = "hmac(sha224)",
+		.hmac_driver_name = "hmac-sha224-caam-qi2",
+		.blocksize = SHA224_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA224_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_export_state),
+			},
+		},
+		.alg_type = OP_ALG_ALGSEL_SHA224,
+	}, {
+		.name = "sha256",
+		.driver_name = "sha256-caam-qi2",
+		.hmac_name = "hmac(sha256)",
+		.hmac_driver_name = "hmac-sha256-caam-qi2",
+		.blocksize = SHA256_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA256_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_export_state),
+			},
+		},
+		.alg_type = OP_ALG_ALGSEL_SHA256,
+	}, {
+		.name = "sha384",
+		.driver_name = "sha384-caam-qi2",
+		.hmac_name = "hmac(sha384)",
+		.hmac_driver_name = "hmac-sha384-caam-qi2",
+		.blocksize = SHA384_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA384_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_export_state),
+			},
+		},
+		.alg_type = OP_ALG_ALGSEL_SHA384,
+	}, {
+		.name = "sha512",
+		.driver_name = "sha512-caam-qi2",
+		.hmac_name = "hmac(sha512)",
+		.hmac_driver_name = "hmac-sha512-caam-qi2",
+		.blocksize = SHA512_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA512_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_export_state),
+			},
+		},
+		.alg_type = OP_ALG_ALGSEL_SHA512,
+	}, {
+		.name = "md5",
+		.driver_name = "md5-caam-qi2",
+		.hmac_name = "hmac(md5)",
+		.hmac_driver_name = "hmac-md5-caam-qi2",
+		.blocksize = MD5_BLOCK_WORDS * 4,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = MD5_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_export_state),
+			},
+		},
+		.alg_type = OP_ALG_ALGSEL_MD5,
+	}
+};
+
+struct caam_hash_alg {
+	struct list_head entry;
+	struct device *dev;
+	int alg_type;
+	struct ahash_alg ahash_alg;
+};
+
+static int caam_hash_cra_init(struct crypto_tfm *tfm)
+{
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct crypto_alg *base = tfm->__crt_alg;
+	struct hash_alg_common *halg =
+		 container_of(base, struct hash_alg_common, base);
+	struct ahash_alg *alg =
+		 container_of(halg, struct ahash_alg, halg);
+	struct caam_hash_alg *caam_hash =
+		 container_of(alg, struct caam_hash_alg, ahash_alg);
+	struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	/* Sizes for MDHA running digests: MD5, SHA1, 224, 256, 384, 512 */
+	static const u8 runninglen[] = { HASH_MSG_LEN + MD5_DIGEST_SIZE,
+					 HASH_MSG_LEN + SHA1_DIGEST_SIZE,
+					 HASH_MSG_LEN + 32,
+					 HASH_MSG_LEN + SHA256_DIGEST_SIZE,
+					 HASH_MSG_LEN + 64,
+					 HASH_MSG_LEN + SHA512_DIGEST_SIZE };
+	dma_addr_t dma_addr;
+	int i;
+
+	ctx->dev = caam_hash->dev;
+
+	dma_addr = dma_map_single_attrs(ctx->dev, ctx->flc, sizeof(ctx->flc),
+					DMA_BIDIRECTIONAL,
+					DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(ctx->dev, dma_addr)) {
+		dev_err(ctx->dev, "unable to map shared descriptors\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < HASH_NUM_OP; i++)
+		ctx->flc_dma[i] = dma_addr + i * sizeof(ctx->flc[i]);
+
+	/* copy descriptor header template value */
+	ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam_hash->alg_type;
+
+	ctx->ctx_len = runninglen[(ctx->adata.algtype &
+				   OP_ALG_ALGSEL_SUBMASK) >>
+				  OP_ALG_ALGSEL_SHIFT];
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct caam_hash_state));
+
+	return ahash_set_sh_desc(ahash);
+}
+
+static void caam_hash_cra_exit(struct crypto_tfm *tfm)
+{
+	struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	dma_unmap_single_attrs(ctx->dev, ctx->flc_dma[0], sizeof(ctx->flc),
+			       DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static struct caam_hash_alg *caam_hash_alloc(struct device *dev,
+	struct caam_hash_template *template, bool keyed)
+{
+	struct caam_hash_alg *t_alg;
+	struct ahash_alg *halg;
+	struct crypto_alg *alg;
+
+	t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL);
+	if (!t_alg)
+		return ERR_PTR(-ENOMEM);
+
+	t_alg->ahash_alg = template->template_ahash;
+	halg = &t_alg->ahash_alg;
+	alg = &halg->halg.base;
+
+	if (keyed) {
+		snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->hmac_name);
+		snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->hmac_driver_name);
+	} else {
+		snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->name);
+		snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->driver_name);
+		t_alg->ahash_alg.setkey = NULL;
+	}
+	alg->cra_module = THIS_MODULE;
+	alg->cra_init = caam_hash_cra_init;
+	alg->cra_exit = caam_hash_cra_exit;
+	alg->cra_ctxsize = sizeof(struct caam_hash_ctx);
+	alg->cra_priority = CAAM_CRA_PRIORITY;
+	alg->cra_blocksize = template->blocksize;
+	alg->cra_alignmask = 0;
+	alg->cra_flags = CRYPTO_ALG_ASYNC;
+
+	t_alg->alg_type = template->alg_type;
+	t_alg->dev = dev;
+
+	return t_alg;
+}
+
+static void dpaa2_caam_fqdan_cb(struct dpaa2_io_notification_ctx *nctx)
+{
+	struct dpaa2_caam_priv_per_cpu *ppriv;
+
+	ppriv = container_of(nctx, struct dpaa2_caam_priv_per_cpu, nctx);
+	napi_schedule_irqoff(&ppriv->napi);
+}
+
+static int __cold dpaa2_dpseci_dpio_setup(struct dpaa2_caam_priv *priv)
+{
+	struct device *dev = priv->dev;
+	struct dpaa2_io_notification_ctx *nctx;
+	struct dpaa2_caam_priv_per_cpu *ppriv;
+	int err, i = 0, cpu;
+
+	for_each_online_cpu(cpu) {
+		ppriv = per_cpu_ptr(priv->ppriv, cpu);
+		ppriv->priv = priv;
+		nctx = &ppriv->nctx;
+		nctx->is_cdan = 0;
+		nctx->id = ppriv->rsp_fqid;
+		nctx->desired_cpu = cpu;
+		nctx->cb = dpaa2_caam_fqdan_cb;
+
+		/* Register notification callbacks */
+		err = dpaa2_io_service_register(NULL, nctx);
+		if (unlikely(err)) {
+			dev_dbg(dev, "No affine DPIO for cpu %d\n", cpu);
+			nctx->cb = NULL;
+			/*
+			 * If no affine DPIO for this core, there's probably
+			 * none available for next cores either. Signal we want
+			 * to retry later, in case the DPIO devices weren't
+			 * probed yet.
+			 */
+			err = -EPROBE_DEFER;
+			goto err;
+		}
+
+		ppriv->store = dpaa2_io_store_create(DPAA2_CAAM_STORE_SIZE,
+						     dev);
+		if (unlikely(!ppriv->store)) {
+			dev_err(dev, "dpaa2_io_store_create() failed\n");
+			err = -ENOMEM;
+			goto err;
+		}
+
+		if (++i == priv->num_pairs)
+			break;
+	}
+
+	return 0;
+
+err:
+	for_each_online_cpu(cpu) {
+		ppriv = per_cpu_ptr(priv->ppriv, cpu);
+		if (!ppriv->nctx.cb)
+			break;
+		dpaa2_io_service_deregister(NULL, &ppriv->nctx);
+	}
+
+	for_each_online_cpu(cpu) {
+		ppriv = per_cpu_ptr(priv->ppriv, cpu);
+		if (!ppriv->store)
+			break;
+		dpaa2_io_store_destroy(ppriv->store);
+	}
+
+	return err;
+}
+
+static void __cold dpaa2_dpseci_dpio_free(struct dpaa2_caam_priv *priv)
+{
+	struct dpaa2_caam_priv_per_cpu *ppriv;
+	int i = 0, cpu;
+
+	for_each_online_cpu(cpu) {
+		ppriv = per_cpu_ptr(priv->ppriv, cpu);
+		dpaa2_io_service_deregister(NULL, &ppriv->nctx);
+		dpaa2_io_store_destroy(ppriv->store);
+
+		if (++i == priv->num_pairs)
+			return;
+	}
+}
+
+static int dpaa2_dpseci_bind(struct dpaa2_caam_priv *priv)
+{
+	struct dpseci_rx_queue_cfg rx_queue_cfg;
+	struct device *dev = priv->dev;
+	struct fsl_mc_device *ls_dev = to_fsl_mc_device(dev);
+	struct dpaa2_caam_priv_per_cpu *ppriv;
+	int err = 0, i = 0, cpu;
+
+	/* Configure Rx queues */
+	for_each_online_cpu(cpu) {
+		ppriv = per_cpu_ptr(priv->ppriv, cpu);
+
+		rx_queue_cfg.options = DPSECI_QUEUE_OPT_DEST |
+				       DPSECI_QUEUE_OPT_USER_CTX;
+		rx_queue_cfg.order_preservation_en = 0;
+		rx_queue_cfg.dest_cfg.dest_type = DPSECI_DEST_DPIO;
+		rx_queue_cfg.dest_cfg.dest_id = ppriv->nctx.dpio_id;
+		/*
+		 * Rx priority (WQ) doesn't really matter, since we use
+		 * pull mode, i.e. volatile dequeues from specific FQs
+		 */
+		rx_queue_cfg.dest_cfg.priority = 0;
+		rx_queue_cfg.user_ctx = ppriv->nctx.qman64;
+
+		err = dpseci_set_rx_queue(priv->mc_io, 0, ls_dev->mc_handle, i,
+					  &rx_queue_cfg);
+		if (err) {
+			dev_err(dev, "dpseci_set_rx_queue() failed with err %d\n",
+				err);
+			return err;
+		}
+
+		if (++i == priv->num_pairs)
+			break;
+	}
+
+	return err;
+}
+
+static void dpaa2_dpseci_congestion_free(struct dpaa2_caam_priv *priv)
+{
+	struct device *dev = priv->dev;
+
+	if (!priv->cscn_mem)
+		return;
+
+	dma_unmap_single(dev, priv->cscn_dma, DPAA2_CSCN_SIZE, DMA_FROM_DEVICE);
+	kfree(priv->cscn_mem);
+}
+
+static void dpaa2_dpseci_free(struct dpaa2_caam_priv *priv)
+{
+	struct device *dev = priv->dev;
+	struct fsl_mc_device *ls_dev = to_fsl_mc_device(dev);
+
+	dpaa2_dpseci_congestion_free(priv);
+	dpseci_close(priv->mc_io, 0, ls_dev->mc_handle);
+}
+
+static void dpaa2_caam_process_fd(struct dpaa2_caam_priv *priv,
+				  const struct dpaa2_fd *fd)
+{
+	struct caam_request *req;
+	u32 fd_err;
+
+	if (dpaa2_fd_get_format(fd) != dpaa2_fd_list) {
+		dev_err(priv->dev, "Only Frame List FD format is supported!\n");
+		return;
+	}
+
+	fd_err = dpaa2_fd_get_ctrl(fd) & FD_CTRL_ERR_MASK;
+	if (unlikely(fd_err))
+		dev_err(priv->dev, "FD error: %08x\n", fd_err);
+
+	/*
+	 * FD[ADDR] is guaranteed to be valid, irrespective of errors reported
+	 * in FD[ERR] or FD[FRC].
+	 */
+	req = dpaa2_caam_iova_to_virt(priv, dpaa2_fd_get_addr(fd));
+	dma_unmap_single(priv->dev, req->fd_flt_dma, sizeof(req->fd_flt),
+			 DMA_BIDIRECTIONAL);
+	req->cbk(req->ctx, dpaa2_fd_get_frc(fd));
+}
+
+static int dpaa2_caam_pull_fq(struct dpaa2_caam_priv_per_cpu *ppriv)
+{
+	int err;
+
+	/* Retry while portal is busy */
+	do {
+		err = dpaa2_io_service_pull_fq(NULL, ppriv->rsp_fqid,
+					       ppriv->store);
+	} while (err == -EBUSY);
+
+	if (unlikely(err))
+		dev_err(ppriv->priv->dev, "dpaa2_io_service_pull err %d", err);
+
+	return err;
+}
+
+static int dpaa2_caam_store_consume(struct dpaa2_caam_priv_per_cpu *ppriv)
+{
+	struct dpaa2_dq *dq;
+	int cleaned = 0, is_last;
+
+	do {
+		dq = dpaa2_io_store_next(ppriv->store, &is_last);
+		if (unlikely(!dq)) {
+			if (unlikely(!is_last)) {
+				dev_dbg(ppriv->priv->dev,
+					"FQ %d returned no valid frames\n",
+					ppriv->rsp_fqid);
+				/*
+				 * MUST retry until we get some sort of
+				 * valid response token (be it "empty dequeue"
+				 * or a valid frame).
+				 */
+				continue;
+			}
+			break;
+		}
+
+		/* Process FD */
+		dpaa2_caam_process_fd(ppriv->priv, dpaa2_dq_fd(dq));
+		cleaned++;
+	} while (!is_last);
+
+	return cleaned;
+}
+
+static int dpaa2_dpseci_poll(struct napi_struct *napi, int budget)
+{
+	struct dpaa2_caam_priv_per_cpu *ppriv;
+	struct dpaa2_caam_priv *priv;
+	int err, cleaned = 0, store_cleaned;
+
+	ppriv = container_of(napi, struct dpaa2_caam_priv_per_cpu, napi);
+	priv = ppriv->priv;
+
+	if (unlikely(dpaa2_caam_pull_fq(ppriv)))
+		return 0;
+
+	do {
+		store_cleaned = dpaa2_caam_store_consume(ppriv);
+		cleaned += store_cleaned;
+
+		if (store_cleaned == 0 ||
+		    cleaned > budget - DPAA2_CAAM_STORE_SIZE)
+			break;
+
+		/* Try to dequeue some more */
+		err = dpaa2_caam_pull_fq(ppriv);
+		if (unlikely(err))
+			break;
+	} while (1);
+
+	if (cleaned < budget) {
+		napi_complete_done(napi, cleaned);
+		err = dpaa2_io_service_rearm(NULL, &ppriv->nctx);
+		if (unlikely(err))
+			dev_err(priv->dev, "Notification rearm failed: %d\n",
+				err);
+	}
+
+	return cleaned;
+}
+
+static int dpaa2_dpseci_congestion_setup(struct dpaa2_caam_priv *priv,
+					 u16 token)
+{
+	struct dpseci_congestion_notification_cfg cong_notif_cfg = { 0 };
+	struct device *dev = priv->dev;
+	int err;
+
+	/*
+	 * Congestion group feature supported starting with DPSECI API v5.1
+	 * and only when object has been created with this capability.
+	 */
+	if ((DPSECI_VER(priv->major_ver, priv->minor_ver) < DPSECI_VER(5, 1)) ||
+	    !(priv->dpseci_attr.options & DPSECI_OPT_HAS_CG))
+		return 0;
+
+	priv->cscn_mem = kzalloc(DPAA2_CSCN_SIZE + DPAA2_CSCN_ALIGN,
+				 GFP_KERNEL | GFP_DMA);
+	if (!priv->cscn_mem)
+		return -ENOMEM;
+
+	priv->cscn_mem_aligned = PTR_ALIGN(priv->cscn_mem, DPAA2_CSCN_ALIGN);
+	priv->cscn_dma = dma_map_single(dev, priv->cscn_mem_aligned,
+					DPAA2_CSCN_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, priv->cscn_dma)) {
+		dev_err(dev, "Error mapping CSCN memory area\n");
+		err = -ENOMEM;
+		goto err_dma_map;
+	}
+
+	cong_notif_cfg.units = DPSECI_CONGESTION_UNIT_BYTES;
+	cong_notif_cfg.threshold_entry = DPAA2_SEC_CONG_ENTRY_THRESH;
+	cong_notif_cfg.threshold_exit = DPAA2_SEC_CONG_EXIT_THRESH;
+	cong_notif_cfg.message_ctx = (uintptr_t)priv;
+	cong_notif_cfg.message_iova = priv->cscn_dma;
+	cong_notif_cfg.notification_mode = DPSECI_CGN_MODE_WRITE_MEM_ON_ENTER |
+					DPSECI_CGN_MODE_WRITE_MEM_ON_EXIT |
+					DPSECI_CGN_MODE_COHERENT_WRITE;
+
+	err = dpseci_set_congestion_notification(priv->mc_io, 0, token,
+						 &cong_notif_cfg);
+	if (err) {
+		dev_err(dev, "dpseci_set_congestion_notification failed\n");
+		goto err_set_cong;
+	}
+
+	return 0;
+
+err_set_cong:
+	dma_unmap_single(dev, priv->cscn_dma, DPAA2_CSCN_SIZE, DMA_FROM_DEVICE);
+err_dma_map:
+	kfree(priv->cscn_mem);
+
+	return err;
+}
+
+static int __cold dpaa2_dpseci_setup(struct fsl_mc_device *ls_dev)
+{
+	struct device *dev = &ls_dev->dev;
+	struct dpaa2_caam_priv *priv;
+	struct dpaa2_caam_priv_per_cpu *ppriv;
+	int err, cpu;
+	u8 i;
+
+	priv = dev_get_drvdata(dev);
+
+	priv->dev = dev;
+	priv->dpsec_id = ls_dev->obj_desc.id;
+
+	/* Get a handle for the DPSECI this interface is associate with */
+	err = dpseci_open(priv->mc_io, 0, priv->dpsec_id, &ls_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpseci_open() failed: %d\n", err);
+		goto err_open;
+	}
+
+	err = dpseci_get_api_version(priv->mc_io, 0, &priv->major_ver,
+				     &priv->minor_ver);
+	if (err) {
+		dev_err(dev, "dpseci_get_api_version() failed\n");
+		goto err_get_vers;
+	}
+
+	dev_info(dev, "dpseci v%d.%d\n", priv->major_ver, priv->minor_ver);
+
+	err = dpseci_get_attributes(priv->mc_io, 0, ls_dev->mc_handle,
+				    &priv->dpseci_attr);
+	if (err) {
+		dev_err(dev, "dpseci_get_attributes() failed\n");
+		goto err_get_vers;
+	}
+
+	err = dpseci_get_sec_attr(priv->mc_io, 0, ls_dev->mc_handle,
+				  &priv->sec_attr);
+	if (err) {
+		dev_err(dev, "dpseci_get_sec_attr() failed\n");
+		goto err_get_vers;
+	}
+
+	err = dpaa2_dpseci_congestion_setup(priv, ls_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "setup_congestion() failed\n");
+		goto err_get_vers;
+	}
+
+	priv->num_pairs = min(priv->dpseci_attr.num_rx_queues,
+			      priv->dpseci_attr.num_tx_queues);
+	if (priv->num_pairs > num_online_cpus()) {
+		dev_warn(dev, "%d queues won't be used\n",
+			 priv->num_pairs - num_online_cpus());
+		priv->num_pairs = num_online_cpus();
+	}
+
+	for (i = 0; i < priv->dpseci_attr.num_rx_queues; i++) {
+		err = dpseci_get_rx_queue(priv->mc_io, 0, ls_dev->mc_handle, i,
+					  &priv->rx_queue_attr[i]);
+		if (err) {
+			dev_err(dev, "dpseci_get_rx_queue() failed\n");
+			goto err_get_rx_queue;
+		}
+	}
+
+	for (i = 0; i < priv->dpseci_attr.num_tx_queues; i++) {
+		err = dpseci_get_tx_queue(priv->mc_io, 0, ls_dev->mc_handle, i,
+					  &priv->tx_queue_attr[i]);
+		if (err) {
+			dev_err(dev, "dpseci_get_tx_queue() failed\n");
+			goto err_get_rx_queue;
+		}
+	}
+
+	i = 0;
+	for_each_online_cpu(cpu) {
+		dev_dbg(dev, "pair %d: rx queue %d, tx queue %d\n", i,
+			priv->rx_queue_attr[i].fqid,
+			priv->tx_queue_attr[i].fqid);
+
+		ppriv = per_cpu_ptr(priv->ppriv, cpu);
+		ppriv->req_fqid = priv->tx_queue_attr[i].fqid;
+		ppriv->rsp_fqid = priv->rx_queue_attr[i].fqid;
+		ppriv->prio = i;
+
+		ppriv->net_dev.dev = *dev;
+		INIT_LIST_HEAD(&ppriv->net_dev.napi_list);
+		netif_napi_add(&ppriv->net_dev, &ppriv->napi, dpaa2_dpseci_poll,
+			       DPAA2_CAAM_NAPI_WEIGHT);
+		if (++i == priv->num_pairs)
+			break;
+	}
+
+	return 0;
+
+err_get_rx_queue:
+	dpaa2_dpseci_congestion_free(priv);
+err_get_vers:
+	dpseci_close(priv->mc_io, 0, ls_dev->mc_handle);
+err_open:
+	return err;
+}
+
+static int dpaa2_dpseci_enable(struct dpaa2_caam_priv *priv)
+{
+	struct device *dev = priv->dev;
+	struct fsl_mc_device *ls_dev = to_fsl_mc_device(dev);
+	struct dpaa2_caam_priv_per_cpu *ppriv;
+	int i;
+
+	for (i = 0; i < priv->num_pairs; i++) {
+		ppriv = per_cpu_ptr(priv->ppriv, i);
+		napi_enable(&ppriv->napi);
+	}
+
+	return dpseci_enable(priv->mc_io, 0, ls_dev->mc_handle);
+}
+
+static int __cold dpaa2_dpseci_disable(struct dpaa2_caam_priv *priv)
+{
+	struct device *dev = priv->dev;
+	struct dpaa2_caam_priv_per_cpu *ppriv;
+	struct fsl_mc_device *ls_dev = to_fsl_mc_device(dev);
+	int i, err = 0, enabled;
+
+	err = dpseci_disable(priv->mc_io, 0, ls_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpseci_disable() failed\n");
+		return err;
+	}
+
+	err = dpseci_is_enabled(priv->mc_io, 0, ls_dev->mc_handle, &enabled);
+	if (err) {
+		dev_err(dev, "dpseci_is_enabled() failed\n");
+		return err;
+	}
+
+	dev_dbg(dev, "disable: %s\n", enabled ? "false" : "true");
+
+	for (i = 0; i < priv->num_pairs; i++) {
+		ppriv = per_cpu_ptr(priv->ppriv, i);
+		napi_disable(&ppriv->napi);
+		netif_napi_del(&ppriv->napi);
+	}
+
+	return 0;
+}
+
+static struct list_head hash_list;
+
+static int dpaa2_caam_probe(struct fsl_mc_device *dpseci_dev)
+{
+	struct device *dev;
+	struct dpaa2_caam_priv *priv;
+	int i, err = 0;
+	bool registered = false;
+
+	/*
+	 * There is no way to get CAAM endianness - there is no direct register
+	 * space access and MC f/w does not provide this attribute.
+	 * All DPAA2-based SoCs have little endian CAAM, thus hard-code this
+	 * property.
+	 */
+	caam_little_end = true;
+
+	caam_imx = false;
+
+	dev = &dpseci_dev->dev;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, priv);
+
+	priv->domain = iommu_get_domain_for_dev(dev);
+
+	qi_cache = kmem_cache_create("dpaa2_caamqicache", CAAM_QI_MEMCACHE_SIZE,
+				     0, SLAB_CACHE_DMA, NULL);
+	if (!qi_cache) {
+		dev_err(dev, "Can't allocate SEC cache\n");
+		return -ENOMEM;
+	}
+
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(49));
+	if (err) {
+		dev_err(dev, "dma_set_mask_and_coherent() failed\n");
+		goto err_dma_mask;
+	}
+
+	/* Obtain a MC portal */
+	err = fsl_mc_portal_allocate(dpseci_dev, 0, &priv->mc_io);
+	if (err) {
+		if (err == -ENXIO)
+			err = -EPROBE_DEFER;
+		else
+			dev_err(dev, "MC portal allocation failed\n");
+
+		goto err_dma_mask;
+	}
+
+	priv->ppriv = alloc_percpu(*priv->ppriv);
+	if (!priv->ppriv) {
+		dev_err(dev, "alloc_percpu() failed\n");
+		err = -ENOMEM;
+		goto err_alloc_ppriv;
+	}
+
+	/* DPSECI initialization */
+	err = dpaa2_dpseci_setup(dpseci_dev);
+	if (err) {
+		dev_err(dev, "dpaa2_dpseci_setup() failed\n");
+		goto err_dpseci_setup;
+	}
+
+	/* DPIO */
+	err = dpaa2_dpseci_dpio_setup(priv);
+	if (err) {
+		if (err != -EPROBE_DEFER)
+			dev_err(dev, "dpaa2_dpseci_dpio_setup() failed\n");
+		goto err_dpio_setup;
+	}
+
+	/* DPSECI binding to DPIO */
+	err = dpaa2_dpseci_bind(priv);
+	if (err) {
+		dev_err(dev, "dpaa2_dpseci_bind() failed\n");
+		goto err_bind;
+	}
+
+	/* DPSECI enable */
+	err = dpaa2_dpseci_enable(priv);
+	if (err) {
+		dev_err(dev, "dpaa2_dpseci_enable() failed\n");
+		goto err_bind;
+	}
+
+	/* register crypto algorithms the device supports */
+	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
+		struct caam_skcipher_alg *t_alg = driver_algs + i;
+		u32 alg_sel = t_alg->caam.class1_alg_type & OP_ALG_ALGSEL_MASK;
+
+		/* Skip DES algorithms if not supported by device */
+		if (!priv->sec_attr.des_acc_num &&
+		    (alg_sel == OP_ALG_ALGSEL_3DES ||
+		     alg_sel == OP_ALG_ALGSEL_DES))
+			continue;
+
+		/* Skip AES algorithms if not supported by device */
+		if (!priv->sec_attr.aes_acc_num &&
+		    alg_sel == OP_ALG_ALGSEL_AES)
+			continue;
+
+		t_alg->caam.dev = dev;
+		caam_skcipher_alg_init(t_alg);
+
+		err = crypto_register_skcipher(&t_alg->skcipher);
+		if (err) {
+			dev_warn(dev, "%s alg registration failed: %d\n",
+				 t_alg->skcipher.base.cra_driver_name, err);
+			continue;
+		}
+
+		t_alg->registered = true;
+		registered = true;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(driver_aeads); i++) {
+		struct caam_aead_alg *t_alg = driver_aeads + i;
+		u32 c1_alg_sel = t_alg->caam.class1_alg_type &
+				 OP_ALG_ALGSEL_MASK;
+		u32 c2_alg_sel = t_alg->caam.class2_alg_type &
+				 OP_ALG_ALGSEL_MASK;
+
+		/* Skip DES algorithms if not supported by device */
+		if (!priv->sec_attr.des_acc_num &&
+		    (c1_alg_sel == OP_ALG_ALGSEL_3DES ||
+		     c1_alg_sel == OP_ALG_ALGSEL_DES))
+			continue;
+
+		/* Skip AES algorithms if not supported by device */
+		if (!priv->sec_attr.aes_acc_num &&
+		    c1_alg_sel == OP_ALG_ALGSEL_AES)
+			continue;
+
+		/*
+		 * Skip algorithms requiring message digests
+		 * if MD not supported by device.
+		 */
+		if (!priv->sec_attr.md_acc_num && c2_alg_sel)
+			continue;
+
+		t_alg->caam.dev = dev;
+		caam_aead_alg_init(t_alg);
+
+		err = crypto_register_aead(&t_alg->aead);
+		if (err) {
+			dev_warn(dev, "%s alg registration failed: %d\n",
+				 t_alg->aead.base.cra_driver_name, err);
+			continue;
+		}
+
+		t_alg->registered = true;
+		registered = true;
+	}
+	if (registered)
+		dev_info(dev, "algorithms registered in /proc/crypto\n");
+
+	/* register hash algorithms the device supports */
+	INIT_LIST_HEAD(&hash_list);
+
+	/*
+	 * Skip registration of any hashing algorithms if MD block
+	 * is not present.
+	 */
+	if (!priv->sec_attr.md_acc_num)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(driver_hash); i++) {
+		struct caam_hash_alg *t_alg;
+		struct caam_hash_template *alg = driver_hash + i;
+
+		/* register hmac version */
+		t_alg = caam_hash_alloc(dev, alg, true);
+		if (IS_ERR(t_alg)) {
+			err = PTR_ERR(t_alg);
+			dev_warn(dev, "%s hash alg allocation failed: %d\n",
+				 alg->driver_name, err);
+			continue;
+		}
+
+		err = crypto_register_ahash(&t_alg->ahash_alg);
+		if (err) {
+			dev_warn(dev, "%s alg registration failed: %d\n",
+				 t_alg->ahash_alg.halg.base.cra_driver_name,
+				 err);
+			kfree(t_alg);
+		} else {
+			list_add_tail(&t_alg->entry, &hash_list);
+		}
+
+		/* register unkeyed version */
+		t_alg = caam_hash_alloc(dev, alg, false);
+		if (IS_ERR(t_alg)) {
+			err = PTR_ERR(t_alg);
+			dev_warn(dev, "%s alg allocation failed: %d\n",
+				 alg->driver_name, err);
+			continue;
+		}
+
+		err = crypto_register_ahash(&t_alg->ahash_alg);
+		if (err) {
+			dev_warn(dev, "%s alg registration failed: %d\n",
+				 t_alg->ahash_alg.halg.base.cra_driver_name,
+				 err);
+			kfree(t_alg);
+		} else {
+			list_add_tail(&t_alg->entry, &hash_list);
+		}
+	}
+	if (!list_empty(&hash_list))
+		dev_info(dev, "hash algorithms registered in /proc/crypto\n");
+
+	return err;
+
+err_bind:
+	dpaa2_dpseci_dpio_free(priv);
+err_dpio_setup:
+	dpaa2_dpseci_free(priv);
+err_dpseci_setup:
+	free_percpu(priv->ppriv);
+err_alloc_ppriv:
+	fsl_mc_portal_free(priv->mc_io);
+err_dma_mask:
+	kmem_cache_destroy(qi_cache);
+
+	return err;
+}
+
+static int __cold dpaa2_caam_remove(struct fsl_mc_device *ls_dev)
+{
+	struct device *dev;
+	struct dpaa2_caam_priv *priv;
+	int i;
+
+	dev = &ls_dev->dev;
+	priv = dev_get_drvdata(dev);
+
+	for (i = 0; i < ARRAY_SIZE(driver_aeads); i++) {
+		struct caam_aead_alg *t_alg = driver_aeads + i;
+
+		if (t_alg->registered)
+			crypto_unregister_aead(&t_alg->aead);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
+		struct caam_skcipher_alg *t_alg = driver_algs + i;
+
+		if (t_alg->registered)
+			crypto_unregister_skcipher(&t_alg->skcipher);
+	}
+
+	if (hash_list.next) {
+		struct caam_hash_alg *t_hash_alg, *p;
+
+		list_for_each_entry_safe(t_hash_alg, p, &hash_list, entry) {
+			crypto_unregister_ahash(&t_hash_alg->ahash_alg);
+			list_del(&t_hash_alg->entry);
+			kfree(t_hash_alg);
+		}
+	}
+
+	dpaa2_dpseci_disable(priv);
+	dpaa2_dpseci_dpio_free(priv);
+	dpaa2_dpseci_free(priv);
+	free_percpu(priv->ppriv);
+	fsl_mc_portal_free(priv->mc_io);
+	kmem_cache_destroy(qi_cache);
+
+	return 0;
+}
+
+int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req)
+{
+	struct dpaa2_fd fd;
+	struct dpaa2_caam_priv *priv = dev_get_drvdata(dev);
+	int err = 0, i, id;
+
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	if (priv->cscn_mem) {
+		dma_sync_single_for_cpu(priv->dev, priv->cscn_dma,
+					DPAA2_CSCN_SIZE,
+					DMA_FROM_DEVICE);
+		if (unlikely(dpaa2_cscn_state_congested(priv->cscn_mem_aligned))) {
+			dev_dbg_ratelimited(dev, "Dropping request\n");
+			return -EBUSY;
+		}
+	}
+
+	dpaa2_fl_set_flc(&req->fd_flt[1], req->flc_dma);
+
+	req->fd_flt_dma = dma_map_single(dev, req->fd_flt, sizeof(req->fd_flt),
+					 DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, req->fd_flt_dma)) {
+		dev_err(dev, "DMA mapping error for QI enqueue request\n");
+		goto err_out;
+	}
+
+	memset(&fd, 0, sizeof(fd));
+	dpaa2_fd_set_format(&fd, dpaa2_fd_list);
+	dpaa2_fd_set_addr(&fd, req->fd_flt_dma);
+	dpaa2_fd_set_len(&fd, dpaa2_fl_get_len(&req->fd_flt[1]));
+	dpaa2_fd_set_flc(&fd, req->flc_dma);
+
+	/*
+	 * There is no guarantee that preemption is disabled here,
+	 * thus take action.
+	 */
+	preempt_disable();
+	id = smp_processor_id() % priv->dpseci_attr.num_tx_queues;
+	for (i = 0; i < (priv->dpseci_attr.num_tx_queues << 1); i++) {
+		err = dpaa2_io_service_enqueue_fq(NULL,
+						  priv->tx_queue_attr[id].fqid,
+						  &fd);
+		if (err != -EBUSY)
+			break;
+	}
+	preempt_enable();
+
+	if (unlikely(err)) {
+		dev_err(dev, "Error enqueuing frame: %d\n", err);
+		goto err_out;
+	}
+
+	return -EINPROGRESS;
+
+err_out:
+	dma_unmap_single(dev, req->fd_flt_dma, sizeof(req->fd_flt),
+			 DMA_BIDIRECTIONAL);
+	return -EIO;
+}
+EXPORT_SYMBOL(dpaa2_caam_enqueue);
+
+static const struct fsl_mc_device_id dpaa2_caam_match_id_table[] = {
+	{
+		.vendor = FSL_MC_VENDOR_FREESCALE,
+		.obj_type = "dpseci",
+	},
+	{ .vendor = 0x0 }
+};
+
+static struct fsl_mc_driver dpaa2_caam_driver = {
+	.driver = {
+		.name		= KBUILD_MODNAME,
+		.owner		= THIS_MODULE,
+	},
+	.probe		= dpaa2_caam_probe,
+	.remove		= dpaa2_caam_remove,
+	.match_id_table = dpaa2_caam_match_id_table
+};
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Freescale Semiconductor, Inc");
+MODULE_DESCRIPTION("Freescale DPAA2 CAAM Driver");
+
+module_fsl_mc_driver(dpaa2_caam_driver);
diff --git a/drivers/crypto/caam/caamalg_qi2.h b/drivers/crypto/caam/caamalg_qi2.h
new file mode 100644
index 000000000000..9823bdefd029
--- /dev/null
+++ b/drivers/crypto/caam/caamalg_qi2.h
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright 2015-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2018 NXP
+ */
+
+#ifndef _CAAMALG_QI2_H_
+#define _CAAMALG_QI2_H_
+
+#include <soc/fsl/dpaa2-io.h>
+#include <soc/fsl/dpaa2-fd.h>
+#include <linux/threads.h>
+#include "dpseci.h"
+#include "desc_constr.h"
+
+#define DPAA2_CAAM_STORE_SIZE	16
+/* NAPI weight *must* be a multiple of the store size. */
+#define DPAA2_CAAM_NAPI_WEIGHT	64
+
+/* The congestion entrance threshold was chosen so that on LS2088
+ * we support the maximum throughput for the available memory
+ */
+#define DPAA2_SEC_CONG_ENTRY_THRESH	(128 * 1024 * 1024)
+#define DPAA2_SEC_CONG_EXIT_THRESH	(DPAA2_SEC_CONG_ENTRY_THRESH * 9 / 10)
+
+/**
+ * dpaa2_caam_priv - driver private data
+ * @dpseci_id: DPSECI object unique ID
+ * @major_ver: DPSECI major version
+ * @minor_ver: DPSECI minor version
+ * @dpseci_attr: DPSECI attributes
+ * @sec_attr: SEC engine attributes
+ * @rx_queue_attr: array of Rx queue attributes
+ * @tx_queue_attr: array of Tx queue attributes
+ * @cscn_mem: pointer to memory region containing the congestion SCN
+ *	it's size is larger than to accommodate alignment
+ * @cscn_mem_aligned: pointer to congestion SCN; it is computed as
+ *	PTR_ALIGN(cscn_mem, DPAA2_CSCN_ALIGN)
+ * @cscn_dma: dma address used by the QMAN to write CSCN messages
+ * @dev: device associated with the DPSECI object
+ * @mc_io: pointer to MC portal's I/O object
+ * @domain: IOMMU domain
+ * @ppriv: per CPU pointers to privata data
+ */
+struct dpaa2_caam_priv {
+	int dpsec_id;
+
+	u16 major_ver;
+	u16 minor_ver;
+
+	struct dpseci_attr dpseci_attr;
+	struct dpseci_sec_attr sec_attr;
+	struct dpseci_rx_queue_attr rx_queue_attr[DPSECI_MAX_QUEUE_NUM];
+	struct dpseci_tx_queue_attr tx_queue_attr[DPSECI_MAX_QUEUE_NUM];
+	int num_pairs;
+
+	/* congestion */
+	void *cscn_mem;
+	void *cscn_mem_aligned;
+	dma_addr_t cscn_dma;
+
+	struct device *dev;
+	struct fsl_mc_io *mc_io;
+	struct iommu_domain *domain;
+
+	struct dpaa2_caam_priv_per_cpu __percpu *ppriv;
+};
+
+/**
+ * dpaa2_caam_priv_per_cpu - per CPU private data
+ * @napi: napi structure
+ * @net_dev: netdev used by napi
+ * @req_fqid: (virtual) request (Tx / enqueue) FQID
+ * @rsp_fqid: (virtual) response (Rx / dequeue) FQID
+ * @prio: internal queue number - index for dpaa2_caam_priv.*_queue_attr
+ * @nctx: notification context of response FQ
+ * @store: where dequeued frames are stored
+ * @priv: backpointer to dpaa2_caam_priv
+ */
+struct dpaa2_caam_priv_per_cpu {
+	struct napi_struct napi;
+	struct net_device net_dev;
+	int req_fqid;
+	int rsp_fqid;
+	int prio;
+	struct dpaa2_io_notification_ctx nctx;
+	struct dpaa2_io_store *store;
+	struct dpaa2_caam_priv *priv;
+};
+
+/*
+ * The CAAM QI hardware constructs a job descriptor which points
+ * to shared descriptor (as pointed by context_a of FQ to CAAM).
+ * When the job descriptor is executed by deco, the whole job
+ * descriptor together with shared descriptor gets loaded in
+ * deco buffer which is 64 words long (each 32-bit).
+ *
+ * The job descriptor constructed by QI hardware has layout:
+ *
+ *	HEADER		(1 word)
+ *	Shdesc ptr	(1 or 2 words)
+ *	SEQ_OUT_PTR	(1 word)
+ *	Out ptr		(1 or 2 words)
+ *	Out length	(1 word)
+ *	SEQ_IN_PTR	(1 word)
+ *	In ptr		(1 or 2 words)
+ *	In length	(1 word)
+ *
+ * The shdesc ptr is used to fetch shared descriptor contents
+ * into deco buffer.
+ *
+ * Apart from shdesc contents, the total number of words that
+ * get loaded in deco buffer are '8' or '11'. The remaining words
+ * in deco buffer can be used for storing shared descriptor.
+ */
+#define MAX_SDLEN	((CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) / CAAM_CMD_SZ)
+
+/* Length of a single buffer in the QI driver memory cache */
+#define CAAM_QI_MEMCACHE_SIZE	512
+
+/*
+ * aead_edesc - s/w-extended aead descriptor
+ * @src_nents: number of segments in input scatterlist
+ * @dst_nents: number of segments in output scatterlist
+ * @iv_dma: dma address of iv for checking continuity and link table
+ * @qm_sg_bytes: length of dma mapped h/w link table
+ * @qm_sg_dma: bus physical mapped address of h/w link table
+ * @assoclen: associated data length, in CAAM endianness
+ * @assoclen_dma: bus physical mapped address of req->assoclen
+ * @sgt: the h/w link table, followed by IV
+ */
+struct aead_edesc {
+	int src_nents;
+	int dst_nents;
+	dma_addr_t iv_dma;
+	int qm_sg_bytes;
+	dma_addr_t qm_sg_dma;
+	unsigned int assoclen;
+	dma_addr_t assoclen_dma;
+	struct dpaa2_sg_entry sgt[0];
+};
+
+/*
+ * skcipher_edesc - s/w-extended skcipher descriptor
+ * @src_nents: number of segments in input scatterlist
+ * @dst_nents: number of segments in output scatterlist
+ * @iv_dma: dma address of iv for checking continuity and link table
+ * @qm_sg_bytes: length of dma mapped qm_sg space
+ * @qm_sg_dma: I/O virtual address of h/w link table
+ * @sgt: the h/w link table, followed by IV
+ */
+struct skcipher_edesc {
+	int src_nents;
+	int dst_nents;
+	dma_addr_t iv_dma;
+	int qm_sg_bytes;
+	dma_addr_t qm_sg_dma;
+	struct dpaa2_sg_entry sgt[0];
+};
+
+/*
+ * ahash_edesc - s/w-extended ahash descriptor
+ * @dst_dma: I/O virtual address of req->result
+ * @qm_sg_dma: I/O virtual address of h/w link table
+ * @src_nents: number of segments in input scatterlist
+ * @qm_sg_bytes: length of dma mapped qm_sg space
+ * @sgt: pointer to h/w link table
+ */
+struct ahash_edesc {
+	dma_addr_t dst_dma;
+	dma_addr_t qm_sg_dma;
+	int src_nents;
+	int qm_sg_bytes;
+	struct dpaa2_sg_entry sgt[0];
+};
+
+/**
+ * caam_flc - Flow Context (FLC)
+ * @flc: Flow Context options
+ * @sh_desc: Shared Descriptor
+ */
+struct caam_flc {
+	u32 flc[16];
+	u32 sh_desc[MAX_SDLEN];
+} ____cacheline_aligned;
+
+enum optype {
+	ENCRYPT = 0,
+	DECRYPT,
+	NUM_OP
+};
+
+/**
+ * caam_request - the request structure the driver application should fill while
+ *                submitting a job to driver.
+ * @fd_flt: Frame list table defining input and output
+ *          fd_flt[0] - FLE pointing to output buffer
+ *          fd_flt[1] - FLE pointing to input buffer
+ * @fd_flt_dma: DMA address for the frame list table
+ * @flc: Flow Context
+ * @flc_dma: I/O virtual address of Flow Context
+ * @cbk: Callback function to invoke when job is completed
+ * @ctx: arbit context attached with request by the application
+ * @edesc: extended descriptor; points to one of {skcipher,aead}_edesc
+ */
+struct caam_request {
+	struct dpaa2_fl_entry fd_flt[2];
+	dma_addr_t fd_flt_dma;
+	struct caam_flc *flc;
+	dma_addr_t flc_dma;
+	void (*cbk)(void *ctx, u32 err);
+	void *ctx;
+	void *edesc;
+};
+
+/**
+ * dpaa2_caam_enqueue() - enqueue a crypto request
+ * @dev: device associated with the DPSECI object
+ * @req: pointer to caam_request
+ */
+int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req);
+
+#endif	/* _CAAMALG_QI2_H_ */
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 43975ab5f09c..46924affa0bd 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * caam - Freescale FSL CAAM support for ahash functions of crypto API
  *
@@ -62,6 +63,7 @@
 #include "error.h"
 #include "sg_sw_sec4.h"
 #include "key_gen.h"
+#include "caamhash_desc.h"
 
 #define CAAM_CRA_PRIORITY		3000
 
@@ -71,14 +73,6 @@
 #define CAAM_MAX_HASH_BLOCK_SIZE	SHA512_BLOCK_SIZE
 #define CAAM_MAX_HASH_DIGEST_SIZE	SHA512_DIGEST_SIZE
 
-/* length of descriptors text */
-#define DESC_AHASH_BASE			(3 * CAAM_CMD_SZ)
-#define DESC_AHASH_UPDATE_LEN		(6 * CAAM_CMD_SZ)
-#define DESC_AHASH_UPDATE_FIRST_LEN	(DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
-#define DESC_AHASH_FINAL_LEN		(DESC_AHASH_BASE + 5 * CAAM_CMD_SZ)
-#define DESC_AHASH_FINUP_LEN		(DESC_AHASH_BASE + 5 * CAAM_CMD_SZ)
-#define DESC_AHASH_DIGEST_LEN		(DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
-
 #define DESC_HASH_MAX_USED_BYTES	(DESC_AHASH_FINAL_LEN + \
 					 CAAM_MAX_HASH_KEY_SIZE)
 #define DESC_HASH_MAX_USED_LEN		(DESC_HASH_MAX_USED_BYTES / CAAM_CMD_SZ)
@@ -235,60 +229,6 @@ static inline int ctx_map_to_sec4_sg(struct device *jrdev,
 	return 0;
 }
 
-/*
- * For ahash update, final and finup (import_ctx = true)
- *     import context, read and write to seqout
- * For ahash firsts and digest (import_ctx = false)
- *     read and write to seqout
- */
-static inline void ahash_gen_sh_desc(u32 *desc, u32 state, int digestsize,
-				     struct caam_hash_ctx *ctx, bool import_ctx,
-				     int era)
-{
-	u32 op = ctx->adata.algtype;
-	u32 *skip_key_load;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Append key if it has been set; ahash update excluded */
-	if ((state != OP_ALG_AS_UPDATE) && (ctx->adata.keylen)) {
-		/* Skip key loading if already shared */
-		skip_key_load = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-					    JUMP_COND_SHRD);
-
-		if (era < 6)
-			append_key_as_imm(desc, ctx->key, ctx->adata.keylen_pad,
-					  ctx->adata.keylen, CLASS_2 |
-					  KEY_DEST_MDHA_SPLIT | KEY_ENC);
-		else
-			append_proto_dkp(desc, &ctx->adata);
-
-		set_jump_tgt_here(desc, skip_key_load);
-
-		op |= OP_ALG_AAI_HMAC_PRECOMP;
-	}
-
-	/* If needed, import context from software */
-	if (import_ctx)
-		append_seq_load(desc, ctx->ctx_len, LDST_CLASS_2_CCB |
-				LDST_SRCDST_BYTE_CONTEXT);
-
-	/* Class 2 operation */
-	append_operation(desc, op | state | OP_ALG_ENCRYPT);
-
-	/*
-	 * Load from buf and/or src and write to req->result or state->context
-	 * Calculate remaining bytes to read
-	 */
-	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-	/* Read remaining bytes */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 |
-			     FIFOLD_TYPE_MSG | KEY_VLF);
-	/* Store class2 context bytes */
-	append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
-			 LDST_SRCDST_BYTE_CONTEXT);
-}
-
 static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 {
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
@@ -301,8 +241,8 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_update shared descriptor */
 	desc = ctx->sh_desc_update;
-	ahash_gen_sh_desc(desc, OP_ALG_AS_UPDATE, ctx->ctx_len, ctx, true,
-			  ctrlpriv->era);
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_UPDATE, ctx->ctx_len,
+			  ctx->ctx_len, true, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_update_dma,
 				   desc_bytes(desc), ctx->dir);
 #ifdef DEBUG
@@ -313,8 +253,8 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_update_first shared descriptor */
 	desc = ctx->sh_desc_update_first;
-	ahash_gen_sh_desc(desc, OP_ALG_AS_INIT, ctx->ctx_len, ctx, false,
-			  ctrlpriv->era);
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_INIT, ctx->ctx_len,
+			  ctx->ctx_len, false, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_update_first_dma,
 				   desc_bytes(desc), ctx->dir);
 #ifdef DEBUG
@@ -325,8 +265,8 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_final shared descriptor */
 	desc = ctx->sh_desc_fin;
-	ahash_gen_sh_desc(desc, OP_ALG_AS_FINALIZE, digestsize, ctx, true,
-			  ctrlpriv->era);
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_FINALIZE, digestsize,
+			  ctx->ctx_len, true, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_fin_dma,
 				   desc_bytes(desc), ctx->dir);
 #ifdef DEBUG
@@ -337,8 +277,8 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_digest shared descriptor */
 	desc = ctx->sh_desc_digest;
-	ahash_gen_sh_desc(desc, OP_ALG_AS_INITFINAL, digestsize, ctx, false,
-			  ctrlpriv->era);
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_INITFINAL, digestsize,
+			  ctx->ctx_len, false, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_digest_dma,
 				   desc_bytes(desc), ctx->dir);
 #ifdef DEBUG
diff --git a/drivers/crypto/caam/caamhash_desc.c b/drivers/crypto/caam/caamhash_desc.c
new file mode 100644
index 000000000000..a12f7959a2c3
--- /dev/null
+++ b/drivers/crypto/caam/caamhash_desc.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Shared descriptors for ahash algorithms
+ *
+ * Copyright 2017 NXP
+ */
+
+#include "compat.h"
+#include "desc_constr.h"
+#include "caamhash_desc.h"
+
+/**
+ * cnstr_shdsc_ahash - ahash shared descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case.
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1, SHA224,
+ *         SHA256, SHA384, SHA512}.
+ * @state: algorithm state OP_ALG_AS_{INIT, FINALIZE, INITFINALIZE, UPDATE}
+ * @digestsize: algorithm's digest size
+ * @ctx_len: size of Context Register
+ * @import_ctx: true if previous Context Register needs to be restored
+ *              must be true for ahash update and final
+ *              must be false for for ahash first and digest
+ * @era: SEC Era
+ */
+void cnstr_shdsc_ahash(u32 * const desc, struct alginfo *adata, u32 state,
+		       int digestsize, int ctx_len, bool import_ctx, int era)
+{
+	u32 op = adata->algtype;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+	/* Append key if it has been set; ahash update excluded */
+	if (state != OP_ALG_AS_UPDATE && adata->keylen) {
+		u32 *skip_key_load;
+
+		/* Skip key loading if already shared */
+		skip_key_load = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_SHRD);
+
+		if (era < 6)
+			append_key_as_imm(desc, adata->key_virt,
+					  adata->keylen_pad,
+					  adata->keylen, CLASS_2 |
+					  KEY_DEST_MDHA_SPLIT | KEY_ENC);
+		else
+			append_proto_dkp(desc, adata);
+
+		set_jump_tgt_here(desc, skip_key_load);
+
+		op |= OP_ALG_AAI_HMAC_PRECOMP;
+	}
+
+	/* If needed, import context from software */
+	if (import_ctx)
+		append_seq_load(desc, ctx_len, LDST_CLASS_2_CCB |
+				LDST_SRCDST_BYTE_CONTEXT);
+
+	/* Class 2 operation */
+	append_operation(desc, op | state | OP_ALG_ENCRYPT);
+
+	/*
+	 * Load from buf and/or src and write to req->result or state->context
+	 * Calculate remaining bytes to read
+	 */
+	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	/* Read remaining bytes */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 |
+			     FIFOLD_TYPE_MSG | KEY_VLF);
+	/* Store class2 context bytes */
+	append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+}
+EXPORT_SYMBOL(cnstr_shdsc_ahash);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("FSL CAAM ahash descriptors support");
+MODULE_AUTHOR("NXP Semiconductors");
diff --git a/drivers/crypto/caam/caamhash_desc.h b/drivers/crypto/caam/caamhash_desc.h
new file mode 100644
index 000000000000..631fc1ac312c
--- /dev/null
+++ b/drivers/crypto/caam/caamhash_desc.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Shared descriptors for ahash algorithms
+ *
+ * Copyright 2017 NXP
+ */
+
+#ifndef _CAAMHASH_DESC_H_
+#define _CAAMHASH_DESC_H_
+
+/* length of descriptors text */
+#define DESC_AHASH_BASE			(3 * CAAM_CMD_SZ)
+#define DESC_AHASH_UPDATE_LEN		(6 * CAAM_CMD_SZ)
+#define DESC_AHASH_UPDATE_FIRST_LEN	(DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
+#define DESC_AHASH_FINAL_LEN		(DESC_AHASH_BASE + 5 * CAAM_CMD_SZ)
+#define DESC_AHASH_DIGEST_LEN		(DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
+
+void cnstr_shdsc_ahash(u32 * const desc, struct alginfo *adata, u32 state,
+		       int digestsize, int ctx_len, bool import_ctx, int era);
+
+#endif /* _CAAMHASH_DESC_H_ */
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index f26d62e5533a..4fc209cbbeab 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
 /*
  * caam - Freescale FSL CAAM support for Public Key Cryptography
  *
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index fde07d4ff019..4318b0aa6fb9 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * caam - Freescale FSL CAAM support for hw_random
  *
diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h
index 1c71e0cd5098..9604ff7a335e 100644
--- a/drivers/crypto/caam/compat.h
+++ b/drivers/crypto/caam/compat.h
@@ -17,6 +17,7 @@
 #include <linux/of_platform.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
+#include <linux/iommu.h>
 #include <linux/spinlock.h>
 #include <linux/rtnetlink.h>
 #include <linux/in.h>
@@ -39,6 +40,7 @@
 #include <crypto/authenc.h>
 #include <crypto/akcipher.h>
 #include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/rsa.h>
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 538c01f428c1..3fc793193821 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /* * CAAM control-plane driver backend
  * Controller-level driver, kernel property detection, initialization
  *
diff --git a/drivers/crypto/caam/dpseci.c b/drivers/crypto/caam/dpseci.c
new file mode 100644
index 000000000000..8a68531ded0b
--- /dev/null
+++ b/drivers/crypto/caam/dpseci.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2018 NXP
+ */
+
+#include <linux/fsl/mc.h>
+#include "dpseci.h"
+#include "dpseci_cmd.h"
+
+/**
+ * dpseci_open() - Open a control session for the specified object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @dpseci_id:	DPSECI unique ID
+ * @token:	Returned token; use in subsequent API calls
+ *
+ * This function can be used to open a control session for an already created
+ * object; an object may have been declared statically in the DPL
+ * or created dynamically.
+ * This function returns a unique authentication token, associated with the
+ * specific object ID and the specific MC portal; this token must be used in all
+ * subsequent commands for this specific object.
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_open(struct fsl_mc_io *mc_io, u32 cmd_flags, int dpseci_id,
+		u16 *token)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpseci_cmd_open *cmd_params;
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSECI_CMDID_OPEN,
+					  cmd_flags,
+					  0);
+	cmd_params = (struct dpseci_cmd_open *)cmd.params;
+	cmd_params->dpseci_id = cpu_to_le32(dpseci_id);
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	*token = mc_cmd_hdr_read_token(&cmd);
+
+	return 0;
+}
+
+/**
+ * dpseci_close() - Close the control session of the object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSECI object
+ *
+ * After this function is called, no further operations are allowed on the
+ * object without opening a new control session.
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSECI_CMDID_CLOSE,
+					  cmd_flags,
+					  token);
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpseci_enable() - Enable the DPSECI, allow sending and receiving frames
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSECI object
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSECI_CMDID_ENABLE,
+					  cmd_flags,
+					  token);
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpseci_disable() - Disable the DPSECI, stop sending and receiving frames
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSECI object
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSECI_CMDID_DISABLE,
+					  cmd_flags,
+					  token);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpseci_is_enabled() - Check if the DPSECI is enabled.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSECI object
+ * @en:		Returns '1' if object is enabled; '0' otherwise
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_is_enabled(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		      int *en)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpseci_rsp_is_enabled *rsp_params;
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSECI_CMDID_IS_ENABLED,
+					  cmd_flags,
+					  token);
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpseci_rsp_is_enabled *)cmd.params;
+	*en = dpseci_get_field(rsp_params->is_enabled, ENABLE);
+
+	return 0;
+}
+
+/**
+ * dpseci_get_attributes() - Retrieve DPSECI attributes
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSECI object
+ * @attr:	Returned object's attributes
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  struct dpseci_attr *attr)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpseci_rsp_get_attributes *rsp_params;
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_ATTR,
+					  cmd_flags,
+					  token);
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpseci_rsp_get_attributes *)cmd.params;
+	attr->id = le32_to_cpu(rsp_params->id);
+	attr->num_tx_queues = rsp_params->num_tx_queues;
+	attr->num_rx_queues = rsp_params->num_rx_queues;
+	attr->options = le32_to_cpu(rsp_params->options);
+
+	return 0;
+}
+
+/**
+ * dpseci_set_rx_queue() - Set Rx queue configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSECI object
+ * @queue:	Select the queue relative to number of priorities configured at
+ *		DPSECI creation; use DPSECI_ALL_QUEUES to configure all
+ *		Rx queues identically.
+ * @cfg:	Rx queue configuration
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_set_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 queue, const struct dpseci_rx_queue_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpseci_cmd_queue *cmd_params;
+
+	cmd.header = mc_encode_cmd_header(DPSECI_CMDID_SET_RX_QUEUE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpseci_cmd_queue *)cmd.params;
+	cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id);
+	cmd_params->priority = cfg->dest_cfg.priority;
+	cmd_params->queue = queue;
+	dpseci_set_field(cmd_params->dest_type, DEST_TYPE,
+			 cfg->dest_cfg.dest_type);
+	cmd_params->user_ctx = cpu_to_le64(cfg->user_ctx);
+	cmd_params->options = cpu_to_le32(cfg->options);
+	dpseci_set_field(cmd_params->order_preservation_en, ORDER_PRESERVATION,
+			 cfg->order_preservation_en);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpseci_get_rx_queue() - Retrieve Rx queue attributes
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSECI object
+ * @queue:	Select the queue relative to number of priorities configured at
+ *		DPSECI creation
+ * @attr:	Returned Rx queue attributes
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_get_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 queue, struct dpseci_rx_queue_attr *attr)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpseci_cmd_queue *cmd_params;
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_RX_QUEUE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpseci_cmd_queue *)cmd.params;
+	cmd_params->queue = queue;
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	attr->dest_cfg.dest_id = le32_to_cpu(cmd_params->dest_id);
+	attr->dest_cfg.priority = cmd_params->priority;
+	attr->dest_cfg.dest_type = dpseci_get_field(cmd_params->dest_type,
+						    DEST_TYPE);
+	attr->user_ctx = le64_to_cpu(cmd_params->user_ctx);
+	attr->fqid = le32_to_cpu(cmd_params->fqid);
+	attr->order_preservation_en =
+		dpseci_get_field(cmd_params->order_preservation_en,
+				 ORDER_PRESERVATION);
+
+	return 0;
+}
+
+/**
+ * dpseci_get_tx_queue() - Retrieve Tx queue attributes
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSECI object
+ * @queue:	Select the queue relative to number of priorities configured at
+ *		DPSECI creation
+ * @attr:	Returned Tx queue attributes
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_get_tx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 queue, struct dpseci_tx_queue_attr *attr)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpseci_cmd_queue *cmd_params;
+	struct dpseci_rsp_get_tx_queue *rsp_params;
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_TX_QUEUE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpseci_cmd_queue *)cmd.params;
+	cmd_params->queue = queue;
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpseci_rsp_get_tx_queue *)cmd.params;
+	attr->fqid = le32_to_cpu(rsp_params->fqid);
+	attr->priority = rsp_params->priority;
+
+	return 0;
+}
+
+/**
+ * dpseci_get_sec_attr() - Retrieve SEC accelerator attributes
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSECI object
+ * @attr:	Returned SEC attributes
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_get_sec_attr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			struct dpseci_sec_attr *attr)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpseci_rsp_get_sec_attr *rsp_params;
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_SEC_ATTR,
+					  cmd_flags,
+					  token);
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpseci_rsp_get_sec_attr *)cmd.params;
+	attr->ip_id = le16_to_cpu(rsp_params->ip_id);
+	attr->major_rev = rsp_params->major_rev;
+	attr->minor_rev = rsp_params->minor_rev;
+	attr->era = rsp_params->era;
+	attr->deco_num = rsp_params->deco_num;
+	attr->zuc_auth_acc_num = rsp_params->zuc_auth_acc_num;
+	attr->zuc_enc_acc_num = rsp_params->zuc_enc_acc_num;
+	attr->snow_f8_acc_num = rsp_params->snow_f8_acc_num;
+	attr->snow_f9_acc_num = rsp_params->snow_f9_acc_num;
+	attr->crc_acc_num = rsp_params->crc_acc_num;
+	attr->pk_acc_num = rsp_params->pk_acc_num;
+	attr->kasumi_acc_num = rsp_params->kasumi_acc_num;
+	attr->rng_acc_num = rsp_params->rng_acc_num;
+	attr->md_acc_num = rsp_params->md_acc_num;
+	attr->arc4_acc_num = rsp_params->arc4_acc_num;
+	attr->des_acc_num = rsp_params->des_acc_num;
+	attr->aes_acc_num = rsp_params->aes_acc_num;
+	attr->ccha_acc_num = rsp_params->ccha_acc_num;
+	attr->ptha_acc_num = rsp_params->ptha_acc_num;
+
+	return 0;
+}
+
+/**
+ * dpseci_get_api_version() - Get Data Path SEC Interface API version
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @major_ver:	Major version of data path sec API
+ * @minor_ver:	Minor version of data path sec API
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_get_api_version(struct fsl_mc_io *mc_io, u32 cmd_flags,
+			   u16 *major_ver, u16 *minor_ver)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpseci_rsp_get_api_version *rsp_params;
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_API_VERSION,
+					  cmd_flags, 0);
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpseci_rsp_get_api_version *)cmd.params;
+	*major_ver = le16_to_cpu(rsp_params->major);
+	*minor_ver = le16_to_cpu(rsp_params->minor);
+
+	return 0;
+}
+
+/**
+ * dpseci_set_congestion_notification() - Set congestion group
+ *	notification configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSECI object
+ * @cfg:	congestion notification configuration
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_set_congestion_notification(struct fsl_mc_io *mc_io, u32 cmd_flags,
+	u16 token, const struct dpseci_congestion_notification_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpseci_cmd_congestion_notification *cmd_params;
+
+	cmd.header = mc_encode_cmd_header(
+			DPSECI_CMDID_SET_CONGESTION_NOTIFICATION,
+			cmd_flags,
+			token);
+	cmd_params = (struct dpseci_cmd_congestion_notification *)cmd.params;
+	cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id);
+	cmd_params->notification_mode = cpu_to_le16(cfg->notification_mode);
+	cmd_params->priority = cfg->dest_cfg.priority;
+	dpseci_set_field(cmd_params->options, CGN_DEST_TYPE,
+			 cfg->dest_cfg.dest_type);
+	dpseci_set_field(cmd_params->options, CGN_UNITS, cfg->units);
+	cmd_params->message_iova = cpu_to_le64(cfg->message_iova);
+	cmd_params->message_ctx = cpu_to_le64(cfg->message_ctx);
+	cmd_params->threshold_entry = cpu_to_le32(cfg->threshold_entry);
+	cmd_params->threshold_exit = cpu_to_le32(cfg->threshold_exit);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpseci_get_congestion_notification() - Get congestion group notification
+ *	configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSECI object
+ * @cfg:	congestion notification configuration
+ *
+ * Return:	'0' on success, error code otherwise
+ */
+int dpseci_get_congestion_notification(struct fsl_mc_io *mc_io, u32 cmd_flags,
+	u16 token, struct dpseci_congestion_notification_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpseci_cmd_congestion_notification *rsp_params;
+	int err;
+
+	cmd.header = mc_encode_cmd_header(
+			DPSECI_CMDID_GET_CONGESTION_NOTIFICATION,
+			cmd_flags,
+			token);
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpseci_cmd_congestion_notification *)cmd.params;
+	cfg->dest_cfg.dest_id = le32_to_cpu(rsp_params->dest_id);
+	cfg->notification_mode = le16_to_cpu(rsp_params->notification_mode);
+	cfg->dest_cfg.priority = rsp_params->priority;
+	cfg->dest_cfg.dest_type = dpseci_get_field(rsp_params->options,
+						   CGN_DEST_TYPE);
+	cfg->units = dpseci_get_field(rsp_params->options, CGN_UNITS);
+	cfg->message_iova = le64_to_cpu(rsp_params->message_iova);
+	cfg->message_ctx = le64_to_cpu(rsp_params->message_ctx);
+	cfg->threshold_entry = le32_to_cpu(rsp_params->threshold_entry);
+	cfg->threshold_exit = le32_to_cpu(rsp_params->threshold_exit);
+
+	return 0;
+}
diff --git a/drivers/crypto/caam/dpseci.h b/drivers/crypto/caam/dpseci.h
new file mode 100644
index 000000000000..4550e134d166
--- /dev/null
+++ b/drivers/crypto/caam/dpseci.h
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2018 NXP
+ */
+#ifndef _DPSECI_H_
+#define _DPSECI_H_
+
+/*
+ * Data Path SEC Interface API
+ * Contains initialization APIs and runtime control APIs for DPSECI
+ */
+
+struct fsl_mc_io;
+
+/**
+ * General DPSECI macros
+ */
+
+/**
+ * Maximum number of Tx/Rx queues per DPSECI object
+ */
+#define DPSECI_MAX_QUEUE_NUM		16
+
+/**
+ * All queues considered; see dpseci_set_rx_queue()
+ */
+#define DPSECI_ALL_QUEUES	(u8)(-1)
+
+int dpseci_open(struct fsl_mc_io *mc_io, u32 cmd_flags, int dpseci_id,
+		u16 *token);
+
+int dpseci_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
+/**
+ * Enable the Congestion Group support
+ */
+#define DPSECI_OPT_HAS_CG		0x000020
+
+/**
+ * struct dpseci_cfg - Structure representing DPSECI configuration
+ * @options: Any combination of the following flags:
+ *		DPSECI_OPT_HAS_CG
+ * @num_tx_queues: num of queues towards the SEC
+ * @num_rx_queues: num of queues back from the SEC
+ * @priorities: Priorities for the SEC hardware processing;
+ *		each place in the array is the priority of the tx queue
+ *		towards the SEC;
+ *		valid priorities are configured with values 1-8;
+ */
+struct dpseci_cfg {
+	u32 options;
+	u8 num_tx_queues;
+	u8 num_rx_queues;
+	u8 priorities[DPSECI_MAX_QUEUE_NUM];
+};
+
+int dpseci_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
+int dpseci_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
+int dpseci_is_enabled(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		      int *en);
+
+/**
+ * struct dpseci_attr - Structure representing DPSECI attributes
+ * @id: DPSECI object ID
+ * @num_tx_queues: number of queues towards the SEC
+ * @num_rx_queues: number of queues back from the SEC
+ * @options: any combination of the following flags:
+ *		DPSECI_OPT_HAS_CG
+ */
+struct dpseci_attr {
+	int id;
+	u8 num_tx_queues;
+	u8 num_rx_queues;
+	u32 options;
+};
+
+int dpseci_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  struct dpseci_attr *attr);
+
+/**
+ * enum dpseci_dest - DPSECI destination types
+ * @DPSECI_DEST_NONE: Unassigned destination; The queue is set in parked mode
+ *	and does not generate FQDAN notifications; user is expected to dequeue
+ *	from the queue based on polling or other user-defined method
+ * @DPSECI_DEST_DPIO: The queue is set in schedule mode and generates FQDAN
+ *	notifications to the specified DPIO; user is expected to dequeue from
+ *	the queue only after notification is received
+ * @DPSECI_DEST_DPCON: The queue is set in schedule mode and does not generate
+ *	FQDAN notifications, but is connected to the specified DPCON object;
+ *	user is expected to dequeue from the DPCON channel
+ */
+enum dpseci_dest {
+	DPSECI_DEST_NONE = 0,
+	DPSECI_DEST_DPIO,
+	DPSECI_DEST_DPCON
+};
+
+/**
+ * struct dpseci_dest_cfg - Structure representing DPSECI destination parameters
+ * @dest_type: Destination type
+ * @dest_id: Either DPIO ID or DPCON ID, depending on the destination type
+ * @priority: Priority selection within the DPIO or DPCON channel; valid values
+ *	are 0-1 or 0-7, depending on the number of priorities in that channel;
+ *	not relevant for 'DPSECI_DEST_NONE' option
+ */
+struct dpseci_dest_cfg {
+	enum dpseci_dest dest_type;
+	int dest_id;
+	u8 priority;
+};
+
+/**
+ * DPSECI queue modification options
+ */
+
+/**
+ * Select to modify the user's context associated with the queue
+ */
+#define DPSECI_QUEUE_OPT_USER_CTX		0x00000001
+
+/**
+ * Select to modify the queue's destination
+ */
+#define DPSECI_QUEUE_OPT_DEST			0x00000002
+
+/**
+ * Select to modify the queue's order preservation
+ */
+#define DPSECI_QUEUE_OPT_ORDER_PRESERVATION	0x00000004
+
+/**
+ * struct dpseci_rx_queue_cfg - DPSECI RX queue configuration
+ * @options: Flags representing the suggested modifications to the queue;
+ *	Use any combination of 'DPSECI_QUEUE_OPT_<X>' flags
+ * @order_preservation_en: order preservation configuration for the rx queue
+ * valid only if 'DPSECI_QUEUE_OPT_ORDER_PRESERVATION' is contained in 'options'
+ * @user_ctx: User context value provided in the frame descriptor of each
+ *	dequeued frame;	valid only if 'DPSECI_QUEUE_OPT_USER_CTX' is contained
+ *	in 'options'
+ * @dest_cfg: Queue destination parameters; valid only if
+ *	'DPSECI_QUEUE_OPT_DEST' is contained in 'options'
+ */
+struct dpseci_rx_queue_cfg {
+	u32 options;
+	int order_preservation_en;
+	u64 user_ctx;
+	struct dpseci_dest_cfg dest_cfg;
+};
+
+int dpseci_set_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 queue, const struct dpseci_rx_queue_cfg *cfg);
+
+/**
+ * struct dpseci_rx_queue_attr - Structure representing attributes of Rx queues
+ * @user_ctx: User context value provided in the frame descriptor of each
+ *	dequeued frame
+ * @order_preservation_en: Status of the order preservation configuration on the
+ *	queue
+ * @dest_cfg: Queue destination configuration
+ * @fqid: Virtual FQID value to be used for dequeue operations
+ */
+struct dpseci_rx_queue_attr {
+	u64 user_ctx;
+	int order_preservation_en;
+	struct dpseci_dest_cfg dest_cfg;
+	u32 fqid;
+};
+
+int dpseci_get_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 queue, struct dpseci_rx_queue_attr *attr);
+
+/**
+ * struct dpseci_tx_queue_attr - Structure representing attributes of Tx queues
+ * @fqid: Virtual FQID to be used for sending frames to SEC hardware
+ * @priority: SEC hardware processing priority for the queue
+ */
+struct dpseci_tx_queue_attr {
+	u32 fqid;
+	u8 priority;
+};
+
+int dpseci_get_tx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 queue, struct dpseci_tx_queue_attr *attr);
+
+/**
+ * struct dpseci_sec_attr - Structure representing attributes of the SEC
+ *	hardware accelerator
+ * @ip_id: ID for SEC
+ * @major_rev: Major revision number for SEC
+ * @minor_rev: Minor revision number for SEC
+ * @era: SEC Era
+ * @deco_num: The number of copies of the DECO that are implemented in this
+ *	version of SEC
+ * @zuc_auth_acc_num: The number of copies of ZUCA that are implemented in this
+ *	version of SEC
+ * @zuc_enc_acc_num: The number of copies of ZUCE that are implemented in this
+ *	version of SEC
+ * @snow_f8_acc_num: The number of copies of the SNOW-f8 module that are
+ *	implemented in this version of SEC
+ * @snow_f9_acc_num: The number of copies of the SNOW-f9 module that are
+ *	implemented in this version of SEC
+ * @crc_acc_num: The number of copies of the CRC module that are implemented in
+ *	this version of SEC
+ * @pk_acc_num:  The number of copies of the Public Key module that are
+ *	implemented in this version of SEC
+ * @kasumi_acc_num: The number of copies of the Kasumi module that are
+ *	implemented in this version of SEC
+ * @rng_acc_num: The number of copies of the Random Number Generator that are
+ *	implemented in this version of SEC
+ * @md_acc_num: The number of copies of the MDHA (Hashing module) that are
+ *	implemented in this version of SEC
+ * @arc4_acc_num: The number of copies of the ARC4 module that are implemented
+ *	in this version of SEC
+ * @des_acc_num: The number of copies of the DES module that are implemented in
+ *	this version of SEC
+ * @aes_acc_num: The number of copies of the AES module that are implemented in
+ *	this version of SEC
+ * @ccha_acc_num: The number of copies of the ChaCha20 module that are
+ *	implemented in this version of SEC.
+ * @ptha_acc_num: The number of copies of the Poly1305 module that are
+ *	implemented in this version of SEC.
+ **/
+struct dpseci_sec_attr {
+	u16 ip_id;
+	u8 major_rev;
+	u8 minor_rev;
+	u8 era;
+	u8 deco_num;
+	u8 zuc_auth_acc_num;
+	u8 zuc_enc_acc_num;
+	u8 snow_f8_acc_num;
+	u8 snow_f9_acc_num;
+	u8 crc_acc_num;
+	u8 pk_acc_num;
+	u8 kasumi_acc_num;
+	u8 rng_acc_num;
+	u8 md_acc_num;
+	u8 arc4_acc_num;
+	u8 des_acc_num;
+	u8 aes_acc_num;
+	u8 ccha_acc_num;
+	u8 ptha_acc_num;
+};
+
+int dpseci_get_sec_attr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			struct dpseci_sec_attr *attr);
+
+int dpseci_get_api_version(struct fsl_mc_io *mc_io, u32 cmd_flags,
+			   u16 *major_ver, u16 *minor_ver);
+
+/**
+ * enum dpseci_congestion_unit - DPSECI congestion units
+ * @DPSECI_CONGESTION_UNIT_BYTES: bytes units
+ * @DPSECI_CONGESTION_UNIT_FRAMES: frames units
+ */
+enum dpseci_congestion_unit {
+	DPSECI_CONGESTION_UNIT_BYTES = 0,
+	DPSECI_CONGESTION_UNIT_FRAMES
+};
+
+/**
+ * CSCN message is written to message_iova once entering a
+ * congestion state (see 'threshold_entry')
+ */
+#define DPSECI_CGN_MODE_WRITE_MEM_ON_ENTER		0x00000001
+
+/**
+ * CSCN message is written to message_iova once exiting a
+ * congestion state (see 'threshold_exit')
+ */
+#define DPSECI_CGN_MODE_WRITE_MEM_ON_EXIT		0x00000002
+
+/**
+ * CSCN write will attempt to allocate into a cache (coherent write);
+ * valid only if 'DPSECI_CGN_MODE_WRITE_MEM_<X>' is selected
+ */
+#define DPSECI_CGN_MODE_COHERENT_WRITE			0x00000004
+
+/**
+ * if 'dpseci_dest_cfg.dest_type != DPSECI_DEST_NONE' CSCN message is sent to
+ * DPIO/DPCON's WQ channel once entering a congestion state
+ * (see 'threshold_entry')
+ */
+#define DPSECI_CGN_MODE_NOTIFY_DEST_ON_ENTER		0x00000008
+
+/**
+ * if 'dpseci_dest_cfg.dest_type != DPSECI_DEST_NONE' CSCN message is sent to
+ * DPIO/DPCON's WQ channel once exiting a congestion state
+ * (see 'threshold_exit')
+ */
+#define DPSECI_CGN_MODE_NOTIFY_DEST_ON_EXIT		0x00000010
+
+/**
+ * if 'dpseci_dest_cfg.dest_type != DPSECI_DEST_NONE' when the CSCN is written
+ * to the sw-portal's DQRR, the DQRI interrupt is asserted immediately
+ * (if enabled)
+ */
+#define DPSECI_CGN_MODE_INTR_COALESCING_DISABLED	0x00000020
+
+/**
+ * struct dpseci_congestion_notification_cfg - congestion notification
+ *	configuration
+ * @units: units type
+ * @threshold_entry: above this threshold we enter a congestion state.
+ *	set it to '0' to disable it
+ * @threshold_exit: below this threshold we exit the congestion state.
+ * @message_ctx: The context that will be part of the CSCN message
+ * @message_iova: I/O virtual address (must be in DMA-able memory),
+ *	must be 16B aligned;
+ * @dest_cfg: CSCN can be send to either DPIO or DPCON WQ channel
+ * @notification_mode: Mask of available options; use 'DPSECI_CGN_MODE_<X>'
+ *	values
+ */
+struct dpseci_congestion_notification_cfg {
+	enum dpseci_congestion_unit units;
+	u32 threshold_entry;
+	u32 threshold_exit;
+	u64 message_ctx;
+	u64 message_iova;
+	struct dpseci_dest_cfg dest_cfg;
+	u16 notification_mode;
+};
+
+int dpseci_set_congestion_notification(struct fsl_mc_io *mc_io, u32 cmd_flags,
+	u16 token, const struct dpseci_congestion_notification_cfg *cfg);
+
+int dpseci_get_congestion_notification(struct fsl_mc_io *mc_io, u32 cmd_flags,
+	u16 token, struct dpseci_congestion_notification_cfg *cfg);
+
+#endif /* _DPSECI_H_ */
diff --git a/drivers/crypto/caam/dpseci_cmd.h b/drivers/crypto/caam/dpseci_cmd.h
new file mode 100644
index 000000000000..6ab77ead6e3d
--- /dev/null
+++ b/drivers/crypto/caam/dpseci_cmd.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2018 NXP
+ */
+
+#ifndef _DPSECI_CMD_H_
+#define _DPSECI_CMD_H_
+
+/* DPSECI Version */
+#define DPSECI_VER_MAJOR				5
+#define DPSECI_VER_MINOR				3
+
+#define DPSECI_VER(maj, min)	(((maj) << 16) | (min))
+#define DPSECI_VERSION		DPSECI_VER(DPSECI_VER_MAJOR, DPSECI_VER_MINOR)
+
+/* Command versioning */
+#define DPSECI_CMD_BASE_VERSION		1
+#define DPSECI_CMD_BASE_VERSION_V2	2
+#define DPSECI_CMD_ID_OFFSET		4
+
+#define DPSECI_CMD_V1(id)	(((id) << DPSECI_CMD_ID_OFFSET) | \
+				 DPSECI_CMD_BASE_VERSION)
+
+#define DPSECI_CMD_V2(id)	(((id) << DPSECI_CMD_ID_OFFSET) | \
+				 DPSECI_CMD_BASE_VERSION_V2)
+
+/* Command IDs */
+#define DPSECI_CMDID_CLOSE				DPSECI_CMD_V1(0x800)
+#define DPSECI_CMDID_OPEN				DPSECI_CMD_V1(0x809)
+#define DPSECI_CMDID_GET_API_VERSION			DPSECI_CMD_V1(0xa09)
+
+#define DPSECI_CMDID_ENABLE				DPSECI_CMD_V1(0x002)
+#define DPSECI_CMDID_DISABLE				DPSECI_CMD_V1(0x003)
+#define DPSECI_CMDID_GET_ATTR				DPSECI_CMD_V1(0x004)
+#define DPSECI_CMDID_IS_ENABLED				DPSECI_CMD_V1(0x006)
+
+#define DPSECI_CMDID_SET_RX_QUEUE			DPSECI_CMD_V1(0x194)
+#define DPSECI_CMDID_GET_RX_QUEUE			DPSECI_CMD_V1(0x196)
+#define DPSECI_CMDID_GET_TX_QUEUE			DPSECI_CMD_V1(0x197)
+#define DPSECI_CMDID_GET_SEC_ATTR			DPSECI_CMD_V2(0x198)
+#define DPSECI_CMDID_SET_CONGESTION_NOTIFICATION	DPSECI_CMD_V1(0x170)
+#define DPSECI_CMDID_GET_CONGESTION_NOTIFICATION	DPSECI_CMD_V1(0x171)
+
+/* Macros for accessing command fields smaller than 1 byte */
+#define DPSECI_MASK(field)	\
+	GENMASK(DPSECI_##field##_SHIFT + DPSECI_##field##_SIZE - 1,	\
+		DPSECI_##field##_SHIFT)
+
+#define dpseci_set_field(var, field, val)	\
+	((var) |= (((val) << DPSECI_##field##_SHIFT) & DPSECI_MASK(field)))
+
+#define dpseci_get_field(var, field)	\
+	(((var) & DPSECI_MASK(field)) >> DPSECI_##field##_SHIFT)
+
+struct dpseci_cmd_open {
+	__le32 dpseci_id;
+};
+
+#define DPSECI_ENABLE_SHIFT	0
+#define DPSECI_ENABLE_SIZE	1
+
+struct dpseci_rsp_is_enabled {
+	u8 is_enabled;
+};
+
+struct dpseci_rsp_get_attributes {
+	__le32 id;
+	__le32 pad0;
+	u8 num_tx_queues;
+	u8 num_rx_queues;
+	u8 pad1[6];
+	__le32 options;
+};
+
+#define DPSECI_DEST_TYPE_SHIFT	0
+#define DPSECI_DEST_TYPE_SIZE	4
+
+#define DPSECI_ORDER_PRESERVATION_SHIFT	0
+#define DPSECI_ORDER_PRESERVATION_SIZE	1
+
+struct dpseci_cmd_queue {
+	__le32 dest_id;
+	u8 priority;
+	u8 queue;
+	u8 dest_type;
+	u8 pad;
+	__le64 user_ctx;
+	union {
+		__le32 options;
+		__le32 fqid;
+	};
+	u8 order_preservation_en;
+};
+
+struct dpseci_rsp_get_tx_queue {
+	__le32 pad;
+	__le32 fqid;
+	u8 priority;
+};
+
+struct dpseci_rsp_get_sec_attr {
+	__le16 ip_id;
+	u8 major_rev;
+	u8 minor_rev;
+	u8 era;
+	u8 pad0[3];
+	u8 deco_num;
+	u8 zuc_auth_acc_num;
+	u8 zuc_enc_acc_num;
+	u8 pad1;
+	u8 snow_f8_acc_num;
+	u8 snow_f9_acc_num;
+	u8 crc_acc_num;
+	u8 pad2;
+	u8 pk_acc_num;
+	u8 kasumi_acc_num;
+	u8 rng_acc_num;
+	u8 pad3;
+	u8 md_acc_num;
+	u8 arc4_acc_num;
+	u8 des_acc_num;
+	u8 aes_acc_num;
+	u8 ccha_acc_num;
+	u8 ptha_acc_num;
+};
+
+struct dpseci_rsp_get_api_version {
+	__le16 major;
+	__le16 minor;
+};
+
+#define DPSECI_CGN_DEST_TYPE_SHIFT	0
+#define DPSECI_CGN_DEST_TYPE_SIZE	4
+#define DPSECI_CGN_UNITS_SHIFT		4
+#define DPSECI_CGN_UNITS_SIZE		2
+
+struct dpseci_cmd_congestion_notification {
+	__le32 dest_id;
+	__le16 notification_mode;
+	u8 priority;
+	u8 options;
+	__le64 message_iova;
+	__le64 message_ctx;
+	__le32 threshold_entry;
+	__le32 threshold_exit;
+};
+
+#endif /* _DPSECI_CMD_H_ */
diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c
index 8da88beb1abb..7e8d690f2827 100644
--- a/drivers/crypto/caam/error.c
+++ b/drivers/crypto/caam/error.c
@@ -108,6 +108,54 @@ static const struct {
 	{ 0xF1, "3GPP HFN matches or exceeds the Threshold" },
 };
 
+static const struct {
+	u8 value;
+	const char *error_text;
+} qi_error_list[] = {
+	{ 0x1F, "Job terminated by FQ or ICID flush" },
+	{ 0x20, "FD format error"},
+	{ 0x21, "FD command format error"},
+	{ 0x23, "FL format error"},
+	{ 0x25, "CRJD specified in FD, but not enabled in FLC"},
+	{ 0x30, "Max. buffer size too small"},
+	{ 0x31, "DHR exceeds max. buffer size (allocate mode, S/G format)"},
+	{ 0x32, "SGT exceeds max. buffer size (allocate mode, S/G format"},
+	{ 0x33, "Size over/underflow (allocate mode)"},
+	{ 0x34, "Size over/underflow (reuse mode)"},
+	{ 0x35, "Length exceeds max. short length (allocate mode, S/G/ format)"},
+	{ 0x36, "Memory footprint exceeds max. value (allocate mode, S/G/ format)"},
+	{ 0x41, "SBC frame format not supported (allocate mode)"},
+	{ 0x42, "Pool 0 invalid / pool 1 size < pool 0 size (allocate mode)"},
+	{ 0x43, "Annotation output enabled but ASAR = 0 (allocate mode)"},
+	{ 0x44, "Unsupported or reserved frame format or SGHR = 1 (reuse mode)"},
+	{ 0x45, "DHR correction underflow (reuse mode, single buffer format)"},
+	{ 0x46, "Annotation length exceeds offset (reuse mode)"},
+	{ 0x48, "Annotation output enabled but ASA limited by ASAR (reuse mode)"},
+	{ 0x49, "Data offset correction exceeds input frame data length (reuse mode)"},
+	{ 0x4B, "Annotation output enabled but ASA cannote be expanded (frame list)"},
+	{ 0x51, "Unsupported IF reuse mode"},
+	{ 0x52, "Unsupported FL use mode"},
+	{ 0x53, "Unsupported RJD use mode"},
+	{ 0x54, "Unsupported inline descriptor use mode"},
+	{ 0xC0, "Table buffer pool 0 depletion"},
+	{ 0xC1, "Table buffer pool 1 depletion"},
+	{ 0xC2, "Data buffer pool 0 depletion, no OF allocated"},
+	{ 0xC3, "Data buffer pool 1 depletion, no OF allocated"},
+	{ 0xC4, "Data buffer pool 0 depletion, partial OF allocated"},
+	{ 0xC5, "Data buffer pool 1 depletion, partial OF allocated"},
+	{ 0xD0, "FLC read error"},
+	{ 0xD1, "FL read error"},
+	{ 0xD2, "FL write error"},
+	{ 0xD3, "OF SGT write error"},
+	{ 0xD4, "PTA read error"},
+	{ 0xD5, "PTA write error"},
+	{ 0xD6, "OF SGT F-bit write error"},
+	{ 0xD7, "ASA write error"},
+	{ 0xE1, "FLC[ICR]=0 ICID error"},
+	{ 0xE2, "FLC[ICR]=1 ICID error"},
+	{ 0xE4, "source of ICID flush not trusted (BDI = 0)"},
+};
+
 static const char * const cha_id_list[] = {
 	"",
 	"AES",
@@ -236,6 +284,27 @@ static void report_deco_status(struct device *jrdev, const u32 status,
 		status, error, idx_str, idx, err_str, err_err_code);
 }
 
+static void report_qi_status(struct device *qidev, const u32 status,
+			     const char *error)
+{
+	u8 err_id = status & JRSTA_QIERR_ERROR_MASK;
+	const char *err_str = "unidentified error value 0x";
+	char err_err_code[3] = { 0 };
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(qi_error_list); i++)
+		if (qi_error_list[i].value == err_id)
+			break;
+
+	if (i != ARRAY_SIZE(qi_error_list) && qi_error_list[i].error_text)
+		err_str = qi_error_list[i].error_text;
+	else
+		snprintf(err_err_code, sizeof(err_err_code), "%02x", err_id);
+
+	dev_err(qidev, "%08x: %s: %s%s\n",
+		status, error, err_str, err_err_code);
+}
+
 static void report_jr_status(struct device *jrdev, const u32 status,
 			     const char *error)
 {
@@ -250,7 +319,7 @@ static void report_cond_code_status(struct device *jrdev, const u32 status,
 		status, error, __func__);
 }
 
-void caam_jr_strstatus(struct device *jrdev, u32 status)
+void caam_strstatus(struct device *jrdev, u32 status, bool qi_v2)
 {
 	static const struct stat_src {
 		void (*report_ssed)(struct device *jrdev, const u32 status,
@@ -262,7 +331,7 @@ void caam_jr_strstatus(struct device *jrdev, u32 status)
 		{ report_ccb_status, "CCB" },
 		{ report_jump_status, "Jump" },
 		{ report_deco_status, "DECO" },
-		{ NULL, "Queue Manager Interface" },
+		{ report_qi_status, "Queue Manager Interface" },
 		{ report_jr_status, "Job Ring" },
 		{ report_cond_code_status, "Condition Code" },
 		{ NULL, NULL },
@@ -288,4 +357,8 @@ void caam_jr_strstatus(struct device *jrdev, u32 status)
 	else
 		dev_err(jrdev, "%d: unknown error source\n", ssrc);
 }
-EXPORT_SYMBOL(caam_jr_strstatus);
+EXPORT_SYMBOL(caam_strstatus);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FSL CAAM error reporting");
+MODULE_AUTHOR("Freescale Semiconductor");
diff --git a/drivers/crypto/caam/error.h b/drivers/crypto/caam/error.h
index 5aa332bac4b0..67ea94079837 100644
--- a/drivers/crypto/caam/error.h
+++ b/drivers/crypto/caam/error.h
@@ -8,7 +8,11 @@
 #ifndef CAAM_ERROR_H
 #define CAAM_ERROR_H
 #define CAAM_ERROR_STR_MAX 302
-void caam_jr_strstatus(struct device *jrdev, u32 status);
+
+void caam_strstatus(struct device *dev, u32 status, bool qi_v2);
+
+#define caam_jr_strstatus(jrdev, status) caam_strstatus(jrdev, status, false)
+#define caam_qi2_strstatus(qidev, status) caam_strstatus(qidev, status, true)
 
 void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
 		  int rowsize, int groupsize, struct scatterlist *sg,
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index acdd72016ffe..d50085a03597 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * CAAM/SEC 4.x transport/backend driver
  * JobR backend functionality
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 67f7f8c42c93..b84e6c8b1e13 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -84,13 +84,6 @@ static u64 times_congested;
 #endif
 
 /*
- * CPU from where the module initialised. This is required because QMan driver
- * requires CGRs to be removed from same CPU from where they were originally
- * allocated.
- */
-static int mod_init_cpu;
-
-/*
  * This is a a cache of buffers, from which the users of CAAM QI driver
  * can allocate short (CAAM_QI_MEMCACHE_SIZE) buffers. It's faster than
  * doing malloc on the hotpath.
@@ -492,12 +485,11 @@ void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx)
 }
 EXPORT_SYMBOL(caam_drv_ctx_rel);
 
-int caam_qi_shutdown(struct device *qidev)
+void caam_qi_shutdown(struct device *qidev)
 {
-	int i, ret;
+	int i;
 	struct caam_qi_priv *priv = dev_get_drvdata(qidev);
 	const cpumask_t *cpus = qman_affine_cpus();
-	struct cpumask old_cpumask = current->cpus_allowed;
 
 	for_each_cpu(i, cpus) {
 		struct napi_struct *irqtask;
@@ -510,26 +502,12 @@ int caam_qi_shutdown(struct device *qidev)
 			dev_err(qidev, "Rsp FQ kill failed, cpu: %d\n", i);
 	}
 
-	/*
-	 * QMan driver requires CGRs to be deleted from same CPU from where they
-	 * were instantiated. Hence we get the module removal execute from the
-	 * same CPU from where it was originally inserted.
-	 */
-	set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu));
-
-	ret = qman_delete_cgr(&priv->cgr);
-	if (ret)
-		dev_err(qidev, "Deletion of CGR failed: %d\n", ret);
-	else
-		qman_release_cgrid(priv->cgr.cgrid);
+	qman_delete_cgr_safe(&priv->cgr);
+	qman_release_cgrid(priv->cgr.cgrid);
 
 	kmem_cache_destroy(qi_cache);
 
-	/* Now that we're done with the CGRs, restore the cpus allowed mask */
-	set_cpus_allowed_ptr(current, &old_cpumask);
-
 	platform_device_unregister(priv->qi_pdev);
-	return ret;
 }
 
 static void cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, int congested)
@@ -718,22 +696,11 @@ int caam_qi_init(struct platform_device *caam_pdev)
 	struct device *ctrldev = &caam_pdev->dev, *qidev;
 	struct caam_drv_private *ctrlpriv;
 	const cpumask_t *cpus = qman_affine_cpus();
-	struct cpumask old_cpumask = current->cpus_allowed;
 	static struct platform_device_info qi_pdev_info = {
 		.name = "caam_qi",
 		.id = PLATFORM_DEVID_NONE
 	};
 
-	/*
-	 * QMAN requires CGRs to be removed from same CPU+portal from where it
-	 * was originally allocated. Hence we need to note down the
-	 * initialisation CPU and use the same CPU for module exit.
-	 * We select the first CPU to from the list of portal owning CPUs.
-	 * Then we pin module init to this CPU.
-	 */
-	mod_init_cpu = cpumask_first(cpus);
-	set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu));
-
 	qi_pdev_info.parent = ctrldev;
 	qi_pdev_info.dma_mask = dma_get_mask(ctrldev);
 	qi_pdev = platform_device_register_full(&qi_pdev_info);
@@ -795,8 +762,6 @@ int caam_qi_init(struct platform_device *caam_pdev)
 		return -ENOMEM;
 	}
 
-	/* Done with the CGRs; restore the cpus allowed mask */
-	set_cpus_allowed_ptr(current, &old_cpumask);
 #ifdef CONFIG_DEBUG_FS
 	debugfs_create_file("qi_congested", 0444, ctrlpriv->ctl,
 			    &times_congested, &caam_fops_u64_ro);
diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h
index 357b69f57072..f93c9c7ed430 100644
--- a/drivers/crypto/caam/qi.h
+++ b/drivers/crypto/caam/qi.h
@@ -62,7 +62,6 @@ typedef void (*caam_qi_cbk)(struct caam_drv_req *drv_req, u32 status);
 enum optype {
 	ENCRYPT,
 	DECRYPT,
-	GIVENCRYPT,
 	NUM_OP
 };
 
@@ -174,7 +173,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc);
 void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx);
 
 int caam_qi_init(struct platform_device *pdev);
-int caam_qi_shutdown(struct device *dev);
+void caam_qi_shutdown(struct device *dev);
 
 /**
  * qi_cache_alloc - Allocate buffers from CAAM-QI cache
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 4fb91ba39c36..457815f965c0 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -70,22 +70,22 @@
 extern bool caam_little_end;
 extern bool caam_imx;
 
-#define caam_to_cpu(len)				\
-static inline u##len caam##len ## _to_cpu(u##len val)	\
-{							\
-	if (caam_little_end)				\
-		return le##len ## _to_cpu(val);		\
-	else						\
-		return be##len ## _to_cpu(val);		\
+#define caam_to_cpu(len)						\
+static inline u##len caam##len ## _to_cpu(u##len val)			\
+{									\
+	if (caam_little_end)						\
+		return le##len ## _to_cpu((__force __le##len)val);	\
+	else								\
+		return be##len ## _to_cpu((__force __be##len)val);	\
 }
 
-#define cpu_to_caam(len)				\
-static inline u##len cpu_to_caam##len(u##len val)	\
-{							\
-	if (caam_little_end)				\
-		return cpu_to_le##len(val);		\
-	else						\
-		return cpu_to_be##len(val);		\
+#define cpu_to_caam(len)					\
+static inline u##len cpu_to_caam##len(u##len val)		\
+{								\
+	if (caam_little_end)					\
+		return (__force u##len)cpu_to_le##len(val);	\
+	else							\
+		return (__force u##len)cpu_to_be##len(val);	\
 }
 
 caam_to_cpu(16)
@@ -633,6 +633,8 @@ struct caam_job_ring {
 #define JRSTA_DECOERR_INVSIGN       0x86
 #define JRSTA_DECOERR_DSASIGN       0x87
 
+#define JRSTA_QIERR_ERROR_MASK      0x00ff
+
 #define JRSTA_CCBERR_JUMP           0x08000000
 #define JRSTA_CCBERR_INDEX_MASK     0xff00
 #define JRSTA_CCBERR_INDEX_SHIFT    8
diff --git a/drivers/crypto/caam/sg_sw_qm.h b/drivers/crypto/caam/sg_sw_qm.h
index d000b4df745f..b3e1aaaeffea 100644
--- a/drivers/crypto/caam/sg_sw_qm.h
+++ b/drivers/crypto/caam/sg_sw_qm.h
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
 /*
  * Copyright 2013-2016 Freescale Semiconductor, Inc.
  * Copyright 2016-2017 NXP
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- *     * Neither the name of Freescale Semiconductor nor the
- *       names of its contributors may be used to endorse or promote products
- *       derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef __SG_SW_QM_H
diff --git a/drivers/crypto/caam/sg_sw_qm2.h b/drivers/crypto/caam/sg_sw_qm2.h
index b5b4c12179df..c9378402a5f8 100644
--- a/drivers/crypto/caam/sg_sw_qm2.h
+++ b/drivers/crypto/caam/sg_sw_qm2.h
@@ -1,35 +1,7 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
 /*
  * Copyright 2015-2016 Freescale Semiconductor, Inc.
  * Copyright 2017 NXP
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *     * Redistributions of source code must retain the above copyright
- *	 notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *	 notice, this list of conditions and the following disclaimer in the
- *	 documentation and/or other materials provided with the distribution.
- *     * Neither the names of the above-listed copyright holders nor the
- *	 names of any contributors may be used to endorse or promote products
- *	 derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef _SG_SW_QM2_H_
diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
index b0ba4331944b..ca549c5dc08e 100644
--- a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
+++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
@@ -308,21 +308,11 @@ void do_request_cleanup(struct cpt_vf *cptvf,
 		}
 	}
 
-	if (info->scatter_components)
-		kzfree(info->scatter_components);
-
-	if (info->gather_components)
-		kzfree(info->gather_components);
-
-	if (info->out_buffer)
-		kzfree(info->out_buffer);
-
-	if (info->in_buffer)
-		kzfree(info->in_buffer);
-
-	if (info->completion_addr)
-		kzfree((void *)info->completion_addr);
-
+	kzfree(info->scatter_components);
+	kzfree(info->gather_components);
+	kzfree(info->out_buffer);
+	kzfree(info->in_buffer);
+	kzfree((void *)info->completion_addr);
 	kzfree(info);
 }
 
diff --git a/drivers/crypto/cavium/nitrox/Makefile b/drivers/crypto/cavium/nitrox/Makefile
index 45b7379e8e30..e12954791673 100644
--- a/drivers/crypto/cavium/nitrox/Makefile
+++ b/drivers/crypto/cavium/nitrox/Makefile
@@ -7,3 +7,6 @@ n5pf-objs := nitrox_main.o \
 	nitrox_hal.o \
 	nitrox_reqmgr.o \
 	nitrox_algs.o
+
+n5pf-$(CONFIG_PCI_IOV) += nitrox_sriov.o
+n5pf-$(CONFIG_DEBUG_FS) += nitrox_debugfs.o
diff --git a/drivers/crypto/cavium/nitrox/nitrox_common.h b/drivers/crypto/cavium/nitrox/nitrox_common.h
index 312f72801af6..863143a8336b 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_common.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_common.h
@@ -12,32 +12,15 @@ void crypto_free_context(void *ctx);
 struct nitrox_device *nitrox_get_first_device(void);
 void nitrox_put_device(struct nitrox_device *ndev);
 
-void nitrox_pf_cleanup_isr(struct nitrox_device *ndev);
-int nitrox_pf_init_isr(struct nitrox_device *ndev);
-
 int nitrox_common_sw_init(struct nitrox_device *ndev);
 void nitrox_common_sw_cleanup(struct nitrox_device *ndev);
 
-void pkt_slc_resp_handler(unsigned long data);
+void pkt_slc_resp_tasklet(unsigned long data);
 int nitrox_process_se_request(struct nitrox_device *ndev,
 			      struct se_crypto_request *req,
 			      completion_t cb,
 			      struct skcipher_request *skreq);
 void backlog_qflush_work(struct work_struct *work);
 
-void nitrox_config_emu_unit(struct nitrox_device *ndev);
-void nitrox_config_pkt_input_rings(struct nitrox_device *ndev);
-void nitrox_config_pkt_solicit_ports(struct nitrox_device *ndev);
-void nitrox_config_vfmode(struct nitrox_device *ndev, int mode);
-void nitrox_config_nps_unit(struct nitrox_device *ndev);
-void nitrox_config_pom_unit(struct nitrox_device *ndev);
-void nitrox_config_rand_unit(struct nitrox_device *ndev);
-void nitrox_config_efl_unit(struct nitrox_device *ndev);
-void nitrox_config_bmi_unit(struct nitrox_device *ndev);
-void nitrox_config_bmo_unit(struct nitrox_device *ndev);
-void nitrox_config_lbc_unit(struct nitrox_device *ndev);
-void invalidate_lbc(struct nitrox_device *ndev);
-void enable_pkt_input_ring(struct nitrox_device *ndev, int ring);
-void enable_pkt_solicit_port(struct nitrox_device *ndev, int port);
 
 #endif /* __NITROX_COMMON_H */
diff --git a/drivers/crypto/cavium/nitrox/nitrox_csr.h b/drivers/crypto/cavium/nitrox/nitrox_csr.h
index 9dcb7fdbe0a7..1ad27b1a87c5 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_csr.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_csr.h
@@ -7,9 +7,16 @@
 
 /* EMU clusters */
 #define NR_CLUSTERS		4
+/* Maximum cores per cluster,
+ * varies based on partname
+ */
 #define AE_CORES_PER_CLUSTER	20
 #define SE_CORES_PER_CLUSTER	16
 
+#define AE_MAX_CORES	(AE_CORES_PER_CLUSTER * NR_CLUSTERS)
+#define SE_MAX_CORES	(SE_CORES_PER_CLUSTER * NR_CLUSTERS)
+#define ZIP_MAX_CORES	5
+
 /* BIST registers */
 #define EMU_BIST_STATUSX(_i)	(0x1402700 + ((_i) * 0x40000))
 #define UCD_BIST_STATUS		0x12C0070
@@ -111,6 +118,9 @@
 #define LBC_ELM_VF65_128_INT		0x120C000
 #define LBC_ELM_VF65_128_INT_ENA_W1S	0x120F000
 
+#define RST_BOOT	0x10C1600
+#define FUS_DAT1	0x10C1408
+
 /* PEM registers */
 #define PEM0_INT 0x1080428
 
@@ -1082,4 +1092,105 @@ union lbc_inval_status {
 	} s;
 };
 
+/**
+ * struct rst_boot: RST Boot Register
+ * @jtcsrdis: when set, internal CSR access via JTAG TAP controller
+ *   is disabled
+ * @jt_tst_mode: JTAG test mode
+ * @io_supply: I/O power supply setting based on IO_VDD_SELECT pin:
+ *    0x1 = 1.8V
+ *    0x2 = 2.5V
+ *    0x4 = 3.3V
+ *    All other values are reserved
+ * @pnr_mul: clock multiplier
+ * @lboot: last boot cause mask, resets only with PLL_DC_OK
+ * @rboot: determines whether core 0 remains in reset after
+ *    chip cold or warm or soft reset
+ * @rboot_pin: read only access to REMOTE_BOOT pin
+ */
+union rst_boot {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_63 : 1;
+		u64 jtcsrdis : 1;
+		u64 raz_59_61 : 3;
+		u64 jt_tst_mode : 1;
+		u64 raz_40_57 : 18;
+		u64 io_supply : 3;
+		u64 raz_30_36 : 7;
+		u64 pnr_mul : 6;
+		u64 raz_12_23 : 12;
+		u64 lboot : 10;
+		u64 rboot : 1;
+		u64 rboot_pin : 1;
+#else
+		u64 rboot_pin : 1;
+		u64 rboot : 1;
+		u64 lboot : 10;
+		u64 raz_12_23 : 12;
+		u64 pnr_mul : 6;
+		u64 raz_30_36 : 7;
+		u64 io_supply : 3;
+		u64 raz_40_57 : 18;
+		u64 jt_tst_mode : 1;
+		u64 raz_59_61 : 3;
+		u64 jtcsrdis : 1;
+		u64 raz_63 : 1;
+#endif
+	};
+};
+
+/**
+ * struct fus_dat1: Fuse Data 1 Register
+ * @pll_mul: main clock PLL multiplier hardware limit
+ * @pll_half_dis: main clock PLL control
+ * @efus_lck: efuse lockdown
+ * @zip_info: ZIP information
+ * @bar2_sz_conf: when zero, BAR2 size conforms to
+ *    PCIe specification
+ * @efus_ign: efuse ignore
+ * @nozip: ZIP disable
+ * @pll_alt_matrix: select alternate PLL matrix
+ * @pll_bwadj_denom: select CLKF denominator for
+ *    BWADJ value
+ * @chip_id: chip ID
+ */
+union fus_dat1 {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_57_63 : 7;
+		u64 pll_mul : 3;
+		u64 pll_half_dis : 1;
+		u64 raz_43_52 : 10;
+		u64 efus_lck : 3;
+		u64 raz_26_39 : 14;
+		u64 zip_info : 5;
+		u64 bar2_sz_conf : 1;
+		u64 efus_ign : 1;
+		u64 nozip : 1;
+		u64 raz_11_17 : 7;
+		u64 pll_alt_matrix : 1;
+		u64 pll_bwadj_denom : 2;
+		u64 chip_id : 8;
+#else
+		u64 chip_id : 8;
+		u64 pll_bwadj_denom : 2;
+		u64 pll_alt_matrix : 1;
+		u64 raz_11_17 : 7;
+		u64 nozip : 1;
+		u64 efus_ign : 1;
+		u64 bar2_sz_conf : 1;
+		u64 zip_info : 5;
+		u64 raz_26_39 : 14;
+		u64 efus_lck : 3;
+		u64 raz_43_52 : 10;
+		u64 pll_half_dis : 1;
+		u64 pll_mul : 3;
+		u64 raz_57_63 : 7;
+#endif
+	};
+};
+
 #endif /* __NITROX_CSR_H */
diff --git a/drivers/crypto/cavium/nitrox/nitrox_debugfs.c b/drivers/crypto/cavium/nitrox/nitrox_debugfs.c
new file mode 100644
index 000000000000..5f3cd5fafe04
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_debugfs.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+
+#include "nitrox_csr.h"
+#include "nitrox_dev.h"
+
+static int firmware_show(struct seq_file *s, void *v)
+{
+	struct nitrox_device *ndev = s->private;
+
+	seq_printf(s, "Version: %s\n", ndev->hw.fw_name);
+	return 0;
+}
+
+static int firmware_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, firmware_show, inode->i_private);
+}
+
+static const struct file_operations firmware_fops = {
+	.owner = THIS_MODULE,
+	.open = firmware_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int device_show(struct seq_file *s, void *v)
+{
+	struct nitrox_device *ndev = s->private;
+
+	seq_printf(s, "NITROX [%d]\n", ndev->idx);
+	seq_printf(s, "  Part Name: %s\n", ndev->hw.partname);
+	seq_printf(s, "  Frequency: %d MHz\n", ndev->hw.freq);
+	seq_printf(s, "  Device ID: 0x%0x\n", ndev->hw.device_id);
+	seq_printf(s, "  Revision ID: 0x%0x\n", ndev->hw.revision_id);
+	seq_printf(s, "  Cores: [AE=%u  SE=%u  ZIP=%u]\n",
+		   ndev->hw.ae_cores, ndev->hw.se_cores, ndev->hw.zip_cores);
+
+	return 0;
+}
+
+static int nitrox_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, device_show, inode->i_private);
+}
+
+static const struct file_operations nitrox_fops = {
+	.owner = THIS_MODULE,
+	.open = nitrox_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int stats_show(struct seq_file *s, void *v)
+{
+	struct nitrox_device *ndev = s->private;
+
+	seq_printf(s, "NITROX [%d] Request Statistics\n", ndev->idx);
+	seq_printf(s, "  Posted: %llu\n",
+		   (u64)atomic64_read(&ndev->stats.posted));
+	seq_printf(s, "  Completed: %llu\n",
+		   (u64)atomic64_read(&ndev->stats.completed));
+	seq_printf(s, "  Dropped: %llu\n",
+		   (u64)atomic64_read(&ndev->stats.dropped));
+
+	return 0;
+}
+
+static int nitrox_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, stats_show, inode->i_private);
+}
+
+static const struct file_operations nitrox_stats_fops = {
+	.owner = THIS_MODULE,
+	.open = nitrox_stats_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void nitrox_debugfs_exit(struct nitrox_device *ndev)
+{
+	debugfs_remove_recursive(ndev->debugfs_dir);
+	ndev->debugfs_dir = NULL;
+}
+
+int nitrox_debugfs_init(struct nitrox_device *ndev)
+{
+	struct dentry *dir, *f;
+
+	dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!dir)
+		return -ENOMEM;
+
+	ndev->debugfs_dir = dir;
+	f = debugfs_create_file("firmware", 0400, dir, ndev, &firmware_fops);
+	if (!f)
+		goto err;
+	f = debugfs_create_file("device", 0400, dir, ndev, &nitrox_fops);
+	if (!f)
+		goto err;
+	f = debugfs_create_file("stats", 0400, dir, ndev, &nitrox_stats_fops);
+	if (!f)
+		goto err;
+
+	return 0;
+
+err:
+	nitrox_debugfs_exit(ndev);
+	return -ENODEV;
+}
diff --git a/drivers/crypto/cavium/nitrox/nitrox_dev.h b/drivers/crypto/cavium/nitrox/nitrox_dev.h
index af596455b420..283e252385fb 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_dev.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_dev.h
@@ -5,92 +5,123 @@
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
+#include <linux/if.h>
 
 #define VERSION_LEN 32
 
+/**
+ * struct nitrox_cmdq - NITROX command queue
+ * @cmd_qlock: command queue lock
+ * @resp_qlock: response queue lock
+ * @backlog_qlock: backlog queue lock
+ * @ndev: NITROX device
+ * @response_head: submitted request list
+ * @backlog_head: backlog queue
+ * @dbell_csr_addr: doorbell register address for this queue
+ * @compl_cnt_csr_addr: completion count register address of the slc port
+ * @base: command queue base address
+ * @dma: dma address of the base
+ * @pending_count: request pending at device
+ * @backlog_count: backlog request count
+ * @write_idx: next write index for the command
+ * @instr_size: command size
+ * @qno: command queue number
+ * @qsize: command queue size
+ * @unalign_base: unaligned base address
+ * @unalign_dma: unaligned dma address
+ */
 struct nitrox_cmdq {
-	/* command queue lock */
-	spinlock_t cmdq_lock;
-	/* response list lock */
-	spinlock_t response_lock;
-	/* backlog list lock */
-	spinlock_t backlog_lock;
-
-	/* request submitted to chip, in progress */
+	spinlock_t cmd_qlock;
+	spinlock_t resp_qlock;
+	spinlock_t backlog_qlock;
+
+	struct nitrox_device *ndev;
 	struct list_head response_head;
-	/* hw queue full, hold in backlog list */
 	struct list_head backlog_head;
 
-	/* doorbell address */
 	u8 __iomem *dbell_csr_addr;
-	/* base address of the queue */
-	u8 *head;
+	u8 __iomem *compl_cnt_csr_addr;
+	u8 *base;
+	dma_addr_t dma;
 
-	struct nitrox_device *ndev;
-	/* flush pending backlog commands */
 	struct work_struct backlog_qflush;
 
-	/* requests posted waiting for completion */
 	atomic_t pending_count;
-	/* requests in backlog queues */
 	atomic_t backlog_count;
 
 	int write_idx;
-	/* command size 32B/64B */
 	u8 instr_size;
 	u8 qno;
 	u32 qsize;
 
-	/* unaligned addresses */
-	u8 *head_unaligned;
-	dma_addr_t dma_unaligned;
-	/* dma address of the base */
-	dma_addr_t dma;
+	u8 *unalign_base;
+	dma_addr_t unalign_dma;
 };
 
+/**
+ * struct nitrox_hw - NITROX hardware information
+ * @partname: partname ex: CNN55xxx-xxx
+ * @fw_name: firmware version
+ * @freq: NITROX frequency
+ * @vendor_id: vendor ID
+ * @device_id: device ID
+ * @revision_id: revision ID
+ * @se_cores: number of symmetric cores
+ * @ae_cores: number of asymmetric cores
+ * @zip_cores: number of zip cores
+ */
 struct nitrox_hw {
-	/* firmware version */
+	char partname[IFNAMSIZ * 2];
 	char fw_name[VERSION_LEN];
 
+	int freq;
 	u16 vendor_id;
 	u16 device_id;
 	u8 revision_id;
 
-	/* CNN55XX cores */
 	u8 se_cores;
 	u8 ae_cores;
 	u8 zip_cores;
 };
 
-#define MAX_MSIX_VECTOR_NAME	20
-/**
- * vectors for queues (64 AE, 64 SE and 64 ZIP) and
- * error condition/mailbox.
- */
-#define MAX_MSIX_VECTORS	192
-
-struct nitrox_msix {
-	struct msix_entry *entries;
-	char **names;
-	DECLARE_BITMAP(irqs, MAX_MSIX_VECTORS);
-	u32 nr_entries;
+struct nitrox_stats {
+	atomic64_t posted;
+	atomic64_t completed;
+	atomic64_t dropped;
 };
 
-struct bh_data {
-	/* slc port completion count address */
-	u8 __iomem *completion_cnt_csr_addr;
+#define IRQ_NAMESZ	32
+
+struct nitrox_q_vector {
+	char name[IRQ_NAMESZ];
+	bool valid;
+	int ring;
+	struct tasklet_struct resp_tasklet;
+	union {
+		struct nitrox_cmdq *cmdq;
+		struct nitrox_device *ndev;
+	};
+};
 
-	struct nitrox_cmdq *cmdq;
-	struct tasklet_struct resp_handler;
+/*
+ * NITROX Device states
+ */
+enum ndev_state {
+	__NDEV_NOT_READY,
+	__NDEV_READY,
+	__NDEV_IN_RESET,
 };
 
-struct nitrox_bh {
-	struct bh_data *slc;
+/* NITROX support modes for VF(s) */
+enum vf_mode {
+	__NDEV_MODE_PF,
+	__NDEV_MODE_VF16,
+	__NDEV_MODE_VF32,
+	__NDEV_MODE_VF64,
+	__NDEV_MODE_VF128,
 };
 
-/* NITROX-V driver state */
-#define NITROX_UCODE_LOADED	0
-#define NITROX_READY		1
+#define __NDEV_SRIOV_BIT 0
 
 /* command queue size */
 #define DEFAULT_CMD_QLEN 2048
@@ -98,7 +129,6 @@ struct nitrox_bh {
 #define CMD_TIMEOUT 2000
 
 #define DEV(ndev) ((struct device *)(&(ndev)->pdev->dev))
-#define PF_MODE 0
 
 #define NITROX_CSR_ADDR(ndev, offset) \
 	((ndev)->bar_addr + (offset))
@@ -108,17 +138,18 @@ struct nitrox_bh {
  * @list: pointer to linked list of devices
  * @bar_addr: iomap address
  * @pdev: PCI device information
- * @status: NITROX status
+ * @state: NITROX device state
+ * @flags: flags to indicate device the features
  * @timeout: Request timeout in jiffies
  * @refcnt: Device usage count
  * @idx: device index (0..N)
  * @node: NUMA node id attached
  * @qlen: Command queue length
  * @nr_queues: Number of command queues
+ * @mode: Device mode PF/VF
  * @ctx_pool: DMA pool for crypto context
- * @pkt_cmdqs: SE Command queues
- * @msix: MSI-X information
- * @bh: post processing work
+ * @pkt_inq: Packet input rings
+ * @qvec: MSI-X queue vectors information
  * @hw: hardware information
  * @debugfs_dir: debugfs directory
  */
@@ -128,7 +159,8 @@ struct nitrox_device {
 	u8 __iomem *bar_addr;
 	struct pci_dev *pdev;
 
-	unsigned long status;
+	atomic_t state;
+	unsigned long flags;
 	unsigned long timeout;
 	refcount_t refcnt;
 
@@ -136,13 +168,16 @@ struct nitrox_device {
 	int node;
 	u16 qlen;
 	u16 nr_queues;
+	int num_vfs;
+	enum vf_mode mode;
 
 	struct dma_pool *ctx_pool;
-	struct nitrox_cmdq *pkt_cmdqs;
+	struct nitrox_cmdq *pkt_inq;
 
-	struct nitrox_msix msix;
-	struct nitrox_bh bh;
+	struct nitrox_q_vector *qvec;
+	int num_vecs;
 
+	struct nitrox_stats stats;
 	struct nitrox_hw hw;
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct dentry *debugfs_dir;
@@ -173,9 +208,22 @@ static inline void nitrox_write_csr(struct nitrox_device *ndev, u64 offset,
 	writeq(value, (ndev->bar_addr + offset));
 }
 
-static inline int nitrox_ready(struct nitrox_device *ndev)
+static inline bool nitrox_ready(struct nitrox_device *ndev)
 {
-	return test_bit(NITROX_READY, &ndev->status);
+	return atomic_read(&ndev->state) == __NDEV_READY;
 }
 
+#ifdef CONFIG_DEBUG_FS
+int nitrox_debugfs_init(struct nitrox_device *ndev);
+void nitrox_debugfs_exit(struct nitrox_device *ndev);
+#else
+static inline int nitrox_debugfs_init(struct nitrox_device *ndev)
+{
+	return 0;
+}
+
+static inline void nitrox_debugfs_exit(struct nitrox_device *ndev)
+{ }
+#endif
+
 #endif /* __NITROX_DEV_H */
diff --git a/drivers/crypto/cavium/nitrox/nitrox_hal.c b/drivers/crypto/cavium/nitrox/nitrox_hal.c
index ab4ccf2f9e77..a9b82387cf53 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_hal.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_hal.c
@@ -4,6 +4,8 @@
 #include "nitrox_dev.h"
 #include "nitrox_csr.h"
 
+#define PLL_REF_CLK 50
+
 /**
  * emu_enable_cores - Enable EMU cluster cores.
  * @ndev: N5 device
@@ -117,7 +119,7 @@ void nitrox_config_pkt_input_rings(struct nitrox_device *ndev)
 	int i;
 
 	for (i = 0; i < ndev->nr_queues; i++) {
-		struct nitrox_cmdq *cmdq = &ndev->pkt_cmdqs[i];
+		struct nitrox_cmdq *cmdq = &ndev->pkt_inq[i];
 		union nps_pkt_in_instr_rsize pkt_in_rsize;
 		u64 offset;
 
@@ -256,7 +258,7 @@ void nitrox_config_nps_unit(struct nitrox_device *ndev)
 	/* disable ILK interface */
 	core_gbl_vfcfg.value = 0;
 	core_gbl_vfcfg.s.ilk_disable = 1;
-	core_gbl_vfcfg.s.cfg = PF_MODE;
+	core_gbl_vfcfg.s.cfg = __NDEV_MODE_PF;
 	nitrox_write_csr(ndev, NPS_CORE_GBL_VFCFG, core_gbl_vfcfg.value);
 	/* config input and solicit ports */
 	nitrox_config_pkt_input_rings(ndev);
@@ -400,3 +402,68 @@ void nitrox_config_lbc_unit(struct nitrox_device *ndev)
 	offset = LBC_ELM_VF65_128_INT_ENA_W1S;
 	nitrox_write_csr(ndev, offset, (~0ULL));
 }
+
+void config_nps_core_vfcfg_mode(struct nitrox_device *ndev, enum vf_mode mode)
+{
+	union nps_core_gbl_vfcfg vfcfg;
+
+	vfcfg.value = nitrox_read_csr(ndev, NPS_CORE_GBL_VFCFG);
+	vfcfg.s.cfg = mode & 0x7;
+
+	nitrox_write_csr(ndev, NPS_CORE_GBL_VFCFG, vfcfg.value);
+}
+
+void nitrox_get_hwinfo(struct nitrox_device *ndev)
+{
+	union emu_fuse_map emu_fuse;
+	union rst_boot rst_boot;
+	union fus_dat1 fus_dat1;
+	unsigned char name[IFNAMSIZ * 2] = {};
+	int i, dead_cores;
+	u64 offset;
+
+	/* get core frequency */
+	offset = RST_BOOT;
+	rst_boot.value = nitrox_read_csr(ndev, offset);
+	ndev->hw.freq = (rst_boot.pnr_mul + 3) * PLL_REF_CLK;
+
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		offset = EMU_FUSE_MAPX(i);
+		emu_fuse.value = nitrox_read_csr(ndev, offset);
+		if (emu_fuse.s.valid) {
+			dead_cores = hweight32(emu_fuse.s.ae_fuse);
+			ndev->hw.ae_cores += AE_CORES_PER_CLUSTER - dead_cores;
+			dead_cores = hweight16(emu_fuse.s.se_fuse);
+			ndev->hw.se_cores += SE_CORES_PER_CLUSTER - dead_cores;
+		}
+	}
+	/* find zip hardware availability */
+	offset = FUS_DAT1;
+	fus_dat1.value = nitrox_read_csr(ndev, offset);
+	if (!fus_dat1.nozip) {
+		dead_cores = hweight8(fus_dat1.zip_info);
+		ndev->hw.zip_cores = ZIP_MAX_CORES - dead_cores;
+	}
+
+	/* determine the partname CNN55<cores>-<freq><pincount>-<rev>*/
+	if (ndev->hw.ae_cores == AE_MAX_CORES) {
+		switch (ndev->hw.se_cores) {
+		case SE_MAX_CORES:
+			i = snprintf(name, sizeof(name), "CNN5560");
+			break;
+		case 40:
+			i = snprintf(name, sizeof(name), "CNN5560s");
+			break;
+		}
+	} else if (ndev->hw.ae_cores == (AE_MAX_CORES / 2)) {
+		i = snprintf(name, sizeof(name), "CNN5530");
+	} else {
+		i = snprintf(name, sizeof(name), "CNN5560i");
+	}
+
+	snprintf(name + i, sizeof(name) - i, "-%3dBG676-1.%u",
+		 ndev->hw.freq, ndev->hw.revision_id);
+
+	/* copy partname */
+	strncpy(ndev->hw.partname, name, sizeof(ndev->hw.partname));
+}
diff --git a/drivers/crypto/cavium/nitrox/nitrox_hal.h b/drivers/crypto/cavium/nitrox/nitrox_hal.h
new file mode 100644
index 000000000000..489ee64c119e
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_hal.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NITROX_HAL_H
+#define __NITROX_HAL_H
+
+#include "nitrox_dev.h"
+
+void nitrox_config_emu_unit(struct nitrox_device *ndev);
+void nitrox_config_pkt_input_rings(struct nitrox_device *ndev);
+void nitrox_config_pkt_solicit_ports(struct nitrox_device *ndev);
+void nitrox_config_nps_unit(struct nitrox_device *ndev);
+void nitrox_config_pom_unit(struct nitrox_device *ndev);
+void nitrox_config_rand_unit(struct nitrox_device *ndev);
+void nitrox_config_efl_unit(struct nitrox_device *ndev);
+void nitrox_config_bmi_unit(struct nitrox_device *ndev);
+void nitrox_config_bmo_unit(struct nitrox_device *ndev);
+void nitrox_config_lbc_unit(struct nitrox_device *ndev);
+void invalidate_lbc(struct nitrox_device *ndev);
+void enable_pkt_input_ring(struct nitrox_device *ndev, int ring);
+void enable_pkt_solicit_port(struct nitrox_device *ndev, int port);
+void config_nps_core_vfcfg_mode(struct nitrox_device *ndev, enum vf_mode mode);
+void nitrox_get_hwinfo(struct nitrox_device *ndev);
+
+#endif /* __NITROX_HAL_H */
diff --git a/drivers/crypto/cavium/nitrox/nitrox_isr.c b/drivers/crypto/cavium/nitrox/nitrox_isr.c
index ee0d70ba25d5..88a77b8fb3fb 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_isr.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_isr.c
@@ -6,9 +6,16 @@
 #include "nitrox_dev.h"
 #include "nitrox_csr.h"
 #include "nitrox_common.h"
+#include "nitrox_hal.h"
 
+/**
+ * One vector for each type of ring
+ *  - NPS packet ring, AQMQ ring and ZQMQ ring
+ */
 #define NR_RING_VECTORS 3
-#define NPS_CORE_INT_ACTIVE_ENTRY 192
+/* base entry for packet ring/port */
+#define PKT_RING_MSIX_BASE 0
+#define NON_RING_MSIX_BASE 192
 
 /**
  * nps_pkt_slc_isr - IRQ handler for NPS solicit port
@@ -17,13 +24,14 @@
  */
 static irqreturn_t nps_pkt_slc_isr(int irq, void *data)
 {
-	struct bh_data *slc = data;
-	union nps_pkt_slc_cnts pkt_slc_cnts;
+	struct nitrox_q_vector *qvec = data;
+	union nps_pkt_slc_cnts slc_cnts;
+	struct nitrox_cmdq *cmdq = qvec->cmdq;
 
-	pkt_slc_cnts.value = readq(slc->completion_cnt_csr_addr);
+	slc_cnts.value = readq(cmdq->compl_cnt_csr_addr);
 	/* New packet on SLC output port */
-	if (pkt_slc_cnts.s.slc_int)
-		tasklet_hi_schedule(&slc->resp_handler);
+	if (slc_cnts.s.slc_int)
+		tasklet_hi_schedule(&qvec->resp_tasklet);
 
 	return IRQ_HANDLED;
 }
@@ -190,165 +198,92 @@ static void clear_bmi_err_intr(struct nitrox_device *ndev)
 	dev_err_ratelimited(DEV(ndev), "BMI_INT  0x%016llx\n", value);
 }
 
+static void nps_core_int_tasklet(unsigned long data)
+{
+	struct nitrox_q_vector *qvec = (void *)(uintptr_t)(data);
+	struct nitrox_device *ndev = qvec->ndev;
+
+	/* if pf mode do queue recovery */
+	if (ndev->mode == __NDEV_MODE_PF) {
+	} else {
+		/**
+		 * if VF(s) enabled communicate the error information
+		 * to VF(s)
+		 */
+	}
+}
+
 /**
- * clear_nps_core_int_active - clear NPS_CORE_INT_ACTIVE interrupts
- * @ndev: NITROX device
+ * nps_core_int_isr - interrupt handler for NITROX errors and
+ *   mailbox communication
  */
-static void clear_nps_core_int_active(struct nitrox_device *ndev)
+static irqreturn_t nps_core_int_isr(int irq, void *data)
 {
-	union nps_core_int_active core_int_active;
+	struct nitrox_device *ndev = data;
+	union nps_core_int_active core_int;
 
-	core_int_active.value = nitrox_read_csr(ndev, NPS_CORE_INT_ACTIVE);
+	core_int.value = nitrox_read_csr(ndev, NPS_CORE_INT_ACTIVE);
 
-	if (core_int_active.s.nps_core)
+	if (core_int.s.nps_core)
 		clear_nps_core_err_intr(ndev);
 
-	if (core_int_active.s.nps_pkt)
+	if (core_int.s.nps_pkt)
 		clear_nps_pkt_err_intr(ndev);
 
-	if (core_int_active.s.pom)
+	if (core_int.s.pom)
 		clear_pom_err_intr(ndev);
 
-	if (core_int_active.s.pem)
+	if (core_int.s.pem)
 		clear_pem_err_intr(ndev);
 
-	if (core_int_active.s.lbc)
+	if (core_int.s.lbc)
 		clear_lbc_err_intr(ndev);
 
-	if (core_int_active.s.efl)
+	if (core_int.s.efl)
 		clear_efl_err_intr(ndev);
 
-	if (core_int_active.s.bmi)
+	if (core_int.s.bmi)
 		clear_bmi_err_intr(ndev);
 
 	/* If more work callback the ISR, set resend */
-	core_int_active.s.resend = 1;
-	nitrox_write_csr(ndev, NPS_CORE_INT_ACTIVE, core_int_active.value);
-}
-
-static irqreturn_t nps_core_int_isr(int irq, void *data)
-{
-	struct nitrox_device *ndev = data;
-
-	clear_nps_core_int_active(ndev);
+	core_int.s.resend = 1;
+	nitrox_write_csr(ndev, NPS_CORE_INT_ACTIVE, core_int.value);
 
 	return IRQ_HANDLED;
 }
 
-static int nitrox_enable_msix(struct nitrox_device *ndev)
+void nitrox_unregister_interrupts(struct nitrox_device *ndev)
 {
-	struct msix_entry *entries;
-	char **names;
-	int i, nr_entries, ret;
-
-	/*
-	 * PF MSI-X vectors
-	 *
-	 * Entry 0: NPS PKT ring 0
-	 * Entry 1: AQMQ ring 0
-	 * Entry 2: ZQM ring 0
-	 * Entry 3: NPS PKT ring 1
-	 * Entry 4: AQMQ ring 1
-	 * Entry 5: ZQM ring 1
-	 * ....
-	 * Entry 192: NPS_CORE_INT_ACTIVE
-	 */
-	nr_entries = (ndev->nr_queues * NR_RING_VECTORS) + 1;
-	entries = kcalloc_node(nr_entries, sizeof(struct msix_entry),
-			       GFP_KERNEL, ndev->node);
-	if (!entries)
-		return -ENOMEM;
-
-	names = kcalloc(nr_entries, sizeof(char *), GFP_KERNEL);
-	if (!names) {
-		kfree(entries);
-		return -ENOMEM;
-	}
-
-	/* fill entires */
-	for (i = 0; i < (nr_entries - 1); i++)
-		entries[i].entry = i;
-
-	entries[i].entry = NPS_CORE_INT_ACTIVE_ENTRY;
-
-	for (i = 0; i < nr_entries; i++) {
-		*(names + i) = kzalloc(MAX_MSIX_VECTOR_NAME, GFP_KERNEL);
-		if (!(*(names + i))) {
-			ret = -ENOMEM;
-			goto msix_fail;
-		}
-	}
-	ndev->msix.entries = entries;
-	ndev->msix.names = names;
-	ndev->msix.nr_entries = nr_entries;
-
-	ret = pci_enable_msix_exact(ndev->pdev, ndev->msix.entries,
-				    ndev->msix.nr_entries);
-	if (ret) {
-		dev_err(&ndev->pdev->dev, "Failed to enable MSI-X IRQ(s) %d\n",
-			ret);
-		goto msix_fail;
-	}
-	return 0;
-
-msix_fail:
-	for (i = 0; i < nr_entries; i++)
-		kfree(*(names + i));
-
-	kfree(entries);
-	kfree(names);
-	return ret;
-}
-
-static void nitrox_cleanup_pkt_slc_bh(struct nitrox_device *ndev)
-{
-	int i;
-
-	if (!ndev->bh.slc)
-		return;
-
-	for (i = 0; i < ndev->nr_queues; i++) {
-		struct bh_data *bh = &ndev->bh.slc[i];
-
-		tasklet_disable(&bh->resp_handler);
-		tasklet_kill(&bh->resp_handler);
-	}
-	kfree(ndev->bh.slc);
-	ndev->bh.slc = NULL;
-}
-
-static int nitrox_setup_pkt_slc_bh(struct nitrox_device *ndev)
-{
-	u32 size;
+	struct pci_dev *pdev = ndev->pdev;
 	int i;
 
-	size = ndev->nr_queues * sizeof(struct bh_data);
-	ndev->bh.slc = kzalloc(size, GFP_KERNEL);
-	if (!ndev->bh.slc)
-		return -ENOMEM;
+	for (i = 0; i < ndev->num_vecs; i++) {
+		struct nitrox_q_vector *qvec;
+		int vec;
 
-	for (i = 0; i < ndev->nr_queues; i++) {
-		struct bh_data *bh = &ndev->bh.slc[i];
-		u64 offset;
+		qvec = ndev->qvec + i;
+		if (!qvec->valid)
+			continue;
 
-		offset = NPS_PKT_SLC_CNTSX(i);
-		/* pre calculate completion count address */
-		bh->completion_cnt_csr_addr = NITROX_CSR_ADDR(ndev, offset);
-		bh->cmdq = &ndev->pkt_cmdqs[i];
+		/* get the vector number */
+		vec = pci_irq_vector(pdev, i);
+		irq_set_affinity_hint(vec, NULL);
+		free_irq(vec, qvec);
 
-		tasklet_init(&bh->resp_handler, pkt_slc_resp_handler,
-			     (unsigned long)bh);
+		tasklet_disable(&qvec->resp_tasklet);
+		tasklet_kill(&qvec->resp_tasklet);
+		qvec->valid = false;
 	}
-
-	return 0;
+	kfree(ndev->qvec);
+	pci_free_irq_vectors(pdev);
 }
 
-static int nitrox_request_irqs(struct nitrox_device *ndev)
+int nitrox_register_interrupts(struct nitrox_device *ndev)
 {
 	struct pci_dev *pdev = ndev->pdev;
-	struct msix_entry *msix_ent = ndev->msix.entries;
-	int nr_ring_vectors, i = 0, ring, cpu, ret;
-	char *name;
+	struct nitrox_q_vector *qvec;
+	int nr_vecs, vec, cpu;
+	int ret, i;
 
 	/*
 	 * PF MSI-X vectors
@@ -357,112 +292,76 @@ static int nitrox_request_irqs(struct nitrox_device *ndev)
 	 * Entry 1: AQMQ ring 0
 	 * Entry 2: ZQM ring 0
 	 * Entry 3: NPS PKT ring 1
+	 * Entry 4: AQMQ ring 1
+	 * Entry 5: ZQM ring 1
 	 * ....
 	 * Entry 192: NPS_CORE_INT_ACTIVE
 	 */
-	nr_ring_vectors = ndev->nr_queues * NR_RING_VECTORS;
-
-	/* request irq for pkt ring/ports only */
-	while (i < nr_ring_vectors) {
-		name = *(ndev->msix.names + i);
-		ring = (i / NR_RING_VECTORS);
-		snprintf(name, MAX_MSIX_VECTOR_NAME, "n5(%d)-slc-ring%d",
-			 ndev->idx, ring);
+	nr_vecs = pci_msix_vec_count(pdev);
 
-		ret = request_irq(msix_ent[i].vector, nps_pkt_slc_isr, 0,
-				  name, &ndev->bh.slc[ring]);
-		if (ret) {
-			dev_err(&pdev->dev, "failed to get irq %d for %s\n",
-				msix_ent[i].vector, name);
-			return ret;
-		}
-		cpu = ring % num_online_cpus();
-		irq_set_affinity_hint(msix_ent[i].vector, get_cpu_mask(cpu));
-
-		set_bit(i, ndev->msix.irqs);
-		i += NR_RING_VECTORS;
-	}
-
-	/* Request IRQ for NPS_CORE_INT_ACTIVE */
-	name = *(ndev->msix.names + i);
-	snprintf(name, MAX_MSIX_VECTOR_NAME, "n5(%d)-nps-core-int", ndev->idx);
-	ret = request_irq(msix_ent[i].vector, nps_core_int_isr, 0, name, ndev);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to get irq %d for %s\n",
-			msix_ent[i].vector, name);
+	/* Enable MSI-X */
+	ret = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
+	if (ret < 0) {
+		dev_err(DEV(ndev), "msix vectors %d alloc failed\n", nr_vecs);
 		return ret;
 	}
-	set_bit(i, ndev->msix.irqs);
+	ndev->num_vecs = nr_vecs;
 
-	return 0;
-}
-
-static void nitrox_disable_msix(struct nitrox_device *ndev)
-{
-	struct msix_entry *msix_ent = ndev->msix.entries;
-	char **names = ndev->msix.names;
-	int i = 0, ring, nr_ring_vectors;
-
-	nr_ring_vectors = ndev->msix.nr_entries - 1;
-
-	/* clear pkt ring irqs */
-	while (i < nr_ring_vectors) {
-		if (test_and_clear_bit(i, ndev->msix.irqs)) {
-			ring = (i / NR_RING_VECTORS);
-			irq_set_affinity_hint(msix_ent[i].vector, NULL);
-			free_irq(msix_ent[i].vector, &ndev->bh.slc[ring]);
-		}
-		i += NR_RING_VECTORS;
+	ndev->qvec = kcalloc(nr_vecs, sizeof(*qvec), GFP_KERNEL);
+	if (!ndev->qvec) {
+		pci_free_irq_vectors(pdev);
+		return -ENOMEM;
 	}
-	irq_set_affinity_hint(msix_ent[i].vector, NULL);
-	free_irq(msix_ent[i].vector, ndev);
-	clear_bit(i, ndev->msix.irqs);
 
-	kfree(ndev->msix.entries);
-	for (i = 0; i < ndev->msix.nr_entries; i++)
-		kfree(*(names + i));
+	/* request irqs for packet rings/ports */
+	for (i = PKT_RING_MSIX_BASE; i < (nr_vecs - 1); i += NR_RING_VECTORS) {
+		qvec = &ndev->qvec[i];
 
-	kfree(names);
-	pci_disable_msix(ndev->pdev);
-}
-
-/**
- * nitrox_pf_cleanup_isr: Cleanup PF MSI-X and IRQ
- * @ndev: NITROX device
- */
-void nitrox_pf_cleanup_isr(struct nitrox_device *ndev)
-{
-	nitrox_disable_msix(ndev);
-	nitrox_cleanup_pkt_slc_bh(ndev);
-}
+		qvec->ring = i / NR_RING_VECTORS;
+		if (qvec->ring >= ndev->nr_queues)
+			break;
 
-/**
- * nitrox_init_isr - Initialize PF MSI-X vectors and IRQ
- * @ndev: NITROX device
- *
- * Return: 0 on success, a negative value on failure.
- */
-int nitrox_pf_init_isr(struct nitrox_device *ndev)
-{
-	int err;
+		snprintf(qvec->name, IRQ_NAMESZ, "nitrox-pkt%d", qvec->ring);
+		/* get the vector number */
+		vec = pci_irq_vector(pdev, i);
+		ret = request_irq(vec, nps_pkt_slc_isr, 0, qvec->name, qvec);
+		if (ret) {
+			dev_err(DEV(ndev), "irq failed for pkt ring/port%d\n",
+				qvec->ring);
+			goto irq_fail;
+		}
+		cpu = qvec->ring % num_online_cpus();
+		irq_set_affinity_hint(vec, get_cpu_mask(cpu));
 
-	err = nitrox_setup_pkt_slc_bh(ndev);
-	if (err)
-		return err;
+		tasklet_init(&qvec->resp_tasklet, pkt_slc_resp_tasklet,
+			     (unsigned long)qvec);
+		qvec->cmdq = &ndev->pkt_inq[qvec->ring];
+		qvec->valid = true;
+	}
 
-	err = nitrox_enable_msix(ndev);
-	if (err)
-		goto msix_fail;
+	/* request irqs for non ring vectors */
+	i = NON_RING_MSIX_BASE;
+	qvec = &ndev->qvec[i];
 
-	err = nitrox_request_irqs(ndev);
-	if (err)
+	snprintf(qvec->name, IRQ_NAMESZ, "nitrox-core-int%d", i);
+	/* get the vector number */
+	vec = pci_irq_vector(pdev, i);
+	ret = request_irq(vec, nps_core_int_isr, 0, qvec->name, qvec);
+	if (ret) {
+		dev_err(DEV(ndev), "irq failed for nitrox-core-int%d\n", i);
 		goto irq_fail;
+	}
+	cpu = num_online_cpus();
+	irq_set_affinity_hint(vec, get_cpu_mask(cpu));
+
+	tasklet_init(&qvec->resp_tasklet, nps_core_int_tasklet,
+		     (unsigned long)qvec);
+	qvec->ndev = ndev;
+	qvec->valid = true;
 
 	return 0;
 
 irq_fail:
-	nitrox_disable_msix(ndev);
-msix_fail:
-	nitrox_cleanup_pkt_slc_bh(ndev);
-	return err;
+	nitrox_unregister_interrupts(ndev);
+	return ret;
 }
diff --git a/drivers/crypto/cavium/nitrox/nitrox_isr.h b/drivers/crypto/cavium/nitrox/nitrox_isr.h
new file mode 100644
index 000000000000..63418a6cc52c
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_isr.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NITROX_ISR_H
+#define __NITROX_ISR_H
+
+#include "nitrox_dev.h"
+
+int nitrox_register_interrupts(struct nitrox_device *ndev);
+void nitrox_unregister_interrupts(struct nitrox_device *ndev);
+
+#endif /* __NITROX_ISR_H */
diff --git a/drivers/crypto/cavium/nitrox/nitrox_lib.c b/drivers/crypto/cavium/nitrox/nitrox_lib.c
index 4d31df07777f..2260efa42308 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_lib.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_lib.c
@@ -17,30 +17,27 @@
 
 #define CRYPTO_CTX_SIZE	256
 
-/* command queue alignments */
-#define PKT_IN_ALIGN	16
+/* packet inuput ring alignments */
+#define PKTIN_Q_ALIGN_BYTES 16
 
-static int cmdq_common_init(struct nitrox_cmdq *cmdq)
+static int nitrox_cmdq_init(struct nitrox_cmdq *cmdq, int align_bytes)
 {
 	struct nitrox_device *ndev = cmdq->ndev;
-	u32 qsize;
-
-	qsize = (ndev->qlen) * cmdq->instr_size;
-	cmdq->head_unaligned = dma_zalloc_coherent(DEV(ndev),
-						   (qsize + PKT_IN_ALIGN),
-						   &cmdq->dma_unaligned,
-						   GFP_KERNEL);
-	if (!cmdq->head_unaligned)
+
+	cmdq->qsize = (ndev->qlen * cmdq->instr_size) + align_bytes;
+	cmdq->unalign_base = dma_zalloc_coherent(DEV(ndev), cmdq->qsize,
+						 &cmdq->unalign_dma,
+						 GFP_KERNEL);
+	if (!cmdq->unalign_base)
 		return -ENOMEM;
 
-	cmdq->head = PTR_ALIGN(cmdq->head_unaligned, PKT_IN_ALIGN);
-	cmdq->dma = PTR_ALIGN(cmdq->dma_unaligned, PKT_IN_ALIGN);
-	cmdq->qsize = (qsize + PKT_IN_ALIGN);
+	cmdq->dma = PTR_ALIGN(cmdq->unalign_dma, align_bytes);
+	cmdq->base = cmdq->unalign_base + (cmdq->dma - cmdq->unalign_dma);
 	cmdq->write_idx = 0;
 
-	spin_lock_init(&cmdq->response_lock);
-	spin_lock_init(&cmdq->cmdq_lock);
-	spin_lock_init(&cmdq->backlog_lock);
+	spin_lock_init(&cmdq->cmd_qlock);
+	spin_lock_init(&cmdq->resp_qlock);
+	spin_lock_init(&cmdq->backlog_qlock);
 
 	INIT_LIST_HEAD(&cmdq->response_head);
 	INIT_LIST_HEAD(&cmdq->backlog_head);
@@ -51,68 +48,83 @@ static int cmdq_common_init(struct nitrox_cmdq *cmdq)
 	return 0;
 }
 
-static void cmdq_common_cleanup(struct nitrox_cmdq *cmdq)
+static void nitrox_cmdq_reset(struct nitrox_cmdq *cmdq)
+{
+	cmdq->write_idx = 0;
+	atomic_set(&cmdq->pending_count, 0);
+	atomic_set(&cmdq->backlog_count, 0);
+}
+
+static void nitrox_cmdq_cleanup(struct nitrox_cmdq *cmdq)
 {
 	struct nitrox_device *ndev = cmdq->ndev;
 
+	if (!cmdq->unalign_base)
+		return;
+
 	cancel_work_sync(&cmdq->backlog_qflush);
 
 	dma_free_coherent(DEV(ndev), cmdq->qsize,
-			  cmdq->head_unaligned, cmdq->dma_unaligned);
-
-	atomic_set(&cmdq->pending_count, 0);
-	atomic_set(&cmdq->backlog_count, 0);
+			  cmdq->unalign_base, cmdq->unalign_dma);
+	nitrox_cmdq_reset(cmdq);
 
 	cmdq->dbell_csr_addr = NULL;
-	cmdq->head = NULL;
+	cmdq->compl_cnt_csr_addr = NULL;
+	cmdq->unalign_base = NULL;
+	cmdq->base = NULL;
+	cmdq->unalign_dma = 0;
 	cmdq->dma = 0;
 	cmdq->qsize = 0;
 	cmdq->instr_size = 0;
 }
 
-static void nitrox_cleanup_pkt_cmdqs(struct nitrox_device *ndev)
+static void nitrox_free_pktin_queues(struct nitrox_device *ndev)
 {
 	int i;
 
 	for (i = 0; i < ndev->nr_queues; i++) {
-		struct nitrox_cmdq *cmdq = &ndev->pkt_cmdqs[i];
+		struct nitrox_cmdq *cmdq = &ndev->pkt_inq[i];
 
-		cmdq_common_cleanup(cmdq);
+		nitrox_cmdq_cleanup(cmdq);
 	}
-	kfree(ndev->pkt_cmdqs);
-	ndev->pkt_cmdqs = NULL;
+	kfree(ndev->pkt_inq);
+	ndev->pkt_inq = NULL;
 }
 
-static int nitrox_init_pkt_cmdqs(struct nitrox_device *ndev)
+static int nitrox_alloc_pktin_queues(struct nitrox_device *ndev)
 {
-	int i, err, size;
+	int i, err;
 
-	size = ndev->nr_queues * sizeof(struct nitrox_cmdq);
-	ndev->pkt_cmdqs = kzalloc(size, GFP_KERNEL);
-	if (!ndev->pkt_cmdqs)
+	ndev->pkt_inq = kcalloc_node(ndev->nr_queues,
+				     sizeof(struct nitrox_cmdq),
+				     GFP_KERNEL, ndev->node);
+	if (!ndev->pkt_inq)
 		return -ENOMEM;
 
 	for (i = 0; i < ndev->nr_queues; i++) {
 		struct nitrox_cmdq *cmdq;
 		u64 offset;
 
-		cmdq = &ndev->pkt_cmdqs[i];
+		cmdq = &ndev->pkt_inq[i];
 		cmdq->ndev = ndev;
 		cmdq->qno = i;
 		cmdq->instr_size = sizeof(struct nps_pkt_instr);
 
+		/* packet input ring doorbell address */
 		offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(i);
-		/* SE ring doorbell address for this queue */
 		cmdq->dbell_csr_addr = NITROX_CSR_ADDR(ndev, offset);
+		/* packet solicit port completion count address */
+		offset = NPS_PKT_SLC_CNTSX(i);
+		cmdq->compl_cnt_csr_addr = NITROX_CSR_ADDR(ndev, offset);
 
-		err = cmdq_common_init(cmdq);
+		err = nitrox_cmdq_init(cmdq, PKTIN_Q_ALIGN_BYTES);
 		if (err)
-			goto pkt_cmdq_fail;
+			goto pktq_fail;
 	}
 	return 0;
 
-pkt_cmdq_fail:
-	nitrox_cleanup_pkt_cmdqs(ndev);
+pktq_fail:
+	nitrox_free_pktin_queues(ndev);
 	return err;
 }
 
@@ -122,7 +134,7 @@ static int create_crypto_dma_pool(struct nitrox_device *ndev)
 
 	/* Crypto context pool, 16 byte aligned */
 	size = CRYPTO_CTX_SIZE + sizeof(struct ctx_hdr);
-	ndev->ctx_pool = dma_pool_create("crypto-context",
+	ndev->ctx_pool = dma_pool_create("nitrox-context",
 					 DEV(ndev), size, 16, 0);
 	if (!ndev->ctx_pool)
 		return -ENOMEM;
@@ -149,7 +161,7 @@ void *crypto_alloc_context(struct nitrox_device *ndev)
 	void *vaddr;
 	dma_addr_t dma;
 
-	vaddr = dma_pool_alloc(ndev->ctx_pool, (GFP_KERNEL | __GFP_ZERO), &dma);
+	vaddr = dma_pool_zalloc(ndev->ctx_pool, GFP_KERNEL, &dma);
 	if (!vaddr)
 		return NULL;
 
@@ -194,7 +206,7 @@ int nitrox_common_sw_init(struct nitrox_device *ndev)
 	if (err)
 		return err;
 
-	err = nitrox_init_pkt_cmdqs(ndev);
+	err = nitrox_alloc_pktin_queues(ndev);
 	if (err)
 		destroy_crypto_dma_pool(ndev);
 
@@ -207,6 +219,6 @@ int nitrox_common_sw_init(struct nitrox_device *ndev)
  */
 void nitrox_common_sw_cleanup(struct nitrox_device *ndev)
 {
-	nitrox_cleanup_pkt_cmdqs(ndev);
+	nitrox_free_pktin_queues(ndev);
 	destroy_crypto_dma_pool(ndev);
 }
diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index fee7cb2ce747..6595c95af9f1 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -11,13 +11,15 @@
 #include "nitrox_dev.h"
 #include "nitrox_common.h"
 #include "nitrox_csr.h"
+#include "nitrox_hal.h"
+#include "nitrox_isr.h"
 
 #define CNN55XX_DEV_ID	0x12
 #define MAX_PF_QUEUES	64
 #define UCODE_HLEN 48
 #define SE_GROUP 0
 
-#define DRIVER_VERSION "1.0"
+#define DRIVER_VERSION "1.1"
 #define FW_DIR "cavium/"
 /* SE microcode */
 #define SE_FW	FW_DIR "cnn55xx_se.fw"
@@ -42,6 +44,15 @@ static unsigned int qlen = DEFAULT_CMD_QLEN;
 module_param(qlen, uint, 0644);
 MODULE_PARM_DESC(qlen, "Command queue length - default 2048");
 
+#ifdef CONFIG_PCI_IOV
+int nitrox_sriov_configure(struct pci_dev *pdev, int num_vfs);
+#else
+int nitrox_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	return 0;
+}
+#endif
+
 /**
  * struct ucode - Firmware Header
  * @id: microcode ID
@@ -136,9 +147,6 @@ static int nitrox_load_fw(struct nitrox_device *ndev, const char *fw_name)
 	write_to_ucd_unit(ndev, ucode);
 	release_firmware(fw);
 
-	set_bit(NITROX_UCODE_LOADED, &ndev->status);
-	/* barrier to sync with other cpus */
-	smp_mb__after_atomic();
 	return 0;
 }
 
@@ -210,7 +218,7 @@ void nitrox_put_device(struct nitrox_device *ndev)
 	smp_mb__after_atomic();
 }
 
-static int nitrox_reset_device(struct pci_dev *pdev)
+static int nitrox_device_flr(struct pci_dev *pdev)
 {
 	int pos = 0;
 
@@ -220,15 +228,10 @@ static int nitrox_reset_device(struct pci_dev *pdev)
 		return -ENOMEM;
 	}
 
-	pos = pci_pcie_cap(pdev);
-	if (!pos)
-		return -ENOTTY;
+	/* check flr support */
+	if (pcie_has_flr(pdev))
+		pcie_flr(pdev);
 
-	if (!pci_wait_for_pending_transaction(pdev))
-		dev_err(&pdev->dev, "waiting for pending transaction\n");
-
-	pcie_capability_set_word(pdev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
-	msleep(100);
 	pci_restore_state(pdev);
 
 	return 0;
@@ -242,7 +245,7 @@ static int nitrox_pf_sw_init(struct nitrox_device *ndev)
 	if (err)
 		return err;
 
-	err = nitrox_pf_init_isr(ndev);
+	err = nitrox_register_interrupts(ndev);
 	if (err)
 		nitrox_common_sw_cleanup(ndev);
 
@@ -251,7 +254,7 @@ static int nitrox_pf_sw_init(struct nitrox_device *ndev)
 
 static void nitrox_pf_sw_cleanup(struct nitrox_device *ndev)
 {
-	nitrox_pf_cleanup_isr(ndev);
+	nitrox_unregister_interrupts(ndev);
 	nitrox_common_sw_cleanup(ndev);
 }
 
@@ -284,26 +287,6 @@ static int nitrox_bist_check(struct nitrox_device *ndev)
 	return 0;
 }
 
-static void nitrox_get_hwinfo(struct nitrox_device *ndev)
-{
-	union emu_fuse_map emu_fuse;
-	u64 offset;
-	int i;
-
-	for (i = 0; i < NR_CLUSTERS; i++) {
-		u8 dead_cores;
-
-		offset = EMU_FUSE_MAPX(i);
-		emu_fuse.value = nitrox_read_csr(ndev, offset);
-		if (emu_fuse.s.valid) {
-			dead_cores = hweight32(emu_fuse.s.ae_fuse);
-			ndev->hw.ae_cores += AE_CORES_PER_CLUSTER - dead_cores;
-			dead_cores = hweight16(emu_fuse.s.se_fuse);
-			ndev->hw.se_cores += SE_CORES_PER_CLUSTER - dead_cores;
-		}
-	}
-}
-
 static int nitrox_pf_hw_init(struct nitrox_device *ndev)
 {
 	int err;
@@ -336,135 +319,6 @@ static int nitrox_pf_hw_init(struct nitrox_device *ndev)
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-static int registers_show(struct seq_file *s, void *v)
-{
-	struct nitrox_device *ndev = s->private;
-	u64 offset;
-
-	/* NPS DMA stats */
-	offset = NPS_STATS_PKT_DMA_RD_CNT;
-	seq_printf(s, "NPS_STATS_PKT_DMA_RD_CNT  0x%016llx\n",
-		   nitrox_read_csr(ndev, offset));
-	offset = NPS_STATS_PKT_DMA_WR_CNT;
-	seq_printf(s, "NPS_STATS_PKT_DMA_WR_CNT  0x%016llx\n",
-		   nitrox_read_csr(ndev, offset));
-
-	/* BMI/BMO stats */
-	offset = BMI_NPS_PKT_CNT;
-	seq_printf(s, "BMI_NPS_PKT_CNT  0x%016llx\n",
-		   nitrox_read_csr(ndev, offset));
-	offset = BMO_NPS_SLC_PKT_CNT;
-	seq_printf(s, "BMO_NPS_PKT_CNT  0x%016llx\n",
-		   nitrox_read_csr(ndev, offset));
-
-	return 0;
-}
-
-static int registers_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, registers_show, inode->i_private);
-}
-
-static const struct file_operations register_fops = {
-	.owner = THIS_MODULE,
-	.open = registers_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int firmware_show(struct seq_file *s, void *v)
-{
-	struct nitrox_device *ndev = s->private;
-
-	seq_printf(s, "Version: %s\n", ndev->hw.fw_name);
-	return 0;
-}
-
-static int firmware_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, firmware_show, inode->i_private);
-}
-
-static const struct file_operations firmware_fops = {
-	.owner = THIS_MODULE,
-	.open = firmware_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int nitrox_show(struct seq_file *s, void *v)
-{
-	struct nitrox_device *ndev = s->private;
-
-	seq_printf(s, "NITROX-5 [idx: %d]\n", ndev->idx);
-	seq_printf(s, "  Revision ID: 0x%0x\n", ndev->hw.revision_id);
-	seq_printf(s, "  Cores [AE: %u  SE: %u]\n",
-		   ndev->hw.ae_cores, ndev->hw.se_cores);
-	seq_printf(s, "  Number of Queues: %u\n", ndev->nr_queues);
-	seq_printf(s, "  Queue length: %u\n", ndev->qlen);
-	seq_printf(s, "  Node: %u\n", ndev->node);
-
-	return 0;
-}
-
-static int nitrox_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, nitrox_show, inode->i_private);
-}
-
-static const struct file_operations nitrox_fops = {
-	.owner = THIS_MODULE,
-	.open = nitrox_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static void nitrox_debugfs_exit(struct nitrox_device *ndev)
-{
-	debugfs_remove_recursive(ndev->debugfs_dir);
-	ndev->debugfs_dir = NULL;
-}
-
-static int nitrox_debugfs_init(struct nitrox_device *ndev)
-{
-	struct dentry *dir, *f;
-
-	dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
-	if (!dir)
-		return -ENOMEM;
-
-	ndev->debugfs_dir = dir;
-	f = debugfs_create_file("counters", 0400, dir, ndev, &register_fops);
-	if (!f)
-		goto err;
-	f = debugfs_create_file("firmware", 0400, dir, ndev, &firmware_fops);
-	if (!f)
-		goto err;
-	f = debugfs_create_file("nitrox", 0400, dir, ndev, &nitrox_fops);
-	if (!f)
-		goto err;
-
-	return 0;
-
-err:
-	nitrox_debugfs_exit(ndev);
-	return -ENODEV;
-}
-#else
-static int nitrox_debugfs_init(struct nitrox_device *ndev)
-{
-	return 0;
-}
-
-static void nitrox_debugfs_exit(struct nitrox_device *ndev)
-{
-}
-#endif
-
 /**
  * nitrox_probe - NITROX Initialization function.
  * @pdev: PCI device information struct
@@ -487,7 +341,7 @@ static int nitrox_probe(struct pci_dev *pdev,
 		return err;
 
 	/* do FLR */
-	err = nitrox_reset_device(pdev);
+	err = nitrox_device_flr(pdev);
 	if (err) {
 		dev_err(&pdev->dev, "FLR failed\n");
 		pci_disable_device(pdev);
@@ -555,7 +409,12 @@ static int nitrox_probe(struct pci_dev *pdev,
 	if (err)
 		goto pf_hw_fail;
 
-	set_bit(NITROX_READY, &ndev->status);
+	/* clear the statistics */
+	atomic64_set(&ndev->stats.posted, 0);
+	atomic64_set(&ndev->stats.completed, 0);
+	atomic64_set(&ndev->stats.dropped, 0);
+
+	atomic_set(&ndev->state, __NDEV_READY);
 	/* barrier to sync with other cpus */
 	smp_mb__after_atomic();
 
@@ -567,7 +426,7 @@ static int nitrox_probe(struct pci_dev *pdev,
 
 crypto_fail:
 	nitrox_debugfs_exit(ndev);
-	clear_bit(NITROX_READY, &ndev->status);
+	atomic_set(&ndev->state, __NDEV_NOT_READY);
 	/* barrier to sync with other cpus */
 	smp_mb__after_atomic();
 pf_hw_fail:
@@ -602,11 +461,16 @@ static void nitrox_remove(struct pci_dev *pdev)
 	dev_info(DEV(ndev), "Removing Device %x:%x\n",
 		 ndev->hw.vendor_id, ndev->hw.device_id);
 
-	clear_bit(NITROX_READY, &ndev->status);
+	atomic_set(&ndev->state, __NDEV_NOT_READY);
 	/* barrier to sync with other cpus */
 	smp_mb__after_atomic();
 
 	nitrox_remove_from_devlist(ndev);
+
+#ifdef CONFIG_PCI_IOV
+	/* disable SR-IOV */
+	nitrox_sriov_configure(pdev, 0);
+#endif
 	nitrox_crypto_unregister();
 	nitrox_debugfs_exit(ndev);
 	nitrox_pf_sw_cleanup(ndev);
@@ -632,6 +496,9 @@ static struct pci_driver nitrox_driver = {
 	.probe = nitrox_probe,
 	.remove	= nitrox_remove,
 	.shutdown = nitrox_shutdown,
+#ifdef CONFIG_PCI_IOV
+	.sriov_configure = nitrox_sriov_configure,
+#endif
 };
 
 module_pci_driver(nitrox_driver);
diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
index 4a362fc22f62..3987cd84c033 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
@@ -382,11 +382,11 @@ static inline void backlog_list_add(struct nitrox_softreq *sr,
 {
 	INIT_LIST_HEAD(&sr->backlog);
 
-	spin_lock_bh(&cmdq->backlog_lock);
+	spin_lock_bh(&cmdq->backlog_qlock);
 	list_add_tail(&sr->backlog, &cmdq->backlog_head);
 	atomic_inc(&cmdq->backlog_count);
 	atomic_set(&sr->status, REQ_BACKLOG);
-	spin_unlock_bh(&cmdq->backlog_lock);
+	spin_unlock_bh(&cmdq->backlog_qlock);
 }
 
 static inline void response_list_add(struct nitrox_softreq *sr,
@@ -394,17 +394,17 @@ static inline void response_list_add(struct nitrox_softreq *sr,
 {
 	INIT_LIST_HEAD(&sr->response);
 
-	spin_lock_bh(&cmdq->response_lock);
+	spin_lock_bh(&cmdq->resp_qlock);
 	list_add_tail(&sr->response, &cmdq->response_head);
-	spin_unlock_bh(&cmdq->response_lock);
+	spin_unlock_bh(&cmdq->resp_qlock);
 }
 
 static inline void response_list_del(struct nitrox_softreq *sr,
 				     struct nitrox_cmdq *cmdq)
 {
-	spin_lock_bh(&cmdq->response_lock);
+	spin_lock_bh(&cmdq->resp_qlock);
 	list_del(&sr->response);
-	spin_unlock_bh(&cmdq->response_lock);
+	spin_unlock_bh(&cmdq->resp_qlock);
 }
 
 static struct nitrox_softreq *
@@ -439,11 +439,11 @@ static void post_se_instr(struct nitrox_softreq *sr,
 	int idx;
 	u8 *ent;
 
-	spin_lock_bh(&cmdq->cmdq_lock);
+	spin_lock_bh(&cmdq->cmd_qlock);
 
 	idx = cmdq->write_idx;
 	/* copy the instruction */
-	ent = cmdq->head + (idx * cmdq->instr_size);
+	ent = cmdq->base + (idx * cmdq->instr_size);
 	memcpy(ent, &sr->instr, cmdq->instr_size);
 
 	atomic_set(&sr->status, REQ_POSTED);
@@ -459,7 +459,10 @@ static void post_se_instr(struct nitrox_softreq *sr,
 
 	cmdq->write_idx = incr_index(idx, 1, ndev->qlen);
 
-	spin_unlock_bh(&cmdq->cmdq_lock);
+	spin_unlock_bh(&cmdq->cmd_qlock);
+
+	/* increment the posted command count */
+	atomic64_inc(&ndev->stats.posted);
 }
 
 static int post_backlog_cmds(struct nitrox_cmdq *cmdq)
@@ -471,7 +474,7 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq)
 	if (!atomic_read(&cmdq->backlog_count))
 		return 0;
 
-	spin_lock_bh(&cmdq->backlog_lock);
+	spin_lock_bh(&cmdq->backlog_qlock);
 
 	list_for_each_entry_safe(sr, tmp, &cmdq->backlog_head, backlog) {
 		struct skcipher_request *skreq;
@@ -494,7 +497,7 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq)
 		/* backlog requests are posted, wakeup with -EINPROGRESS */
 		skcipher_request_complete(skreq, -EINPROGRESS);
 	}
-	spin_unlock_bh(&cmdq->backlog_lock);
+	spin_unlock_bh(&cmdq->backlog_qlock);
 
 	return ret;
 }
@@ -508,8 +511,11 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr)
 	post_backlog_cmds(cmdq);
 
 	if (unlikely(cmdq_full(cmdq, ndev->qlen))) {
-		if (!(sr->flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		if (!(sr->flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+			/* increment drop count */
+			atomic64_inc(&ndev->stats.dropped);
 			return -ENOSPC;
+		}
 		/* add to backlog list */
 		backlog_list_add(sr, cmdq);
 		return -EBUSY;
@@ -572,7 +578,7 @@ int nitrox_process_se_request(struct nitrox_device *ndev,
 	/* select the queue */
 	qno = smp_processor_id() % ndev->nr_queues;
 
-	sr->cmdq = &ndev->pkt_cmdqs[qno];
+	sr->cmdq = &ndev->pkt_inq[qno];
 
 	/*
 	 * 64-Byte Instruction Format
@@ -694,6 +700,7 @@ static void process_response_list(struct nitrox_cmdq *cmdq)
 					    READ_ONCE(sr->resp.orh));
 		}
 		atomic_dec(&cmdq->pending_count);
+		atomic64_inc(&ndev->stats.completed);
 		/* sync with other cpus */
 		smp_mb__after_atomic();
 		/* remove from response list */
@@ -714,18 +721,18 @@ static void process_response_list(struct nitrox_cmdq *cmdq)
 }
 
 /**
- * pkt_slc_resp_handler - post processing of SE responses
+ * pkt_slc_resp_tasklet - post processing of SE responses
  */
-void pkt_slc_resp_handler(unsigned long data)
+void pkt_slc_resp_tasklet(unsigned long data)
 {
-	struct bh_data *bh = (void *)(uintptr_t)(data);
-	struct nitrox_cmdq *cmdq = bh->cmdq;
-	union nps_pkt_slc_cnts pkt_slc_cnts;
+	struct nitrox_q_vector *qvec = (void *)(uintptr_t)(data);
+	struct nitrox_cmdq *cmdq = qvec->cmdq;
+	union nps_pkt_slc_cnts slc_cnts;
 
 	/* read completion count */
-	pkt_slc_cnts.value = readq(bh->completion_cnt_csr_addr);
+	slc_cnts.value = readq(cmdq->compl_cnt_csr_addr);
 	/* resend the interrupt if more work to do */
-	pkt_slc_cnts.s.resend = 1;
+	slc_cnts.s.resend = 1;
 
 	process_response_list(cmdq);
 
@@ -733,7 +740,7 @@ void pkt_slc_resp_handler(unsigned long data)
 	 * clear the interrupt with resend bit enabled,
 	 * MSI-X interrupt generates if Completion count > Threshold
 	 */
-	writeq(pkt_slc_cnts.value, bh->completion_cnt_csr_addr);
+	writeq(slc_cnts.value, cmdq->compl_cnt_csr_addr);
 	/* order the writes */
 	mmiowb();
 
diff --git a/drivers/crypto/cavium/nitrox/nitrox_sriov.c b/drivers/crypto/cavium/nitrox/nitrox_sriov.c
new file mode 100644
index 000000000000..30c0aa874583
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_sriov.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_hal.h"
+#include "nitrox_common.h"
+#include "nitrox_isr.h"
+
+static inline bool num_vfs_valid(int num_vfs)
+{
+	bool valid = false;
+
+	switch (num_vfs) {
+	case 16:
+	case 32:
+	case 64:
+	case 128:
+		valid = true;
+		break;
+	}
+
+	return valid;
+}
+
+static inline enum vf_mode num_vfs_to_mode(int num_vfs)
+{
+	enum vf_mode mode = 0;
+
+	switch (num_vfs) {
+	case 0:
+		mode = __NDEV_MODE_PF;
+		break;
+	case 16:
+		mode = __NDEV_MODE_VF16;
+		break;
+	case 32:
+		mode = __NDEV_MODE_VF32;
+		break;
+	case 64:
+		mode = __NDEV_MODE_VF64;
+		break;
+	case 128:
+		mode = __NDEV_MODE_VF128;
+		break;
+	}
+
+	return mode;
+}
+
+static void pf_sriov_cleanup(struct nitrox_device *ndev)
+{
+	 /* PF has no queues in SR-IOV mode */
+	atomic_set(&ndev->state, __NDEV_NOT_READY);
+	/* unregister crypto algorithms */
+	nitrox_crypto_unregister();
+
+	/* cleanup PF resources */
+	nitrox_unregister_interrupts(ndev);
+	nitrox_common_sw_cleanup(ndev);
+}
+
+static int pf_sriov_init(struct nitrox_device *ndev)
+{
+	int err;
+
+	/* allocate resources for PF */
+	err = nitrox_common_sw_init(ndev);
+	if (err)
+		return err;
+
+	err = nitrox_register_interrupts(ndev);
+	if (err) {
+		nitrox_common_sw_cleanup(ndev);
+		return err;
+	}
+
+	/* configure the packet queues */
+	nitrox_config_pkt_input_rings(ndev);
+	nitrox_config_pkt_solicit_ports(ndev);
+
+	/* set device to ready state */
+	atomic_set(&ndev->state, __NDEV_READY);
+
+	/* register crypto algorithms */
+	return nitrox_crypto_register();
+}
+
+static int nitrox_sriov_enable(struct pci_dev *pdev, int num_vfs)
+{
+	struct nitrox_device *ndev = pci_get_drvdata(pdev);
+	int err;
+
+	if (!num_vfs_valid(num_vfs)) {
+		dev_err(DEV(ndev), "Invalid num_vfs %d\n", num_vfs);
+		return -EINVAL;
+	}
+
+	if (pci_num_vf(pdev) == num_vfs)
+		return num_vfs;
+
+	err = pci_enable_sriov(pdev, num_vfs);
+	if (err) {
+		dev_err(DEV(ndev), "failed to enable PCI sriov %d\n", err);
+		return err;
+	}
+	dev_info(DEV(ndev), "Enabled VF(s) %d\n", num_vfs);
+
+	ndev->num_vfs = num_vfs;
+	ndev->mode = num_vfs_to_mode(num_vfs);
+	/* set bit in flags */
+	set_bit(__NDEV_SRIOV_BIT, &ndev->flags);
+
+	/* cleanup PF resources */
+	pf_sriov_cleanup(ndev);
+
+	config_nps_core_vfcfg_mode(ndev, ndev->mode);
+
+	return num_vfs;
+}
+
+static int nitrox_sriov_disable(struct pci_dev *pdev)
+{
+	struct nitrox_device *ndev = pci_get_drvdata(pdev);
+
+	if (!test_bit(__NDEV_SRIOV_BIT, &ndev->flags))
+		return 0;
+
+	if (pci_vfs_assigned(pdev)) {
+		dev_warn(DEV(ndev), "VFs are attached to VM. Can't disable SR-IOV\n");
+		return -EPERM;
+	}
+	pci_disable_sriov(pdev);
+	/* clear bit in flags */
+	clear_bit(__NDEV_SRIOV_BIT, &ndev->flags);
+
+	ndev->num_vfs = 0;
+	ndev->mode = __NDEV_MODE_PF;
+
+	config_nps_core_vfcfg_mode(ndev, ndev->mode);
+
+	return pf_sriov_init(ndev);
+}
+
+int nitrox_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	if (!num_vfs)
+		return nitrox_sriov_disable(pdev);
+
+	return nitrox_sriov_enable(pdev, num_vfs);
+}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 94b5bcf5b628..ca4630b8395f 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -102,7 +102,7 @@ static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	ctx->u.aes.key_len = key_len / 2;
 	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
 
-	return crypto_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len);
+	return crypto_sync_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len);
 }
 
 static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
@@ -151,12 +151,13 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 	    (ctx->u.aes.key_len != AES_KEYSIZE_256))
 		fallback = 1;
 	if (fallback) {
-		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(subreq,
+					       ctx->u.aes.tfm_skcipher);
 
 		/* Use the fallback to process the request for any
 		 * unsupported unit sizes or key sizes
 		 */
-		skcipher_request_set_tfm(subreq, ctx->u.aes.tfm_skcipher);
+		skcipher_request_set_sync_tfm(subreq, ctx->u.aes.tfm_skcipher);
 		skcipher_request_set_callback(subreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
@@ -203,12 +204,12 @@ static int ccp_aes_xts_decrypt(struct ablkcipher_request *req)
 static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm)
 {
 	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_skcipher *fallback_tfm;
+	struct crypto_sync_skcipher *fallback_tfm;
 
 	ctx->complete = ccp_aes_xts_complete;
 	ctx->u.aes.key_len = 0;
 
-	fallback_tfm = crypto_alloc_skcipher("xts(aes)", 0,
+	fallback_tfm = crypto_alloc_sync_skcipher("xts(aes)", 0,
 					     CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback_tfm)) {
@@ -226,7 +227,7 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm)
 {
 	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypto_free_skcipher(ctx->u.aes.tfm_skcipher);
+	crypto_free_sync_skcipher(ctx->u.aes.tfm_skcipher);
 }
 
 static int ccp_register_aes_xts_alg(struct list_head *head,
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index b9fd090c46c2..28819e11db96 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -88,7 +88,7 @@ static inline struct ccp_crypto_ahash_alg *
 /***** AES related defines *****/
 struct ccp_aes_ctx {
 	/* Fallback cipher for XTS with unsupported unit sizes */
-	struct crypto_skcipher *tfm_skcipher;
+	struct crypto_sync_skcipher *tfm_skcipher;
 
 	/* Cipher used to generate CMAC K1/K2 keys */
 	struct crypto_cipher *tfm_cipher;
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index 72790d88236d..d64a78ccc03e 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -31,8 +31,9 @@
 		((psp_master->api_major) >= _maj &&	\
 		 (psp_master->api_minor) >= _min)
 
-#define DEVICE_NAME	"sev"
-#define SEV_FW_FILE	"amd/sev.fw"
+#define DEVICE_NAME		"sev"
+#define SEV_FW_FILE		"amd/sev.fw"
+#define SEV_FW_NAME_SIZE	64
 
 static DEFINE_MUTEX(sev_cmd_mutex);
 static struct sev_misc_dev *misc_dev;
@@ -423,7 +424,7 @@ EXPORT_SYMBOL_GPL(psp_copy_user_blob);
 static int sev_get_api_version(void)
 {
 	struct sev_user_data_status *status;
-	int error, ret;
+	int error = 0, ret;
 
 	status = &psp_master->status_cmd_buf;
 	ret = sev_platform_status(status, &error);
@@ -440,6 +441,41 @@ static int sev_get_api_version(void)
 	return 0;
 }
 
+static int sev_get_firmware(struct device *dev,
+			    const struct firmware **firmware)
+{
+	char fw_name_specific[SEV_FW_NAME_SIZE];
+	char fw_name_subset[SEV_FW_NAME_SIZE];
+
+	snprintf(fw_name_specific, sizeof(fw_name_specific),
+		 "amd/amd_sev_fam%.2xh_model%.2xh.sbin",
+		 boot_cpu_data.x86, boot_cpu_data.x86_model);
+
+	snprintf(fw_name_subset, sizeof(fw_name_subset),
+		 "amd/amd_sev_fam%.2xh_model%.1xxh.sbin",
+		 boot_cpu_data.x86, (boot_cpu_data.x86_model & 0xf0) >> 4);
+
+	/* Check for SEV FW for a particular model.
+	 * Ex. amd_sev_fam17h_model00h.sbin for Family 17h Model 00h
+	 *
+	 * or
+	 *
+	 * Check for SEV FW common to a subset of models.
+	 * Ex. amd_sev_fam17h_model0xh.sbin for
+	 *     Family 17h Model 00h -- Family 17h Model 0Fh
+	 *
+	 * or
+	 *
+	 * Fall-back to using generic name: sev.fw
+	 */
+	if ((firmware_request_nowarn(firmware, fw_name_specific, dev) >= 0) ||
+	    (firmware_request_nowarn(firmware, fw_name_subset, dev) >= 0) ||
+	    (firmware_request_nowarn(firmware, SEV_FW_FILE, dev) >= 0))
+		return 0;
+
+	return -ENOENT;
+}
+
 /* Don't fail if SEV FW couldn't be updated. Continue with existing SEV FW */
 static int sev_update_firmware(struct device *dev)
 {
@@ -449,9 +485,10 @@ static int sev_update_firmware(struct device *dev)
 	struct page *p;
 	u64 data_size;
 
-	ret = request_firmware(&firmware, SEV_FW_FILE, dev);
-	if (ret < 0)
+	if (sev_get_firmware(dev, &firmware) == -ENOENT) {
+		dev_dbg(dev, "No SEV firmware file present\n");
 		return -1;
+	}
 
 	/*
 	 * SEV FW expects the physical address given to it to be 32
diff --git a/drivers/crypto/ccp/sp-platform.c b/drivers/crypto/ccp/sp-platform.c
index 71734f254fd1..b75dc7db2d4a 100644
--- a/drivers/crypto/ccp/sp-platform.c
+++ b/drivers/crypto/ccp/sp-platform.c
@@ -33,8 +33,31 @@ struct sp_platform {
 	unsigned int irq_count;
 };
 
-static const struct acpi_device_id sp_acpi_match[];
-static const struct of_device_id sp_of_match[];
+static const struct sp_dev_vdata dev_vdata[] = {
+	{
+		.bar = 0,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv3_platform,
+#endif
+	},
+};
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id sp_acpi_match[] = {
+	{ "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, sp_acpi_match);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id sp_of_match[] = {
+	{ .compatible = "amd,ccp-seattle-v1a",
+	  .data = (const void *)&dev_vdata[0] },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, sp_of_match);
+#endif
 
 static struct sp_dev_vdata *sp_get_of_version(struct platform_device *pdev)
 {
@@ -201,32 +224,6 @@ static int sp_platform_resume(struct platform_device *pdev)
 }
 #endif
 
-static const struct sp_dev_vdata dev_vdata[] = {
-	{
-		.bar = 0,
-#ifdef CONFIG_CRYPTO_DEV_SP_CCP
-		.ccp_vdata = &ccpv3_platform,
-#endif
-	},
-};
-
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id sp_acpi_match[] = {
-	{ "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] },
-	{ },
-};
-MODULE_DEVICE_TABLE(acpi, sp_acpi_match);
-#endif
-
-#ifdef CONFIG_OF
-static const struct of_device_id sp_of_match[] = {
-	{ .compatible = "amd,ccp-seattle-v1a",
-	  .data = (const void *)&dev_vdata[0] },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, sp_of_match);
-#endif
-
 static struct platform_driver sp_platform_driver = {
 	.driver = {
 		.name = "ccp",
diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h b/drivers/crypto/ccree/cc_hw_queue_defs.h
index a091ae57f902..45985b955d2c 100644
--- a/drivers/crypto/ccree/cc_hw_queue_defs.h
+++ b/drivers/crypto/ccree/cc_hw_queue_defs.h
@@ -449,8 +449,7 @@ static inline void set_flow_mode(struct cc_hw_desc *pdesc,
  * @pdesc: pointer HW descriptor struct
  * @mode:  Any one of the modes defined in [CC7x-DESC]
  */
-static inline void set_cipher_mode(struct cc_hw_desc *pdesc,
-				   enum drv_cipher_mode mode)
+static inline void set_cipher_mode(struct cc_hw_desc *pdesc, int mode)
 {
 	pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_MODE, mode);
 }
@@ -461,8 +460,7 @@ static inline void set_cipher_mode(struct cc_hw_desc *pdesc,
  * @pdesc: pointer HW descriptor struct
  * @mode: Any one of the modes defined in [CC7x-DESC]
  */
-static inline void set_cipher_config0(struct cc_hw_desc *pdesc,
-				      enum drv_crypto_direction mode)
+static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode)
 {
 	pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF0, mode);
 }
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 010bbf607797..db203f8be429 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -673,7 +673,7 @@ static int chcr_sg_ent_in_wr(struct scatterlist *src,
 	return min(srclen, dstlen);
 }
 
-static int chcr_cipher_fallback(struct crypto_skcipher *cipher,
+static int chcr_cipher_fallback(struct crypto_sync_skcipher *cipher,
 				u32 flags,
 				struct scatterlist *src,
 				struct scatterlist *dst,
@@ -683,9 +683,9 @@ static int chcr_cipher_fallback(struct crypto_skcipher *cipher,
 {
 	int err;
 
-	SKCIPHER_REQUEST_ON_STACK(subreq, cipher);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, cipher);
 
-	skcipher_request_set_tfm(subreq, cipher);
+	skcipher_request_set_sync_tfm(subreq, cipher);
 	skcipher_request_set_callback(subreq, flags, NULL, NULL);
 	skcipher_request_set_crypt(subreq, src, dst,
 				   nbytes, iv);
@@ -856,13 +856,14 @@ static int chcr_cipher_fallback_setkey(struct crypto_ablkcipher *cipher,
 	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(cipher));
 	int err = 0;
 
-	crypto_skcipher_clear_flags(ablkctx->sw_cipher, CRYPTO_TFM_REQ_MASK);
-	crypto_skcipher_set_flags(ablkctx->sw_cipher, cipher->base.crt_flags &
-				  CRYPTO_TFM_REQ_MASK);
-	err = crypto_skcipher_setkey(ablkctx->sw_cipher, key, keylen);
+	crypto_sync_skcipher_clear_flags(ablkctx->sw_cipher,
+				CRYPTO_TFM_REQ_MASK);
+	crypto_sync_skcipher_set_flags(ablkctx->sw_cipher,
+				cipher->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+	err = crypto_sync_skcipher_setkey(ablkctx->sw_cipher, key, keylen);
 	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
 	tfm->crt_flags |=
-		crypto_skcipher_get_flags(ablkctx->sw_cipher) &
+		crypto_sync_skcipher_get_flags(ablkctx->sw_cipher) &
 		CRYPTO_TFM_RES_MASK;
 	return err;
 }
@@ -1337,8 +1338,7 @@ static int chcr_device_init(struct chcr_context *ctx)
 		}
 		ctx->dev = u_ctx->dev;
 		adap = padap(ctx->dev);
-		ntxq = min_not_zero((unsigned int)u_ctx->lldi.nrxq,
-				    adap->vres.ncrypto_fc);
+		ntxq = u_ctx->lldi.ntxq;
 		rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan;
 		txq_perchan = ntxq / u_ctx->lldi.nchan;
 		spin_lock(&ctx->dev->lock_chcr_dev);
@@ -1369,8 +1369,8 @@ static int chcr_cra_init(struct crypto_tfm *tfm)
 	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 
-	ablkctx->sw_cipher = crypto_alloc_skcipher(alg->cra_name, 0,
-				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	ablkctx->sw_cipher = crypto_alloc_sync_skcipher(alg->cra_name, 0,
+				CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ablkctx->sw_cipher)) {
 		pr_err("failed to allocate fallback for %s\n", alg->cra_name);
 		return PTR_ERR(ablkctx->sw_cipher);
@@ -1399,8 +1399,8 @@ static int chcr_rfc3686_init(struct crypto_tfm *tfm)
 	/*RFC3686 initialises IV counter value to 1, rfc3686(ctr(aes))
 	 * cannot be used as fallback in chcr_handle_cipher_response
 	 */
-	ablkctx->sw_cipher = crypto_alloc_skcipher("ctr(aes)", 0,
-				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	ablkctx->sw_cipher = crypto_alloc_sync_skcipher("ctr(aes)", 0,
+				CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ablkctx->sw_cipher)) {
 		pr_err("failed to allocate fallback for %s\n", alg->cra_name);
 		return PTR_ERR(ablkctx->sw_cipher);
@@ -1415,7 +1415,7 @@ static void chcr_cra_exit(struct crypto_tfm *tfm)
 	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 
-	crypto_free_skcipher(ablkctx->sw_cipher);
+	crypto_free_sync_skcipher(ablkctx->sw_cipher);
 	if (ablkctx->aes_generic)
 		crypto_free_cipher(ablkctx->aes_generic);
 }
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index 62249d4ed373..2c472e3c6aeb 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -43,7 +43,7 @@ static chcr_handler_func work_handlers[NUM_CPL_CMDS] = {
 static struct cxgb4_uld_info chcr_uld_info = {
 	.name = DRV_MODULE_NAME,
 	.nrxq = MAX_ULD_QSETS,
-	.ntxq = MAX_ULD_QSETS,
+	/* Max ntxq will be derived from fw config file*/
 	.rxq_size = 1024,
 	.add = chcr_uld_add,
 	.state_change = chcr_uld_state_change,
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 0d2c70c344f3..d37ef41f9ebe 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -170,7 +170,7 @@ static inline struct chcr_context *h_ctx(struct crypto_ahash *tfm)
 }
 
 struct ablk_ctx {
-	struct crypto_skcipher *sw_cipher;
+	struct crypto_sync_skcipher *sw_cipher;
 	struct crypto_cipher *aes_generic;
 	__be32 key_ctx_hdr;
 	unsigned int enckey_len;
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
index 0997e166ea57..20209e29f814 100644
--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
@@ -234,8 +234,7 @@ static void chtls_send_reset(struct sock *sk, int mode, struct sk_buff *skb)
 
 	return;
 out:
-	if (skb)
-		kfree_skb(skb);
+	kfree_skb(skb);
 }
 
 static void release_tcp_port(struct sock *sk)
@@ -406,12 +405,10 @@ static int wait_for_states(struct sock *sk, unsigned int states)
 
 int chtls_disconnect(struct sock *sk, int flags)
 {
-	struct chtls_sock *csk;
 	struct tcp_sock *tp;
 	int err;
 
 	tp = tcp_sk(sk);
-	csk = rcu_dereference_sk_user_data(sk);
 	chtls_purge_recv_queue(sk);
 	chtls_purge_receive_queue(sk);
 	chtls_purge_write_queue(sk);
@@ -1014,7 +1011,6 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
 				    const struct cpl_pass_accept_req *req,
 				    struct chtls_dev *cdev)
 {
-	const struct tcphdr *tcph;
 	struct inet_sock *newinet;
 	const struct iphdr *iph;
 	struct net_device *ndev;
@@ -1036,7 +1032,6 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
 	if (!dst)
 		goto free_sk;
 
-	tcph = (struct tcphdr *)(iph + 1);
 	n = dst_neigh_lookup(dst, &iph->saddr);
 	if (!n)
 		goto free_sk;
diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
index f59b044ebd25..f472c51abe56 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -272,8 +272,7 @@ static void chtls_free_uld(struct chtls_dev *cdev)
 	for (i = 0; i < (1 << RSPQ_HASH_BITS); i++)
 		kfree_skb(cdev->rspq_skb_cache[i]);
 	kfree(cdev->lldi);
-	if (cdev->askb)
-		kfree_skb(cdev->askb);
+	kfree_skb(cdev->askb);
 	kfree(cdev);
 }
 
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index 56bd28174f52..4e6ff32f8a7e 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -28,9 +28,24 @@
 
 #define DCP_MAX_CHANS	4
 #define DCP_BUF_SZ	PAGE_SIZE
+#define DCP_SHA_PAY_SZ  64
 
 #define DCP_ALIGNMENT	64
 
+/*
+ * Null hashes to align with hw behavior on imx6sl and ull
+ * these are flipped for consistency with hw output
+ */
+static const uint8_t sha1_null_hash[] =
+	"\x09\x07\xd8\xaf\x90\x18\x60\x95\xef\xbf"
+	"\x55\x32\x0d\x4b\x6b\x5e\xee\xa3\x39\xda";
+
+static const uint8_t sha256_null_hash[] =
+	"\x55\xb8\x52\x78\x1b\x99\x95\xa4"
+	"\x4c\x93\x9b\x64\xe4\x41\xae\x27"
+	"\x24\xb9\x6f\x99\xc8\xf4\xfb\x9a"
+	"\x14\x1c\xfc\x98\x42\xc4\xb0\xe3";
+
 /* DCP DMA descriptor. */
 struct dcp_dma_desc {
 	uint32_t	next_cmd_addr;
@@ -48,6 +63,7 @@ struct dcp_coherent_block {
 	uint8_t			aes_in_buf[DCP_BUF_SZ];
 	uint8_t			aes_out_buf[DCP_BUF_SZ];
 	uint8_t			sha_in_buf[DCP_BUF_SZ];
+	uint8_t			sha_out_buf[DCP_SHA_PAY_SZ];
 
 	uint8_t			aes_key[2 * AES_KEYSIZE_128];
 
@@ -84,7 +100,7 @@ struct dcp_async_ctx {
 	unsigned int			hot:1;
 
 	/* Crypto-specific context */
-	struct crypto_skcipher		*fallback;
+	struct crypto_sync_skcipher	*fallback;
 	unsigned int			key_len;
 	uint8_t				key[AES_KEYSIZE_128];
 };
@@ -99,6 +115,11 @@ struct dcp_sha_req_ctx {
 	unsigned int	fini:1;
 };
 
+struct dcp_export_state {
+	struct dcp_sha_req_ctx req_ctx;
+	struct dcp_async_ctx async_ctx;
+};
+
 /*
  * There can even be only one instance of the MXS DCP due to the
  * design of Linux Crypto API.
@@ -209,6 +230,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
 	dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf,
 					     DCP_BUF_SZ, DMA_FROM_DEVICE);
 
+	if (actx->fill % AES_BLOCK_SIZE) {
+		dev_err(sdcp->dev, "Invalid block size!\n");
+		ret = -EINVAL;
+		goto aes_done_run;
+	}
+
 	/* Fill in the DMA descriptor. */
 	desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE |
 		    MXS_DCP_CONTROL0_INTERRUPT |
@@ -238,6 +265,7 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
 
 	ret = mxs_dcp_start_dma(actx);
 
+aes_done_run:
 	dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128,
 			 DMA_TO_DEVICE);
 	dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
@@ -264,13 +292,15 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
 
 	uint8_t *out_tmp, *src_buf, *dst_buf = NULL;
 	uint32_t dst_off = 0;
+	uint32_t last_out_len = 0;
 
 	uint8_t *key = sdcp->coh->aes_key;
 
 	int ret = 0;
 	int split = 0;
-	unsigned int i, len, clen, rem = 0;
+	unsigned int i, len, clen, rem = 0, tlen = 0;
 	int init = 0;
+	bool limit_hit = false;
 
 	actx->fill = 0;
 
@@ -289,6 +319,11 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
 	for_each_sg(req->src, src, nents, i) {
 		src_buf = sg_virt(src);
 		len = sg_dma_len(src);
+		tlen += len;
+		limit_hit = tlen > req->nbytes;
+
+		if (limit_hit)
+			len = req->nbytes - (tlen - len);
 
 		do {
 			if (actx->fill + len > out_off)
@@ -305,13 +340,15 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
 			 * If we filled the buffer or this is the last SG,
 			 * submit the buffer.
 			 */
-			if (actx->fill == out_off || sg_is_last(src)) {
+			if (actx->fill == out_off || sg_is_last(src) ||
+				limit_hit) {
 				ret = mxs_dcp_run_aes(actx, req, init);
 				if (ret)
 					return ret;
 				init = 0;
 
 				out_tmp = out_buf;
+				last_out_len = actx->fill;
 				while (dst && actx->fill) {
 					if (!split) {
 						dst_buf = sg_virt(dst);
@@ -334,6 +371,19 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
 				}
 			}
 		} while (len);
+
+		if (limit_hit)
+			break;
+	}
+
+	/* Copy the IV for CBC for chaining */
+	if (!rctx->ecb) {
+		if (rctx->enc)
+			memcpy(req->info, out_buf+(last_out_len-AES_BLOCK_SIZE),
+				AES_BLOCK_SIZE);
+		else
+			memcpy(req->info, in_buf+(last_out_len-AES_BLOCK_SIZE),
+				AES_BLOCK_SIZE);
 	}
 
 	return ret;
@@ -380,10 +430,10 @@ static int mxs_dcp_block_fallback(struct ablkcipher_request *req, int enc)
 {
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 	int ret;
 
-	skcipher_request_set_tfm(subreq, ctx->fallback);
+	skcipher_request_set_sync_tfm(subreq, ctx->fallback);
 	skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL);
 	skcipher_request_set_crypt(subreq, req->src, req->dst,
 				   req->nbytes, req->info);
@@ -464,16 +514,16 @@ static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	 * but is supported by in-kernel software implementation, we use
 	 * software fallback.
 	 */
-	crypto_skcipher_clear_flags(actx->fallback, CRYPTO_TFM_REQ_MASK);
-	crypto_skcipher_set_flags(actx->fallback,
+	crypto_sync_skcipher_clear_flags(actx->fallback, CRYPTO_TFM_REQ_MASK);
+	crypto_sync_skcipher_set_flags(actx->fallback,
 				  tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
 
-	ret = crypto_skcipher_setkey(actx->fallback, key, len);
+	ret = crypto_sync_skcipher_setkey(actx->fallback, key, len);
 	if (!ret)
 		return 0;
 
 	tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->base.crt_flags |= crypto_skcipher_get_flags(actx->fallback) &
+	tfm->base.crt_flags |= crypto_sync_skcipher_get_flags(actx->fallback) &
 			       CRYPTO_TFM_RES_MASK;
 
 	return ret;
@@ -482,11 +532,10 @@ static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm)
 {
 	const char *name = crypto_tfm_alg_name(tfm);
-	const uint32_t flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK;
 	struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm);
-	struct crypto_skcipher *blk;
+	struct crypto_sync_skcipher *blk;
 
-	blk = crypto_alloc_skcipher(name, 0, flags);
+	blk = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(blk))
 		return PTR_ERR(blk);
 
@@ -499,7 +548,7 @@ static void mxs_dcp_aes_fallback_exit(struct crypto_tfm *tfm)
 {
 	struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm);
 
-	crypto_free_skcipher(actx->fallback);
+	crypto_free_sync_skcipher(actx->fallback);
 }
 
 /*
@@ -513,8 +562,6 @@ static int mxs_dcp_run_sha(struct ahash_request *req)
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
 	struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req);
-	struct hash_alg_common *halg = crypto_hash_alg_common(tfm);
-
 	struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
 
 	dma_addr_t digest_phys = 0;
@@ -536,10 +583,23 @@ static int mxs_dcp_run_sha(struct ahash_request *req)
 	desc->payload = 0;
 	desc->status = 0;
 
+	/*
+	 * Align driver with hw behavior when generating null hashes
+	 */
+	if (rctx->init && rctx->fini && desc->size == 0) {
+		struct hash_alg_common *halg = crypto_hash_alg_common(tfm);
+		const uint8_t *sha_buf =
+			(actx->alg == MXS_DCP_CONTROL1_HASH_SELECT_SHA1) ?
+			sha1_null_hash : sha256_null_hash;
+		memcpy(sdcp->coh->sha_out_buf, sha_buf, halg->digestsize);
+		ret = 0;
+		goto done_run;
+	}
+
 	/* Set HASH_TERM bit for last transfer block. */
 	if (rctx->fini) {
-		digest_phys = dma_map_single(sdcp->dev, req->result,
-					     halg->digestsize, DMA_FROM_DEVICE);
+		digest_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_out_buf,
+					     DCP_SHA_PAY_SZ, DMA_FROM_DEVICE);
 		desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM;
 		desc->payload = digest_phys;
 	}
@@ -547,9 +607,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req)
 	ret = mxs_dcp_start_dma(actx);
 
 	if (rctx->fini)
-		dma_unmap_single(sdcp->dev, digest_phys, halg->digestsize,
+		dma_unmap_single(sdcp->dev, digest_phys, DCP_SHA_PAY_SZ,
 				 DMA_FROM_DEVICE);
 
+done_run:
 	dma_unmap_single(sdcp->dev, buf_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
 
 	return ret;
@@ -567,6 +628,7 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq)
 	const int nents = sg_nents(req->src);
 
 	uint8_t *in_buf = sdcp->coh->sha_in_buf;
+	uint8_t *out_buf = sdcp->coh->sha_out_buf;
 
 	uint8_t *src_buf;
 
@@ -621,11 +683,9 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq)
 
 		actx->fill = 0;
 
-		/* For some reason, the result is flipped. */
-		for (i = 0; i < halg->digestsize / 2; i++) {
-			swap(req->result[i],
-			     req->result[halg->digestsize - i - 1]);
-		}
+		/* For some reason the result is flipped */
+		for (i = 0; i < halg->digestsize; i++)
+			req->result[i] = out_buf[halg->digestsize - i - 1];
 	}
 
 	return 0;
@@ -766,14 +826,32 @@ static int dcp_sha_digest(struct ahash_request *req)
 	return dcp_sha_finup(req);
 }
 
-static int dcp_sha_noimport(struct ahash_request *req, const void *in)
+static int dcp_sha_import(struct ahash_request *req, const void *in)
 {
-	return -ENOSYS;
+	struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
+	const struct dcp_export_state *export = in;
+
+	memset(rctx, 0, sizeof(struct dcp_sha_req_ctx));
+	memset(actx, 0, sizeof(struct dcp_async_ctx));
+	memcpy(rctx, &export->req_ctx, sizeof(struct dcp_sha_req_ctx));
+	memcpy(actx, &export->async_ctx, sizeof(struct dcp_async_ctx));
+
+	return 0;
 }
 
-static int dcp_sha_noexport(struct ahash_request *req, void *out)
+static int dcp_sha_export(struct ahash_request *req, void *out)
 {
-	return -ENOSYS;
+	struct dcp_sha_req_ctx *rctx_state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct dcp_async_ctx *actx_state = crypto_ahash_ctx(tfm);
+	struct dcp_export_state *export = out;
+
+	memcpy(&export->req_ctx, rctx_state, sizeof(struct dcp_sha_req_ctx));
+	memcpy(&export->async_ctx, actx_state, sizeof(struct dcp_async_ctx));
+
+	return 0;
 }
 
 static int dcp_sha_cra_init(struct crypto_tfm *tfm)
@@ -846,10 +924,11 @@ static struct ahash_alg dcp_sha1_alg = {
 	.final	= dcp_sha_final,
 	.finup	= dcp_sha_finup,
 	.digest	= dcp_sha_digest,
-	.import = dcp_sha_noimport,
-	.export = dcp_sha_noexport,
+	.import = dcp_sha_import,
+	.export = dcp_sha_export,
 	.halg	= {
 		.digestsize	= SHA1_DIGEST_SIZE,
+		.statesize	= sizeof(struct dcp_export_state),
 		.base		= {
 			.cra_name		= "sha1",
 			.cra_driver_name	= "sha1-dcp",
@@ -872,10 +951,11 @@ static struct ahash_alg dcp_sha256_alg = {
 	.final	= dcp_sha_final,
 	.finup	= dcp_sha_finup,
 	.digest	= dcp_sha_digest,
-	.import = dcp_sha_noimport,
-	.export = dcp_sha_noexport,
+	.import = dcp_sha_import,
+	.export = dcp_sha_export,
 	.halg	= {
 		.digestsize	= SHA256_DIGEST_SIZE,
+		.statesize	= sizeof(struct dcp_export_state),
 		.base		= {
 			.cra_name		= "sha256",
 			.cra_driver_name	= "sha256-dcp",
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 9019f6b67986..a553ffddb11b 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -522,9 +522,9 @@ static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 		  !!(mode & FLAGS_CBC));
 
 	if (req->nbytes < aes_fallback_sz) {
-		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 
-		skcipher_request_set_tfm(subreq, ctx->fallback);
+		skcipher_request_set_sync_tfm(subreq, ctx->fallback);
 		skcipher_request_set_callback(subreq, req->base.flags, NULL,
 					      NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
@@ -564,11 +564,11 @@ static int omap_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	memcpy(ctx->key, key, keylen);
 	ctx->keylen = keylen;
 
-	crypto_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK);
-	crypto_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags &
+	crypto_sync_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK);
+	crypto_sync_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags &
 						 CRYPTO_TFM_REQ_MASK);
 
-	ret = crypto_skcipher_setkey(ctx->fallback, key, keylen);
+	ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
 	if (!ret)
 		return 0;
 
@@ -613,11 +613,10 @@ static int omap_aes_crypt_req(struct crypto_engine *engine,
 static int omap_aes_cra_init(struct crypto_tfm *tfm)
 {
 	const char *name = crypto_tfm_alg_name(tfm);
-	const u32 flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK;
 	struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_skcipher *blk;
+	struct crypto_sync_skcipher *blk;
 
-	blk = crypto_alloc_skcipher(name, 0, flags);
+	blk = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(blk))
 		return PTR_ERR(blk);
 
@@ -667,7 +666,7 @@ static void omap_aes_cra_exit(struct crypto_tfm *tfm)
 	struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (ctx->fallback)
-		crypto_free_skcipher(ctx->fallback);
+		crypto_free_sync_skcipher(ctx->fallback);
 
 	ctx->fallback = NULL;
 }
diff --git a/drivers/crypto/omap-aes.h b/drivers/crypto/omap-aes.h
index fc3b46a85809..7e02920ef6f8 100644
--- a/drivers/crypto/omap-aes.h
+++ b/drivers/crypto/omap-aes.h
@@ -101,7 +101,7 @@ struct omap_aes_ctx {
 	int		keylen;
 	u32		key[AES_KEYSIZE_256 / sizeof(u32)];
 	u8		nonce[4];
-	struct crypto_skcipher	*fallback;
+	struct crypto_sync_skcipher	*fallback;
 	struct crypto_skcipher	*ctr;
 };
 
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index 321d5e2ac833..a28f1d18fe01 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -171,7 +171,7 @@ struct spacc_ablk_ctx {
 	 * The fallback cipher. If the operation can't be done in hardware,
 	 * fallback to a software version.
 	 */
-	struct crypto_skcipher		*sw_cipher;
+	struct crypto_sync_skcipher	*sw_cipher;
 };
 
 /* AEAD cipher context. */
@@ -799,17 +799,17 @@ static int spacc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 		 * Set the fallback transform to use the same request flags as
 		 * the hardware transform.
 		 */
-		crypto_skcipher_clear_flags(ctx->sw_cipher,
+		crypto_sync_skcipher_clear_flags(ctx->sw_cipher,
 					    CRYPTO_TFM_REQ_MASK);
-		crypto_skcipher_set_flags(ctx->sw_cipher,
+		crypto_sync_skcipher_set_flags(ctx->sw_cipher,
 					  cipher->base.crt_flags &
 					  CRYPTO_TFM_REQ_MASK);
 
-		err = crypto_skcipher_setkey(ctx->sw_cipher, key, len);
+		err = crypto_sync_skcipher_setkey(ctx->sw_cipher, key, len);
 
 		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
 		tfm->crt_flags |=
-			crypto_skcipher_get_flags(ctx->sw_cipher) &
+			crypto_sync_skcipher_get_flags(ctx->sw_cipher) &
 			CRYPTO_TFM_RES_MASK;
 
 		if (err)
@@ -914,7 +914,7 @@ static int spacc_ablk_do_fallback(struct ablkcipher_request *req,
 	struct crypto_tfm *old_tfm =
 	    crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
 	struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(old_tfm);
-	SKCIPHER_REQUEST_ON_STACK(subreq, ctx->sw_cipher);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->sw_cipher);
 	int err;
 
 	/*
@@ -922,7 +922,7 @@ static int spacc_ablk_do_fallback(struct ablkcipher_request *req,
 	 * the ciphering has completed, put the old transform back into the
 	 * request.
 	 */
-	skcipher_request_set_tfm(subreq, ctx->sw_cipher);
+	skcipher_request_set_sync_tfm(subreq, ctx->sw_cipher);
 	skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL);
 	skcipher_request_set_crypt(subreq, req->src, req->dst,
 				   req->nbytes, req->info);
@@ -1020,9 +1020,8 @@ static int spacc_ablk_cra_init(struct crypto_tfm *tfm)
 	ctx->generic.flags = spacc_alg->type;
 	ctx->generic.engine = engine;
 	if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
-		ctx->sw_cipher = crypto_alloc_skcipher(
-			alg->cra_name, 0, CRYPTO_ALG_ASYNC |
-					  CRYPTO_ALG_NEED_FALLBACK);
+		ctx->sw_cipher = crypto_alloc_sync_skcipher(
+			alg->cra_name, 0, CRYPTO_ALG_NEED_FALLBACK);
 		if (IS_ERR(ctx->sw_cipher)) {
 			dev_warn(engine->dev, "failed to allocate fallback for %s\n",
 				 alg->cra_name);
@@ -1041,7 +1040,7 @@ static void spacc_ablk_cra_exit(struct crypto_tfm *tfm)
 {
 	struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypto_free_skcipher(ctx->sw_cipher);
+	crypto_free_sync_skcipher(ctx->sw_cipher);
 }
 
 static int spacc_ablk_encrypt(struct ablkcipher_request *req)
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index 1138e41d6805..d2698299896f 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -113,6 +113,13 @@ struct qat_alg_aead_ctx {
 	struct crypto_shash *hash_tfm;
 	enum icp_qat_hw_auth_algo qat_hash_alg;
 	struct qat_crypto_instance *inst;
+	union {
+		struct sha1_state sha1;
+		struct sha256_state sha256;
+		struct sha512_state sha512;
+	};
+	char ipad[SHA512_BLOCK_SIZE]; /* sufficient for SHA-1/SHA-256 as well */
+	char opad[SHA512_BLOCK_SIZE];
 };
 
 struct qat_alg_ablkcipher_ctx {
@@ -148,37 +155,32 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash,
 				  unsigned int auth_keylen)
 {
 	SHASH_DESC_ON_STACK(shash, ctx->hash_tfm);
-	struct sha1_state sha1;
-	struct sha256_state sha256;
-	struct sha512_state sha512;
 	int block_size = crypto_shash_blocksize(ctx->hash_tfm);
 	int digest_size = crypto_shash_digestsize(ctx->hash_tfm);
-	char ipad[block_size];
-	char opad[block_size];
 	__be32 *hash_state_out;
 	__be64 *hash512_state_out;
 	int i, offset;
 
-	memset(ipad, 0, block_size);
-	memset(opad, 0, block_size);
+	memset(ctx->ipad, 0, block_size);
+	memset(ctx->opad, 0, block_size);
 	shash->tfm = ctx->hash_tfm;
 	shash->flags = 0x0;
 
 	if (auth_keylen > block_size) {
 		int ret = crypto_shash_digest(shash, auth_key,
-					      auth_keylen, ipad);
+					      auth_keylen, ctx->ipad);
 		if (ret)
 			return ret;
 
-		memcpy(opad, ipad, digest_size);
+		memcpy(ctx->opad, ctx->ipad, digest_size);
 	} else {
-		memcpy(ipad, auth_key, auth_keylen);
-		memcpy(opad, auth_key, auth_keylen);
+		memcpy(ctx->ipad, auth_key, auth_keylen);
+		memcpy(ctx->opad, auth_key, auth_keylen);
 	}
 
 	for (i = 0; i < block_size; i++) {
-		char *ipad_ptr = ipad + i;
-		char *opad_ptr = opad + i;
+		char *ipad_ptr = ctx->ipad + i;
+		char *opad_ptr = ctx->opad + i;
 		*ipad_ptr ^= HMAC_IPAD_VALUE;
 		*opad_ptr ^= HMAC_OPAD_VALUE;
 	}
@@ -186,7 +188,7 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash,
 	if (crypto_shash_init(shash))
 		return -EFAULT;
 
-	if (crypto_shash_update(shash, ipad, block_size))
+	if (crypto_shash_update(shash, ctx->ipad, block_size))
 		return -EFAULT;
 
 	hash_state_out = (__be32 *)hash->sha.state1;
@@ -194,22 +196,22 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash,
 
 	switch (ctx->qat_hash_alg) {
 	case ICP_QAT_HW_AUTH_ALGO_SHA1:
-		if (crypto_shash_export(shash, &sha1))
+		if (crypto_shash_export(shash, &ctx->sha1))
 			return -EFAULT;
 		for (i = 0; i < digest_size >> 2; i++, hash_state_out++)
-			*hash_state_out = cpu_to_be32(*(sha1.state + i));
+			*hash_state_out = cpu_to_be32(ctx->sha1.state[i]);
 		break;
 	case ICP_QAT_HW_AUTH_ALGO_SHA256:
-		if (crypto_shash_export(shash, &sha256))
+		if (crypto_shash_export(shash, &ctx->sha256))
 			return -EFAULT;
 		for (i = 0; i < digest_size >> 2; i++, hash_state_out++)
-			*hash_state_out = cpu_to_be32(*(sha256.state + i));
+			*hash_state_out = cpu_to_be32(ctx->sha256.state[i]);
 		break;
 	case ICP_QAT_HW_AUTH_ALGO_SHA512:
-		if (crypto_shash_export(shash, &sha512))
+		if (crypto_shash_export(shash, &ctx->sha512))
 			return -EFAULT;
 		for (i = 0; i < digest_size >> 3; i++, hash512_state_out++)
-			*hash512_state_out = cpu_to_be64(*(sha512.state + i));
+			*hash512_state_out = cpu_to_be64(ctx->sha512.state[i]);
 		break;
 	default:
 		return -EFAULT;
@@ -218,7 +220,7 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash,
 	if (crypto_shash_init(shash))
 		return -EFAULT;
 
-	if (crypto_shash_update(shash, opad, block_size))
+	if (crypto_shash_update(shash, ctx->opad, block_size))
 		return -EFAULT;
 
 	offset = round_up(qat_get_inter_state_size(ctx->qat_hash_alg), 8);
@@ -227,28 +229,28 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash,
 
 	switch (ctx->qat_hash_alg) {
 	case ICP_QAT_HW_AUTH_ALGO_SHA1:
-		if (crypto_shash_export(shash, &sha1))
+		if (crypto_shash_export(shash, &ctx->sha1))
 			return -EFAULT;
 		for (i = 0; i < digest_size >> 2; i++, hash_state_out++)
-			*hash_state_out = cpu_to_be32(*(sha1.state + i));
+			*hash_state_out = cpu_to_be32(ctx->sha1.state[i]);
 		break;
 	case ICP_QAT_HW_AUTH_ALGO_SHA256:
-		if (crypto_shash_export(shash, &sha256))
+		if (crypto_shash_export(shash, &ctx->sha256))
 			return -EFAULT;
 		for (i = 0; i < digest_size >> 2; i++, hash_state_out++)
-			*hash_state_out = cpu_to_be32(*(sha256.state + i));
+			*hash_state_out = cpu_to_be32(ctx->sha256.state[i]);
 		break;
 	case ICP_QAT_HW_AUTH_ALGO_SHA512:
-		if (crypto_shash_export(shash, &sha512))
+		if (crypto_shash_export(shash, &ctx->sha512))
 			return -EFAULT;
 		for (i = 0; i < digest_size >> 3; i++, hash512_state_out++)
-			*hash512_state_out = cpu_to_be64(*(sha512.state + i));
+			*hash512_state_out = cpu_to_be64(ctx->sha512.state[i]);
 		break;
 	default:
 		return -EFAULT;
 	}
-	memzero_explicit(ipad, block_size);
-	memzero_explicit(opad, block_size);
+	memzero_explicit(ctx->ipad, block_size);
+	memzero_explicit(ctx->opad, block_size);
 	return 0;
 }
 
diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c
index ea4d96bf47e8..585e1cab9ae3 100644
--- a/drivers/crypto/qce/ablkcipher.c
+++ b/drivers/crypto/qce/ablkcipher.c
@@ -189,7 +189,7 @@ static int qce_ablkcipher_setkey(struct crypto_ablkcipher *ablk, const u8 *key,
 	memcpy(ctx->enc_key, key, keylen);
 	return 0;
 fallback:
-	ret = crypto_skcipher_setkey(ctx->fallback, key, keylen);
+	ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
 	if (!ret)
 		ctx->enc_keylen = keylen;
 	return ret;
@@ -212,9 +212,9 @@ static int qce_ablkcipher_crypt(struct ablkcipher_request *req, int encrypt)
 
 	if (IS_AES(rctx->flags) && ctx->enc_keylen != AES_KEYSIZE_128 &&
 	    ctx->enc_keylen != AES_KEYSIZE_256) {
-		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 
-		skcipher_request_set_tfm(subreq, ctx->fallback);
+		skcipher_request_set_sync_tfm(subreq, ctx->fallback);
 		skcipher_request_set_callback(subreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
@@ -245,9 +245,8 @@ static int qce_ablkcipher_init(struct crypto_tfm *tfm)
 	memset(ctx, 0, sizeof(*ctx));
 	tfm->crt_ablkcipher.reqsize = sizeof(struct qce_cipher_reqctx);
 
-	ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(tfm), 0,
-					      CRYPTO_ALG_ASYNC |
-					      CRYPTO_ALG_NEED_FALLBACK);
+	ctx->fallback = crypto_alloc_sync_skcipher(crypto_tfm_alg_name(tfm),
+						   0, CRYPTO_ALG_NEED_FALLBACK);
 	return PTR_ERR_OR_ZERO(ctx->fallback);
 }
 
@@ -255,7 +254,7 @@ static void qce_ablkcipher_exit(struct crypto_tfm *tfm)
 {
 	struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypto_free_skcipher(ctx->fallback);
+	crypto_free_sync_skcipher(ctx->fallback);
 }
 
 struct qce_ablkcipher_def {
diff --git a/drivers/crypto/qce/cipher.h b/drivers/crypto/qce/cipher.h
index 2b0278bb6e92..ee055bfe98a0 100644
--- a/drivers/crypto/qce/cipher.h
+++ b/drivers/crypto/qce/cipher.h
@@ -22,7 +22,7 @@
 struct qce_cipher_ctx {
 	u8 enc_key[QCE_MAX_KEY_SIZE];
 	unsigned int enc_keylen;
-	struct crypto_skcipher *fallback;
+	struct crypto_sync_skcipher *fallback;
 };
 
 /**
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index faa282074e5a..0064be0e3941 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -249,8 +249,8 @@ struct s5p_aes_reqctx {
 struct s5p_aes_ctx {
 	struct s5p_aes_dev		*dev;
 
-	uint8_t				aes_key[AES_MAX_KEY_SIZE];
-	uint8_t				nonce[CTR_RFC3686_NONCE_SIZE];
+	u8				aes_key[AES_MAX_KEY_SIZE];
+	u8				nonce[CTR_RFC3686_NONCE_SIZE];
 	int				keylen;
 };
 
@@ -475,9 +475,9 @@ static void s5p_sg_done(struct s5p_aes_dev *dev)
 }
 
 /* Calls the completion. Cannot be called with dev->lock hold. */
-static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+static void s5p_aes_complete(struct ablkcipher_request *req, int err)
 {
-	dev->req->base.complete(&dev->req->base, err);
+	req->base.complete(&req->base, err);
 }
 
 static void s5p_unset_outdata(struct s5p_aes_dev *dev)
@@ -491,7 +491,7 @@ static void s5p_unset_indata(struct s5p_aes_dev *dev)
 }
 
 static int s5p_make_sg_cpy(struct s5p_aes_dev *dev, struct scatterlist *src,
-			    struct scatterlist **dst)
+			   struct scatterlist **dst)
 {
 	void *pages;
 	int len;
@@ -518,46 +518,28 @@ static int s5p_make_sg_cpy(struct s5p_aes_dev *dev, struct scatterlist *src,
 
 static int s5p_set_outdata(struct s5p_aes_dev *dev, struct scatterlist *sg)
 {
-	int err;
-
-	if (!sg->length) {
-		err = -EINVAL;
-		goto exit;
-	}
+	if (!sg->length)
+		return -EINVAL;
 
-	err = dma_map_sg(dev->dev, sg, 1, DMA_FROM_DEVICE);
-	if (!err) {
-		err = -ENOMEM;
-		goto exit;
-	}
+	if (!dma_map_sg(dev->dev, sg, 1, DMA_FROM_DEVICE))
+		return -ENOMEM;
 
 	dev->sg_dst = sg;
-	err = 0;
 
-exit:
-	return err;
+	return 0;
 }
 
 static int s5p_set_indata(struct s5p_aes_dev *dev, struct scatterlist *sg)
 {
-	int err;
-
-	if (!sg->length) {
-		err = -EINVAL;
-		goto exit;
-	}
+	if (!sg->length)
+		return -EINVAL;
 
-	err = dma_map_sg(dev->dev, sg, 1, DMA_TO_DEVICE);
-	if (!err) {
-		err = -ENOMEM;
-		goto exit;
-	}
+	if (!dma_map_sg(dev->dev, sg, 1, DMA_TO_DEVICE))
+		return -ENOMEM;
 
 	dev->sg_src = sg;
-	err = 0;
 
-exit:
-	return err;
+	return 0;
 }
 
 /*
@@ -655,14 +637,14 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id)
 {
 	struct platform_device *pdev = dev_id;
 	struct s5p_aes_dev *dev = platform_get_drvdata(pdev);
+	struct ablkcipher_request *req;
 	int err_dma_tx = 0;
 	int err_dma_rx = 0;
 	int err_dma_hx = 0;
 	bool tx_end = false;
 	bool hx_end = false;
 	unsigned long flags;
-	uint32_t status;
-	u32 st_bits;
+	u32 status, st_bits;
 	int err;
 
 	spin_lock_irqsave(&dev->lock, flags);
@@ -727,7 +709,7 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id)
 
 		spin_unlock_irqrestore(&dev->lock, flags);
 
-		s5p_aes_complete(dev, 0);
+		s5p_aes_complete(dev->req, 0);
 		/* Device is still busy */
 		tasklet_schedule(&dev->tasklet);
 	} else {
@@ -752,11 +734,12 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id)
 error:
 	s5p_sg_done(dev);
 	dev->busy = false;
+	req = dev->req;
 	if (err_dma_hx == 1)
 		s5p_set_dma_hashdata(dev, dev->hash_sg_iter);
 
 	spin_unlock_irqrestore(&dev->lock, flags);
-	s5p_aes_complete(dev, err);
+	s5p_aes_complete(req, err);
 
 hash_irq_end:
 	/*
@@ -1830,7 +1813,7 @@ static struct ahash_alg algs_sha1_md5_sha256[] = {
 };
 
 static void s5p_set_aes(struct s5p_aes_dev *dev,
-			const uint8_t *key, const uint8_t *iv,
+			const u8 *key, const u8 *iv, const u8 *ctr,
 			unsigned int keylen)
 {
 	void __iomem *keystart;
@@ -1838,6 +1821,9 @@ static void s5p_set_aes(struct s5p_aes_dev *dev,
 	if (iv)
 		memcpy_toio(dev->aes_ioaddr + SSS_REG_AES_IV_DATA(0), iv, 0x10);
 
+	if (ctr)
+		memcpy_toio(dev->aes_ioaddr + SSS_REG_AES_CNT_DATA(0), ctr, 0x10);
+
 	if (keylen == AES_KEYSIZE_256)
 		keystart = dev->aes_ioaddr + SSS_REG_AES_KEY_DATA(0);
 	else if (keylen == AES_KEYSIZE_192)
@@ -1887,7 +1873,7 @@ static int s5p_set_indata_start(struct s5p_aes_dev *dev,
 }
 
 static int s5p_set_outdata_start(struct s5p_aes_dev *dev,
-				struct ablkcipher_request *req)
+				 struct ablkcipher_request *req)
 {
 	struct scatterlist *sg;
 	int err;
@@ -1916,11 +1902,12 @@ static int s5p_set_outdata_start(struct s5p_aes_dev *dev,
 static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode)
 {
 	struct ablkcipher_request *req = dev->req;
-	uint32_t aes_control;
+	u32 aes_control;
 	unsigned long flags;
 	int err;
-	u8 *iv;
+	u8 *iv, *ctr;
 
+	/* This sets bit [13:12] to 00, which selects 128-bit counter */
 	aes_control = SSS_AES_KEY_CHANGE_MODE;
 	if (mode & FLAGS_AES_DECRYPT)
 		aes_control |= SSS_AES_MODE_DECRYPT;
@@ -1928,11 +1915,14 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode)
 	if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CBC) {
 		aes_control |= SSS_AES_CHAIN_MODE_CBC;
 		iv = req->info;
+		ctr = NULL;
 	} else if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CTR) {
 		aes_control |= SSS_AES_CHAIN_MODE_CTR;
-		iv = req->info;
+		iv = NULL;
+		ctr = req->info;
 	} else {
 		iv = NULL; /* AES_ECB */
+		ctr = NULL;
 	}
 
 	if (dev->ctx->keylen == AES_KEYSIZE_192)
@@ -1964,7 +1954,7 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode)
 		goto outdata_error;
 
 	SSS_AES_WRITE(dev, AES_CONTROL, aes_control);
-	s5p_set_aes(dev, dev->ctx->aes_key, iv, dev->ctx->keylen);
+	s5p_set_aes(dev, dev->ctx->aes_key, iv, ctr, dev->ctx->keylen);
 
 	s5p_set_dma_indata(dev,  dev->sg_src);
 	s5p_set_dma_outdata(dev, dev->sg_dst);
@@ -1983,7 +1973,7 @@ indata_error:
 	s5p_sg_done(dev);
 	dev->busy = false;
 	spin_unlock_irqrestore(&dev->lock, flags);
-	s5p_aes_complete(dev, err);
+	s5p_aes_complete(req, err);
 }
 
 static void s5p_tasklet_cb(unsigned long data)
@@ -2024,7 +2014,7 @@ static int s5p_aes_handle_req(struct s5p_aes_dev *dev,
 	err = ablkcipher_enqueue_request(&dev->queue, req);
 	if (dev->busy) {
 		spin_unlock_irqrestore(&dev->lock, flags);
-		goto exit;
+		return err;
 	}
 	dev->busy = true;
 
@@ -2032,7 +2022,6 @@ static int s5p_aes_handle_req(struct s5p_aes_dev *dev,
 
 	tasklet_schedule(&dev->tasklet);
 
-exit:
 	return err;
 }
 
@@ -2043,7 +2032,8 @@ static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 	struct s5p_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct s5p_aes_dev *dev = ctx->dev;
 
-	if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) {
+	if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE) &&
+			((mode & FLAGS_AES_MODE_MASK) != FLAGS_AES_CTR)) {
 		dev_err(dev->dev, "request size is not exact amount of AES blocks\n");
 		return -EINVAL;
 	}
@@ -2054,7 +2044,7 @@ static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 }
 
 static int s5p_aes_setkey(struct crypto_ablkcipher *cipher,
-			  const uint8_t *key, unsigned int keylen)
+			  const u8 *key, unsigned int keylen)
 {
 	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
 	struct s5p_aes_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -2090,6 +2080,11 @@ static int s5p_aes_cbc_decrypt(struct ablkcipher_request *req)
 	return s5p_aes_crypt(req, FLAGS_AES_DECRYPT | FLAGS_AES_CBC);
 }
 
+static int s5p_aes_ctr_crypt(struct ablkcipher_request *req)
+{
+	return s5p_aes_crypt(req, FLAGS_AES_CTR);
+}
+
 static int s5p_aes_cra_init(struct crypto_tfm *tfm)
 {
 	struct s5p_aes_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -2144,6 +2139,28 @@ static struct crypto_alg algs[] = {
 			.decrypt	= s5p_aes_cbc_decrypt,
 		}
 	},
+	{
+		.cra_name		= "ctr(aes)",
+		.cra_driver_name	= "ctr-aes-s5p",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+					  CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_KERN_DRIVER_ONLY,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct s5p_aes_ctx),
+		.cra_alignmask		= 0x0f,
+		.cra_type		= &crypto_ablkcipher_type,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= s5p_aes_cra_init,
+		.cra_u.ablkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= s5p_aes_setkey,
+			.encrypt	= s5p_aes_ctr_crypt,
+			.decrypt	= s5p_aes_ctr_crypt,
+		}
+	},
 };
 
 static int s5p_aes_probe(struct platform_device *pdev)
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index e7540a5b8197..bbf166a97ad3 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -149,7 +149,7 @@ struct sahara_ctx {
 	/* AES-specific context */
 	int keylen;
 	u8 key[AES_KEYSIZE_128];
-	struct crypto_skcipher *fallback;
+	struct crypto_sync_skcipher *fallback;
 };
 
 struct sahara_aes_reqctx {
@@ -621,14 +621,14 @@ static int sahara_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	/*
 	 * The requested key size is not supported by HW, do a fallback.
 	 */
-	crypto_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK);
-	crypto_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags &
+	crypto_sync_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK);
+	crypto_sync_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags &
 						 CRYPTO_TFM_REQ_MASK);
 
-	ret = crypto_skcipher_setkey(ctx->fallback, key, keylen);
+	ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
 
 	tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->base.crt_flags |= crypto_skcipher_get_flags(ctx->fallback) &
+	tfm->base.crt_flags |= crypto_sync_skcipher_get_flags(ctx->fallback) &
 			       CRYPTO_TFM_RES_MASK;
 	return ret;
 }
@@ -666,9 +666,9 @@ static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req)
 	int err;
 
 	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
-		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 
-		skcipher_request_set_tfm(subreq, ctx->fallback);
+		skcipher_request_set_sync_tfm(subreq, ctx->fallback);
 		skcipher_request_set_callback(subreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
@@ -688,9 +688,9 @@ static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req)
 	int err;
 
 	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
-		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 
-		skcipher_request_set_tfm(subreq, ctx->fallback);
+		skcipher_request_set_sync_tfm(subreq, ctx->fallback);
 		skcipher_request_set_callback(subreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
@@ -710,9 +710,9 @@ static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req)
 	int err;
 
 	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
-		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 
-		skcipher_request_set_tfm(subreq, ctx->fallback);
+		skcipher_request_set_sync_tfm(subreq, ctx->fallback);
 		skcipher_request_set_callback(subreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
@@ -732,9 +732,9 @@ static int sahara_aes_cbc_decrypt(struct ablkcipher_request *req)
 	int err;
 
 	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
-		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 
-		skcipher_request_set_tfm(subreq, ctx->fallback);
+		skcipher_request_set_sync_tfm(subreq, ctx->fallback);
 		skcipher_request_set_callback(subreq, req->base.flags,
 					      NULL, NULL);
 		skcipher_request_set_crypt(subreq, req->src, req->dst,
@@ -752,8 +752,7 @@ static int sahara_aes_cra_init(struct crypto_tfm *tfm)
 	const char *name = crypto_tfm_alg_name(tfm);
 	struct sahara_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	ctx->fallback = crypto_alloc_skcipher(name, 0,
-					      CRYPTO_ALG_ASYNC |
+	ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
 					      CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ctx->fallback)) {
 		pr_err("Error allocating fallback algo %s\n", name);
@@ -769,7 +768,7 @@ static void sahara_aes_cra_exit(struct crypto_tfm *tfm)
 {
 	struct sahara_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypto_free_skcipher(ctx->fallback);
+	crypto_free_sync_skcipher(ctx->fallback);
 }
 
 static u32 sahara_sha_init_hdr(struct sahara_dev *dev,
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index b71895871be3..c5c5ff82b52e 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -32,7 +32,7 @@
 #include "aesp8-ppc.h"
 
 struct p8_aes_cbc_ctx {
-	struct crypto_skcipher *fallback;
+	struct crypto_sync_skcipher *fallback;
 	struct aes_key enc_key;
 	struct aes_key dec_key;
 };
@@ -40,11 +40,11 @@ struct p8_aes_cbc_ctx {
 static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 {
 	const char *alg = crypto_tfm_alg_name(tfm);
-	struct crypto_skcipher *fallback;
+	struct crypto_sync_skcipher *fallback;
 	struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	fallback = crypto_alloc_skcipher(alg, 0,
-			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	fallback = crypto_alloc_sync_skcipher(alg, 0,
+					      CRYPTO_ALG_NEED_FALLBACK);
 
 	if (IS_ERR(fallback)) {
 		printk(KERN_ERR
@@ -53,7 +53,7 @@ static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 		return PTR_ERR(fallback);
 	}
 
-	crypto_skcipher_set_flags(
+	crypto_sync_skcipher_set_flags(
 		fallback,
 		crypto_skcipher_get_flags((struct crypto_skcipher *)tfm));
 	ctx->fallback = fallback;
@@ -66,7 +66,7 @@ static void p8_aes_cbc_exit(struct crypto_tfm *tfm)
 	struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (ctx->fallback) {
-		crypto_free_skcipher(ctx->fallback);
+		crypto_free_sync_skcipher(ctx->fallback);
 		ctx->fallback = NULL;
 	}
 }
@@ -86,7 +86,7 @@ static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key,
 	pagefault_enable();
 	preempt_enable();
 
-	ret += crypto_skcipher_setkey(ctx->fallback, key, keylen);
+	ret += crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
 	return ret;
 }
 
@@ -100,8 +100,8 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc,
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
 
 	if (in_interrupt()) {
-		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
-		skcipher_request_set_tfm(req, ctx->fallback);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_sync_tfm(req, ctx->fallback);
 		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
 		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
 		ret = crypto_skcipher_encrypt(req);
@@ -139,8 +139,8 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc,
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
 
 	if (in_interrupt()) {
-		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
-		skcipher_request_set_tfm(req, ctx->fallback);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_sync_tfm(req, ctx->fallback);
 		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
 		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
 		ret = crypto_skcipher_decrypt(req);
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index cd777c75291d..8a2fe092cb8e 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -32,18 +32,18 @@
 #include "aesp8-ppc.h"
 
 struct p8_aes_ctr_ctx {
-	struct crypto_skcipher *fallback;
+	struct crypto_sync_skcipher *fallback;
 	struct aes_key enc_key;
 };
 
 static int p8_aes_ctr_init(struct crypto_tfm *tfm)
 {
 	const char *alg = crypto_tfm_alg_name(tfm);
-	struct crypto_skcipher *fallback;
+	struct crypto_sync_skcipher *fallback;
 	struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	fallback = crypto_alloc_skcipher(alg, 0,
-			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	fallback = crypto_alloc_sync_skcipher(alg, 0,
+					      CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback)) {
 		printk(KERN_ERR
 		       "Failed to allocate transformation for '%s': %ld\n",
@@ -51,7 +51,7 @@ static int p8_aes_ctr_init(struct crypto_tfm *tfm)
 		return PTR_ERR(fallback);
 	}
 
-	crypto_skcipher_set_flags(
+	crypto_sync_skcipher_set_flags(
 		fallback,
 		crypto_skcipher_get_flags((struct crypto_skcipher *)tfm));
 	ctx->fallback = fallback;
@@ -64,7 +64,7 @@ static void p8_aes_ctr_exit(struct crypto_tfm *tfm)
 	struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (ctx->fallback) {
-		crypto_free_skcipher(ctx->fallback);
+		crypto_free_sync_skcipher(ctx->fallback);
 		ctx->fallback = NULL;
 	}
 }
@@ -83,7 +83,7 @@ static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key,
 	pagefault_enable();
 	preempt_enable();
 
-	ret += crypto_skcipher_setkey(ctx->fallback, key, keylen);
+	ret += crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
 	return ret;
 }
 
@@ -119,8 +119,8 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc,
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
 
 	if (in_interrupt()) {
-		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
-		skcipher_request_set_tfm(req, ctx->fallback);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_sync_tfm(req, ctx->fallback);
 		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
 		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
 		ret = crypto_skcipher_encrypt(req);
diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
index e9954a7d4694..ecd64e5cc5bb 100644
--- a/drivers/crypto/vmx/aes_xts.c
+++ b/drivers/crypto/vmx/aes_xts.c
@@ -33,7 +33,7 @@
 #include "aesp8-ppc.h"
 
 struct p8_aes_xts_ctx {
-	struct crypto_skcipher *fallback;
+	struct crypto_sync_skcipher *fallback;
 	struct aes_key enc_key;
 	struct aes_key dec_key;
 	struct aes_key tweak_key;
@@ -42,11 +42,11 @@ struct p8_aes_xts_ctx {
 static int p8_aes_xts_init(struct crypto_tfm *tfm)
 {
 	const char *alg = crypto_tfm_alg_name(tfm);
-	struct crypto_skcipher *fallback;
+	struct crypto_sync_skcipher *fallback;
 	struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	fallback = crypto_alloc_skcipher(alg, 0,
-			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	fallback = crypto_alloc_sync_skcipher(alg, 0,
+					      CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback)) {
 		printk(KERN_ERR
 			"Failed to allocate transformation for '%s': %ld\n",
@@ -54,7 +54,7 @@ static int p8_aes_xts_init(struct crypto_tfm *tfm)
 		return PTR_ERR(fallback);
 	}
 
-	crypto_skcipher_set_flags(
+	crypto_sync_skcipher_set_flags(
 		fallback,
 		crypto_skcipher_get_flags((struct crypto_skcipher *)tfm));
 	ctx->fallback = fallback;
@@ -67,7 +67,7 @@ static void p8_aes_xts_exit(struct crypto_tfm *tfm)
 	struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (ctx->fallback) {
-		crypto_free_skcipher(ctx->fallback);
+		crypto_free_sync_skcipher(ctx->fallback);
 		ctx->fallback = NULL;
 	}
 }
@@ -92,7 +92,7 @@ static int p8_aes_xts_setkey(struct crypto_tfm *tfm, const u8 *key,
 	pagefault_enable();
 	preempt_enable();
 
-	ret += crypto_skcipher_setkey(ctx->fallback, key, keylen);
+	ret += crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
 	return ret;
 }
 
@@ -109,8 +109,8 @@ static int p8_aes_xts_crypt(struct blkcipher_desc *desc,
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
 
 	if (in_interrupt()) {
-		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
-		skcipher_request_set_tfm(req, ctx->fallback);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_sync_tfm(req, ctx->fallback);
 		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
 		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
 		ret = enc? crypto_skcipher_encrypt(req) : crypto_skcipher_decrypt(req);
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
index 2c98e020df05..3c0881ac9880 100644
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -593,8 +593,7 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)
 /******************** CPU err device********************************/
 static u32 cpc925_cpu_mask_disabled(void)
 {
-	struct device_node *cpus;
-	struct device_node *cpunode = NULL;
+	struct device_node *cpunode;
 	static u32 mask = 0;
 
 	/* use cached value if available */
@@ -603,20 +602,8 @@ static u32 cpc925_cpu_mask_disabled(void)
 
 	mask = APIMASK_ADI0 | APIMASK_ADI1;
 
-	cpus = of_find_node_by_path("/cpus");
-	if (cpus == NULL) {
-		cpc925_printk(KERN_DEBUG, "No /cpus node !\n");
-		return 0;
-	}
-
-	while ((cpunode = of_get_next_child(cpus, cpunode)) != NULL) {
+	for_each_of_cpu_node(cpunode) {
 		const u32 *reg = of_get_property(cpunode, "reg", NULL);
-
-		if (strcmp(cpunode->type, "cpu")) {
-			cpc925_printk(KERN_ERR, "Not a cpu node in /cpus: %s\n", cpunode->name);
-			continue;
-		}
-
 		if (reg == NULL || *reg > 2) {
 			cpc925_printk(KERN_ERR, "Bad reg value at %pOF\n", cpunode);
 			continue;
@@ -633,9 +620,6 @@ static u32 cpc925_cpu_mask_disabled(void)
 				"Assuming PI id is equal to CPU MPIC id!\n");
 	}
 
-	of_node_put(cpunode);
-	of_node_put(cpus);
-
 	return mask;
 }
 
diff --git a/drivers/extcon/extcon-intel-cht-wc.c b/drivers/extcon/extcon-intel-cht-wc.c
index 5e1dd2772278..5ef215297101 100644
--- a/drivers/extcon/extcon-intel-cht-wc.c
+++ b/drivers/extcon/extcon-intel-cht-wc.c
@@ -1,18 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Extcon charger detection driver for Intel Cherrytrail Whiskey Cove PMIC
  * Copyright (C) 2017 Hans de Goede <hdegoede@redhat.com>
  *
  * Based on various non upstream patches to support the CHT Whiskey Cove PMIC:
  * Copyright (C) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #include <linux/extcon-provider.h>
@@ -32,10 +24,10 @@
 #define CHT_WC_CHGRCTRL0_EMRGCHREN	BIT(1)
 #define CHT_WC_CHGRCTRL0_EXTCHRDIS	BIT(2)
 #define CHT_WC_CHGRCTRL0_SWCONTROL	BIT(3)
-#define CHT_WC_CHGRCTRL0_TTLCK_MASK	BIT(4)
-#define CHT_WC_CHGRCTRL0_CCSM_OFF_MASK	BIT(5)
-#define CHT_WC_CHGRCTRL0_DBPOFF_MASK	BIT(6)
-#define CHT_WC_CHGRCTRL0_WDT_NOKICK	BIT(7)
+#define CHT_WC_CHGRCTRL0_TTLCK		BIT(4)
+#define CHT_WC_CHGRCTRL0_CCSM_OFF	BIT(5)
+#define CHT_WC_CHGRCTRL0_DBPOFF		BIT(6)
+#define CHT_WC_CHGRCTRL0_CHR_WDT_NOKICK	BIT(7)
 
 #define CHT_WC_CHGRCTRL1		0x5e17
 
@@ -52,7 +44,7 @@
 #define CHT_WC_USBSRC_TYPE_ACA		4
 #define CHT_WC_USBSRC_TYPE_SE1		5
 #define CHT_WC_USBSRC_TYPE_MHL		6
-#define CHT_WC_USBSRC_TYPE_FLOAT_DP_DN	7
+#define CHT_WC_USBSRC_TYPE_FLOATING	7
 #define CHT_WC_USBSRC_TYPE_OTHER	8
 #define CHT_WC_USBSRC_TYPE_DCP_EXTPHY	9
 
@@ -61,9 +53,12 @@
 #define CHT_WC_PWRSRC_STS		0x6e1e
 #define CHT_WC_PWRSRC_VBUS		BIT(0)
 #define CHT_WC_PWRSRC_DC		BIT(1)
-#define CHT_WC_PWRSRC_BAT		BIT(2)
-#define CHT_WC_PWRSRC_ID_GND		BIT(3)
-#define CHT_WC_PWRSRC_ID_FLOAT		BIT(4)
+#define CHT_WC_PWRSRC_BATT		BIT(2)
+#define CHT_WC_PWRSRC_USBID_MASK	GENMASK(4, 3)
+#define CHT_WC_PWRSRC_USBID_SHIFT	3
+#define CHT_WC_PWRSRC_RID_ACA		0
+#define CHT_WC_PWRSRC_RID_GND		1
+#define CHT_WC_PWRSRC_RID_FLOAT		2
 
 #define CHT_WC_VBUS_GPIO_CTLO		0x6e2d
 #define CHT_WC_VBUS_GPIO_CTLO_OUTPUT	BIT(0)
@@ -104,16 +99,20 @@ struct cht_wc_extcon_data {
 
 static int cht_wc_extcon_get_id(struct cht_wc_extcon_data *ext, int pwrsrc_sts)
 {
-	if (pwrsrc_sts & CHT_WC_PWRSRC_ID_GND)
+	switch ((pwrsrc_sts & CHT_WC_PWRSRC_USBID_MASK) >> CHT_WC_PWRSRC_USBID_SHIFT) {
+	case CHT_WC_PWRSRC_RID_GND:
 		return USB_ID_GND;
-	if (pwrsrc_sts & CHT_WC_PWRSRC_ID_FLOAT)
+	case CHT_WC_PWRSRC_RID_FLOAT:
 		return USB_ID_FLOAT;
-
-	/*
-	 * Once we have iio support for the gpadc we should read the USBID
-	 * gpadc channel here and determine ACA role based on that.
-	 */
-	return USB_ID_FLOAT;
+	case CHT_WC_PWRSRC_RID_ACA:
+	default:
+		/*
+		 * Once we have IIO support for the GPADC we should read
+		 * the USBID GPADC channel here and determine ACA role
+		 * based on that.
+		 */
+		return USB_ID_FLOAT;
+	}
 }
 
 static int cht_wc_extcon_get_charger(struct cht_wc_extcon_data *ext,
@@ -156,9 +155,9 @@ static int cht_wc_extcon_get_charger(struct cht_wc_extcon_data *ext,
 		dev_warn(ext->dev,
 			"Unhandled charger type %d, defaulting to SDP\n",
 			 ret);
-		/* Fall through, treat as SDP */
+		return EXTCON_CHG_USB_SDP;
 	case CHT_WC_USBSRC_TYPE_SDP:
-	case CHT_WC_USBSRC_TYPE_FLOAT_DP_DN:
+	case CHT_WC_USBSRC_TYPE_FLOATING:
 	case CHT_WC_USBSRC_TYPE_OTHER:
 		return EXTCON_CHG_USB_SDP;
 	case CHT_WC_USBSRC_TYPE_CDP:
@@ -279,7 +278,7 @@ static int cht_wc_extcon_sw_control(struct cht_wc_extcon_data *ext, bool enable)
 {
 	int ret, mask, val;
 
-	mask = CHT_WC_CHGRCTRL0_SWCONTROL | CHT_WC_CHGRCTRL0_CCSM_OFF_MASK;
+	mask = CHT_WC_CHGRCTRL0_SWCONTROL | CHT_WC_CHGRCTRL0_CCSM_OFF;
 	val = enable ? mask : 0;
 	ret = regmap_update_bits(ext->regmap, CHT_WC_CHGRCTRL0, mask, val);
 	if (ret)
@@ -292,6 +291,7 @@ static int cht_wc_extcon_probe(struct platform_device *pdev)
 {
 	struct intel_soc_pmic *pmic = dev_get_drvdata(pdev->dev.parent);
 	struct cht_wc_extcon_data *ext;
+	unsigned long mask = ~(CHT_WC_PWRSRC_VBUS | CHT_WC_PWRSRC_USBID_MASK);
 	int irq, ret;
 
 	irq = platform_get_irq(pdev, 0);
@@ -352,9 +352,7 @@ static int cht_wc_extcon_probe(struct platform_device *pdev)
 	}
 
 	/* Unmask irqs */
-	ret = regmap_write(ext->regmap, CHT_WC_PWRSRC_IRQ_MASK,
-			   (int)~(CHT_WC_PWRSRC_VBUS | CHT_WC_PWRSRC_ID_GND |
-				  CHT_WC_PWRSRC_ID_FLOAT));
+	ret = regmap_write(ext->regmap, CHT_WC_PWRSRC_IRQ_MASK, mask);
 	if (ret) {
 		dev_err(ext->dev, "Error writing irq-mask: %d\n", ret);
 		goto disable_sw_control;
diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c
index fd24debe58a3..80c9abcc3f97 100644
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Intel INT3496 ACPI device extcon driver
  *
@@ -7,15 +8,6 @@
  *
  * Copyright (c) 2014, Intel Corporation.
  * Author: David Cohen <david.a.cohen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/acpi.h>
@@ -192,4 +184,4 @@ module_platform_driver(int3496_driver);
 
 MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
 MODULE_DESCRIPTION("Intel INT3496 ACPI device extcon driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/extcon/extcon-max14577.c b/drivers/extcon/extcon-max14577.c
index b871836da8a4..22d2feb1f8bc 100644
--- a/drivers/extcon/extcon-max14577.c
+++ b/drivers/extcon/extcon-max14577.c
@@ -1,20 +1,10 @@
-/*
- * extcon-max14577.c - MAX14577/77836 extcon driver to support MUIC
- *
- * Copyright (C) 2013,2014 Samsung Electronics
- * Chanwoo Choi <cw00.choi@samsung.com>
- * Krzysztof Kozlowski <krzk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// extcon-max14577.c - MAX14577/77836 extcon driver to support MUIC
+//
+// Copyright (C) 2013,2014 Samsung Electronics
+// Chanwoo Choi <cw00.choi@samsung.com>
+// Krzysztof Kozlowski <krzk@kernel.org>
 
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c
index 227651ff9666..a79537ebb671 100644
--- a/drivers/extcon/extcon-max77693.c
+++ b/drivers/extcon/extcon-max77693.c
@@ -1,19 +1,9 @@
-/*
- * extcon-max77693.c - MAX77693 extcon driver to support MAX77693 MUIC
- *
- * Copyright (C) 2012 Samsung Electrnoics
- * Chanwoo Choi <cw00.choi@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// extcon-max77693.c - MAX77693 extcon driver to support MAX77693 MUIC
+//
+// Copyright (C) 2012 Samsung Electrnoics
+// Chanwoo Choi <cw00.choi@samsung.com>
 
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/drivers/extcon/extcon-max77843.c b/drivers/extcon/extcon-max77843.c
index c9fcd6cd41cb..b98cbd0362f5 100644
--- a/drivers/extcon/extcon-max77843.c
+++ b/drivers/extcon/extcon-max77843.c
@@ -1,15 +1,10 @@
-/*
- * extcon-max77843.c - Maxim MAX77843 extcon driver to support
- *			MUIC(Micro USB Interface Controller)
- *
- * Copyright (C) 2015 Samsung Electronics
- * Author: Jaewon Kim <jaewon02.kim@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// extcon-max77843.c - Maxim MAX77843 extcon driver to support
+//			MUIC(Micro USB Interface Controller)
+//
+// Copyright (C) 2015 Samsung Electronics
+// Author: Jaewon Kim <jaewon02.kim@samsung.com>
 
 #include <linux/extcon-provider.h>
 #include <linux/i2c.h>
diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index 9f30f4929b72..bdabb2479e0d 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -1,19 +1,9 @@
-/*
- * extcon-max8997.c - MAX8997 extcon driver to support MAX8997 MUIC
- *
- *  Copyright (C) 2012 Samsung Electronics
- *  Donggeun Kim <dg77.kim@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// extcon-max8997.c - MAX8997 extcon driver to support MAX8997 MUIC
+//
+//  Copyright (C) 2012 Samsung Electronics
+//  Donggeun Kim <dg77.kim@samsung.com>
 
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c
index b9d27c8fe57e..5ab0498be652 100644
--- a/drivers/extcon/extcon.c
+++ b/drivers/extcon/extcon.c
@@ -628,7 +628,7 @@ int extcon_get_property(struct extcon_dev *edev, unsigned int id,
 	unsigned long flags;
 	int index, ret = 0;
 
-	*prop_val = (union extcon_property_value)(0);
+	*prop_val = (union extcon_property_value){0};
 
 	if (!edev)
 		return -EINVAL;
@@ -1123,7 +1123,6 @@ int extcon_dev_register(struct extcon_dev *edev)
 			(unsigned long)atomic_inc_return(&edev_no));
 
 	if (edev->max_supported) {
-		char buf[10];
 		char *str;
 		struct extcon_cable *cable;
 
@@ -1137,9 +1136,7 @@ int extcon_dev_register(struct extcon_dev *edev)
 		for (index = 0; index < edev->max_supported; index++) {
 			cable = &edev->cables[index];
 
-			snprintf(buf, 10, "cable.%d", index);
-			str = kzalloc(strlen(buf) + 1,
-				      GFP_KERNEL);
+			str = kasprintf(GFP_KERNEL, "cable.%d", index);
 			if (!str) {
 				for (index--; index >= 0; index--) {
 					cable = &edev->cables[index];
@@ -1149,7 +1146,6 @@ int extcon_dev_register(struct extcon_dev *edev)
 
 				goto err_alloc_cables;
 			}
-			strcpy(str, buf);
 
 			cable->edev = edev;
 			cable->cable_index = index;
@@ -1172,7 +1168,6 @@ int extcon_dev_register(struct extcon_dev *edev)
 	}
 
 	if (edev->max_supported && edev->mutually_exclusive) {
-		char buf[80];
 		char *name;
 
 		/* Count the size of mutually_exclusive array */
@@ -1197,9 +1192,8 @@ int extcon_dev_register(struct extcon_dev *edev)
 		}
 
 		for (index = 0; edev->mutually_exclusive[index]; index++) {
-			sprintf(buf, "0x%x", edev->mutually_exclusive[index]);
-			name = kzalloc(strlen(buf) + 1,
-				       GFP_KERNEL);
+			name = kasprintf(GFP_KERNEL, "0x%x",
+					 edev->mutually_exclusive[index]);
 			if (!name) {
 				for (index--; index >= 0; index--) {
 					kfree(edev->d_attrs_muex[index].attr.
@@ -1210,7 +1204,6 @@ int extcon_dev_register(struct extcon_dev *edev)
 				ret = -ENOMEM;
 				goto err_muex;
 			}
-			strcpy(name, buf);
 			sysfs_attr_init(&edev->d_attrs_muex[index].attr);
 			edev->d_attrs_muex[index].attr.name = name;
 			edev->d_attrs_muex[index].attr.mode = 0000;
diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig
index a456a000048b..91a0404affe2 100644
--- a/drivers/firmware/google/Kconfig
+++ b/drivers/firmware/google/Kconfig
@@ -10,37 +10,31 @@ if GOOGLE_FIRMWARE
 
 config GOOGLE_SMI
 	tristate "SMI interface for Google platforms"
-	depends on X86 && ACPI && DMI && EFI
-	select EFI_VARS
+	depends on X86 && ACPI && DMI
 	help
 	  Say Y here if you want to enable SMI callbacks for Google
 	  platforms.  This provides an interface for writing to and
-	  clearing the EFI event log and reading and writing NVRAM
+	  clearing the event log.  If EFI_VARS is also enabled this
+	  driver provides an interface for reading and writing NVRAM
 	  variables.
 
 config GOOGLE_COREBOOT_TABLE
-	tristate
-	depends on GOOGLE_COREBOOT_TABLE_ACPI || GOOGLE_COREBOOT_TABLE_OF
-
-config GOOGLE_COREBOOT_TABLE_ACPI
-	tristate "Coreboot Table Access - ACPI"
-	depends on ACPI
-	select GOOGLE_COREBOOT_TABLE
+	tristate "Coreboot Table Access"
+	depends on ACPI || OF
 	help
 	  This option enables the coreboot_table module, which provides other
-	  firmware modules to access to the coreboot table. The coreboot table
-	  pointer is accessed through the ACPI "GOOGCB00" object.
+	  firmware modules access to the coreboot table. The coreboot table
+	  pointer is accessed through the ACPI "GOOGCB00" object or the
+	  device tree node /firmware/coreboot.
 	  If unsure say N.
 
+config GOOGLE_COREBOOT_TABLE_ACPI
+	tristate
+	select GOOGLE_COREBOOT_TABLE
+
 config GOOGLE_COREBOOT_TABLE_OF
-	tristate "Coreboot Table Access - Device Tree"
-	depends on OF
+	tristate
 	select GOOGLE_COREBOOT_TABLE
-	help
-	  This option enable the coreboot_table module, which provide other
-	  firmware modules to access coreboot table. The coreboot table pointer
-	  is accessed through the device tree node /firmware/coreboot.
-	  If unsure say N.
 
 config GOOGLE_MEMCONSOLE
 	tristate
diff --git a/drivers/firmware/google/Makefile b/drivers/firmware/google/Makefile
index d0b3fba96194..d17caded5d88 100644
--- a/drivers/firmware/google/Makefile
+++ b/drivers/firmware/google/Makefile
@@ -2,8 +2,6 @@
 
 obj-$(CONFIG_GOOGLE_SMI)		+= gsmi.o
 obj-$(CONFIG_GOOGLE_COREBOOT_TABLE)        += coreboot_table.o
-obj-$(CONFIG_GOOGLE_COREBOOT_TABLE_ACPI)   += coreboot_table-acpi.o
-obj-$(CONFIG_GOOGLE_COREBOOT_TABLE_OF)     += coreboot_table-of.o
 obj-$(CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT)  += framebuffer-coreboot.o
 obj-$(CONFIG_GOOGLE_MEMCONSOLE)            += memconsole.o
 obj-$(CONFIG_GOOGLE_MEMCONSOLE_COREBOOT)   += memconsole-coreboot.o
diff --git a/drivers/firmware/google/coreboot_table-acpi.c b/drivers/firmware/google/coreboot_table-acpi.c
deleted file mode 100644
index 77197fe3d42f..000000000000
--- a/drivers/firmware/google/coreboot_table-acpi.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * coreboot_table-acpi.c
- *
- * Using ACPI to locate Coreboot table and provide coreboot table access.
- *
- * Copyright 2017 Google Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License v2.0 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/acpi.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-
-#include "coreboot_table.h"
-
-static int coreboot_table_acpi_probe(struct platform_device *pdev)
-{
-	phys_addr_t phyaddr;
-	resource_size_t len;
-	struct coreboot_table_header __iomem *header = NULL;
-	struct resource *res;
-	void __iomem *ptr = NULL;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -EINVAL;
-
-	len = resource_size(res);
-	if (!res->start || !len)
-		return -EINVAL;
-
-	phyaddr = res->start;
-	header = ioremap_cache(phyaddr, sizeof(*header));
-	if (header == NULL)
-		return -ENOMEM;
-
-	ptr = ioremap_cache(phyaddr,
-			    header->header_bytes + header->table_bytes);
-	iounmap(header);
-	if (!ptr)
-		return -ENOMEM;
-
-	return coreboot_table_init(&pdev->dev, ptr);
-}
-
-static int coreboot_table_acpi_remove(struct platform_device *pdev)
-{
-	return coreboot_table_exit();
-}
-
-static const struct acpi_device_id cros_coreboot_acpi_match[] = {
-	{ "GOOGCB00", 0 },
-	{ "BOOT0000", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(acpi, cros_coreboot_acpi_match);
-
-static struct platform_driver coreboot_table_acpi_driver = {
-	.probe = coreboot_table_acpi_probe,
-	.remove = coreboot_table_acpi_remove,
-	.driver = {
-		.name = "coreboot_table_acpi",
-		.acpi_match_table = ACPI_PTR(cros_coreboot_acpi_match),
-	},
-};
-
-static int __init coreboot_table_acpi_init(void)
-{
-	return platform_driver_register(&coreboot_table_acpi_driver);
-}
-
-module_init(coreboot_table_acpi_init);
-
-MODULE_AUTHOR("Google, Inc.");
-MODULE_LICENSE("GPL");
diff --git a/drivers/firmware/google/coreboot_table-of.c b/drivers/firmware/google/coreboot_table-of.c
deleted file mode 100644
index f15bf404c579..000000000000
--- a/drivers/firmware/google/coreboot_table-of.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * coreboot_table-of.c
- *
- * Coreboot table access through open firmware.
- *
- * Copyright 2017 Google Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License v2.0 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/device.h>
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
-#include <linux/platform_device.h>
-
-#include "coreboot_table.h"
-
-static int coreboot_table_of_probe(struct platform_device *pdev)
-{
-	struct device_node *fw_dn = pdev->dev.of_node;
-	void __iomem *ptr;
-
-	ptr = of_iomap(fw_dn, 0);
-	of_node_put(fw_dn);
-	if (!ptr)
-		return -ENOMEM;
-
-	return coreboot_table_init(&pdev->dev, ptr);
-}
-
-static int coreboot_table_of_remove(struct platform_device *pdev)
-{
-	return coreboot_table_exit();
-}
-
-static const struct of_device_id coreboot_of_match[] = {
-	{ .compatible = "coreboot" },
-	{},
-};
-
-static struct platform_driver coreboot_table_of_driver = {
-	.probe = coreboot_table_of_probe,
-	.remove = coreboot_table_of_remove,
-	.driver = {
-		.name = "coreboot_table_of",
-		.of_match_table = coreboot_of_match,
-	},
-};
-
-static int __init platform_coreboot_table_of_init(void)
-{
-	struct platform_device *pdev;
-	struct device_node *of_node;
-
-	/* Limit device creation to the presence of /firmware/coreboot node */
-	of_node = of_find_node_by_path("/firmware/coreboot");
-	if (!of_node)
-		return -ENODEV;
-
-	if (!of_match_node(coreboot_of_match, of_node))
-		return -ENODEV;
-
-	pdev = of_platform_device_create(of_node, "coreboot_table_of", NULL);
-	if (!pdev)
-		return -ENODEV;
-
-	return platform_driver_register(&coreboot_table_of_driver);
-}
-
-module_init(platform_coreboot_table_of_init);
-
-MODULE_AUTHOR("Google, Inc.");
-MODULE_LICENSE("GPL");
diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c
index 19db5709ae28..078d3bbe632f 100644
--- a/drivers/firmware/google/coreboot_table.c
+++ b/drivers/firmware/google/coreboot_table.c
@@ -16,12 +16,15 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/acpi.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 
 #include "coreboot_table.h"
@@ -29,8 +32,6 @@
 #define CB_DEV(d) container_of(d, struct coreboot_device, dev)
 #define CB_DRV(d) container_of(d, struct coreboot_driver, drv)
 
-static struct coreboot_table_header __iomem *ptr_header;
-
 static int coreboot_bus_match(struct device *dev, struct device_driver *drv)
 {
 	struct coreboot_device *device = CB_DEV(dev);
@@ -70,12 +71,6 @@ static struct bus_type coreboot_bus_type = {
 	.remove		= coreboot_bus_remove,
 };
 
-static int __init coreboot_bus_init(void)
-{
-	return bus_register(&coreboot_bus_type);
-}
-module_init(coreboot_bus_init);
-
 static void coreboot_device_release(struct device *dev)
 {
 	struct coreboot_device *device = CB_DEV(dev);
@@ -97,62 +92,117 @@ void coreboot_driver_unregister(struct coreboot_driver *driver)
 }
 EXPORT_SYMBOL(coreboot_driver_unregister);
 
-int coreboot_table_init(struct device *dev, void __iomem *ptr)
+static int coreboot_table_populate(struct device *dev, void *ptr)
 {
 	int i, ret;
 	void *ptr_entry;
 	struct coreboot_device *device;
-	struct coreboot_table_entry entry;
-	struct coreboot_table_header header;
-
-	ptr_header = ptr;
-	memcpy_fromio(&header, ptr_header, sizeof(header));
-
-	if (strncmp(header.signature, "LBIO", sizeof(header.signature))) {
-		pr_warn("coreboot_table: coreboot table missing or corrupt!\n");
-		return -ENODEV;
-	}
+	struct coreboot_table_entry *entry;
+	struct coreboot_table_header *header = ptr;
 
-	ptr_entry = (void *)ptr_header + header.header_bytes;
-	for (i = 0; i < header.table_entries; i++) {
-		memcpy_fromio(&entry, ptr_entry, sizeof(entry));
+	ptr_entry = ptr + header->header_bytes;
+	for (i = 0; i < header->table_entries; i++) {
+		entry = ptr_entry;
 
-		device = kzalloc(sizeof(struct device) + entry.size, GFP_KERNEL);
-		if (!device) {
-			ret = -ENOMEM;
-			break;
-		}
+		device = kzalloc(sizeof(struct device) + entry->size, GFP_KERNEL);
+		if (!device)
+			return -ENOMEM;
 
 		dev_set_name(&device->dev, "coreboot%d", i);
 		device->dev.parent = dev;
 		device->dev.bus = &coreboot_bus_type;
 		device->dev.release = coreboot_device_release;
-		memcpy_fromio(&device->entry, ptr_entry, entry.size);
+		memcpy(&device->entry, ptr_entry, entry->size);
 
 		ret = device_register(&device->dev);
 		if (ret) {
 			put_device(&device->dev);
-			break;
+			return ret;
 		}
 
-		ptr_entry += entry.size;
+		ptr_entry += entry->size;
 	}
 
-	return ret;
+	return 0;
 }
-EXPORT_SYMBOL(coreboot_table_init);
 
-int coreboot_table_exit(void)
+static int coreboot_table_probe(struct platform_device *pdev)
 {
-	if (ptr_header) {
-		bus_unregister(&coreboot_bus_type);
-		iounmap(ptr_header);
-		ptr_header = NULL;
+	resource_size_t len;
+	struct coreboot_table_header *header;
+	struct resource *res;
+	struct device *dev = &pdev->dev;
+	void *ptr;
+	int ret;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
+	len = resource_size(res);
+	if (!res->start || !len)
+		return -EINVAL;
+
+	/* Check just the header first to make sure things are sane */
+	header = memremap(res->start, sizeof(*header), MEMREMAP_WB);
+	if (!header)
+		return -ENOMEM;
+
+	len = header->header_bytes + header->table_bytes;
+	ret = strncmp(header->signature, "LBIO", sizeof(header->signature));
+	memunmap(header);
+	if (ret) {
+		dev_warn(dev, "coreboot table missing or corrupt!\n");
+		return -ENODEV;
 	}
 
+	ptr = memremap(res->start, len, MEMREMAP_WB);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = bus_register(&coreboot_bus_type);
+	if (!ret) {
+		ret = coreboot_table_populate(dev, ptr);
+		if (ret)
+			bus_unregister(&coreboot_bus_type);
+	}
+	memunmap(ptr);
+
+	return ret;
+}
+
+static int coreboot_table_remove(struct platform_device *pdev)
+{
+	bus_unregister(&coreboot_bus_type);
 	return 0;
 }
-EXPORT_SYMBOL(coreboot_table_exit);
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id cros_coreboot_acpi_match[] = {
+	{ "GOOGCB00", 0 },
+	{ "BOOT0000", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, cros_coreboot_acpi_match);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id coreboot_of_match[] = {
+	{ .compatible = "coreboot" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, coreboot_of_match);
+#endif
+
+static struct platform_driver coreboot_table_driver = {
+	.probe = coreboot_table_probe,
+	.remove = coreboot_table_remove,
+	.driver = {
+		.name = "coreboot_table",
+		.acpi_match_table = ACPI_PTR(cros_coreboot_acpi_match),
+		.of_match_table = of_match_ptr(coreboot_of_match),
+	},
+};
+module_platform_driver(coreboot_table_driver);
 MODULE_AUTHOR("Google, Inc.");
 MODULE_LICENSE("GPL");
diff --git a/drivers/firmware/google/coreboot_table.h b/drivers/firmware/google/coreboot_table.h
index 8ad95a94481b..71a9de6b15fa 100644
--- a/drivers/firmware/google/coreboot_table.h
+++ b/drivers/firmware/google/coreboot_table.h
@@ -91,10 +91,4 @@ int coreboot_driver_register(struct coreboot_driver *driver);
 /* Unregister a driver that uses the data from a coreboot table. */
 void coreboot_driver_unregister(struct coreboot_driver *driver);
 
-/* Initialize coreboot table module given a pointer to iomem */
-int coreboot_table_init(struct device *dev, void __iomem *ptr);
-
-/* Cleanup coreboot table module */
-int coreboot_table_exit(void);
-
 #endif /* __COREBOOT_TABLE_H */
diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c
index c8f169bf2e27..82ce1e6d261e 100644
--- a/drivers/firmware/google/gsmi.c
+++ b/drivers/firmware/google/gsmi.c
@@ -29,6 +29,7 @@
 #include <linux/efi.h>
 #include <linux/module.h>
 #include <linux/ucs2_string.h>
+#include <linux/suspend.h>
 
 #define GSMI_SHUTDOWN_CLEAN	0	/* Clean Shutdown */
 /* TODO(mikew@google.com): Tie in HARDLOCKUP_DETECTOR with NMIWDT */
@@ -70,6 +71,8 @@
 #define GSMI_CMD_SET_NVRAM_VAR		0x03
 #define GSMI_CMD_SET_EVENT_LOG		0x08
 #define GSMI_CMD_CLEAR_EVENT_LOG	0x09
+#define GSMI_CMD_LOG_S0IX_SUSPEND	0x0a
+#define GSMI_CMD_LOG_S0IX_RESUME	0x0b
 #define GSMI_CMD_CLEAR_CONFIG		0x20
 #define GSMI_CMD_HANDSHAKE_TYPE		0xC1
 
@@ -84,7 +87,7 @@ struct gsmi_buf {
 	u32 address;			/* physical address of buffer */
 };
 
-struct gsmi_device {
+static struct gsmi_device {
 	struct platform_device *pdev;	/* platform device */
 	struct gsmi_buf *name_buf;	/* variable name buffer */
 	struct gsmi_buf *data_buf;	/* generic data buffer */
@@ -122,7 +125,6 @@ struct gsmi_log_entry_type_1 {
 	u32	instance;
 } __packed;
 
-
 /*
  * Some platforms don't have explicit SMI handshake
  * and need to wait for SMI to complete.
@@ -133,6 +135,15 @@ module_param(spincount, uint, 0600);
 MODULE_PARM_DESC(spincount,
 	"The number of loop iterations to use when using the spin handshake.");
 
+/*
+ * Platforms might not support S0ix logging in their GSMI handlers. In order to
+ * avoid any side-effects of generating an SMI for S0ix logging, use the S0ix
+ * related GSMI commands only for those platforms that explicitly enable this
+ * option.
+ */
+static bool s0ix_logging_enable;
+module_param(s0ix_logging_enable, bool, 0600);
+
 static struct gsmi_buf *gsmi_buf_alloc(void)
 {
 	struct gsmi_buf *smibuf;
@@ -289,6 +300,10 @@ static int gsmi_exec(u8 func, u8 sub)
 	return rc;
 }
 
+#ifdef CONFIG_EFI_VARS
+
+static struct efivars efivars;
+
 static efi_status_t gsmi_get_variable(efi_char16_t *name,
 				      efi_guid_t *vendor, u32 *attr,
 				      unsigned long *data_size,
@@ -466,6 +481,8 @@ static const struct efivar_operations efivar_ops = {
 	.get_next_variable = gsmi_get_next_variable,
 };
 
+#endif /* CONFIG_EFI_VARS */
+
 static ssize_t eventlog_write(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t pos, size_t count)
@@ -480,11 +497,10 @@ static ssize_t eventlog_write(struct file *filp, struct kobject *kobj,
 	if (count < sizeof(u32))
 		return -EINVAL;
 	param.type = *(u32 *)buf;
-	count -= sizeof(u32);
 	buf += sizeof(u32);
 
 	/* The remaining buffer is the data payload */
-	if (count > gsmi_dev.data_buf->length)
+	if ((count - sizeof(u32)) > gsmi_dev.data_buf->length)
 		return -EINVAL;
 	param.data_len = count - sizeof(u32);
 
@@ -504,7 +520,7 @@ static ssize_t eventlog_write(struct file *filp, struct kobject *kobj,
 
 	spin_unlock_irqrestore(&gsmi_dev.lock, flags);
 
-	return rc;
+	return (rc == 0) ? count : rc;
 
 }
 
@@ -716,6 +732,12 @@ static const struct dmi_system_id gsmi_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_BOARD_VENDOR, "Google, Inc."),
 		},
 	},
+	{
+		.ident = "Coreboot Firmware",
+		.matches = {
+			DMI_MATCH(DMI_BIOS_VENDOR, "coreboot"),
+		},
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(dmi, gsmi_dmi_table);
@@ -762,7 +784,6 @@ static __init int gsmi_system_valid(void)
 }
 
 static struct kobject *gsmi_kobj;
-static struct efivars efivars;
 
 static const struct platform_device_info gsmi_dev_info = {
 	.name		= "gsmi",
@@ -771,6 +792,78 @@ static const struct platform_device_info gsmi_dev_info = {
 	.dma_mask	= DMA_BIT_MASK(32),
 };
 
+#ifdef CONFIG_PM
+static void gsmi_log_s0ix_info(u8 cmd)
+{
+	unsigned long flags;
+
+	/*
+	 * If platform has not enabled S0ix logging, then no action is
+	 * necessary.
+	 */
+	if (!s0ix_logging_enable)
+		return;
+
+	spin_lock_irqsave(&gsmi_dev.lock, flags);
+
+	memset(gsmi_dev.param_buf->start, 0, gsmi_dev.param_buf->length);
+
+	gsmi_exec(GSMI_CALLBACK, cmd);
+
+	spin_unlock_irqrestore(&gsmi_dev.lock, flags);
+}
+
+static int gsmi_log_s0ix_suspend(struct device *dev)
+{
+	/*
+	 * If system is not suspending via firmware using the standard ACPI Sx
+	 * types, then make a GSMI call to log the suspend info.
+	 */
+	if (!pm_suspend_via_firmware())
+		gsmi_log_s0ix_info(GSMI_CMD_LOG_S0IX_SUSPEND);
+
+	/*
+	 * Always return success, since we do not want suspend
+	 * to fail just because of logging failure.
+	 */
+	return 0;
+}
+
+static int gsmi_log_s0ix_resume(struct device *dev)
+{
+	/*
+	 * If system did not resume via firmware, then make a GSMI call to log
+	 * the resume info and wake source.
+	 */
+	if (!pm_resume_via_firmware())
+		gsmi_log_s0ix_info(GSMI_CMD_LOG_S0IX_RESUME);
+
+	/*
+	 * Always return success, since we do not want resume
+	 * to fail just because of logging failure.
+	 */
+	return 0;
+}
+
+static const struct dev_pm_ops gsmi_pm_ops = {
+	.suspend_noirq = gsmi_log_s0ix_suspend,
+	.resume_noirq = gsmi_log_s0ix_resume,
+};
+
+static int gsmi_platform_driver_probe(struct platform_device *dev)
+{
+	return 0;
+}
+
+static struct platform_driver gsmi_driver_info = {
+	.driver = {
+		.name = "gsmi",
+		.pm = &gsmi_pm_ops,
+	},
+	.probe = gsmi_platform_driver_probe,
+};
+#endif
+
 static __init int gsmi_init(void)
 {
 	unsigned long flags;
@@ -782,6 +875,14 @@ static __init int gsmi_init(void)
 
 	gsmi_dev.smi_cmd = acpi_gbl_FADT.smi_command;
 
+#ifdef CONFIG_PM
+	ret = platform_driver_register(&gsmi_driver_info);
+	if (unlikely(ret)) {
+		printk(KERN_ERR "gsmi: unable to register platform driver\n");
+		return ret;
+	}
+#endif
+
 	/* register device */
 	gsmi_dev.pdev = platform_device_register_full(&gsmi_dev_info);
 	if (IS_ERR(gsmi_dev.pdev)) {
@@ -886,11 +987,14 @@ static __init int gsmi_init(void)
 		goto out_remove_bin_file;
 	}
 
+#ifdef CONFIG_EFI_VARS
 	ret = efivars_register(&efivars, &efivar_ops, gsmi_kobj);
 	if (ret) {
 		printk(KERN_INFO "gsmi: Failed to register efivars\n");
-		goto out_remove_sysfs_files;
+		sysfs_remove_files(gsmi_kobj, gsmi_attrs);
+		goto out_remove_bin_file;
 	}
+#endif
 
 	register_reboot_notifier(&gsmi_reboot_notifier);
 	register_die_notifier(&gsmi_die_notifier);
@@ -901,8 +1005,6 @@ static __init int gsmi_init(void)
 
 	return 0;
 
-out_remove_sysfs_files:
-	sysfs_remove_files(gsmi_kobj, gsmi_attrs);
 out_remove_bin_file:
 	sysfs_remove_bin_file(gsmi_kobj, &eventlog_bin_attr);
 out_err:
@@ -922,7 +1024,9 @@ static void __exit gsmi_exit(void)
 	unregister_die_notifier(&gsmi_die_notifier);
 	atomic_notifier_chain_unregister(&panic_notifier_list,
 					 &gsmi_panic_notifier);
+#ifdef CONFIG_EFI_VARS
 	efivars_unregister(&efivars);
+#endif
 
 	sysfs_remove_files(gsmi_kobj, gsmi_attrs);
 	sysfs_remove_bin_file(gsmi_kobj, &eventlog_bin_attr);
diff --git a/drivers/firmware/google/vpd.c b/drivers/firmware/google/vpd.c
index 1aa67bb5d8c0..c0c0b4e4e281 100644
--- a/drivers/firmware/google/vpd.c
+++ b/drivers/firmware/google/vpd.c
@@ -198,7 +198,7 @@ static int vpd_section_init(const char *name, struct vpd_section *sec,
 
 	sec->name = name;
 
-	/* We want to export the raw partion with name ${name}_raw */
+	/* We want to export the raw partition with name ${name}_raw */
 	sec->raw_name = kasprintf(GFP_KERNEL, "%s_raw", name);
 	if (!sec->raw_name) {
 		err = -ENOMEM;
diff --git a/drivers/firmware/scpi_pm_domain.c b/drivers/firmware/scpi_pm_domain.c
index f395dec27113..390aa13391e4 100644
--- a/drivers/firmware/scpi_pm_domain.c
+++ b/drivers/firmware/scpi_pm_domain.c
@@ -121,7 +121,7 @@ static int scpi_pm_domain_probe(struct platform_device *pdev)
 
 		scpi_pd->domain = i;
 		scpi_pd->ops = scpi_ops;
-		sprintf(scpi_pd->name, "%s.%d", np->name, i);
+		sprintf(scpi_pd->name, "%pOFn.%d", np, i);
 		scpi_pd->genpd.name = scpi_pd->name;
 		scpi_pd->genpd.power_off = scpi_pd_power_off;
 		scpi_pd->genpd.power_on = scpi_pd_power_on;
diff --git a/drivers/fpga/altera-cvp.c b/drivers/fpga/altera-cvp.c
index 7fa793672a7a..610a1558e0ed 100644
--- a/drivers/fpga/altera-cvp.c
+++ b/drivers/fpga/altera-cvp.c
@@ -453,8 +453,8 @@ static int altera_cvp_probe(struct pci_dev *pdev,
 	snprintf(conf->mgr_name, sizeof(conf->mgr_name), "%s @%s",
 		 ALTERA_CVP_MGR_NAME, pci_name(pdev));
 
-	mgr = fpga_mgr_create(&pdev->dev, conf->mgr_name,
-			      &altera_cvp_ops, conf);
+	mgr = devm_fpga_mgr_create(&pdev->dev, conf->mgr_name,
+				   &altera_cvp_ops, conf);
 	if (!mgr) {
 		ret = -ENOMEM;
 		goto err_unmap;
@@ -463,10 +463,8 @@ static int altera_cvp_probe(struct pci_dev *pdev,
 	pci_set_drvdata(pdev, mgr);
 
 	ret = fpga_mgr_register(mgr);
-	if (ret) {
-		fpga_mgr_free(mgr);
+	if (ret)
 		goto err_unmap;
-	}
 
 	ret = driver_create_file(&altera_cvp_driver.driver,
 				 &driver_attr_chkcfg);
diff --git a/drivers/fpga/altera-fpga2sdram.c b/drivers/fpga/altera-fpga2sdram.c
index 23660ccd634b..a78e49c63c64 100644
--- a/drivers/fpga/altera-fpga2sdram.c
+++ b/drivers/fpga/altera-fpga2sdram.c
@@ -121,18 +121,16 @@ static int alt_fpga_bridge_probe(struct platform_device *pdev)
 	/* Get f2s bridge configuration saved in handoff register */
 	regmap_read(sysmgr, SYSMGR_ISWGRP_HANDOFF3, &priv->mask);
 
-	br = fpga_bridge_create(dev, F2S_BRIDGE_NAME,
-				&altera_fpga2sdram_br_ops, priv);
+	br = devm_fpga_bridge_create(dev, F2S_BRIDGE_NAME,
+				     &altera_fpga2sdram_br_ops, priv);
 	if (!br)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, br);
 
 	ret = fpga_bridge_register(br);
-	if (ret) {
-		fpga_bridge_free(br);
+	if (ret)
 		return ret;
-	}
 
 	dev_info(dev, "driver initialized with handoff %08x\n", priv->mask);
 
diff --git a/drivers/fpga/altera-freeze-bridge.c b/drivers/fpga/altera-freeze-bridge.c
index ffd586c48ecf..dd58c4aea92e 100644
--- a/drivers/fpga/altera-freeze-bridge.c
+++ b/drivers/fpga/altera-freeze-bridge.c
@@ -213,7 +213,6 @@ static int altera_freeze_br_probe(struct platform_device *pdev)
 	struct fpga_bridge *br;
 	struct resource *res;
 	u32 status, revision;
-	int ret;
 
 	if (!np)
 		return -ENODEV;
@@ -245,20 +244,14 @@ static int altera_freeze_br_probe(struct platform_device *pdev)
 
 	priv->base_addr = base_addr;
 
-	br = fpga_bridge_create(dev, FREEZE_BRIDGE_NAME,
-				&altera_freeze_br_br_ops, priv);
+	br = devm_fpga_bridge_create(dev, FREEZE_BRIDGE_NAME,
+				     &altera_freeze_br_br_ops, priv);
 	if (!br)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, br);
 
-	ret = fpga_bridge_register(br);
-	if (ret) {
-		fpga_bridge_free(br);
-		return ret;
-	}
-
-	return 0;
+	return fpga_bridge_register(br);
 }
 
 static int altera_freeze_br_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/altera-hps2fpga.c b/drivers/fpga/altera-hps2fpga.c
index a974d3f60321..77b95f251821 100644
--- a/drivers/fpga/altera-hps2fpga.c
+++ b/drivers/fpga/altera-hps2fpga.c
@@ -180,7 +180,8 @@ static int alt_fpga_bridge_probe(struct platform_device *pdev)
 		}
 	}
 
-	br = fpga_bridge_create(dev, priv->name, &altera_hps2fpga_br_ops, priv);
+	br = devm_fpga_bridge_create(dev, priv->name,
+				     &altera_hps2fpga_br_ops, priv);
 	if (!br) {
 		ret = -ENOMEM;
 		goto err;
@@ -190,12 +191,10 @@ static int alt_fpga_bridge_probe(struct platform_device *pdev)
 
 	ret = fpga_bridge_register(br);
 	if (ret)
-		goto err_free;
+		goto err;
 
 	return 0;
 
-err_free:
-	fpga_bridge_free(br);
 err:
 	clk_disable_unprepare(priv->clk);
 
diff --git a/drivers/fpga/altera-pr-ip-core.c b/drivers/fpga/altera-pr-ip-core.c
index 65e0b6a2c031..a7a3bf0b5202 100644
--- a/drivers/fpga/altera-pr-ip-core.c
+++ b/drivers/fpga/altera-pr-ip-core.c
@@ -177,7 +177,6 @@ int alt_pr_register(struct device *dev, void __iomem *reg_base)
 {
 	struct alt_pr_priv *priv;
 	struct fpga_manager *mgr;
-	int ret;
 	u32 val;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -192,17 +191,13 @@ int alt_pr_register(struct device *dev, void __iomem *reg_base)
 		(val & ALT_PR_CSR_STATUS_MSK) >> ALT_PR_CSR_STATUS_SFT,
 		(int)(val & ALT_PR_CSR_PR_START));
 
-	mgr = fpga_mgr_create(dev, dev_name(dev), &alt_pr_ops, priv);
+	mgr = devm_fpga_mgr_create(dev, dev_name(dev), &alt_pr_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
 	dev_set_drvdata(dev, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 EXPORT_SYMBOL_GPL(alt_pr_register);
 
diff --git a/drivers/fpga/altera-ps-spi.c b/drivers/fpga/altera-ps-spi.c
index 24b25c626036..33aafda50af5 100644
--- a/drivers/fpga/altera-ps-spi.c
+++ b/drivers/fpga/altera-ps-spi.c
@@ -239,7 +239,6 @@ static int altera_ps_probe(struct spi_device *spi)
 	struct altera_ps_conf *conf;
 	const struct of_device_id *of_id;
 	struct fpga_manager *mgr;
-	int ret;
 
 	conf = devm_kzalloc(&spi->dev, sizeof(*conf), GFP_KERNEL);
 	if (!conf)
@@ -275,18 +274,14 @@ static int altera_ps_probe(struct spi_device *spi)
 	snprintf(conf->mgr_name, sizeof(conf->mgr_name), "%s %s",
 		 dev_driver_string(&spi->dev), dev_name(&spi->dev));
 
-	mgr = fpga_mgr_create(&spi->dev, conf->mgr_name,
-			      &altera_ps_ops, conf);
+	mgr = devm_fpga_mgr_create(&spi->dev, conf->mgr_name,
+				   &altera_ps_ops, conf);
 	if (!mgr)
 		return -ENOMEM;
 
 	spi_set_drvdata(spi, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int altera_ps_remove(struct spi_device *spi)
diff --git a/drivers/fpga/dfl-afu-dma-region.c b/drivers/fpga/dfl-afu-dma-region.c
index 0e81d33af856..025aba3ea76c 100644
--- a/drivers/fpga/dfl-afu-dma-region.c
+++ b/drivers/fpga/dfl-afu-dma-region.c
@@ -70,7 +70,7 @@ static int afu_dma_adjust_locked_vm(struct device *dev, long npages, bool incr)
 	dev_dbg(dev, "[%d] RLIMIT_MEMLOCK %c%ld %ld/%ld%s\n", current->pid,
 		incr ? '+' : '-', npages << PAGE_SHIFT,
 		current->mm->locked_vm << PAGE_SHIFT, rlimit(RLIMIT_MEMLOCK),
-		ret ? "- execeeded" : "");
+		ret ? "- exceeded" : "");
 
 	up_write(&current->mm->mmap_sem);
 
diff --git a/drivers/fpga/dfl-fme-br.c b/drivers/fpga/dfl-fme-br.c
index 7cc041def8b3..3ff9f3a687ce 100644
--- a/drivers/fpga/dfl-fme-br.c
+++ b/drivers/fpga/dfl-fme-br.c
@@ -61,7 +61,6 @@ static int fme_br_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct fme_br_priv *priv;
 	struct fpga_bridge *br;
-	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -69,18 +68,14 @@ static int fme_br_probe(struct platform_device *pdev)
 
 	priv->pdata = dev_get_platdata(dev);
 
-	br = fpga_bridge_create(dev, "DFL FPGA FME Bridge",
-				&fme_bridge_ops, priv);
+	br = devm_fpga_bridge_create(dev, "DFL FPGA FME Bridge",
+				     &fme_bridge_ops, priv);
 	if (!br)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, br);
 
-	ret = fpga_bridge_register(br);
-	if (ret)
-		fpga_bridge_free(br);
-
-	return ret;
+	return fpga_bridge_register(br);
 }
 
 static int fme_br_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/dfl-fme-mgr.c b/drivers/fpga/dfl-fme-mgr.c
index b5ef405b6d88..76f37709dd1a 100644
--- a/drivers/fpga/dfl-fme-mgr.c
+++ b/drivers/fpga/dfl-fme-mgr.c
@@ -201,7 +201,7 @@ static int fme_mgr_write(struct fpga_manager *mgr,
 		}
 
 		if (count < 4) {
-			dev_err(dev, "Invaild PR bitstream size\n");
+			dev_err(dev, "Invalid PR bitstream size\n");
 			return -EINVAL;
 		}
 
@@ -287,7 +287,6 @@ static int fme_mgr_probe(struct platform_device *pdev)
 	struct fme_mgr_priv *priv;
 	struct fpga_manager *mgr;
 	struct resource *res;
-	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -309,19 +308,15 @@ static int fme_mgr_probe(struct platform_device *pdev)
 
 	fme_mgr_get_compat_id(priv->ioaddr, compat_id);
 
-	mgr = fpga_mgr_create(dev, "DFL FME FPGA Manager",
-			      &fme_mgr_ops, priv);
+	mgr = devm_fpga_mgr_create(dev, "DFL FME FPGA Manager",
+				   &fme_mgr_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
 	mgr->compat_id = compat_id;
 	platform_set_drvdata(pdev, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int fme_mgr_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/dfl-fme-region.c b/drivers/fpga/dfl-fme-region.c
index 51a5ac2293a7..ec134ec93f08 100644
--- a/drivers/fpga/dfl-fme-region.c
+++ b/drivers/fpga/dfl-fme-region.c
@@ -39,7 +39,7 @@ static int fme_region_probe(struct platform_device *pdev)
 	if (IS_ERR(mgr))
 		return -EPROBE_DEFER;
 
-	region = fpga_region_create(dev, mgr, fme_region_get_bridges);
+	region = devm_fpga_region_create(dev, mgr, fme_region_get_bridges);
 	if (!region) {
 		ret = -ENOMEM;
 		goto eprobe_mgr_put;
@@ -51,14 +51,12 @@ static int fme_region_probe(struct platform_device *pdev)
 
 	ret = fpga_region_register(region);
 	if (ret)
-		goto region_free;
+		goto eprobe_mgr_put;
 
 	dev_dbg(dev, "DFL FME FPGA Region probed\n");
 
 	return 0;
 
-region_free:
-	fpga_region_free(region);
 eprobe_mgr_put:
 	fpga_mgr_put(mgr);
 	return ret;
diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c
index a9b521bccb06..2c09e502e721 100644
--- a/drivers/fpga/dfl.c
+++ b/drivers/fpga/dfl.c
@@ -899,7 +899,7 @@ dfl_fpga_feature_devs_enumerate(struct dfl_fpga_enum_info *info)
 	if (!cdev)
 		return ERR_PTR(-ENOMEM);
 
-	cdev->region = fpga_region_create(info->dev, NULL, NULL);
+	cdev->region = devm_fpga_region_create(info->dev, NULL, NULL);
 	if (!cdev->region) {
 		ret = -ENOMEM;
 		goto free_cdev_exit;
@@ -911,7 +911,7 @@ dfl_fpga_feature_devs_enumerate(struct dfl_fpga_enum_info *info)
 
 	ret = fpga_region_register(cdev->region);
 	if (ret)
-		goto free_region_exit;
+		goto free_cdev_exit;
 
 	/* create and init build info for enumeration */
 	binfo = devm_kzalloc(info->dev, sizeof(*binfo), GFP_KERNEL);
@@ -942,8 +942,6 @@ dfl_fpga_feature_devs_enumerate(struct dfl_fpga_enum_info *info)
 
 unregister_region_exit:
 	fpga_region_unregister(cdev->region);
-free_region_exit:
-	fpga_region_free(cdev->region);
 free_cdev_exit:
 	devm_kfree(info->dev, cdev);
 	return ERR_PTR(ret);
diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c
index c983dac97501..80bd8f1b2aa6 100644
--- a/drivers/fpga/fpga-bridge.c
+++ b/drivers/fpga/fpga-bridge.c
@@ -324,6 +324,9 @@ ATTRIBUTE_GROUPS(fpga_bridge);
  * @br_ops:	pointer to structure of fpga bridge ops
  * @priv:	FPGA bridge private data
  *
+ * The caller of this function is responsible for freeing the bridge with
+ * fpga_bridge_free().  Using devm_fpga_bridge_create() instead is recommended.
+ *
  * Return: struct fpga_bridge or NULL
  */
 struct fpga_bridge *fpga_bridge_create(struct device *dev, const char *name,
@@ -378,8 +381,8 @@ error_kfree:
 EXPORT_SYMBOL_GPL(fpga_bridge_create);
 
 /**
- * fpga_bridge_free - free a fpga bridge and its id
- * @bridge:	FPGA bridge struct created by fpga_bridge_create
+ * fpga_bridge_free - free a fpga bridge created by fpga_bridge_create()
+ * @bridge:	FPGA bridge struct
  */
 void fpga_bridge_free(struct fpga_bridge *bridge)
 {
@@ -388,9 +391,56 @@ void fpga_bridge_free(struct fpga_bridge *bridge)
 }
 EXPORT_SYMBOL_GPL(fpga_bridge_free);
 
+static void devm_fpga_bridge_release(struct device *dev, void *res)
+{
+	struct fpga_bridge *bridge = *(struct fpga_bridge **)res;
+
+	fpga_bridge_free(bridge);
+}
+
 /**
- * fpga_bridge_register - register a fpga bridge
- * @bridge:	FPGA bridge struct created by fpga_bridge_create
+ * devm_fpga_bridge_create - create and init a managed struct fpga_bridge
+ * @dev:	FPGA bridge device from pdev
+ * @name:	FPGA bridge name
+ * @br_ops:	pointer to structure of fpga bridge ops
+ * @priv:	FPGA bridge private data
+ *
+ * This function is intended for use in a FPGA bridge driver's probe function.
+ * After the bridge driver creates the struct with devm_fpga_bridge_create(), it
+ * should register the bridge with fpga_bridge_register().  The bridge driver's
+ * remove function should call fpga_bridge_unregister().  The bridge struct
+ * allocated with this function will be freed automatically on driver detach.
+ * This includes the case of a probe function returning error before calling
+ * fpga_bridge_register(), the struct will still get cleaned up.
+ *
+ *  Return: struct fpga_bridge or NULL
+ */
+struct fpga_bridge
+*devm_fpga_bridge_create(struct device *dev, const char *name,
+			 const struct fpga_bridge_ops *br_ops, void *priv)
+{
+	struct fpga_bridge **ptr, *bridge;
+
+	ptr = devres_alloc(devm_fpga_bridge_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	bridge = fpga_bridge_create(dev, name, br_ops, priv);
+	if (!bridge) {
+		devres_free(ptr);
+	} else {
+		*ptr = bridge;
+		devres_add(dev, ptr);
+	}
+
+	return bridge;
+}
+EXPORT_SYMBOL_GPL(devm_fpga_bridge_create);
+
+/**
+ * fpga_bridge_register - register a FPGA bridge
+ *
+ * @bridge: FPGA bridge struct
  *
  * Return: 0 for success, error code otherwise.
  */
@@ -412,8 +462,11 @@ int fpga_bridge_register(struct fpga_bridge *bridge)
 EXPORT_SYMBOL_GPL(fpga_bridge_register);
 
 /**
- * fpga_bridge_unregister - unregister and free a fpga bridge
- * @bridge:	FPGA bridge struct created by fpga_bridge_create
+ * fpga_bridge_unregister - unregister a FPGA bridge
+ *
+ * @bridge: FPGA bridge struct
+ *
+ * This function is intended for use in a FPGA bridge driver's remove function.
  */
 void fpga_bridge_unregister(struct fpga_bridge *bridge)
 {
@@ -430,9 +483,6 @@ EXPORT_SYMBOL_GPL(fpga_bridge_unregister);
 
 static void fpga_bridge_dev_release(struct device *dev)
 {
-	struct fpga_bridge *bridge = to_fpga_bridge(dev);
-
-	fpga_bridge_free(bridge);
 }
 
 static int __init fpga_bridge_dev_init(void)
diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index a41b07e37884..c3866816456a 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -558,6 +558,9 @@ EXPORT_SYMBOL_GPL(fpga_mgr_unlock);
  * @mops:	pointer to structure of fpga manager ops
  * @priv:	fpga manager private data
  *
+ * The caller of this function is responsible for freeing the struct with
+ * fpga_mgr_free().  Using devm_fpga_mgr_create() instead is recommended.
+ *
  * Return: pointer to struct fpga_manager or NULL
  */
 struct fpga_manager *fpga_mgr_create(struct device *dev, const char *name,
@@ -618,8 +621,8 @@ error_kfree:
 EXPORT_SYMBOL_GPL(fpga_mgr_create);
 
 /**
- * fpga_mgr_free - deallocate a FPGA manager
- * @mgr:	fpga manager struct created by fpga_mgr_create
+ * fpga_mgr_free - free a FPGA manager created with fpga_mgr_create()
+ * @mgr:	fpga manager struct
  */
 void fpga_mgr_free(struct fpga_manager *mgr)
 {
@@ -628,9 +631,55 @@ void fpga_mgr_free(struct fpga_manager *mgr)
 }
 EXPORT_SYMBOL_GPL(fpga_mgr_free);
 
+static void devm_fpga_mgr_release(struct device *dev, void *res)
+{
+	struct fpga_manager *mgr = *(struct fpga_manager **)res;
+
+	fpga_mgr_free(mgr);
+}
+
+/**
+ * devm_fpga_mgr_create - create and initialize a managed FPGA manager struct
+ * @dev:	fpga manager device from pdev
+ * @name:	fpga manager name
+ * @mops:	pointer to structure of fpga manager ops
+ * @priv:	fpga manager private data
+ *
+ * This function is intended for use in a FPGA manager driver's probe function.
+ * After the manager driver creates the manager struct with
+ * devm_fpga_mgr_create(), it should register it with fpga_mgr_register().  The
+ * manager driver's remove function should call fpga_mgr_unregister().  The
+ * manager struct allocated with this function will be freed automatically on
+ * driver detach.  This includes the case of a probe function returning error
+ * before calling fpga_mgr_register(), the struct will still get cleaned up.
+ *
+ * Return: pointer to struct fpga_manager or NULL
+ */
+struct fpga_manager *devm_fpga_mgr_create(struct device *dev, const char *name,
+					  const struct fpga_manager_ops *mops,
+					  void *priv)
+{
+	struct fpga_manager **ptr, *mgr;
+
+	ptr = devres_alloc(devm_fpga_mgr_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	mgr = fpga_mgr_create(dev, name, mops, priv);
+	if (!mgr) {
+		devres_free(ptr);
+	} else {
+		*ptr = mgr;
+		devres_add(dev, ptr);
+	}
+
+	return mgr;
+}
+EXPORT_SYMBOL_GPL(devm_fpga_mgr_create);
+
 /**
  * fpga_mgr_register - register a FPGA manager
- * @mgr:	fpga manager struct created by fpga_mgr_create
+ * @mgr: fpga manager struct
  *
  * Return: 0 on success, negative error code otherwise.
  */
@@ -661,8 +710,10 @@ error_device:
 EXPORT_SYMBOL_GPL(fpga_mgr_register);
 
 /**
- * fpga_mgr_unregister - unregister and free a FPGA manager
- * @mgr:	fpga manager struct
+ * fpga_mgr_unregister - unregister a FPGA manager
+ * @mgr: fpga manager struct
+ *
+ * This function is intended for use in a FPGA manager driver's remove function.
  */
 void fpga_mgr_unregister(struct fpga_manager *mgr)
 {
@@ -681,9 +732,6 @@ EXPORT_SYMBOL_GPL(fpga_mgr_unregister);
 
 static void fpga_mgr_dev_release(struct device *dev)
 {
-	struct fpga_manager *mgr = to_fpga_manager(dev);
-
-	fpga_mgr_free(mgr);
 }
 
 static int __init fpga_mgr_class_init(void)
diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c
index 0d65220d5ec5..bde5a9d460c5 100644
--- a/drivers/fpga/fpga-region.c
+++ b/drivers/fpga/fpga-region.c
@@ -185,6 +185,10 @@ ATTRIBUTE_GROUPS(fpga_region);
  * @mgr: manager that programs this region
  * @get_bridges: optional function to get bridges to a list
  *
+ * The caller of this function is responsible for freeing the resulting region
+ * struct with fpga_region_free().  Using devm_fpga_region_create() instead is
+ * recommended.
+ *
  * Return: struct fpga_region or NULL
  */
 struct fpga_region
@@ -230,8 +234,8 @@ err_free:
 EXPORT_SYMBOL_GPL(fpga_region_create);
 
 /**
- * fpga_region_free - free a struct fpga_region
- * @region: FPGA region created by fpga_region_create
+ * fpga_region_free - free a FPGA region created by fpga_region_create()
+ * @region: FPGA region
  */
 void fpga_region_free(struct fpga_region *region)
 {
@@ -240,21 +244,69 @@ void fpga_region_free(struct fpga_region *region)
 }
 EXPORT_SYMBOL_GPL(fpga_region_free);
 
+static void devm_fpga_region_release(struct device *dev, void *res)
+{
+	struct fpga_region *region = *(struct fpga_region **)res;
+
+	fpga_region_free(region);
+}
+
+/**
+ * devm_fpga_region_create - create and initialize a managed FPGA region struct
+ * @dev: device parent
+ * @mgr: manager that programs this region
+ * @get_bridges: optional function to get bridges to a list
+ *
+ * This function is intended for use in a FPGA region driver's probe function.
+ * After the region driver creates the region struct with
+ * devm_fpga_region_create(), it should register it with fpga_region_register().
+ * The region driver's remove function should call fpga_region_unregister().
+ * The region struct allocated with this function will be freed automatically on
+ * driver detach.  This includes the case of a probe function returning error
+ * before calling fpga_region_register(), the struct will still get cleaned up.
+ *
+ * Return: struct fpga_region or NULL
+ */
+struct fpga_region
+*devm_fpga_region_create(struct device *dev,
+			 struct fpga_manager *mgr,
+			 int (*get_bridges)(struct fpga_region *))
+{
+	struct fpga_region **ptr, *region;
+
+	ptr = devres_alloc(devm_fpga_region_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	region = fpga_region_create(dev, mgr, get_bridges);
+	if (!region) {
+		devres_free(ptr);
+	} else {
+		*ptr = region;
+		devres_add(dev, ptr);
+	}
+
+	return region;
+}
+EXPORT_SYMBOL_GPL(devm_fpga_region_create);
+
 /**
  * fpga_region_register - register a FPGA region
- * @region: FPGA region created by fpga_region_create
+ * @region: FPGA region
+ *
  * Return: 0 or -errno
  */
 int fpga_region_register(struct fpga_region *region)
 {
 	return device_add(&region->dev);
-
 }
 EXPORT_SYMBOL_GPL(fpga_region_register);
 
 /**
- * fpga_region_unregister - unregister and free a FPGA region
+ * fpga_region_unregister - unregister a FPGA region
  * @region: FPGA region
+ *
+ * This function is intended for use in a FPGA region driver's remove function.
  */
 void fpga_region_unregister(struct fpga_region *region)
 {
@@ -264,9 +316,6 @@ EXPORT_SYMBOL_GPL(fpga_region_unregister);
 
 static void fpga_region_dev_release(struct device *dev)
 {
-	struct fpga_region *region = to_fpga_region(dev);
-
-	fpga_region_free(region);
 }
 
 /**
diff --git a/drivers/fpga/ice40-spi.c b/drivers/fpga/ice40-spi.c
index 5981c7ee7a7d..6154661b8f76 100644
--- a/drivers/fpga/ice40-spi.c
+++ b/drivers/fpga/ice40-spi.c
@@ -175,18 +175,14 @@ static int ice40_fpga_probe(struct spi_device *spi)
 		return ret;
 	}
 
-	mgr = fpga_mgr_create(dev, "Lattice iCE40 FPGA Manager",
-			      &ice40_fpga_ops, priv);
+	mgr = devm_fpga_mgr_create(dev, "Lattice iCE40 FPGA Manager",
+				   &ice40_fpga_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
 	spi_set_drvdata(spi, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int ice40_fpga_remove(struct spi_device *spi)
diff --git a/drivers/fpga/machxo2-spi.c b/drivers/fpga/machxo2-spi.c
index a582e0000c97..4d8a87641587 100644
--- a/drivers/fpga/machxo2-spi.c
+++ b/drivers/fpga/machxo2-spi.c
@@ -356,25 +356,20 @@ static int machxo2_spi_probe(struct spi_device *spi)
 {
 	struct device *dev = &spi->dev;
 	struct fpga_manager *mgr;
-	int ret;
 
 	if (spi->max_speed_hz > MACHXO2_MAX_SPEED) {
 		dev_err(dev, "Speed is too high\n");
 		return -EINVAL;
 	}
 
-	mgr = fpga_mgr_create(dev, "Lattice MachXO2 SPI FPGA Manager",
-			      &machxo2_ops, spi);
+	mgr = devm_fpga_mgr_create(dev, "Lattice MachXO2 SPI FPGA Manager",
+				   &machxo2_ops, spi);
 	if (!mgr)
 		return -ENOMEM;
 
 	spi_set_drvdata(spi, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int machxo2_spi_remove(struct spi_device *spi)
diff --git a/drivers/fpga/of-fpga-region.c b/drivers/fpga/of-fpga-region.c
index 052a1342ab7e..122286fd255a 100644
--- a/drivers/fpga/of-fpga-region.c
+++ b/drivers/fpga/of-fpga-region.c
@@ -410,7 +410,7 @@ static int of_fpga_region_probe(struct platform_device *pdev)
 	if (IS_ERR(mgr))
 		return -EPROBE_DEFER;
 
-	region = fpga_region_create(dev, mgr, of_fpga_region_get_bridges);
+	region = devm_fpga_region_create(dev, mgr, of_fpga_region_get_bridges);
 	if (!region) {
 		ret = -ENOMEM;
 		goto eprobe_mgr_put;
@@ -418,7 +418,7 @@ static int of_fpga_region_probe(struct platform_device *pdev)
 
 	ret = fpga_region_register(region);
 	if (ret)
-		goto eprobe_free;
+		goto eprobe_mgr_put;
 
 	of_platform_populate(np, fpga_region_of_match, NULL, &region->dev);
 	dev_set_drvdata(dev, region);
@@ -427,8 +427,6 @@ static int of_fpga_region_probe(struct platform_device *pdev)
 
 	return 0;
 
-eprobe_free:
-	fpga_region_free(region);
 eprobe_mgr_put:
 	fpga_mgr_put(mgr);
 	return ret;
diff --git a/drivers/fpga/socfpga-a10.c b/drivers/fpga/socfpga-a10.c
index be30c48eb6e4..573d88bdf730 100644
--- a/drivers/fpga/socfpga-a10.c
+++ b/drivers/fpga/socfpga-a10.c
@@ -508,8 +508,8 @@ static int socfpga_a10_fpga_probe(struct platform_device *pdev)
 		return -EBUSY;
 	}
 
-	mgr = fpga_mgr_create(dev, "SoCFPGA Arria10 FPGA Manager",
-			      &socfpga_a10_fpga_mgr_ops, priv);
+	mgr = devm_fpga_mgr_create(dev, "SoCFPGA Arria10 FPGA Manager",
+				   &socfpga_a10_fpga_mgr_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
@@ -517,7 +517,6 @@ static int socfpga_a10_fpga_probe(struct platform_device *pdev)
 
 	ret = fpga_mgr_register(mgr);
 	if (ret) {
-		fpga_mgr_free(mgr);
 		clk_disable_unprepare(priv->clk);
 		return ret;
 	}
diff --git a/drivers/fpga/socfpga.c b/drivers/fpga/socfpga.c
index 959d71f26896..4a8a2fcd4e6c 100644
--- a/drivers/fpga/socfpga.c
+++ b/drivers/fpga/socfpga.c
@@ -571,18 +571,14 @@ static int socfpga_fpga_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	mgr = fpga_mgr_create(dev, "Altera SOCFPGA FPGA Manager",
-			      &socfpga_fpga_ops, priv);
+	mgr = devm_fpga_mgr_create(dev, "Altera SOCFPGA FPGA Manager",
+				   &socfpga_fpga_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int socfpga_fpga_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/ts73xx-fpga.c b/drivers/fpga/ts73xx-fpga.c
index 08efd1895b1b..dc22a5842609 100644
--- a/drivers/fpga/ts73xx-fpga.c
+++ b/drivers/fpga/ts73xx-fpga.c
@@ -118,7 +118,6 @@ static int ts73xx_fpga_probe(struct platform_device *pdev)
 	struct ts73xx_fpga_priv *priv;
 	struct fpga_manager *mgr;
 	struct resource *res;
-	int ret;
 
 	priv = devm_kzalloc(kdev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -133,18 +132,14 @@ static int ts73xx_fpga_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->io_base);
 	}
 
-	mgr = fpga_mgr_create(kdev, "TS-73xx FPGA Manager",
-			      &ts73xx_fpga_ops, priv);
+	mgr = devm_fpga_mgr_create(kdev, "TS-73xx FPGA Manager",
+				   &ts73xx_fpga_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int ts73xx_fpga_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/xilinx-pr-decoupler.c b/drivers/fpga/xilinx-pr-decoupler.c
index 07ba1539e82c..641036135207 100644
--- a/drivers/fpga/xilinx-pr-decoupler.c
+++ b/drivers/fpga/xilinx-pr-decoupler.c
@@ -121,8 +121,8 @@ static int xlnx_pr_decoupler_probe(struct platform_device *pdev)
 
 	clk_disable(priv->clk);
 
-	br = fpga_bridge_create(&pdev->dev, "Xilinx PR Decoupler",
-				&xlnx_pr_decoupler_br_ops, priv);
+	br = devm_fpga_bridge_create(&pdev->dev, "Xilinx PR Decoupler",
+				     &xlnx_pr_decoupler_br_ops, priv);
 	if (!br) {
 		err = -ENOMEM;
 		goto err_clk;
diff --git a/drivers/fpga/xilinx-spi.c b/drivers/fpga/xilinx-spi.c
index 8d1945966533..469486be20c4 100644
--- a/drivers/fpga/xilinx-spi.c
+++ b/drivers/fpga/xilinx-spi.c
@@ -144,7 +144,6 @@ static int xilinx_spi_probe(struct spi_device *spi)
 {
 	struct xilinx_spi_conf *conf;
 	struct fpga_manager *mgr;
-	int ret;
 
 	conf = devm_kzalloc(&spi->dev, sizeof(*conf), GFP_KERNEL);
 	if (!conf)
@@ -167,18 +166,15 @@ static int xilinx_spi_probe(struct spi_device *spi)
 		return PTR_ERR(conf->done);
 	}
 
-	mgr = fpga_mgr_create(&spi->dev, "Xilinx Slave Serial FPGA Manager",
-			      &xilinx_spi_ops, conf);
+	mgr = devm_fpga_mgr_create(&spi->dev,
+				   "Xilinx Slave Serial FPGA Manager",
+				   &xilinx_spi_ops, conf);
 	if (!mgr)
 		return -ENOMEM;
 
 	spi_set_drvdata(spi, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int xilinx_spi_remove(struct spi_device *spi)
diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
index 3110e00121ca..bb82efeebb9d 100644
--- a/drivers/fpga/zynq-fpga.c
+++ b/drivers/fpga/zynq-fpga.c
@@ -614,8 +614,8 @@ static int zynq_fpga_probe(struct platform_device *pdev)
 
 	clk_disable(priv->clk);
 
-	mgr = fpga_mgr_create(dev, "Xilinx Zynq FPGA Manager",
-			      &zynq_fpga_ops, priv);
+	mgr = devm_fpga_mgr_create(dev, "Xilinx Zynq FPGA Manager",
+				   &zynq_fpga_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
@@ -624,7 +624,6 @@ static int zynq_fpga_probe(struct platform_device *pdev)
 	err = fpga_mgr_register(mgr);
 	if (err) {
 		dev_err(dev, "unable to register FPGA manager\n");
-		fpga_mgr_free(mgr);
 		clk_unprepare(priv->clk);
 		return err;
 	}
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index 2d45d1dd9554..643f5edd68fe 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1446,8 +1446,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
 	}
 
 	/* The CEC module handles HDMI hotplug detection */
-	cec_np = of_find_compatible_node(np->parent, NULL,
-					 "mediatek,mt8173-cec");
+	cec_np = of_get_compatible_child(np->parent, "mediatek,mt8173-cec");
 	if (!cec_np) {
 		dev_err(dev, "Failed to find CEC node\n");
 		return -EINVAL;
@@ -1457,8 +1456,10 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
 	if (!cec_pdev) {
 		dev_err(hdmi->dev, "Waiting for CEC device %pOF\n",
 			cec_np);
+		of_node_put(cec_np);
 		return -EPROBE_DEFER;
 	}
+	of_node_put(cec_np);
 	hdmi->cec_dev = &cec_pdev->dev;
 
 	/*
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index da1363a0c54d..93d70f4a2154 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -633,8 +633,7 @@ static int adreno_get_legacy_pwrlevels(struct device *dev)
 	struct device_node *child, *node;
 	int ret;
 
-	node = of_find_compatible_node(dev->of_node, NULL,
-		"qcom,gpu-pwrlevels");
+	node = of_get_compatible_child(dev->of_node, "qcom,gpu-pwrlevels");
 	if (!node) {
 		dev_err(dev, "Could not find the GPU powerlevels\n");
 		return -ENXIO;
@@ -655,6 +654,8 @@ static int adreno_get_legacy_pwrlevels(struct device *dev)
 			dev_pm_opp_add(dev, val, 0);
 	}
 
+	of_node_put(node);
+
 	return 0;
 }
 
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 741857d80da1..de8193f3b838 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -79,85 +79,96 @@ void vmbus_setevent(struct vmbus_channel *channel)
 }
 EXPORT_SYMBOL_GPL(vmbus_setevent);
 
-/*
- * vmbus_open - Open the specified channel.
- */
-int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
-		     u32 recv_ringbuffer_size, void *userdata, u32 userdatalen,
-		     void (*onchannelcallback)(void *context), void *context)
+/* vmbus_free_ring - drop mapping of ring buffer */
+void vmbus_free_ring(struct vmbus_channel *channel)
+{
+	hv_ringbuffer_cleanup(&channel->outbound);
+	hv_ringbuffer_cleanup(&channel->inbound);
+
+	if (channel->ringbuffer_page) {
+		__free_pages(channel->ringbuffer_page,
+			     get_order(channel->ringbuffer_pagecount
+				       << PAGE_SHIFT));
+		channel->ringbuffer_page = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(vmbus_free_ring);
+
+/* vmbus_alloc_ring - allocate and map pages for ring buffer */
+int vmbus_alloc_ring(struct vmbus_channel *newchannel,
+		     u32 send_size, u32 recv_size)
+{
+	struct page *page;
+	int order;
+
+	if (send_size % PAGE_SIZE || recv_size % PAGE_SIZE)
+		return -EINVAL;
+
+	/* Allocate the ring buffer */
+	order = get_order(send_size + recv_size);
+	page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
+				GFP_KERNEL|__GFP_ZERO, order);
+
+	if (!page)
+		page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order);
+
+	if (!page)
+		return -ENOMEM;
+
+	newchannel->ringbuffer_page = page;
+	newchannel->ringbuffer_pagecount = (send_size + recv_size) >> PAGE_SHIFT;
+	newchannel->ringbuffer_send_offset = send_size >> PAGE_SHIFT;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vmbus_alloc_ring);
+
+static int __vmbus_open(struct vmbus_channel *newchannel,
+		       void *userdata, u32 userdatalen,
+		       void (*onchannelcallback)(void *context), void *context)
 {
 	struct vmbus_channel_open_channel *open_msg;
 	struct vmbus_channel_msginfo *open_info = NULL;
+	struct page *page = newchannel->ringbuffer_page;
+	u32 send_pages, recv_pages;
 	unsigned long flags;
-	int ret, err = 0;
-	struct page *page;
+	int err;
 
-	if (send_ringbuffer_size % PAGE_SIZE ||
-	    recv_ringbuffer_size % PAGE_SIZE)
+	if (userdatalen > MAX_USER_DEFINED_BYTES)
 		return -EINVAL;
 
+	send_pages = newchannel->ringbuffer_send_offset;
+	recv_pages = newchannel->ringbuffer_pagecount - send_pages;
+
 	spin_lock_irqsave(&newchannel->lock, flags);
-	if (newchannel->state == CHANNEL_OPEN_STATE) {
-		newchannel->state = CHANNEL_OPENING_STATE;
-	} else {
+	if (newchannel->state != CHANNEL_OPEN_STATE) {
 		spin_unlock_irqrestore(&newchannel->lock, flags);
 		return -EINVAL;
 	}
 	spin_unlock_irqrestore(&newchannel->lock, flags);
 
+	newchannel->state = CHANNEL_OPENING_STATE;
 	newchannel->onchannel_callback = onchannelcallback;
 	newchannel->channel_callback_context = context;
 
-	/* Allocate the ring buffer */
-	page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
-				GFP_KERNEL|__GFP_ZERO,
-				get_order(send_ringbuffer_size +
-				recv_ringbuffer_size));
-
-	if (!page)
-		page = alloc_pages(GFP_KERNEL|__GFP_ZERO,
-				   get_order(send_ringbuffer_size +
-					     recv_ringbuffer_size));
-
-	if (!page) {
-		err = -ENOMEM;
-		goto error_set_chnstate;
-	}
-
-	newchannel->ringbuffer_pages = page_address(page);
-	newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
-					   recv_ringbuffer_size) >> PAGE_SHIFT;
-
-	ret = hv_ringbuffer_init(&newchannel->outbound, page,
-				 send_ringbuffer_size >> PAGE_SHIFT);
-
-	if (ret != 0) {
-		err = ret;
-		goto error_free_pages;
-	}
-
-	ret = hv_ringbuffer_init(&newchannel->inbound,
-				 &page[send_ringbuffer_size >> PAGE_SHIFT],
-				 recv_ringbuffer_size >> PAGE_SHIFT);
-	if (ret != 0) {
-		err = ret;
-		goto error_free_pages;
-	}
+	err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages);
+	if (err)
+		goto error_clean_ring;
 
+	err = hv_ringbuffer_init(&newchannel->inbound,
+				 &page[send_pages], recv_pages);
+	if (err)
+		goto error_clean_ring;
 
 	/* Establish the gpadl for the ring buffer */
 	newchannel->ringbuffer_gpadlhandle = 0;
 
-	ret = vmbus_establish_gpadl(newchannel,
-				    page_address(page),
-				    send_ringbuffer_size +
-				    recv_ringbuffer_size,
+	err = vmbus_establish_gpadl(newchannel,
+				    page_address(newchannel->ringbuffer_page),
+				    (send_pages + recv_pages) << PAGE_SHIFT,
 				    &newchannel->ringbuffer_gpadlhandle);
-
-	if (ret != 0) {
-		err = ret;
-		goto error_free_pages;
-	}
+	if (err)
+		goto error_clean_ring;
 
 	/* Create and init the channel open message */
 	open_info = kmalloc(sizeof(*open_info) +
@@ -176,15 +187,9 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 	open_msg->openid = newchannel->offermsg.child_relid;
 	open_msg->child_relid = newchannel->offermsg.child_relid;
 	open_msg->ringbuffer_gpadlhandle = newchannel->ringbuffer_gpadlhandle;
-	open_msg->downstream_ringbuffer_pageoffset = send_ringbuffer_size >>
-						  PAGE_SHIFT;
+	open_msg->downstream_ringbuffer_pageoffset = newchannel->ringbuffer_send_offset;
 	open_msg->target_vp = newchannel->target_vp;
 
-	if (userdatalen > MAX_USER_DEFINED_BYTES) {
-		err = -EINVAL;
-		goto error_free_gpadl;
-	}
-
 	if (userdatalen)
 		memcpy(open_msg->userdata, userdata, userdatalen);
 
@@ -195,18 +200,16 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (newchannel->rescind) {
 		err = -ENODEV;
-		goto error_free_gpadl;
+		goto error_free_info;
 	}
 
-	ret = vmbus_post_msg(open_msg,
+	err = vmbus_post_msg(open_msg,
 			     sizeof(struct vmbus_channel_open_channel), true);
 
-	trace_vmbus_open(open_msg, ret);
+	trace_vmbus_open(open_msg, err);
 
-	if (ret != 0) {
-		err = ret;
+	if (err != 0)
 		goto error_clean_msglist;
-	}
 
 	wait_for_completion(&open_info->waitevent);
 
@@ -216,12 +219,12 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (newchannel->rescind) {
 		err = -ENODEV;
-		goto error_free_gpadl;
+		goto error_free_info;
 	}
 
 	if (open_info->response.open_result.status) {
 		err = -EAGAIN;
-		goto error_free_gpadl;
+		goto error_free_info;
 	}
 
 	newchannel->state = CHANNEL_OPENED_STATE;
@@ -232,19 +235,50 @@ error_clean_msglist:
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&open_info->msglistentry);
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
-
+error_free_info:
+	kfree(open_info);
 error_free_gpadl:
 	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
-	kfree(open_info);
-error_free_pages:
+	newchannel->ringbuffer_gpadlhandle = 0;
+error_clean_ring:
 	hv_ringbuffer_cleanup(&newchannel->outbound);
 	hv_ringbuffer_cleanup(&newchannel->inbound);
-	__free_pages(page,
-		     get_order(send_ringbuffer_size + recv_ringbuffer_size));
-error_set_chnstate:
 	newchannel->state = CHANNEL_OPEN_STATE;
 	return err;
 }
+
+/*
+ * vmbus_connect_ring - Open the channel but reuse ring buffer
+ */
+int vmbus_connect_ring(struct vmbus_channel *newchannel,
+		       void (*onchannelcallback)(void *context), void *context)
+{
+	return  __vmbus_open(newchannel, NULL, 0, onchannelcallback, context);
+}
+EXPORT_SYMBOL_GPL(vmbus_connect_ring);
+
+/*
+ * vmbus_open - Open the specified channel.
+ */
+int vmbus_open(struct vmbus_channel *newchannel,
+	       u32 send_ringbuffer_size, u32 recv_ringbuffer_size,
+	       void *userdata, u32 userdatalen,
+	       void (*onchannelcallback)(void *context), void *context)
+{
+	int err;
+
+	err = vmbus_alloc_ring(newchannel, send_ringbuffer_size,
+			       recv_ringbuffer_size);
+	if (err)
+		return err;
+
+	err = __vmbus_open(newchannel, userdata, userdatalen,
+			   onchannelcallback, context);
+	if (err)
+		vmbus_free_ring(newchannel);
+
+	return err;
+}
 EXPORT_SYMBOL_GPL(vmbus_open);
 
 /* Used for Hyper-V Socket: a guest client's connect() to the host */
@@ -612,10 +646,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	 * in Hyper-V Manager), the driver's remove() invokes vmbus_close():
 	 * here we should skip most of the below cleanup work.
 	 */
-	if (channel->state != CHANNEL_OPENED_STATE) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (channel->state != CHANNEL_OPENED_STATE)
+		return -EINVAL;
 
 	channel->state = CHANNEL_OPEN_STATE;
 
@@ -637,11 +669,10 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 		 * If we failed to post the close msg,
 		 * it is perhaps better to leak memory.
 		 */
-		goto out;
 	}
 
 	/* Tear down the gpadl for the channel's ring buffer */
-	if (channel->ringbuffer_gpadlhandle) {
+	else if (channel->ringbuffer_gpadlhandle) {
 		ret = vmbus_teardown_gpadl(channel,
 					   channel->ringbuffer_gpadlhandle);
 		if (ret) {
@@ -650,74 +681,78 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 			 * If we failed to teardown gpadl,
 			 * it is perhaps better to leak memory.
 			 */
-			goto out;
 		}
-	}
-
-	/* Cleanup the ring buffers for this channel */
-	hv_ringbuffer_cleanup(&channel->outbound);
-	hv_ringbuffer_cleanup(&channel->inbound);
 
-	free_pages((unsigned long)channel->ringbuffer_pages,
-		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
+		channel->ringbuffer_gpadlhandle = 0;
+	}
 
-out:
 	return ret;
 }
 
-/*
- * vmbus_close - Close the specified channel
- */
-void vmbus_close(struct vmbus_channel *channel)
+/* disconnect ring - close all channels */
+int vmbus_disconnect_ring(struct vmbus_channel *channel)
 {
-	struct list_head *cur, *tmp;
-	struct vmbus_channel *cur_channel;
+	struct vmbus_channel *cur_channel, *tmp;
+	unsigned long flags;
+	LIST_HEAD(list);
+	int ret;
 
-	if (channel->primary_channel != NULL) {
-		/*
-		 * We will only close sub-channels when
-		 * the primary is closed.
-		 */
-		return;
-	}
-	/*
-	 * Close all the sub-channels first and then close the
-	 * primary channel.
-	 */
-	list_for_each_safe(cur, tmp, &channel->sc_list) {
-		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
-		if (cur_channel->rescind) {
+	if (channel->primary_channel != NULL)
+		return -EINVAL;
+
+	/* Snapshot the list of subchannels */
+	spin_lock_irqsave(&channel->lock, flags);
+	list_splice_init(&channel->sc_list, &list);
+	channel->num_sc = 0;
+	spin_unlock_irqrestore(&channel->lock, flags);
+
+	list_for_each_entry_safe(cur_channel, tmp, &list, sc_list) {
+		if (cur_channel->rescind)
 			wait_for_completion(&cur_channel->rescind_event);
-			mutex_lock(&vmbus_connection.channel_mutex);
-			vmbus_close_internal(cur_channel);
-			hv_process_channel_removal(
-					   cur_channel->offermsg.child_relid);
-		} else {
-			mutex_lock(&vmbus_connection.channel_mutex);
-			vmbus_close_internal(cur_channel);
+
+		mutex_lock(&vmbus_connection.channel_mutex);
+		if (vmbus_close_internal(cur_channel) == 0) {
+			vmbus_free_ring(cur_channel);
+
+			if (cur_channel->rescind)
+				hv_process_channel_removal(cur_channel);
 		}
 		mutex_unlock(&vmbus_connection.channel_mutex);
 	}
+
 	/*
 	 * Now close the primary.
 	 */
 	mutex_lock(&vmbus_connection.channel_mutex);
-	vmbus_close_internal(channel);
+	ret = vmbus_close_internal(channel);
 	mutex_unlock(&vmbus_connection.channel_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vmbus_disconnect_ring);
+
+/*
+ * vmbus_close - Close the specified channel
+ */
+void vmbus_close(struct vmbus_channel *channel)
+{
+	if (vmbus_disconnect_ring(channel) == 0)
+		vmbus_free_ring(channel);
 }
 EXPORT_SYMBOL_GPL(vmbus_close);
 
 /**
  * vmbus_sendpacket() - Send the specified buffer on the given channel
- * @channel: Pointer to vmbus_channel structure.
- * @buffer: Pointer to the buffer you want to receive the data into.
- * @bufferlen: Maximum size of what the the buffer will hold
+ * @channel: Pointer to vmbus_channel structure
+ * @buffer: Pointer to the buffer you want to send the data from.
+ * @bufferlen: Maximum size of what the buffer holds.
  * @requestid: Identifier of the request
- * @type: Type of packet that is being send e.g. negotiate, time
- * packet etc.
+ * @type: Type of packet that is being sent e.g. negotiate, time
+ *	  packet etc.
+ * @flags: 0 or VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
  *
- * Sends data in @buffer directly to hyper-v via the vmbus
- * This will send the data unparsed to hyper-v.
+ * Sends data in @buffer directly to Hyper-V via the vmbus.
+ * This will send the data unparsed to Hyper-V.
  *
  * Mainly used by Hyper-V drivers.
  */
@@ -850,12 +885,13 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
 EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);
 
 /**
- * vmbus_recvpacket() - Retrieve the user packet on the specified channel
- * @channel: Pointer to vmbus_channel structure.
+ * __vmbus_recvpacket() - Retrieve the user packet on the specified channel
+ * @channel: Pointer to vmbus_channel structure
  * @buffer: Pointer to the buffer you want to receive the data into.
- * @bufferlen: Maximum size of what the the buffer will hold
- * @buffer_actual_len: The actual size of the data after it was received
+ * @bufferlen: Maximum size of what the buffer can hold.
+ * @buffer_actual_len: The actual size of the data after it was received.
  * @requestid: Identifier of the request
+ * @raw: true means keep the vmpacket_descriptor header in the received data.
  *
  * Receives directly from the hyper-v vmbus and puts the data it received
  * into Buffer. This will receive the data unparsed from hyper-v.
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 0f0e091c117c..6277597d3d58 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -198,24 +198,19 @@ static u16 hv_get_dev_type(const struct vmbus_channel *channel)
 }
 
 /**
- * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
+ * vmbus_prep_negotiate_resp() - Create default response for Negotiate message
  * @icmsghdrp: Pointer to msg header structure
- * @icmsg_negotiate: Pointer to negotiate message structure
  * @buf: Raw buffer channel data
+ * @fw_version: The framework versions we can support.
+ * @fw_vercnt: The size of @fw_version.
+ * @srv_version: The service versions we can support.
+ * @srv_vercnt: The size of @srv_version.
+ * @nego_fw_version: The selected framework version.
+ * @nego_srv_version: The selected service version.
  *
- * @icmsghdrp is of type &struct icmsg_hdr.
- * Set up and fill in default negotiate response message.
- *
- * The fw_version and fw_vercnt specifies the framework version that
- * we can support.
- *
- * The srv_version and srv_vercnt specifies the service
- * versions we can support.
- *
- * Versions are given in decreasing order.
- *
- * nego_fw_version and nego_srv_version store the selected protocol versions.
+ * Note: Versions are given in decreasing order.
  *
+ * Set up and fill in default negotiate response message.
  * Mainly used by Hyper-V drivers.
  */
 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
@@ -385,21 +380,14 @@ static void vmbus_release_relid(u32 relid)
 	trace_vmbus_release_relid(&msg, ret);
 }
 
-void hv_process_channel_removal(u32 relid)
+void hv_process_channel_removal(struct vmbus_channel *channel)
 {
+	struct vmbus_channel *primary_channel;
 	unsigned long flags;
-	struct vmbus_channel *primary_channel, *channel;
 
 	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
-
-	/*
-	 * Make sure channel is valid as we may have raced.
-	 */
-	channel = relid2channel(relid);
-	if (!channel)
-		return;
-
 	BUG_ON(!channel->rescind);
+
 	if (channel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(channel->target_cpu,
@@ -429,7 +417,7 @@ void hv_process_channel_removal(u32 relid)
 		cpumask_clear_cpu(channel->target_cpu,
 				  &primary_channel->alloced_cpus_in_node);
 
-	vmbus_release_relid(relid);
+	vmbus_release_relid(channel->offermsg.child_relid);
 
 	free_channel(channel);
 }
@@ -606,16 +594,18 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 	bool perf_chn = vmbus_devs[dev_type].perf_device;
 	struct vmbus_channel *primary = channel->primary_channel;
 	int next_node;
-	struct cpumask available_mask;
+	cpumask_var_t available_mask;
 	struct cpumask *alloced_mask;
 
 	if ((vmbus_proto_version == VERSION_WS2008) ||
-	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
+	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn) ||
+	    !alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
 		/*
 		 * Prior to win8, all channel interrupts are
 		 * delivered on cpu 0.
 		 * Also if the channel is not a performance critical
 		 * channel, bind it to cpu 0.
+		 * In case alloc_cpumask_var() fails, bind it to cpu 0.
 		 */
 		channel->numa_node = 0;
 		channel->target_cpu = 0;
@@ -653,7 +643,7 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 		cpumask_clear(alloced_mask);
 	}
 
-	cpumask_xor(&available_mask, alloced_mask,
+	cpumask_xor(available_mask, alloced_mask,
 		    cpumask_of_node(primary->numa_node));
 
 	cur_cpu = -1;
@@ -671,10 +661,10 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 	}
 
 	while (true) {
-		cur_cpu = cpumask_next(cur_cpu, &available_mask);
+		cur_cpu = cpumask_next(cur_cpu, available_mask);
 		if (cur_cpu >= nr_cpu_ids) {
 			cur_cpu = -1;
-			cpumask_copy(&available_mask,
+			cpumask_copy(available_mask,
 				     cpumask_of_node(primary->numa_node));
 			continue;
 		}
@@ -704,6 +694,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 
 	channel->target_cpu = cur_cpu;
 	channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu);
+
+	free_cpumask_var(available_mask);
 }
 
 static void vmbus_wait_for_unload(void)
@@ -943,7 +935,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 			 * The channel is currently not open;
 			 * it is safe for us to cleanup the channel.
 			 */
-			hv_process_channel_removal(rescind->child_relid);
+			hv_process_channel_removal(channel);
 		} else {
 			complete(&channel->rescind_event);
 		}
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 748a1c4172a6..332d7c34be5c 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -189,6 +189,17 @@ static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
 int hv_synic_alloc(void)
 {
 	int cpu;
+	struct hv_per_cpu_context *hv_cpu;
+
+	/*
+	 * First, zero all per-cpu memory areas so hv_synic_free() can
+	 * detect what memory has been allocated and cleanup properly
+	 * after any failures.
+	 */
+	for_each_present_cpu(cpu) {
+		hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
+		memset(hv_cpu, 0, sizeof(*hv_cpu));
+	}
 
 	hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
 					 GFP_KERNEL);
@@ -198,10 +209,8 @@ int hv_synic_alloc(void)
 	}
 
 	for_each_present_cpu(cpu) {
-		struct hv_per_cpu_context *hv_cpu
-			= per_cpu_ptr(hv_context.cpu_context, cpu);
+		hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
 
-		memset(hv_cpu, 0, sizeof(*hv_cpu));
 		tasklet_init(&hv_cpu->msg_dpc,
 			     vmbus_on_msg_dpc, (unsigned long) hv_cpu);
 
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index b1b788082793..41631512ae97 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -689,7 +689,7 @@ static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
 	__online_page_increment_counters(pg);
 	__online_page_free(pg);
 
-	WARN_ON_ONCE(!spin_is_locked(&dm_device.ha_lock));
+	lockdep_assert_held(&dm_device.ha_lock);
 	dm_device.num_pages_onlined++;
 }
 
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index 5eed1e7da15c..a7513a8a8e37 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -353,7 +353,6 @@ static void process_ib_ipinfo(void *in_msg, void *out_msg, int op)
 
 		out->body.kvp_ip_val.dhcp_enabled = in->kvp_ip_val.dhcp_enabled;
 
-	default:
 		utf16s_to_utf8s((wchar_t *)in->kvp_ip_val.adapter_id,
 				MAX_ADAPTER_ID_SIZE,
 				UTF16_LITTLE_ENDIAN,
@@ -406,7 +405,7 @@ kvp_send_key(struct work_struct *dummy)
 		process_ib_ipinfo(in_msg, message, KVP_OP_SET_IP_INFO);
 		break;
 	case KVP_OP_GET_IP_INFO:
-		process_ib_ipinfo(in_msg, message, KVP_OP_GET_IP_INFO);
+		/* We only need to pass on message->kvp_hdr.operation.  */
 		break;
 	case KVP_OP_SET:
 		switch (in_msg->body.kvp_set.data.value_type) {
@@ -421,7 +420,7 @@ kvp_send_key(struct work_struct *dummy)
 				UTF16_LITTLE_ENDIAN,
 				message->body.kvp_set.data.value,
 				HV_KVP_EXCHANGE_MAX_VALUE_SIZE - 1) + 1;
-				break;
+			break;
 
 		case REG_U32:
 			/*
@@ -446,6 +445,9 @@ kvp_send_key(struct work_struct *dummy)
 			break;
 
 		}
+
+		break;
+
 	case KVP_OP_GET:
 		message->body.kvp_set.data.key_size =
 			utf16s_to_utf8s(
@@ -454,7 +456,7 @@ kvp_send_key(struct work_struct *dummy)
 			UTF16_LITTLE_ENDIAN,
 			message->body.kvp_set.data.key,
 			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
-			break;
+		break;
 
 	case KVP_OP_DELETE:
 		message->body.kvp_delete.key_size =
@@ -464,12 +466,12 @@ kvp_send_key(struct work_struct *dummy)
 			UTF16_LITTLE_ENDIAN,
 			message->body.kvp_delete.key,
 			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
-			break;
+		break;
 
 	case KVP_OP_ENUMERATE:
 		message->body.kvp_enum_data.index =
 			in_msg->body.kvp_enum_data.index;
-			break;
+		break;
 	}
 
 	kvp_transaction.state = HVUTIL_USERSPACE_REQ;
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 3e90eb91db45..64d0c85d5161 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -241,6 +241,7 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
 {
 	vunmap(ring_info->ring_buffer);
+	ring_info->ring_buffer = NULL;
 }
 
 /* Write to the ring buffer. */
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index c71cc857b649..283d184280af 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -498,6 +498,54 @@ static ssize_t device_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(device);
 
+static ssize_t driver_override_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+	char *driver_override, *old, *cp;
+
+	/* We need to keep extra room for a newline */
+	if (count >= (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	driver_override = kstrndup(buf, count, GFP_KERNEL);
+	if (!driver_override)
+		return -ENOMEM;
+
+	cp = strchr(driver_override, '\n');
+	if (cp)
+		*cp = '\0';
+
+	device_lock(dev);
+	old = hv_dev->driver_override;
+	if (strlen(driver_override)) {
+		hv_dev->driver_override = driver_override;
+	} else {
+		kfree(driver_override);
+		hv_dev->driver_override = NULL;
+	}
+	device_unlock(dev);
+
+	kfree(old);
+
+	return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+	ssize_t len;
+
+	device_lock(dev);
+	len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
+	device_unlock(dev);
+
+	return len;
+}
+static DEVICE_ATTR_RW(driver_override);
+
 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
 static struct attribute *vmbus_dev_attrs[] = {
 	&dev_attr_id.attr,
@@ -528,6 +576,7 @@ static struct attribute *vmbus_dev_attrs[] = {
 	&dev_attr_channel_vp_mapping.attr,
 	&dev_attr_vendor.attr,
 	&dev_attr_device.attr,
+	&dev_attr_driver_override.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(vmbus_dev);
@@ -563,17 +612,26 @@ static inline bool is_null_guid(const uuid_le *guid)
 	return true;
 }
 
-/*
- * Return a matching hv_vmbus_device_id pointer.
- * If there is no match, return NULL.
- */
-static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
-					const uuid_le *guid)
+static const struct hv_vmbus_device_id *
+hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const uuid_le *guid)
+
+{
+	if (id == NULL)
+		return NULL; /* empty device table */
+
+	for (; !is_null_guid(&id->guid); id++)
+		if (!uuid_le_cmp(id->guid, *guid))
+			return id;
+
+	return NULL;
+}
+
+static const struct hv_vmbus_device_id *
+hv_vmbus_dynid_match(struct hv_driver *drv, const uuid_le *guid)
 {
 	const struct hv_vmbus_device_id *id = NULL;
 	struct vmbus_dynid *dynid;
 
-	/* Look at the dynamic ids first, before the static ones */
 	spin_lock(&drv->dynids.lock);
 	list_for_each_entry(dynid, &drv->dynids.list, node) {
 		if (!uuid_le_cmp(dynid->id.guid, *guid)) {
@@ -583,18 +641,37 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
 	}
 	spin_unlock(&drv->dynids.lock);
 
-	if (id)
-		return id;
+	return id;
+}
 
-	id = drv->id_table;
-	if (id == NULL)
-		return NULL; /* empty device table */
+static const struct hv_vmbus_device_id vmbus_device_null = {
+	.guid = NULL_UUID_LE,
+};
 
-	for (; !is_null_guid(&id->guid); id++)
-		if (!uuid_le_cmp(id->guid, *guid))
-			return id;
+/*
+ * Return a matching hv_vmbus_device_id pointer.
+ * If there is no match, return NULL.
+ */
+static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
+							struct hv_device *dev)
+{
+	const uuid_le *guid = &dev->dev_type;
+	const struct hv_vmbus_device_id *id;
 
-	return NULL;
+	/* When driver_override is set, only bind to the matching driver */
+	if (dev->driver_override && strcmp(dev->driver_override, drv->name))
+		return NULL;
+
+	/* Look at the dynamic ids first, before the static ones */
+	id = hv_vmbus_dynid_match(drv, guid);
+	if (!id)
+		id = hv_vmbus_dev_match(drv->id_table, guid);
+
+	/* driver_override will always match, send a dummy id */
+	if (!id && dev->driver_override)
+		id = &vmbus_device_null;
+
+	return id;
 }
 
 /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */
@@ -643,7 +720,7 @@ static ssize_t new_id_store(struct device_driver *driver, const char *buf,
 	if (retval)
 		return retval;
 
-	if (hv_vmbus_get_id(drv, &guid))
+	if (hv_vmbus_dynid_match(drv, &guid))
 		return -EEXIST;
 
 	retval = vmbus_add_dynid(drv, &guid);
@@ -708,7 +785,7 @@ static int vmbus_match(struct device *device, struct device_driver *driver)
 	if (is_hvsock_channel(hv_dev->channel))
 		return drv->hvsock;
 
-	if (hv_vmbus_get_id(drv, &hv_dev->dev_type))
+	if (hv_vmbus_get_id(drv, hv_dev))
 		return 1;
 
 	return 0;
@@ -725,7 +802,7 @@ static int vmbus_probe(struct device *child_device)
 	struct hv_device *dev = device_to_hv_device(child_device);
 	const struct hv_vmbus_device_id *dev_id;
 
-	dev_id = hv_vmbus_get_id(drv, &dev->dev_type);
+	dev_id = hv_vmbus_get_id(drv, dev);
 	if (drv->probe) {
 		ret = drv->probe(dev, dev_id);
 		if (ret != 0)
@@ -787,10 +864,9 @@ static void vmbus_device_release(struct device *device)
 	struct vmbus_channel *channel = hv_dev->channel;
 
 	mutex_lock(&vmbus_connection.channel_mutex);
-	hv_process_channel_removal(channel->offermsg.child_relid);
+	hv_process_channel_removal(channel);
 	mutex_unlock(&vmbus_connection.channel_mutex);
 	kfree(hv_dev);
-
 }
 
 /* The one and only one */
diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c
index ff94e58845b7..170fbb66bda2 100644
--- a/drivers/hwtracing/coresight/coresight-catu.c
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -406,6 +406,7 @@ static inline int catu_wait_for_ready(struct catu_drvdata *drvdata)
 
 static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
 {
+	int rc;
 	u32 control, mode;
 	struct etr_buf *etr_buf = data;
 
@@ -418,6 +419,10 @@ static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
 		return -EBUSY;
 	}
 
+	rc = coresight_claim_device_unlocked(drvdata->base);
+	if (rc)
+		return rc;
+
 	control |= BIT(CATU_CONTROL_ENABLE);
 
 	if (etr_buf && etr_buf->mode == ETR_MODE_CATU) {
@@ -459,6 +464,7 @@ static int catu_disable_hw(struct catu_drvdata *drvdata)
 	int rc = 0;
 
 	catu_write_control(drvdata, 0);
+	coresight_disclaim_device_unlocked(drvdata->base);
 	if (catu_wait_for_ready(drvdata)) {
 		dev_info(drvdata->dev, "Timeout while waiting for READY\n");
 		rc = -EAGAIN;
diff --git a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c
index f6d0571ab9dd..299667b887fc 100644
--- a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c
+++ b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c
@@ -34,48 +34,87 @@ struct replicator_state {
 	struct coresight_device	*csdev;
 };
 
+/*
+ * replicator_reset : Reset the replicator configuration to sane values.
+ */
+static void replicator_reset(struct replicator_state *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	if (!coresight_claim_device_unlocked(drvdata->base)) {
+		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0);
+		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1);
+		coresight_disclaim_device_unlocked(drvdata->base);
+	}
+
+	CS_LOCK(drvdata->base);
+}
+
 static int replicator_enable(struct coresight_device *csdev, int inport,
 			      int outport)
 {
+	int rc = 0;
+	u32 reg;
 	struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent);
 
+	switch (outport) {
+	case 0:
+		reg = REPLICATOR_IDFILTER0;
+		break;
+	case 1:
+		reg = REPLICATOR_IDFILTER1;
+		break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
 	CS_UNLOCK(drvdata->base);
 
-	/*
-	 * Ensure that the other port is disabled
-	 * 0x00 - passing through the replicator unimpeded
-	 * 0xff - disable (or impede) the flow of ATB data
-	 */
-	if (outport == 0) {
-		writel_relaxed(0x00, drvdata->base + REPLICATOR_IDFILTER0);
-		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1);
-	} else {
-		writel_relaxed(0x00, drvdata->base + REPLICATOR_IDFILTER1);
-		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0);
+	if ((readl_relaxed(drvdata->base + REPLICATOR_IDFILTER0) == 0xff) &&
+	    (readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1) == 0xff))
+		rc = coresight_claim_device_unlocked(drvdata->base);
+
+	/* Ensure that the outport is enabled. */
+	if (!rc) {
+		writel_relaxed(0x00, drvdata->base + reg);
+		dev_dbg(drvdata->dev, "REPLICATOR enabled\n");
 	}
 
 	CS_LOCK(drvdata->base);
 
-	dev_info(drvdata->dev, "REPLICATOR enabled\n");
-	return 0;
+	return rc;
 }
 
 static void replicator_disable(struct coresight_device *csdev, int inport,
 				int outport)
 {
+	u32 reg;
 	struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent);
 
+	switch (outport) {
+	case 0:
+		reg = REPLICATOR_IDFILTER0;
+		break;
+	case 1:
+		reg = REPLICATOR_IDFILTER1;
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
 	CS_UNLOCK(drvdata->base);
 
 	/* disable the flow of ATB data through port */
-	if (outport == 0)
-		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0);
-	else
-		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1);
+	writel_relaxed(0xff, drvdata->base + reg);
 
+	if ((readl_relaxed(drvdata->base + REPLICATOR_IDFILTER0) == 0xff) &&
+	    (readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1) == 0xff))
+		coresight_disclaim_device_unlocked(drvdata->base);
 	CS_LOCK(drvdata->base);
 
-	dev_info(drvdata->dev, "REPLICATOR disabled\n");
+	dev_dbg(drvdata->dev, "REPLICATOR disabled\n");
 }
 
 static const struct coresight_ops_link replicator_link_ops = {
@@ -156,7 +195,11 @@ static int replicator_probe(struct amba_device *adev, const struct amba_id *id)
 	desc.groups = replicator_groups;
 	drvdata->csdev = coresight_register(&desc);
 
-	return PTR_ERR_OR_ZERO(drvdata->csdev);
+	if (!IS_ERR(drvdata->csdev)) {
+		replicator_reset(drvdata);
+		return 0;
+	}
+	return PTR_ERR(drvdata->csdev);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 306119eaf16a..824be0c5f592 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -5,7 +5,6 @@
  * Description: CoreSight Embedded Trace Buffer driver
  */
 
-#include <asm/local.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -28,6 +27,7 @@
 
 
 #include "coresight-priv.h"
+#include "coresight-etm-perf.h"
 
 #define ETB_RAM_DEPTH_REG	0x004
 #define ETB_STATUS_REG		0x00c
@@ -71,8 +71,8 @@
  * @miscdev:	specifics to handle "/dev/xyz.etb" entry.
  * @spinlock:	only one at a time pls.
  * @reading:	synchronise user space access to etb buffer.
- * @mode:	this ETB is being used.
  * @buf:	area of memory where ETB buffer content gets sent.
+ * @mode:	this ETB is being used.
  * @buffer_depth: size of @buf.
  * @trigger_cntr: amount of words to store after a trigger.
  */
@@ -84,12 +84,15 @@ struct etb_drvdata {
 	struct miscdevice	miscdev;
 	spinlock_t		spinlock;
 	local_t			reading;
-	local_t			mode;
 	u8			*buf;
+	u32			mode;
 	u32			buffer_depth;
 	u32			trigger_cntr;
 };
 
+static int etb_set_buffer(struct coresight_device *csdev,
+			  struct perf_output_handle *handle);
+
 static unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata)
 {
 	u32 depth = 0;
@@ -103,7 +106,7 @@ static unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata)
 	return depth;
 }
 
-static void etb_enable_hw(struct etb_drvdata *drvdata)
+static void __etb_enable_hw(struct etb_drvdata *drvdata)
 {
 	int i;
 	u32 depth;
@@ -131,32 +134,92 @@ static void etb_enable_hw(struct etb_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
-static int etb_enable(struct coresight_device *csdev, u32 mode)
+static int etb_enable_hw(struct etb_drvdata *drvdata)
+{
+	__etb_enable_hw(drvdata);
+	return 0;
+}
+
+static int etb_enable_sysfs(struct coresight_device *csdev)
 {
-	u32 val;
+	int ret = 0;
 	unsigned long flags;
 	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
-	val = local_cmpxchg(&drvdata->mode,
-			    CS_MODE_DISABLED, mode);
-	/*
-	 * When accessing from Perf, a HW buffer can be handled
-	 * by a single trace entity.  In sysFS mode many tracers
-	 * can be logging to the same HW buffer.
-	 */
-	if (val == CS_MODE_PERF)
-		return -EBUSY;
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* Don't messup with perf sessions. */
+	if (drvdata->mode == CS_MODE_PERF) {
+		ret = -EBUSY;
+		goto out;
+	}
 
 	/* Nothing to do, the tracer is already enabled. */
-	if (val == CS_MODE_SYSFS)
+	if (drvdata->mode == CS_MODE_SYSFS)
 		goto out;
 
-	spin_lock_irqsave(&drvdata->spinlock, flags);
-	etb_enable_hw(drvdata);
+	ret = etb_enable_hw(drvdata);
+	if (!ret)
+		drvdata->mode = CS_MODE_SYSFS;
+
+out:
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	return ret;
+}
+
+static int etb_enable_perf(struct coresight_device *csdev, void *data)
+{
+	int ret = 0;
+	unsigned long flags;
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* No need to continue if the component is already in use. */
+	if (drvdata->mode != CS_MODE_DISABLED) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/*
+	 * We don't have an internal state to clean up if we fail to setup
+	 * the perf buffer. So we can perform the step before we turn the
+	 * ETB on and leave without cleaning up.
+	 */
+	ret = etb_set_buffer(csdev, (struct perf_output_handle *)data);
+	if (ret)
+		goto out;
+
+	ret = etb_enable_hw(drvdata);
+	if (!ret)
+		drvdata->mode = CS_MODE_PERF;
 
 out:
-	dev_info(drvdata->dev, "ETB enabled\n");
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	return ret;
+}
+
+static int etb_enable(struct coresight_device *csdev, u32 mode, void *data)
+{
+	int ret;
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		ret = etb_enable_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		ret = etb_enable_perf(csdev, data);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret)
+		return ret;
+
+	dev_dbg(drvdata->dev, "ETB enabled\n");
 	return 0;
 }
 
@@ -256,13 +319,16 @@ static void etb_disable(struct coresight_device *csdev)
 	unsigned long flags;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
-	etb_disable_hw(drvdata);
-	etb_dump_hw(drvdata);
-	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	local_set(&drvdata->mode, CS_MODE_DISABLED);
+	/* Disable the ETB only if it needs to */
+	if (drvdata->mode != CS_MODE_DISABLED) {
+		etb_disable_hw(drvdata);
+		etb_dump_hw(drvdata);
+		drvdata->mode = CS_MODE_DISABLED;
+	}
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	dev_info(drvdata->dev, "ETB disabled\n");
+	dev_dbg(drvdata->dev, "ETB disabled\n");
 }
 
 static void *etb_alloc_buffer(struct coresight_device *csdev, int cpu,
@@ -294,12 +360,14 @@ static void etb_free_buffer(void *config)
 }
 
 static int etb_set_buffer(struct coresight_device *csdev,
-			  struct perf_output_handle *handle,
-			  void *sink_config)
+			  struct perf_output_handle *handle)
 {
 	int ret = 0;
 	unsigned long head;
-	struct cs_buffers *buf = sink_config;
+	struct cs_buffers *buf = etm_perf_sink_config(handle);
+
+	if (!buf)
+		return -EINVAL;
 
 	/* wrap head around to the amount of space we have */
 	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
@@ -315,37 +383,7 @@ static int etb_set_buffer(struct coresight_device *csdev,
 	return ret;
 }
 
-static unsigned long etb_reset_buffer(struct coresight_device *csdev,
-				      struct perf_output_handle *handle,
-				      void *sink_config)
-{
-	unsigned long size = 0;
-	struct cs_buffers *buf = sink_config;
-
-	if (buf) {
-		/*
-		 * In snapshot mode ->data_size holds the new address of the
-		 * ring buffer's head.  The size itself is the whole address
-		 * range since we want the latest information.
-		 */
-		if (buf->snapshot)
-			handle->head = local_xchg(&buf->data_size,
-						  buf->nr_pages << PAGE_SHIFT);
-
-		/*
-		 * Tell the tracer PMU how much we got in this run and if
-		 * something went wrong along the way.  Nobody else can use
-		 * this cs_buffers instance until we are done.  As such
-		 * resetting parameters here and squaring off with the ring
-		 * buffer API in the tracer PMU is fine.
-		 */
-		size = local_xchg(&buf->data_size, 0);
-	}
-
-	return size;
-}
-
-static void etb_update_buffer(struct coresight_device *csdev,
+static unsigned long etb_update_buffer(struct coresight_device *csdev,
 			      struct perf_output_handle *handle,
 			      void *sink_config)
 {
@@ -354,13 +392,13 @@ static void etb_update_buffer(struct coresight_device *csdev,
 	u8 *buf_ptr;
 	const u32 *barrier;
 	u32 read_ptr, write_ptr, capacity;
-	u32 status, read_data, to_read;
-	unsigned long offset;
+	u32 status, read_data;
+	unsigned long offset, to_read;
 	struct cs_buffers *buf = sink_config;
 	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	if (!buf)
-		return;
+		return 0;
 
 	capacity = drvdata->buffer_depth * ETB_FRAME_SIZE_WORDS;
 
@@ -465,18 +503,17 @@ static void etb_update_buffer(struct coresight_device *csdev,
 	writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER);
 
 	/*
-	 * In snapshot mode all we have to do is communicate to
-	 * perf_aux_output_end() the address of the current head.  In full
-	 * trace mode the same function expects a size to move rb->aux_head
-	 * forward.
+	 * In snapshot mode we have to update the handle->head to point
+	 * to the new location.
 	 */
-	if (buf->snapshot)
-		local_set(&buf->data_size, (cur * PAGE_SIZE) + offset);
-	else
-		local_add(to_read, &buf->data_size);
-
+	if (buf->snapshot) {
+		handle->head = (cur * PAGE_SIZE) + offset;
+		to_read = buf->nr_pages << PAGE_SHIFT;
+	}
 	etb_enable_hw(drvdata);
 	CS_LOCK(drvdata->base);
+
+	return to_read;
 }
 
 static const struct coresight_ops_sink etb_sink_ops = {
@@ -484,8 +521,6 @@ static const struct coresight_ops_sink etb_sink_ops = {
 	.disable	= etb_disable,
 	.alloc_buffer	= etb_alloc_buffer,
 	.free_buffer	= etb_free_buffer,
-	.set_buffer	= etb_set_buffer,
-	.reset_buffer	= etb_reset_buffer,
 	.update_buffer	= etb_update_buffer,
 };
 
@@ -498,14 +533,14 @@ static void etb_dump(struct etb_drvdata *drvdata)
 	unsigned long flags;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
-	if (local_read(&drvdata->mode) == CS_MODE_SYSFS) {
+	if (drvdata->mode == CS_MODE_SYSFS) {
 		etb_disable_hw(drvdata);
 		etb_dump_hw(drvdata);
 		etb_enable_hw(drvdata);
 	}
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	dev_info(drvdata->dev, "ETB dumped\n");
+	dev_dbg(drvdata->dev, "ETB dumped\n");
 }
 
 static int etb_open(struct inode *inode, struct file *file)
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 677695635211..abe8249b893b 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/perf_event.h>
+#include <linux/percpu-defs.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
@@ -22,20 +23,6 @@
 static struct pmu etm_pmu;
 static bool etm_perf_up;
 
-/**
- * struct etm_event_data - Coresight specifics associated to an event
- * @work:		Handle to free allocated memory outside IRQ context.
- * @mask:		Hold the CPU(s) this event was set for.
- * @snk_config:		The sink configuration.
- * @path:		An array of path, each slot for one CPU.
- */
-struct etm_event_data {
-	struct work_struct work;
-	cpumask_t mask;
-	void *snk_config;
-	struct list_head **path;
-};
-
 static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle);
 static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
 
@@ -61,6 +48,18 @@ static const struct attribute_group *etm_pmu_attr_groups[] = {
 	NULL,
 };
 
+static inline struct list_head **
+etm_event_cpu_path_ptr(struct etm_event_data *data, int cpu)
+{
+	return per_cpu_ptr(data->path, cpu);
+}
+
+static inline struct list_head *
+etm_event_cpu_path(struct etm_event_data *data, int cpu)
+{
+	return *etm_event_cpu_path_ptr(data, cpu);
+}
+
 static void etm_event_read(struct perf_event *event) {}
 
 static int etm_addr_filters_alloc(struct perf_event *event)
@@ -114,29 +113,30 @@ static void free_event_data(struct work_struct *work)
 
 	event_data = container_of(work, struct etm_event_data, work);
 	mask = &event_data->mask;
-	/*
-	 * First deal with the sink configuration.  See comment in
-	 * etm_setup_aux() about why we take the first available path.
-	 */
-	if (event_data->snk_config) {
+
+	/* Free the sink buffers, if there are any */
+	if (event_data->snk_config && !WARN_ON(cpumask_empty(mask))) {
 		cpu = cpumask_first(mask);
-		sink = coresight_get_sink(event_data->path[cpu]);
+		sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu));
 		if (sink_ops(sink)->free_buffer)
 			sink_ops(sink)->free_buffer(event_data->snk_config);
 	}
 
 	for_each_cpu(cpu, mask) {
-		if (!(IS_ERR_OR_NULL(event_data->path[cpu])))
-			coresight_release_path(event_data->path[cpu]);
+		struct list_head **ppath;
+
+		ppath = etm_event_cpu_path_ptr(event_data, cpu);
+		if (!(IS_ERR_OR_NULL(*ppath)))
+			coresight_release_path(*ppath);
+		*ppath = NULL;
 	}
 
-	kfree(event_data->path);
+	free_percpu(event_data->path);
 	kfree(event_data);
 }
 
 static void *alloc_event_data(int cpu)
 {
-	int size;
 	cpumask_t *mask;
 	struct etm_event_data *event_data;
 
@@ -145,16 +145,12 @@ static void *alloc_event_data(int cpu)
 	if (!event_data)
 		return NULL;
 
-	/* Make sure nothing disappears under us */
-	get_online_cpus();
-	size = num_online_cpus();
 
 	mask = &event_data->mask;
 	if (cpu != -1)
 		cpumask_set_cpu(cpu, mask);
 	else
-		cpumask_copy(mask, cpu_online_mask);
-	put_online_cpus();
+		cpumask_copy(mask, cpu_present_mask);
 
 	/*
 	 * Each CPU has a single path between source and destination.  As such
@@ -164,8 +160,8 @@ static void *alloc_event_data(int cpu)
 	 * unused memory when dealing with single CPU trace scenarios is small
 	 * compared to the cost of searching through an optimized array.
 	 */
-	event_data->path = kcalloc(size,
-				   sizeof(struct list_head *), GFP_KERNEL);
+	event_data->path = alloc_percpu(struct list_head *);
+
 	if (!event_data->path) {
 		kfree(event_data);
 		return NULL;
@@ -206,34 +202,53 @@ static void *etm_setup_aux(int event_cpu, void **pages,
 	 * on the cmd line.  As such the "enable_sink" flag in sysFS is reset.
 	 */
 	sink = coresight_get_enabled_sink(true);
-	if (!sink)
+	if (!sink || !sink_ops(sink)->alloc_buffer)
 		goto err;
 
 	mask = &event_data->mask;
 
-	/* Setup the path for each CPU in a trace session */
+	/*
+	 * Setup the path for each CPU in a trace session. We try to build
+	 * trace path for each CPU in the mask. If we don't find an ETM
+	 * for the CPU or fail to build a path, we clear the CPU from the
+	 * mask and continue with the rest. If ever we try to trace on those
+	 * CPUs, we can handle it and fail the session.
+	 */
 	for_each_cpu(cpu, mask) {
+		struct list_head *path;
 		struct coresight_device *csdev;
 
 		csdev = per_cpu(csdev_src, cpu);
-		if (!csdev)
-			goto err;
+		/*
+		 * If there is no ETM associated with this CPU clear it from
+		 * the mask and continue with the rest. If ever we try to trace
+		 * on this CPU, we handle it accordingly.
+		 */
+		if (!csdev) {
+			cpumask_clear_cpu(cpu, mask);
+			continue;
+		}
 
 		/*
 		 * Building a path doesn't enable it, it simply builds a
 		 * list of devices from source to sink that can be
 		 * referenced later when the path is actually needed.
 		 */
-		event_data->path[cpu] = coresight_build_path(csdev, sink);
-		if (IS_ERR(event_data->path[cpu]))
-			goto err;
+		path = coresight_build_path(csdev, sink);
+		if (IS_ERR(path)) {
+			cpumask_clear_cpu(cpu, mask);
+			continue;
+		}
+
+		*etm_event_cpu_path_ptr(event_data, cpu) = path;
 	}
 
-	if (!sink_ops(sink)->alloc_buffer)
+	/* If we don't have any CPUs ready for tracing, abort */
+	cpu = cpumask_first(mask);
+	if (cpu >= nr_cpu_ids)
 		goto err;
 
-	cpu = cpumask_first(mask);
-	/* Get the AUX specific data from the sink buffer */
+	/* Allocate the sink buffer for this session */
 	event_data->snk_config =
 			sink_ops(sink)->alloc_buffer(sink, cpu, pages,
 						     nr_pages, overwrite);
@@ -255,6 +270,7 @@ static void etm_event_start(struct perf_event *event, int flags)
 	struct etm_event_data *event_data;
 	struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle);
 	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
+	struct list_head *path;
 
 	if (!csdev)
 		goto fail;
@@ -267,18 +283,14 @@ static void etm_event_start(struct perf_event *event, int flags)
 	if (!event_data)
 		goto fail;
 
+	path = etm_event_cpu_path(event_data, cpu);
 	/* We need a sink, no need to continue without one */
-	sink = coresight_get_sink(event_data->path[cpu]);
-	if (WARN_ON_ONCE(!sink || !sink_ops(sink)->set_buffer))
-		goto fail_end_stop;
-
-	/* Configure the sink */
-	if (sink_ops(sink)->set_buffer(sink, handle,
-				       event_data->snk_config))
+	sink = coresight_get_sink(path);
+	if (WARN_ON_ONCE(!sink))
 		goto fail_end_stop;
 
 	/* Nothing will happen without a path */
-	if (coresight_enable_path(event_data->path[cpu], CS_MODE_PERF))
+	if (coresight_enable_path(path, CS_MODE_PERF, handle))
 		goto fail_end_stop;
 
 	/* Tell the perf core the event is alive */
@@ -286,11 +298,13 @@ static void etm_event_start(struct perf_event *event, int flags)
 
 	/* Finally enable the tracer */
 	if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
-		goto fail_end_stop;
+		goto fail_disable_path;
 
 out:
 	return;
 
+fail_disable_path:
+	coresight_disable_path(path);
 fail_end_stop:
 	perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
 	perf_aux_output_end(handle, 0);
@@ -306,6 +320,7 @@ static void etm_event_stop(struct perf_event *event, int mode)
 	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
 	struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle);
 	struct etm_event_data *event_data = perf_get_aux(handle);
+	struct list_head *path;
 
 	if (event->hw.state == PERF_HES_STOPPED)
 		return;
@@ -313,7 +328,11 @@ static void etm_event_stop(struct perf_event *event, int mode)
 	if (!csdev)
 		return;
 
-	sink = coresight_get_sink(event_data->path[cpu]);
+	path = etm_event_cpu_path(event_data, cpu);
+	if (!path)
+		return;
+
+	sink = coresight_get_sink(path);
 	if (!sink)
 		return;
 
@@ -331,20 +350,13 @@ static void etm_event_stop(struct perf_event *event, int mode)
 		if (!sink_ops(sink)->update_buffer)
 			return;
 
-		sink_ops(sink)->update_buffer(sink, handle,
+		size = sink_ops(sink)->update_buffer(sink, handle,
 					      event_data->snk_config);
-
-		if (!sink_ops(sink)->reset_buffer)
-			return;
-
-		size = sink_ops(sink)->reset_buffer(sink, handle,
-						    event_data->snk_config);
-
 		perf_aux_output_end(handle, size);
 	}
 
 	/* Disabling the path make its elements available to other sessions */
-	coresight_disable_path(event_data->path[cpu]);
+	coresight_disable_path(path);
 }
 
 static int etm_event_add(struct perf_event *event, int mode)
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
index 4197df4faf5e..da7d9336a15c 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.h
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -7,6 +7,7 @@
 #ifndef _CORESIGHT_ETM_PERF_H
 #define _CORESIGHT_ETM_PERF_H
 
+#include <linux/percpu-defs.h>
 #include "coresight-priv.h"
 
 struct coresight_device;
@@ -42,14 +43,39 @@ struct etm_filters {
 	bool			ssstatus;
 };
 
+/**
+ * struct etm_event_data - Coresight specifics associated to an event
+ * @work:		Handle to free allocated memory outside IRQ context.
+ * @mask:		Hold the CPU(s) this event was set for.
+ * @snk_config:		The sink configuration.
+ * @path:		An array of path, each slot for one CPU.
+ */
+struct etm_event_data {
+	struct work_struct work;
+	cpumask_t mask;
+	void *snk_config;
+	struct list_head * __percpu *path;
+};
 
 #ifdef CONFIG_CORESIGHT
 int etm_perf_symlink(struct coresight_device *csdev, bool link);
+static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
+{
+	struct etm_event_data *data = perf_get_aux(handle);
 
+	if (data)
+		return data->snk_config;
+	return NULL;
+}
 #else
 static inline int etm_perf_symlink(struct coresight_device *csdev, bool link)
 { return -EINVAL; }
 
+static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_CORESIGHT */
 
 #endif
diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index 7c74263c333d..fd5c4cca7db5 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -355,11 +355,10 @@ static int etm_parse_event_config(struct etm_drvdata *drvdata,
 	return 0;
 }
 
-static void etm_enable_hw(void *info)
+static int etm_enable_hw(struct etm_drvdata *drvdata)
 {
-	int i;
+	int i, rc;
 	u32 etmcr;
-	struct etm_drvdata *drvdata = info;
 	struct etm_config *config = &drvdata->config;
 
 	CS_UNLOCK(drvdata->base);
@@ -370,6 +369,9 @@ static void etm_enable_hw(void *info)
 	etm_set_pwrup(drvdata);
 	/* Make sure all registers are accessible */
 	etm_os_unlock(drvdata);
+	rc = coresight_claim_device_unlocked(drvdata->base);
+	if (rc)
+		goto done;
 
 	etm_set_prog(drvdata);
 
@@ -418,9 +420,29 @@ static void etm_enable_hw(void *info)
 	etm_writel(drvdata, 0x0, ETMVMIDCVR);
 
 	etm_clr_prog(drvdata);
+
+done:
+	if (rc)
+		etm_set_pwrdwn(drvdata);
 	CS_LOCK(drvdata->base);
 
-	dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu);
+	dev_dbg(drvdata->dev, "cpu: %d enable smp call done: %d\n",
+		drvdata->cpu, rc);
+	return rc;
+}
+
+struct etm_enable_arg {
+	struct etm_drvdata *drvdata;
+	int rc;
+};
+
+static void etm_enable_hw_smp_call(void *info)
+{
+	struct etm_enable_arg *arg = info;
+
+	if (WARN_ON(!arg))
+		return;
+	arg->rc = etm_enable_hw(arg->drvdata);
 }
 
 static int etm_cpu_id(struct coresight_device *csdev)
@@ -475,14 +497,13 @@ static int etm_enable_perf(struct coresight_device *csdev,
 	/* Configure the tracer based on the session's specifics */
 	etm_parse_event_config(drvdata, event);
 	/* And enable it */
-	etm_enable_hw(drvdata);
-
-	return 0;
+	return etm_enable_hw(drvdata);
 }
 
 static int etm_enable_sysfs(struct coresight_device *csdev)
 {
 	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etm_enable_arg arg = { 0 };
 	int ret;
 
 	spin_lock(&drvdata->spinlock);
@@ -492,20 +513,21 @@ static int etm_enable_sysfs(struct coresight_device *csdev)
 	 * hw configuration will take place on the local CPU during bring up.
 	 */
 	if (cpu_online(drvdata->cpu)) {
+		arg.drvdata = drvdata;
 		ret = smp_call_function_single(drvdata->cpu,
-					       etm_enable_hw, drvdata, 1);
-		if (ret)
-			goto err;
+					       etm_enable_hw_smp_call, &arg, 1);
+		if (!ret)
+			ret = arg.rc;
+		if (!ret)
+			drvdata->sticky_enable = true;
+	} else {
+		ret = -ENODEV;
 	}
 
-	drvdata->sticky_enable = true;
 	spin_unlock(&drvdata->spinlock);
 
-	dev_info(drvdata->dev, "ETM tracing enabled\n");
-	return 0;
-
-err:
-	spin_unlock(&drvdata->spinlock);
+	if (!ret)
+		dev_dbg(drvdata->dev, "ETM tracing enabled\n");
 	return ret;
 }
 
@@ -555,6 +577,8 @@ static void etm_disable_hw(void *info)
 	for (i = 0; i < drvdata->nr_cntr; i++)
 		config->cntr_val[i] = etm_readl(drvdata, ETMCNTVRn(i));
 
+	coresight_disclaim_device_unlocked(drvdata->base);
+
 	etm_set_pwrdwn(drvdata);
 	CS_LOCK(drvdata->base);
 
@@ -604,7 +628,7 @@ static void etm_disable_sysfs(struct coresight_device *csdev)
 	spin_unlock(&drvdata->spinlock);
 	cpus_read_unlock();
 
-	dev_info(drvdata->dev, "ETM tracing disabled\n");
+	dev_dbg(drvdata->dev, "ETM tracing disabled\n");
 }
 
 static void etm_disable(struct coresight_device *csdev,
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index 1d94ebec027b..53e2fb6e86f6 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -28,6 +28,7 @@
 #include <linux/pm_runtime.h>
 #include <asm/sections.h>
 #include <asm/local.h>
+#include <asm/virt.h>
 
 #include "coresight-etm4x.h"
 #include "coresight-etm-perf.h"
@@ -77,16 +78,24 @@ static int etm4_trace_id(struct coresight_device *csdev)
 	return drvdata->trcid;
 }
 
-static void etm4_enable_hw(void *info)
+struct etm4_enable_arg {
+	struct etmv4_drvdata *drvdata;
+	int rc;
+};
+
+static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
 {
-	int i;
-	struct etmv4_drvdata *drvdata = info;
+	int i, rc;
 	struct etmv4_config *config = &drvdata->config;
 
 	CS_UNLOCK(drvdata->base);
 
 	etm4_os_unlock(drvdata);
 
+	rc = coresight_claim_device_unlocked(drvdata->base);
+	if (rc)
+		goto done;
+
 	/* Disable the trace unit before programming trace registers */
 	writel_relaxed(0, drvdata->base + TRCPRGCTLR);
 
@@ -174,9 +183,21 @@ static void etm4_enable_hw(void *info)
 		dev_err(drvdata->dev,
 			"timeout while waiting for Idle Trace Status\n");
 
+done:
 	CS_LOCK(drvdata->base);
 
-	dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu);
+	dev_dbg(drvdata->dev, "cpu: %d enable smp call done: %d\n",
+		drvdata->cpu, rc);
+	return rc;
+}
+
+static void etm4_enable_hw_smp_call(void *info)
+{
+	struct etm4_enable_arg *arg = info;
+
+	if (WARN_ON(!arg))
+		return;
+	arg->rc = etm4_enable_hw(arg->drvdata);
 }
 
 static int etm4_parse_event_config(struct etmv4_drvdata *drvdata,
@@ -242,7 +263,7 @@ static int etm4_enable_perf(struct coresight_device *csdev,
 	if (ret)
 		goto out;
 	/* And enable it */
-	etm4_enable_hw(drvdata);
+	ret = etm4_enable_hw(drvdata);
 
 out:
 	return ret;
@@ -251,6 +272,7 @@ out:
 static int etm4_enable_sysfs(struct coresight_device *csdev)
 {
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etm4_enable_arg arg = { 0 };
 	int ret;
 
 	spin_lock(&drvdata->spinlock);
@@ -259,19 +281,17 @@ static int etm4_enable_sysfs(struct coresight_device *csdev)
 	 * Executing etm4_enable_hw on the cpu whose ETM is being enabled
 	 * ensures that register writes occur when cpu is powered.
 	 */
+	arg.drvdata = drvdata;
 	ret = smp_call_function_single(drvdata->cpu,
-				       etm4_enable_hw, drvdata, 1);
-	if (ret)
-		goto err;
-
-	drvdata->sticky_enable = true;
+				       etm4_enable_hw_smp_call, &arg, 1);
+	if (!ret)
+		ret = arg.rc;
+	if (!ret)
+		drvdata->sticky_enable = true;
 	spin_unlock(&drvdata->spinlock);
 
-	dev_info(drvdata->dev, "ETM tracing enabled\n");
-	return 0;
-
-err:
-	spin_unlock(&drvdata->spinlock);
+	if (!ret)
+		dev_dbg(drvdata->dev, "ETM tracing enabled\n");
 	return ret;
 }
 
@@ -328,6 +348,8 @@ static void etm4_disable_hw(void *info)
 	isb();
 	writel_relaxed(control, drvdata->base + TRCPRGCTLR);
 
+	coresight_disclaim_device_unlocked(drvdata->base);
+
 	CS_LOCK(drvdata->base);
 
 	dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu);
@@ -380,7 +402,7 @@ static void etm4_disable_sysfs(struct coresight_device *csdev)
 	spin_unlock(&drvdata->spinlock);
 	cpus_read_unlock();
 
-	dev_info(drvdata->dev, "ETM tracing disabled\n");
+	dev_dbg(drvdata->dev, "ETM tracing disabled\n");
 }
 
 static void etm4_disable(struct coresight_device *csdev,
@@ -605,7 +627,7 @@ static void etm4_set_default_config(struct etmv4_config *config)
 	config->vinst_ctrl |= BIT(0);
 }
 
-static u64 etm4_get_access_type(struct etmv4_config *config)
+static u64 etm4_get_ns_access_type(struct etmv4_config *config)
 {
 	u64 access_type = 0;
 
@@ -616,17 +638,26 @@ static u64 etm4_get_access_type(struct etmv4_config *config)
 	 *   Bit[13] Exception level 1 - OS
 	 *   Bit[14] Exception level 2 - Hypervisor
 	 *   Bit[15] Never implemented
-	 *
-	 * Always stay away from hypervisor mode.
 	 */
-	access_type = ETM_EXLEVEL_NS_HYP;
-
-	if (config->mode & ETM_MODE_EXCL_KERN)
-		access_type |= ETM_EXLEVEL_NS_OS;
+	if (!is_kernel_in_hyp_mode()) {
+		/* Stay away from hypervisor mode for non-VHE */
+		access_type =  ETM_EXLEVEL_NS_HYP;
+		if (config->mode & ETM_MODE_EXCL_KERN)
+			access_type |= ETM_EXLEVEL_NS_OS;
+	} else if (config->mode & ETM_MODE_EXCL_KERN) {
+		access_type = ETM_EXLEVEL_NS_HYP;
+	}
 
 	if (config->mode & ETM_MODE_EXCL_USER)
 		access_type |= ETM_EXLEVEL_NS_APP;
 
+	return access_type;
+}
+
+static u64 etm4_get_access_type(struct etmv4_config *config)
+{
+	u64 access_type = etm4_get_ns_access_type(config);
+
 	/*
 	 * EXLEVEL_S, bits[11:8], don't trace anything happening
 	 * in secure state.
@@ -880,20 +911,10 @@ void etm4_config_trace_mode(struct etmv4_config *config)
 
 	addr_acc = config->addr_acc[ETM_DEFAULT_ADDR_COMP];
 	/* clear default config */
-	addr_acc &= ~(ETM_EXLEVEL_NS_APP | ETM_EXLEVEL_NS_OS);
+	addr_acc &= ~(ETM_EXLEVEL_NS_APP | ETM_EXLEVEL_NS_OS |
+		      ETM_EXLEVEL_NS_HYP);
 
-	/*
-	 * EXLEVEL_NS, bits[15:12]
-	 * The Exception levels are:
-	 *   Bit[12] Exception level 0 - Application
-	 *   Bit[13] Exception level 1 - OS
-	 *   Bit[14] Exception level 2 - Hypervisor
-	 *   Bit[15] Never implemented
-	 */
-	if (mode & ETM_MODE_EXCL_KERN)
-		addr_acc |= ETM_EXLEVEL_NS_OS;
-	else
-		addr_acc |= ETM_EXLEVEL_NS_APP;
+	addr_acc |= etm4_get_ns_access_type(config);
 
 	config->addr_acc[ETM_DEFAULT_ADDR_COMP] = addr_acc;
 	config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] = addr_acc;
diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c
index 448145a36675..927925151509 100644
--- a/drivers/hwtracing/coresight/coresight-funnel.c
+++ b/drivers/hwtracing/coresight/coresight-funnel.c
@@ -25,6 +25,7 @@
 #define FUNNEL_HOLDTIME_MASK	0xf00
 #define FUNNEL_HOLDTIME_SHFT	0x8
 #define FUNNEL_HOLDTIME		(0x7 << FUNNEL_HOLDTIME_SHFT)
+#define FUNNEL_ENSx_MASK	0xff
 
 /**
  * struct funnel_drvdata - specifics associated to a funnel component
@@ -42,31 +43,42 @@ struct funnel_drvdata {
 	unsigned long		priority;
 };
 
-static void funnel_enable_hw(struct funnel_drvdata *drvdata, int port)
+static int funnel_enable_hw(struct funnel_drvdata *drvdata, int port)
 {
 	u32 functl;
+	int rc = 0;
 
 	CS_UNLOCK(drvdata->base);
 
 	functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
+	/* Claim the device only when we enable the first slave */
+	if (!(functl & FUNNEL_ENSx_MASK)) {
+		rc = coresight_claim_device_unlocked(drvdata->base);
+		if (rc)
+			goto done;
+	}
+
 	functl &= ~FUNNEL_HOLDTIME_MASK;
 	functl |= FUNNEL_HOLDTIME;
 	functl |= (1 << port);
 	writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL);
 	writel_relaxed(drvdata->priority, drvdata->base + FUNNEL_PRICTL);
-
+done:
 	CS_LOCK(drvdata->base);
+	return rc;
 }
 
 static int funnel_enable(struct coresight_device *csdev, int inport,
 			 int outport)
 {
+	int rc;
 	struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
-	funnel_enable_hw(drvdata, inport);
+	rc = funnel_enable_hw(drvdata, inport);
 
-	dev_info(drvdata->dev, "FUNNEL inport %d enabled\n", inport);
-	return 0;
+	if (!rc)
+		dev_dbg(drvdata->dev, "FUNNEL inport %d enabled\n", inport);
+	return rc;
 }
 
 static void funnel_disable_hw(struct funnel_drvdata *drvdata, int inport)
@@ -79,6 +91,10 @@ static void funnel_disable_hw(struct funnel_drvdata *drvdata, int inport)
 	functl &= ~(1 << inport);
 	writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL);
 
+	/* Disclaim the device if none of the slaves are now active */
+	if (!(functl & FUNNEL_ENSx_MASK))
+		coresight_disclaim_device_unlocked(drvdata->base);
+
 	CS_LOCK(drvdata->base);
 }
 
@@ -89,7 +105,7 @@ static void funnel_disable(struct coresight_device *csdev, int inport,
 
 	funnel_disable_hw(drvdata, inport);
 
-	dev_info(drvdata->dev, "FUNNEL inport %d disabled\n", inport);
+	dev_dbg(drvdata->dev, "FUNNEL inport %d disabled\n", inport);
 }
 
 static const struct coresight_ops_link funnel_link_ops = {
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 1a6cf3589866..579f34943bf1 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -25,6 +25,13 @@
 #define CORESIGHT_DEVID		0xfc8
 #define CORESIGHT_DEVTYPE	0xfcc
 
+
+/*
+ * Coresight device CLAIM protocol.
+ * See PSCI - ARM DEN 0022D, Section: 6.8.1 Debug and Trace save and restore.
+ */
+#define CORESIGHT_CLAIM_SELF_HOSTED	BIT(1)
+
 #define TIMEOUT_US		100
 #define BMVAL(val, lsb, msb)	((val & GENMASK(msb, lsb)) >> lsb)
 
@@ -137,7 +144,7 @@ static inline void coresight_write_reg_pair(void __iomem *addr, u64 val,
 }
 
 void coresight_disable_path(struct list_head *path);
-int coresight_enable_path(struct list_head *path, u32 mode);
+int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data);
 struct coresight_device *coresight_get_sink(struct list_head *path);
 struct coresight_device *coresight_get_enabled_sink(bool reset);
 struct list_head *coresight_build_path(struct coresight_device *csdev,
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
index 8d2eaaab6c2f..feac98315471 100644
--- a/drivers/hwtracing/coresight/coresight-replicator.c
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -35,7 +35,7 @@ static int replicator_enable(struct coresight_device *csdev, int inport,
 {
 	struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
-	dev_info(drvdata->dev, "REPLICATOR enabled\n");
+	dev_dbg(drvdata->dev, "REPLICATOR enabled\n");
 	return 0;
 }
 
@@ -44,7 +44,7 @@ static void replicator_disable(struct coresight_device *csdev, int inport,
 {
 	struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
-	dev_info(drvdata->dev, "REPLICATOR disabled\n");
+	dev_dbg(drvdata->dev, "REPLICATOR disabled\n");
 }
 
 static const struct coresight_ops_link replicator_link_ops = {
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index c46c70aec1d5..35d6f9709274 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -211,7 +211,7 @@ static int stm_enable(struct coresight_device *csdev,
 	stm_enable_hw(drvdata);
 	spin_unlock(&drvdata->spinlock);
 
-	dev_info(drvdata->dev, "STM tracing enabled\n");
+	dev_dbg(drvdata->dev, "STM tracing enabled\n");
 	return 0;
 }
 
@@ -274,7 +274,7 @@ static void stm_disable(struct coresight_device *csdev,
 		pm_runtime_put(drvdata->dev);
 
 		local_set(&drvdata->mode, CS_MODE_DISABLED);
-		dev_info(drvdata->dev, "STM tracing disabled\n");
+		dev_dbg(drvdata->dev, "STM tracing disabled\n");
 	}
 }
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index 0549249f4b39..53fc83b72a49 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -10,8 +10,12 @@
 #include <linux/slab.h>
 #include "coresight-priv.h"
 #include "coresight-tmc.h"
+#include "coresight-etm-perf.h"
 
-static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
+static int tmc_set_etf_buffer(struct coresight_device *csdev,
+			      struct perf_output_handle *handle);
+
+static void __tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
 {
 	CS_UNLOCK(drvdata->base);
 
@@ -30,33 +34,41 @@ static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
+static int tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
+{
+	int rc = coresight_claim_device(drvdata->base);
+
+	if (rc)
+		return rc;
+
+	__tmc_etb_enable_hw(drvdata);
+	return 0;
+}
+
 static void tmc_etb_dump_hw(struct tmc_drvdata *drvdata)
 {
 	char *bufp;
 	u32 read_data, lost;
-	int i;
 
 	/* Check if the buffer wrapped around. */
 	lost = readl_relaxed(drvdata->base + TMC_STS) & TMC_STS_FULL;
 	bufp = drvdata->buf;
 	drvdata->len = 0;
 	while (1) {
-		for (i = 0; i < drvdata->memwidth; i++) {
-			read_data = readl_relaxed(drvdata->base + TMC_RRD);
-			if (read_data == 0xFFFFFFFF)
-				goto done;
-			memcpy(bufp, &read_data, 4);
-			bufp += 4;
-			drvdata->len += 4;
-		}
+		read_data = readl_relaxed(drvdata->base + TMC_RRD);
+		if (read_data == 0xFFFFFFFF)
+			break;
+		memcpy(bufp, &read_data, 4);
+		bufp += 4;
+		drvdata->len += 4;
 	}
-done:
+
 	if (lost)
 		coresight_insert_barrier_packet(drvdata->buf);
 	return;
 }
 
-static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
+static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
 {
 	CS_UNLOCK(drvdata->base);
 
@@ -72,7 +84,13 @@ static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
-static void tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
+static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
+{
+	coresight_disclaim_device(drvdata);
+	__tmc_etb_disable_hw(drvdata);
+}
+
+static void __tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
 {
 	CS_UNLOCK(drvdata->base);
 
@@ -88,13 +106,24 @@ static void tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
+static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
+{
+	int rc = coresight_claim_device(drvdata->base);
+
+	if (rc)
+		return rc;
+
+	__tmc_etf_enable_hw(drvdata);
+	return 0;
+}
+
 static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
 {
 	CS_UNLOCK(drvdata->base);
 
 	tmc_flush_and_stop(drvdata);
 	tmc_disable_hw(drvdata);
-
+	coresight_disclaim_device_unlocked(drvdata->base);
 	CS_LOCK(drvdata->base);
 }
 
@@ -170,8 +199,12 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev)
 		drvdata->buf = buf;
 	}
 
-	drvdata->mode = CS_MODE_SYSFS;
-	tmc_etb_enable_hw(drvdata);
+	ret = tmc_etb_enable_hw(drvdata);
+	if (!ret)
+		drvdata->mode = CS_MODE_SYSFS;
+	else
+		/* Free up the buffer if we failed to enable */
+		used = false;
 out:
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
@@ -182,37 +215,40 @@ out:
 	return ret;
 }
 
-static int tmc_enable_etf_sink_perf(struct coresight_device *csdev)
+static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data)
 {
 	int ret = 0;
 	unsigned long flags;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct perf_output_handle *handle = data;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
-	if (drvdata->reading) {
+	do {
 		ret = -EINVAL;
-		goto out;
-	}
-
-	/*
-	 * In Perf mode there can be only one writer per sink.  There
-	 * is also no need to continue if the ETB/ETR is already operated
-	 * from sysFS.
-	 */
-	if (drvdata->mode != CS_MODE_DISABLED) {
-		ret = -EINVAL;
-		goto out;
-	}
+		if (drvdata->reading)
+			break;
+		/*
+		 * In Perf mode there can be only one writer per sink.  There
+		 * is also no need to continue if the ETB/ETF is already
+		 * operated from sysFS.
+		 */
+		if (drvdata->mode != CS_MODE_DISABLED)
+			break;
 
-	drvdata->mode = CS_MODE_PERF;
-	tmc_etb_enable_hw(drvdata);
-out:
+		ret = tmc_set_etf_buffer(csdev, handle);
+		if (ret)
+			break;
+		ret  = tmc_etb_enable_hw(drvdata);
+		if (!ret)
+			drvdata->mode = CS_MODE_PERF;
+	} while (0);
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 	return ret;
 }
 
-static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode)
+static int tmc_enable_etf_sink(struct coresight_device *csdev,
+			       u32 mode, void *data)
 {
 	int ret;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -222,7 +258,7 @@ static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode)
 		ret = tmc_enable_etf_sink_sysfs(csdev);
 		break;
 	case CS_MODE_PERF:
-		ret = tmc_enable_etf_sink_perf(csdev);
+		ret = tmc_enable_etf_sink_perf(csdev, data);
 		break;
 	/* We shouldn't be here */
 	default:
@@ -233,7 +269,7 @@ static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode)
 	if (ret)
 		return ret;
 
-	dev_info(drvdata->dev, "TMC-ETB/ETF enabled\n");
+	dev_dbg(drvdata->dev, "TMC-ETB/ETF enabled\n");
 	return 0;
 }
 
@@ -256,12 +292,13 @@ static void tmc_disable_etf_sink(struct coresight_device *csdev)
 
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	dev_info(drvdata->dev, "TMC-ETB/ETF disabled\n");
+	dev_dbg(drvdata->dev, "TMC-ETB/ETF disabled\n");
 }
 
 static int tmc_enable_etf_link(struct coresight_device *csdev,
 			       int inport, int outport)
 {
+	int ret;
 	unsigned long flags;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
@@ -271,12 +308,14 @@ static int tmc_enable_etf_link(struct coresight_device *csdev,
 		return -EBUSY;
 	}
 
-	tmc_etf_enable_hw(drvdata);
-	drvdata->mode = CS_MODE_SYSFS;
+	ret = tmc_etf_enable_hw(drvdata);
+	if (!ret)
+		drvdata->mode = CS_MODE_SYSFS;
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	dev_info(drvdata->dev, "TMC-ETF enabled\n");
-	return 0;
+	if (!ret)
+		dev_dbg(drvdata->dev, "TMC-ETF enabled\n");
+	return ret;
 }
 
 static void tmc_disable_etf_link(struct coresight_device *csdev,
@@ -295,7 +334,7 @@ static void tmc_disable_etf_link(struct coresight_device *csdev,
 	drvdata->mode = CS_MODE_DISABLED;
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	dev_info(drvdata->dev, "TMC-ETF disabled\n");
+	dev_dbg(drvdata->dev, "TMC-ETF disabled\n");
 }
 
 static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, int cpu,
@@ -328,12 +367,14 @@ static void tmc_free_etf_buffer(void *config)
 }
 
 static int tmc_set_etf_buffer(struct coresight_device *csdev,
-			      struct perf_output_handle *handle,
-			      void *sink_config)
+			      struct perf_output_handle *handle)
 {
 	int ret = 0;
 	unsigned long head;
-	struct cs_buffers *buf = sink_config;
+	struct cs_buffers *buf = etm_perf_sink_config(handle);
+
+	if (!buf)
+		return -EINVAL;
 
 	/* wrap head around to the amount of space we have */
 	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
@@ -349,36 +390,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev,
 	return ret;
 }
 
-static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
-					  struct perf_output_handle *handle,
-					  void *sink_config)
-{
-	long size = 0;
-	struct cs_buffers *buf = sink_config;
-
-	if (buf) {
-		/*
-		 * In snapshot mode ->data_size holds the new address of the
-		 * ring buffer's head.  The size itself is the whole address
-		 * range since we want the latest information.
-		 */
-		if (buf->snapshot)
-			handle->head = local_xchg(&buf->data_size,
-						  buf->nr_pages << PAGE_SHIFT);
-		/*
-		 * Tell the tracer PMU how much we got in this run and if
-		 * something went wrong along the way.  Nobody else can use
-		 * this cs_buffers instance until we are done.  As such
-		 * resetting parameters here and squaring off with the ring
-		 * buffer API in the tracer PMU is fine.
-		 */
-		size = local_xchg(&buf->data_size, 0);
-	}
-
-	return size;
-}
-
-static void tmc_update_etf_buffer(struct coresight_device *csdev,
+static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev,
 				  struct perf_output_handle *handle,
 				  void *sink_config)
 {
@@ -387,17 +399,17 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
 	const u32 *barrier;
 	u32 *buf_ptr;
 	u64 read_ptr, write_ptr;
-	u32 status, to_read;
-	unsigned long offset;
+	u32 status;
+	unsigned long offset, to_read;
 	struct cs_buffers *buf = sink_config;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	if (!buf)
-		return;
+		return 0;
 
 	/* This shouldn't happen */
 	if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF))
-		return;
+		return 0;
 
 	CS_UNLOCK(drvdata->base);
 
@@ -438,10 +450,10 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
 		case TMC_MEM_INTF_WIDTH_32BITS:
 		case TMC_MEM_INTF_WIDTH_64BITS:
 		case TMC_MEM_INTF_WIDTH_128BITS:
-			mask = GENMASK(31, 5);
+			mask = GENMASK(31, 4);
 			break;
 		case TMC_MEM_INTF_WIDTH_256BITS:
-			mask = GENMASK(31, 6);
+			mask = GENMASK(31, 5);
 			break;
 		}
 
@@ -486,18 +498,14 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
 		}
 	}
 
-	/*
-	 * In snapshot mode all we have to do is communicate to
-	 * perf_aux_output_end() the address of the current head.  In full
-	 * trace mode the same function expects a size to move rb->aux_head
-	 * forward.
-	 */
-	if (buf->snapshot)
-		local_set(&buf->data_size, (cur * PAGE_SIZE) + offset);
-	else
-		local_add(to_read, &buf->data_size);
-
+	/* In snapshot mode we have to update the head */
+	if (buf->snapshot) {
+		handle->head = (cur * PAGE_SIZE) + offset;
+		to_read = buf->nr_pages << PAGE_SHIFT;
+	}
 	CS_LOCK(drvdata->base);
+
+	return to_read;
 }
 
 static const struct coresight_ops_sink tmc_etf_sink_ops = {
@@ -505,8 +513,6 @@ static const struct coresight_ops_sink tmc_etf_sink_ops = {
 	.disable	= tmc_disable_etf_sink,
 	.alloc_buffer	= tmc_alloc_etf_buffer,
 	.free_buffer	= tmc_free_etf_buffer,
-	.set_buffer	= tmc_set_etf_buffer,
-	.reset_buffer	= tmc_reset_etf_buffer,
 	.update_buffer	= tmc_update_etf_buffer,
 };
 
@@ -563,7 +569,7 @@ int tmc_read_prepare_etb(struct tmc_drvdata *drvdata)
 
 	/* Disable the TMC if need be */
 	if (drvdata->mode == CS_MODE_SYSFS)
-		tmc_etb_disable_hw(drvdata);
+		__tmc_etb_disable_hw(drvdata);
 
 	drvdata->reading = true;
 out:
@@ -603,7 +609,7 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata)
 		 * can't be NULL.
 		 */
 		memset(drvdata->buf, 0, drvdata->size);
-		tmc_etb_enable_hw(drvdata);
+		__tmc_etb_enable_hw(drvdata);
 	} else {
 		/*
 		 * The ETB/ETF is not tracing and the buffer was just read.
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 2eda5de304c2..f684283890d3 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include "coresight-catu.h"
+#include "coresight-etm-perf.h"
 #include "coresight-priv.h"
 #include "coresight-tmc.h"
 
@@ -21,6 +22,28 @@ struct etr_flat_buf {
 };
 
 /*
+ * etr_perf_buffer - Perf buffer used for ETR
+ * @etr_buf		- Actual buffer used by the ETR
+ * @snaphost		- Perf session mode
+ * @head		- handle->head at the beginning of the session.
+ * @nr_pages		- Number of pages in the ring buffer.
+ * @pages		- Array of Pages in the ring buffer.
+ */
+struct etr_perf_buffer {
+	struct etr_buf		*etr_buf;
+	bool			snapshot;
+	unsigned long		head;
+	int			nr_pages;
+	void			**pages;
+};
+
+/* Convert the perf index to an offset within the ETR buffer */
+#define PERF_IDX2OFF(idx, buf)	((idx) % ((buf)->nr_pages << PAGE_SHIFT))
+
+/* Lower limit for ETR hardware buffer */
+#define TMC_ETR_PERF_MIN_BUF_SIZE	SZ_1M
+
+/*
  * The TMC ETR SG has a page size of 4K. The SG table contains pointers
  * to 4KB buffers. However, the OS may use a PAGE_SIZE different from
  * 4K (i.e, 16KB or 64KB). This implies that a single OS page could
@@ -536,7 +559,7 @@ tmc_init_etr_sg_table(struct device *dev, int node,
 	sg_table = tmc_alloc_sg_table(dev, node, nr_tpages, nr_dpages, pages);
 	if (IS_ERR(sg_table)) {
 		kfree(etr_table);
-		return ERR_PTR(PTR_ERR(sg_table));
+		return ERR_CAST(sg_table);
 	}
 
 	etr_table->sg_table = sg_table;
@@ -728,12 +751,14 @@ tmc_etr_get_catu_device(struct tmc_drvdata *drvdata)
 	return NULL;
 }
 
-static inline void tmc_etr_enable_catu(struct tmc_drvdata *drvdata)
+static inline int tmc_etr_enable_catu(struct tmc_drvdata *drvdata,
+				      struct etr_buf *etr_buf)
 {
 	struct coresight_device *catu = tmc_etr_get_catu_device(drvdata);
 
 	if (catu && helper_ops(catu)->enable)
-		helper_ops(catu)->enable(catu, drvdata->etr_buf);
+		return helper_ops(catu)->enable(catu, etr_buf);
+	return 0;
 }
 
 static inline void tmc_etr_disable_catu(struct tmc_drvdata *drvdata)
@@ -895,17 +920,11 @@ static void tmc_sync_etr_buf(struct tmc_drvdata *drvdata)
 		tmc_etr_buf_insert_barrier_packet(etr_buf, etr_buf->offset);
 }
 
-static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
+static void __tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
 {
 	u32 axictl, sts;
 	struct etr_buf *etr_buf = drvdata->etr_buf;
 
-	/*
-	 * If this ETR is connected to a CATU, enable it before we turn
-	 * this on
-	 */
-	tmc_etr_enable_catu(drvdata);
-
 	CS_UNLOCK(drvdata->base);
 
 	/* Wait for TMCSReady bit to be set */
@@ -924,11 +943,8 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
 		axictl |= TMC_AXICTL_ARCACHE_OS;
 	}
 
-	if (etr_buf->mode == ETR_MODE_ETR_SG) {
-		if (WARN_ON(!tmc_etr_has_cap(drvdata, TMC_ETR_SG)))
-			return;
+	if (etr_buf->mode == ETR_MODE_ETR_SG)
 		axictl |= TMC_AXICTL_SCT_GAT_MODE;
-	}
 
 	writel_relaxed(axictl, drvdata->base + TMC_AXICTL);
 	tmc_write_dba(drvdata, etr_buf->hwaddr);
@@ -954,19 +970,54 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
+static int tmc_etr_enable_hw(struct tmc_drvdata *drvdata,
+			     struct etr_buf *etr_buf)
+{
+	int rc;
+
+	/* Callers should provide an appropriate buffer for use */
+	if (WARN_ON(!etr_buf))
+		return -EINVAL;
+
+	if ((etr_buf->mode == ETR_MODE_ETR_SG) &&
+	    WARN_ON(!tmc_etr_has_cap(drvdata, TMC_ETR_SG)))
+		return -EINVAL;
+
+	if (WARN_ON(drvdata->etr_buf))
+		return -EBUSY;
+
+	/*
+	 * If this ETR is connected to a CATU, enable it before we turn
+	 * this on.
+	 */
+	rc = tmc_etr_enable_catu(drvdata, etr_buf);
+	if (rc)
+		return rc;
+	rc = coresight_claim_device(drvdata->base);
+	if (!rc) {
+		drvdata->etr_buf = etr_buf;
+		__tmc_etr_enable_hw(drvdata);
+	}
+
+	return rc;
+}
+
 /*
  * Return the available trace data in the buffer (starts at etr_buf->offset,
  * limited by etr_buf->len) from @pos, with a maximum limit of @len,
  * also updating the @bufpp on where to find it. Since the trace data
  * starts at anywhere in the buffer, depending on the RRP, we adjust the
  * @len returned to handle buffer wrapping around.
+ *
+ * We are protected here by drvdata->reading != 0, which ensures the
+ * sysfs_buf stays alive.
  */
 ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata,
 				loff_t pos, size_t len, char **bufpp)
 {
 	s64 offset;
 	ssize_t actual = len;
-	struct etr_buf *etr_buf = drvdata->etr_buf;
+	struct etr_buf *etr_buf = drvdata->sysfs_buf;
 
 	if (pos + actual > etr_buf->len)
 		actual = etr_buf->len - pos;
@@ -996,10 +1047,17 @@ tmc_etr_free_sysfs_buf(struct etr_buf *buf)
 
 static void tmc_etr_sync_sysfs_buf(struct tmc_drvdata *drvdata)
 {
-	tmc_sync_etr_buf(drvdata);
+	struct etr_buf *etr_buf = drvdata->etr_buf;
+
+	if (WARN_ON(drvdata->sysfs_buf != etr_buf)) {
+		tmc_etr_free_sysfs_buf(drvdata->sysfs_buf);
+		drvdata->sysfs_buf = NULL;
+	} else {
+		tmc_sync_etr_buf(drvdata);
+	}
 }
 
-static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
+static void __tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
 {
 	CS_UNLOCK(drvdata->base);
 
@@ -1015,8 +1073,16 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
 
 	CS_LOCK(drvdata->base);
 
+}
+
+static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
+{
+	__tmc_etr_disable_hw(drvdata);
 	/* Disable CATU device if this ETR is connected to one */
 	tmc_etr_disable_catu(drvdata);
+	coresight_disclaim_device(drvdata->base);
+	/* Reset the ETR buf used by hardware */
+	drvdata->etr_buf = NULL;
 }
 
 static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
@@ -1024,7 +1090,7 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
 	int ret = 0;
 	unsigned long flags;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
-	struct etr_buf *new_buf = NULL, *free_buf = NULL;
+	struct etr_buf *sysfs_buf = NULL, *new_buf = NULL, *free_buf = NULL;
 
 	/*
 	 * If we are enabling the ETR from disabled state, we need to make
@@ -1035,7 +1101,8 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
 	 * with the lock released.
 	 */
 	spin_lock_irqsave(&drvdata->spinlock, flags);
-	if (!drvdata->etr_buf || (drvdata->etr_buf->size != drvdata->size)) {
+	sysfs_buf = READ_ONCE(drvdata->sysfs_buf);
+	if (!sysfs_buf || (sysfs_buf->size != drvdata->size)) {
 		spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 		/* Allocate memory with the locks released */
@@ -1064,14 +1131,15 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
 	 * If we don't have a buffer or it doesn't match the requested size,
 	 * use the buffer allocated above. Otherwise reuse the existing buffer.
 	 */
-	if (!drvdata->etr_buf ||
-	    (new_buf && drvdata->etr_buf->size != new_buf->size)) {
-		free_buf = drvdata->etr_buf;
-		drvdata->etr_buf = new_buf;
+	sysfs_buf = READ_ONCE(drvdata->sysfs_buf);
+	if (!sysfs_buf || (new_buf && sysfs_buf->size != new_buf->size)) {
+		free_buf = sysfs_buf;
+		drvdata->sysfs_buf = new_buf;
 	}
 
-	drvdata->mode = CS_MODE_SYSFS;
-	tmc_etr_enable_hw(drvdata);
+	ret = tmc_etr_enable_hw(drvdata, drvdata->sysfs_buf);
+	if (!ret)
+		drvdata->mode = CS_MODE_SYSFS;
 out:
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
@@ -1080,24 +1148,244 @@ out:
 		tmc_etr_free_sysfs_buf(free_buf);
 
 	if (!ret)
-		dev_info(drvdata->dev, "TMC-ETR enabled\n");
+		dev_dbg(drvdata->dev, "TMC-ETR enabled\n");
 
 	return ret;
 }
 
-static int tmc_enable_etr_sink_perf(struct coresight_device *csdev)
+/*
+ * tmc_etr_setup_perf_buf: Allocate ETR buffer for use by perf.
+ * The size of the hardware buffer is dependent on the size configured
+ * via sysfs and the perf ring buffer size. We prefer to allocate the
+ * largest possible size, scaling down the size by half until it
+ * reaches a minimum limit (1M), beyond which we give up.
+ */
+static struct etr_perf_buffer *
+tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, int node, int nr_pages,
+		       void **pages, bool snapshot)
 {
-	/* We don't support perf mode yet ! */
-	return -EINVAL;
+	struct etr_buf *etr_buf;
+	struct etr_perf_buffer *etr_perf;
+	unsigned long size;
+
+	etr_perf = kzalloc_node(sizeof(*etr_perf), GFP_KERNEL, node);
+	if (!etr_perf)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * Try to match the perf ring buffer size if it is larger
+	 * than the size requested via sysfs.
+	 */
+	if ((nr_pages << PAGE_SHIFT) > drvdata->size) {
+		etr_buf = tmc_alloc_etr_buf(drvdata, (nr_pages << PAGE_SHIFT),
+					    0, node, NULL);
+		if (!IS_ERR(etr_buf))
+			goto done;
+	}
+
+	/*
+	 * Else switch to configured size for this ETR
+	 * and scale down until we hit the minimum limit.
+	 */
+	size = drvdata->size;
+	do {
+		etr_buf = tmc_alloc_etr_buf(drvdata, size, 0, node, NULL);
+		if (!IS_ERR(etr_buf))
+			goto done;
+		size /= 2;
+	} while (size >= TMC_ETR_PERF_MIN_BUF_SIZE);
+
+	kfree(etr_perf);
+	return ERR_PTR(-ENOMEM);
+
+done:
+	etr_perf->etr_buf = etr_buf;
+	return etr_perf;
 }
 
-static int tmc_enable_etr_sink(struct coresight_device *csdev, u32 mode)
+
+static void *tmc_alloc_etr_buffer(struct coresight_device *csdev,
+				  int cpu, void **pages, int nr_pages,
+				  bool snapshot)
+{
+	struct etr_perf_buffer *etr_perf;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (cpu == -1)
+		cpu = smp_processor_id();
+
+	etr_perf = tmc_etr_setup_perf_buf(drvdata, cpu_to_node(cpu),
+					  nr_pages, pages, snapshot);
+	if (IS_ERR(etr_perf)) {
+		dev_dbg(drvdata->dev, "Unable to allocate ETR buffer\n");
+		return NULL;
+	}
+
+	etr_perf->snapshot = snapshot;
+	etr_perf->nr_pages = nr_pages;
+	etr_perf->pages = pages;
+
+	return etr_perf;
+}
+
+static void tmc_free_etr_buffer(void *config)
+{
+	struct etr_perf_buffer *etr_perf = config;
+
+	if (etr_perf->etr_buf)
+		tmc_free_etr_buf(etr_perf->etr_buf);
+	kfree(etr_perf);
+}
+
+/*
+ * tmc_etr_sync_perf_buffer: Copy the actual trace data from the hardware
+ * buffer to the perf ring buffer.
+ */
+static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf)
+{
+	long bytes, to_copy;
+	long pg_idx, pg_offset, src_offset;
+	unsigned long head = etr_perf->head;
+	char **dst_pages, *src_buf;
+	struct etr_buf *etr_buf = etr_perf->etr_buf;
+
+	head = etr_perf->head;
+	pg_idx = head >> PAGE_SHIFT;
+	pg_offset = head & (PAGE_SIZE - 1);
+	dst_pages = (char **)etr_perf->pages;
+	src_offset = etr_buf->offset;
+	to_copy = etr_buf->len;
+
+	while (to_copy > 0) {
+		/*
+		 * In one iteration, we can copy minimum of :
+		 *  1) what is available in the source buffer,
+		 *  2) what is available in the source buffer, before it
+		 *     wraps around.
+		 *  3) what is available in the destination page.
+		 * in one iteration.
+		 */
+		bytes = tmc_etr_buf_get_data(etr_buf, src_offset, to_copy,
+					     &src_buf);
+		if (WARN_ON_ONCE(bytes <= 0))
+			break;
+		bytes = min(bytes, (long)(PAGE_SIZE - pg_offset));
+
+		memcpy(dst_pages[pg_idx] + pg_offset, src_buf, bytes);
+
+		to_copy -= bytes;
+
+		/* Move destination pointers */
+		pg_offset += bytes;
+		if (pg_offset == PAGE_SIZE) {
+			pg_offset = 0;
+			if (++pg_idx == etr_perf->nr_pages)
+				pg_idx = 0;
+		}
+
+		/* Move source pointers */
+		src_offset += bytes;
+		if (src_offset >= etr_buf->size)
+			src_offset -= etr_buf->size;
+	}
+}
+
+/*
+ * tmc_update_etr_buffer : Update the perf ring buffer with the
+ * available trace data. We use software double buffering at the moment.
+ *
+ * TODO: Add support for reusing the perf ring buffer.
+ */
+static unsigned long
+tmc_update_etr_buffer(struct coresight_device *csdev,
+		      struct perf_output_handle *handle,
+		      void *config)
+{
+	bool lost = false;
+	unsigned long flags, size = 0;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etr_perf_buffer *etr_perf = config;
+	struct etr_buf *etr_buf = etr_perf->etr_buf;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (WARN_ON(drvdata->perf_data != etr_perf)) {
+		lost = true;
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		goto out;
+	}
+
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+	tmc_sync_etr_buf(drvdata);
+
+	CS_LOCK(drvdata->base);
+	/* Reset perf specific data */
+	drvdata->perf_data = NULL;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	size = etr_buf->len;
+	tmc_etr_sync_perf_buffer(etr_perf);
+
+	/*
+	 * Update handle->head in snapshot mode. Also update the size to the
+	 * hardware buffer size if there was an overflow.
+	 */
+	if (etr_perf->snapshot) {
+		handle->head += size;
+		if (etr_buf->full)
+			size = etr_buf->size;
+	}
+
+	lost |= etr_buf->full;
+out:
+	if (lost)
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+	return size;
+}
+
+static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
+{
+	int rc = 0;
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct perf_output_handle *handle = data;
+	struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	/*
+	 * There can be only one writer per sink in perf mode. If the sink
+	 * is already open in SYSFS mode, we can't use it.
+	 */
+	if (drvdata->mode != CS_MODE_DISABLED || WARN_ON(drvdata->perf_data)) {
+		rc = -EBUSY;
+		goto unlock_out;
+	}
+
+	if (WARN_ON(!etr_perf || !etr_perf->etr_buf)) {
+		rc = -EINVAL;
+		goto unlock_out;
+	}
+
+	etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf);
+	drvdata->perf_data = etr_perf;
+	rc = tmc_etr_enable_hw(drvdata, etr_perf->etr_buf);
+	if (!rc)
+		drvdata->mode = CS_MODE_PERF;
+
+unlock_out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	return rc;
+}
+
+static int tmc_enable_etr_sink(struct coresight_device *csdev,
+			       u32 mode, void *data)
 {
 	switch (mode) {
 	case CS_MODE_SYSFS:
 		return tmc_enable_etr_sink_sysfs(csdev);
 	case CS_MODE_PERF:
-		return tmc_enable_etr_sink_perf(csdev);
+		return tmc_enable_etr_sink_perf(csdev, data);
 	}
 
 	/* We shouldn't be here */
@@ -1123,12 +1411,15 @@ static void tmc_disable_etr_sink(struct coresight_device *csdev)
 
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	dev_info(drvdata->dev, "TMC-ETR disabled\n");
+	dev_dbg(drvdata->dev, "TMC-ETR disabled\n");
 }
 
 static const struct coresight_ops_sink tmc_etr_sink_ops = {
 	.enable		= tmc_enable_etr_sink,
 	.disable	= tmc_disable_etr_sink,
+	.alloc_buffer	= tmc_alloc_etr_buffer,
+	.update_buffer	= tmc_update_etr_buffer,
+	.free_buffer	= tmc_free_etr_buffer,
 };
 
 const struct coresight_ops tmc_etr_cs_ops = {
@@ -1150,21 +1441,19 @@ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata)
 		goto out;
 	}
 
-	/* Don't interfere if operated from Perf */
-	if (drvdata->mode == CS_MODE_PERF) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	/* If drvdata::etr_buf is NULL the trace data has been read already */
-	if (drvdata->etr_buf == NULL) {
+	/*
+	 * We can safely allow reads even if the ETR is operating in PERF mode,
+	 * since the sysfs session is captured in mode specific data.
+	 * If drvdata::sysfs_data is NULL the trace data has been read already.
+	 */
+	if (!drvdata->sysfs_buf) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	/* Disable the TMC if need be */
+	/* Disable the TMC if we are trying to read from a running session. */
 	if (drvdata->mode == CS_MODE_SYSFS)
-		tmc_etr_disable_hw(drvdata);
+		__tmc_etr_disable_hw(drvdata);
 
 	drvdata->reading = true;
 out:
@@ -1176,7 +1465,7 @@ out:
 int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata)
 {
 	unsigned long flags;
-	struct etr_buf *etr_buf = NULL;
+	struct etr_buf *sysfs_buf = NULL;
 
 	/* config types are set a boot time and never change */
 	if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETR))
@@ -1191,22 +1480,22 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata)
 		 * buffer. Since the tracer is still enabled drvdata::buf can't
 		 * be NULL.
 		 */
-		tmc_etr_enable_hw(drvdata);
+		__tmc_etr_enable_hw(drvdata);
 	} else {
 		/*
 		 * The ETR is not tracing and the buffer was just read.
 		 * As such prepare to free the trace buffer.
 		 */
-		etr_buf =  drvdata->etr_buf;
-		drvdata->etr_buf = NULL;
+		sysfs_buf = drvdata->sysfs_buf;
+		drvdata->sysfs_buf = NULL;
 	}
 
 	drvdata->reading = false;
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 	/* Free allocated memory out side of the spinlock */
-	if (etr_buf)
-		tmc_free_etr_buf(etr_buf);
+	if (sysfs_buf)
+		tmc_etr_free_sysfs_buf(sysfs_buf);
 
 	return 0;
 }
diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
index 1b817ec1192c..ea249f0bcd73 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.c
+++ b/drivers/hwtracing/coresight/coresight-tmc.c
@@ -81,7 +81,7 @@ static int tmc_read_prepare(struct tmc_drvdata *drvdata)
 	}
 
 	if (!ret)
-		dev_info(drvdata->dev, "TMC read start\n");
+		dev_dbg(drvdata->dev, "TMC read start\n");
 
 	return ret;
 }
@@ -103,7 +103,7 @@ static int tmc_read_unprepare(struct tmc_drvdata *drvdata)
 	}
 
 	if (!ret)
-		dev_info(drvdata->dev, "TMC read end\n");
+		dev_dbg(drvdata->dev, "TMC read end\n");
 
 	return ret;
 }
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
index 7027bd60c4cc..487c53701e9c 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.h
+++ b/drivers/hwtracing/coresight/coresight-tmc.h
@@ -170,6 +170,8 @@ struct etr_buf {
  * @trigger_cntr: amount of words to store after a trigger.
  * @etr_caps:	Bitmask of capabilities of the TMC ETR, inferred from the
  *		device configuration register (DEVID)
+ * @perf_data:	PERF buffer for ETR.
+ * @sysfs_data:	SYSFS buffer for ETR.
  */
 struct tmc_drvdata {
 	void __iomem		*base;
@@ -189,6 +191,8 @@ struct tmc_drvdata {
 	enum tmc_mem_intf_width	memwidth;
 	u32			trigger_cntr;
 	u32			etr_caps;
+	struct etr_buf		*sysfs_buf;
+	void			*perf_data;
 };
 
 struct etr_buf_operations {
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index 459ef930d98c..b2f72a1fa402 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -68,13 +68,13 @@ static void tpiu_enable_hw(struct tpiu_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
-static int tpiu_enable(struct coresight_device *csdev, u32 mode)
+static int tpiu_enable(struct coresight_device *csdev, u32 mode, void *__unused)
 {
 	struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	tpiu_enable_hw(drvdata);
 
-	dev_info(drvdata->dev, "TPIU enabled\n");
+	dev_dbg(drvdata->dev, "TPIU enabled\n");
 	return 0;
 }
 
@@ -100,7 +100,7 @@ static void tpiu_disable(struct coresight_device *csdev)
 
 	tpiu_disable_hw(drvdata);
 
-	dev_info(drvdata->dev, "TPIU disabled\n");
+	dev_dbg(drvdata->dev, "TPIU disabled\n");
 }
 
 static const struct coresight_ops_sink tpiu_sink_ops = {
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 3e07fd335f8c..2b0df1a0a8df 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -128,16 +128,105 @@ static int coresight_find_link_outport(struct coresight_device *csdev,
 	return -ENODEV;
 }
 
-static int coresight_enable_sink(struct coresight_device *csdev, u32 mode)
+static inline u32 coresight_read_claim_tags(void __iomem *base)
+{
+	return readl_relaxed(base + CORESIGHT_CLAIMCLR);
+}
+
+static inline bool coresight_is_claimed_self_hosted(void __iomem *base)
+{
+	return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED;
+}
+
+static inline bool coresight_is_claimed_any(void __iomem *base)
+{
+	return coresight_read_claim_tags(base) != 0;
+}
+
+static inline void coresight_set_claim_tags(void __iomem *base)
+{
+	writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMSET);
+	isb();
+}
+
+static inline void coresight_clear_claim_tags(void __iomem *base)
+{
+	writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMCLR);
+	isb();
+}
+
+/*
+ * coresight_claim_device_unlocked : Claim the device for self-hosted usage
+ * to prevent an external tool from touching this device. As per PSCI
+ * standards, section "Preserving the execution context" => "Debug and Trace
+ * save and Restore", DBGCLAIM[1] is reserved for Self-hosted debug/trace and
+ * DBGCLAIM[0] is reserved for external tools.
+ *
+ * Called with CS_UNLOCKed for the component.
+ * Returns : 0 on success
+ */
+int coresight_claim_device_unlocked(void __iomem *base)
+{
+	if (coresight_is_claimed_any(base))
+		return -EBUSY;
+
+	coresight_set_claim_tags(base);
+	if (coresight_is_claimed_self_hosted(base))
+		return 0;
+	/* There was a race setting the tags, clean up and fail */
+	coresight_clear_claim_tags(base);
+	return -EBUSY;
+}
+
+int coresight_claim_device(void __iomem *base)
+{
+	int rc;
+
+	CS_UNLOCK(base);
+	rc = coresight_claim_device_unlocked(base);
+	CS_LOCK(base);
+
+	return rc;
+}
+
+/*
+ * coresight_disclaim_device_unlocked : Clear the claim tags for the device.
+ * Called with CS_UNLOCKed for the component.
+ */
+void coresight_disclaim_device_unlocked(void __iomem *base)
+{
+
+	if (coresight_is_claimed_self_hosted(base))
+		coresight_clear_claim_tags(base);
+	else
+		/*
+		 * The external agent may have not honoured our claim
+		 * and has manipulated it. Or something else has seriously
+		 * gone wrong in our driver.
+		 */
+		WARN_ON_ONCE(1);
+}
+
+void coresight_disclaim_device(void __iomem *base)
+{
+	CS_UNLOCK(base);
+	coresight_disclaim_device_unlocked(base);
+	CS_LOCK(base);
+}
+
+static int coresight_enable_sink(struct coresight_device *csdev,
+				 u32 mode, void *data)
 {
 	int ret;
 
-	if (!csdev->enable) {
-		if (sink_ops(csdev)->enable) {
-			ret = sink_ops(csdev)->enable(csdev, mode);
-			if (ret)
-				return ret;
-		}
+	/*
+	 * We need to make sure the "new" session is compatible with the
+	 * existing "mode" of operation.
+	 */
+	if (sink_ops(csdev)->enable) {
+		ret = sink_ops(csdev)->enable(csdev, mode, data);
+		if (ret)
+			return ret;
 		csdev->enable = true;
 	}
 
@@ -184,8 +273,10 @@ static int coresight_enable_link(struct coresight_device *csdev,
 	if (atomic_inc_return(&csdev->refcnt[refport]) == 1) {
 		if (link_ops(csdev)->enable) {
 			ret = link_ops(csdev)->enable(csdev, inport, outport);
-			if (ret)
+			if (ret) {
+				atomic_dec(&csdev->refcnt[refport]);
 				return ret;
+			}
 		}
 	}
 
@@ -274,13 +365,21 @@ static bool coresight_disable_source(struct coresight_device *csdev)
 	return !csdev->enable;
 }
 
-void coresight_disable_path(struct list_head *path)
+/*
+ * coresight_disable_path_from : Disable components in the given path beyond
+ * @nd in the list. If @nd is NULL, all the components, except the SOURCE are
+ * disabled.
+ */
+static void coresight_disable_path_from(struct list_head *path,
+					struct coresight_node *nd)
 {
 	u32 type;
-	struct coresight_node *nd;
 	struct coresight_device *csdev, *parent, *child;
 
-	list_for_each_entry(nd, path, link) {
+	if (!nd)
+		nd = list_first_entry(path, struct coresight_node, link);
+
+	list_for_each_entry_continue(nd, path, link) {
 		csdev = nd->csdev;
 		type = csdev->type;
 
@@ -300,7 +399,12 @@ void coresight_disable_path(struct list_head *path)
 			coresight_disable_sink(csdev);
 			break;
 		case CORESIGHT_DEV_TYPE_SOURCE:
-			/* sources are disabled from either sysFS or Perf */
+			/*
+			 * We skip the first node in the path assuming that it
+			 * is the source. So we don't expect a source device in
+			 * the middle of a path.
+			 */
+			WARN_ON(1);
 			break;
 		case CORESIGHT_DEV_TYPE_LINK:
 			parent = list_prev_entry(nd, link)->csdev;
@@ -313,7 +417,12 @@ void coresight_disable_path(struct list_head *path)
 	}
 }
 
-int coresight_enable_path(struct list_head *path, u32 mode)
+void coresight_disable_path(struct list_head *path)
+{
+	coresight_disable_path_from(path, NULL);
+}
+
+int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data)
 {
 
 	int ret = 0;
@@ -338,9 +447,15 @@ int coresight_enable_path(struct list_head *path, u32 mode)
 
 		switch (type) {
 		case CORESIGHT_DEV_TYPE_SINK:
-			ret = coresight_enable_sink(csdev, mode);
+			ret = coresight_enable_sink(csdev, mode, sink_data);
+			/*
+			 * Sink is the first component turned on. If we
+			 * failed to enable the sink, there are no components
+			 * that need disabling. Disabling the path here
+			 * would mean we could disrupt an existing session.
+			 */
 			if (ret)
-				goto err;
+				goto out;
 			break;
 		case CORESIGHT_DEV_TYPE_SOURCE:
 			/* sources are enabled from either sysFS or Perf */
@@ -360,7 +475,7 @@ int coresight_enable_path(struct list_head *path, u32 mode)
 out:
 	return ret;
 err:
-	coresight_disable_path(path);
+	coresight_disable_path_from(path, nd);
 	goto out;
 }
 
@@ -635,7 +750,7 @@ int coresight_enable(struct coresight_device *csdev)
 		goto out;
 	}
 
-	ret = coresight_enable_path(path, CS_MODE_SYSFS);
+	ret = coresight_enable_path(path, CS_MODE_SYSFS, NULL);
 	if (ret)
 		goto err_path;
 
@@ -995,18 +1110,16 @@ postcore_initcall(coresight_init);
 
 struct coresight_device *coresight_register(struct coresight_desc *desc)
 {
-	int i;
 	int ret;
 	int link_subtype;
 	int nr_refcnts = 1;
 	atomic_t *refcnts = NULL;
 	struct coresight_device *csdev;
-	struct coresight_connection *conns = NULL;
 
 	csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
 	if (!csdev) {
 		ret = -ENOMEM;
-		goto err_kzalloc_csdev;
+		goto err_out;
 	}
 
 	if (desc->type == CORESIGHT_DEV_TYPE_LINK ||
@@ -1022,7 +1135,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 	refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
 	if (!refcnts) {
 		ret = -ENOMEM;
-		goto err_kzalloc_refcnts;
+		goto err_free_csdev;
 	}
 
 	csdev->refcnt = refcnts;
@@ -1030,22 +1143,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 	csdev->nr_inport = desc->pdata->nr_inport;
 	csdev->nr_outport = desc->pdata->nr_outport;
 
-	/* Initialise connections if there is at least one outport */
-	if (csdev->nr_outport) {
-		conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL);
-		if (!conns) {
-			ret = -ENOMEM;
-			goto err_kzalloc_conns;
-		}
-
-		for (i = 0; i < csdev->nr_outport; i++) {
-			conns[i].outport = desc->pdata->outports[i];
-			conns[i].child_name = desc->pdata->child_names[i];
-			conns[i].child_port = desc->pdata->child_ports[i];
-		}
-	}
-
-	csdev->conns = conns;
+	csdev->conns = desc->pdata->conns;
 
 	csdev->type = desc->type;
 	csdev->subtype = desc->subtype;
@@ -1062,7 +1160,11 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 	ret = device_register(&csdev->dev);
 	if (ret) {
 		put_device(&csdev->dev);
-		goto err_kzalloc_csdev;
+		/*
+		 * All resources are free'd explicitly via
+		 * coresight_device_release(), triggered from put_device().
+		 */
+		goto err_out;
 	}
 
 	mutex_lock(&coresight_mutex);
@@ -1074,11 +1176,9 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 
 	return csdev;
 
-err_kzalloc_conns:
-	kfree(refcnts);
-err_kzalloc_refcnts:
+err_free_csdev:
 	kfree(csdev);
-err_kzalloc_csdev:
+err_out:
 	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(coresight_register);
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index 6880bee195c8..89092f83567e 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -45,8 +45,13 @@ of_coresight_get_endpoint_device(struct device_node *endpoint)
 			       endpoint, of_dev_node_match);
 }
 
-static void of_coresight_get_ports(const struct device_node *node,
-				   int *nr_inport, int *nr_outport)
+static inline bool of_coresight_legacy_ep_is_input(struct device_node *ep)
+{
+	return of_property_read_bool(ep, "slave-mode");
+}
+
+static void of_coresight_get_ports_legacy(const struct device_node *node,
+					  int *nr_inport, int *nr_outport)
 {
 	struct device_node *ep = NULL;
 	int in = 0, out = 0;
@@ -56,7 +61,7 @@ static void of_coresight_get_ports(const struct device_node *node,
 		if (!ep)
 			break;
 
-		if (of_property_read_bool(ep, "slave-mode"))
+		if (of_coresight_legacy_ep_is_input(ep))
 			in++;
 		else
 			out++;
@@ -67,32 +72,77 @@ static void of_coresight_get_ports(const struct device_node *node,
 	*nr_outport = out;
 }
 
+static struct device_node *of_coresight_get_port_parent(struct device_node *ep)
+{
+	struct device_node *parent = of_graph_get_port_parent(ep);
+
+	/*
+	 * Skip one-level up to the real device node, if we
+	 * are using the new bindings.
+	 */
+	if (!of_node_cmp(parent->name, "in-ports") ||
+	    !of_node_cmp(parent->name, "out-ports"))
+		parent = of_get_next_parent(parent);
+
+	return parent;
+}
+
+static inline struct device_node *
+of_coresight_get_input_ports_node(const struct device_node *node)
+{
+	return of_get_child_by_name(node, "in-ports");
+}
+
+static inline struct device_node *
+of_coresight_get_output_ports_node(const struct device_node *node)
+{
+	return of_get_child_by_name(node, "out-ports");
+}
+
+static inline int
+of_coresight_count_ports(struct device_node *port_parent)
+{
+	int i = 0;
+	struct device_node *ep = NULL;
+
+	while ((ep = of_graph_get_next_endpoint(port_parent, ep)))
+		i++;
+	return i;
+}
+
+static void of_coresight_get_ports(const struct device_node *node,
+				   int *nr_inport, int *nr_outport)
+{
+	struct device_node *input_ports = NULL, *output_ports = NULL;
+
+	input_ports = of_coresight_get_input_ports_node(node);
+	output_ports = of_coresight_get_output_ports_node(node);
+
+	if (input_ports || output_ports) {
+		if (input_ports) {
+			*nr_inport = of_coresight_count_ports(input_ports);
+			of_node_put(input_ports);
+		}
+		if (output_ports) {
+			*nr_outport = of_coresight_count_ports(output_ports);
+			of_node_put(output_ports);
+		}
+	} else {
+		/* Fall back to legacy DT bindings parsing */
+		of_coresight_get_ports_legacy(node, nr_inport, nr_outport);
+	}
+}
+
 static int of_coresight_alloc_memory(struct device *dev,
 			struct coresight_platform_data *pdata)
 {
-	/* List of output port on this component */
-	pdata->outports = devm_kcalloc(dev,
-				       pdata->nr_outport,
-				       sizeof(*pdata->outports),
-				       GFP_KERNEL);
-	if (!pdata->outports)
-		return -ENOMEM;
-
-	/* Children connected to this component via @outports */
-	pdata->child_names = devm_kcalloc(dev,
-					  pdata->nr_outport,
-					  sizeof(*pdata->child_names),
-					  GFP_KERNEL);
-	if (!pdata->child_names)
-		return -ENOMEM;
-
-	/* Port number on the child this component is connected to */
-	pdata->child_ports = devm_kcalloc(dev,
-					  pdata->nr_outport,
-					  sizeof(*pdata->child_ports),
-					  GFP_KERNEL);
-	if (!pdata->child_ports)
-		return -ENOMEM;
+	if (pdata->nr_outport) {
+		pdata->conns = devm_kzalloc(dev, pdata->nr_outport *
+					    sizeof(*pdata->conns),
+					    GFP_KERNEL);
+		if (!pdata->conns)
+			return -ENOMEM;
+	}
 
 	return 0;
 }
@@ -114,17 +164,78 @@ int of_coresight_get_cpu(const struct device_node *node)
 }
 EXPORT_SYMBOL_GPL(of_coresight_get_cpu);
 
+/*
+ * of_coresight_parse_endpoint : Parse the given output endpoint @ep
+ * and fill the connection information in @conn
+ *
+ * Parses the local port, remote device name and the remote port.
+ *
+ * Returns :
+ *	 1	- If the parsing is successful and a connection record
+ *		  was created for an output connection.
+ *	 0	- If the parsing completed without any fatal errors.
+ *	-Errno	- Fatal error, abort the scanning.
+ */
+static int of_coresight_parse_endpoint(struct device *dev,
+				       struct device_node *ep,
+				       struct coresight_connection *conn)
+{
+	int ret = 0;
+	struct of_endpoint endpoint, rendpoint;
+	struct device_node *rparent = NULL;
+	struct device_node *rep = NULL;
+	struct device *rdev = NULL;
+
+	do {
+		/* Parse the local port details */
+		if (of_graph_parse_endpoint(ep, &endpoint))
+			break;
+		/*
+		 * Get a handle on the remote endpoint and the device it is
+		 * attached to.
+		 */
+		rep = of_graph_get_remote_endpoint(ep);
+		if (!rep)
+			break;
+		rparent = of_coresight_get_port_parent(rep);
+		if (!rparent)
+			break;
+		if (of_graph_parse_endpoint(rep, &rendpoint))
+			break;
+
+		/* If the remote device is not available, defer probing */
+		rdev = of_coresight_get_endpoint_device(rparent);
+		if (!rdev) {
+			ret = -EPROBE_DEFER;
+			break;
+		}
+
+		conn->outport = endpoint.port;
+		conn->child_name = devm_kstrdup(dev,
+						dev_name(rdev),
+						GFP_KERNEL);
+		conn->child_port = rendpoint.port;
+		/* Connection record updated */
+		ret = 1;
+	} while (0);
+
+	of_node_put(rparent);
+	of_node_put(rep);
+	put_device(rdev);
+
+	return ret;
+}
+
 struct coresight_platform_data *
 of_get_coresight_platform_data(struct device *dev,
 			       const struct device_node *node)
 {
-	int i = 0, ret = 0;
+	int ret = 0;
 	struct coresight_platform_data *pdata;
-	struct of_endpoint endpoint, rendpoint;
-	struct device *rdev;
+	struct coresight_connection *conn;
 	struct device_node *ep = NULL;
-	struct device_node *rparent = NULL;
-	struct device_node *rport = NULL;
+	const struct device_node *parent = NULL;
+	bool legacy_binding = false;
 
 	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -132,63 +243,54 @@ of_get_coresight_platform_data(struct device *dev,
 
 	/* Use device name as sysfs handle */
 	pdata->name = dev_name(dev);
+	pdata->cpu = of_coresight_get_cpu(node);
 
 	/* Get the number of input and output port for this component */
 	of_coresight_get_ports(node, &pdata->nr_inport, &pdata->nr_outport);
 
-	if (pdata->nr_outport) {
-		ret = of_coresight_alloc_memory(dev, pdata);
-		if (ret)
-			return ERR_PTR(ret);
-
-		/* Iterate through each port to discover topology */
-		do {
-			/* Get a handle on a port */
-			ep = of_graph_get_next_endpoint(node, ep);
-			if (!ep)
-				break;
-
-			/*
-			 * No need to deal with input ports, processing for as
-			 * processing for output ports will deal with them.
-			 */
-			if (of_find_property(ep, "slave-mode", NULL))
-				continue;
-
-			/* Get a handle on the local endpoint */
-			ret = of_graph_parse_endpoint(ep, &endpoint);
-
-			if (ret)
-				continue;
-
-			/* The local out port number */
-			pdata->outports[i] = endpoint.port;
-
-			/*
-			 * Get a handle on the remote port and parent
-			 * attached to it.
-			 */
-			rparent = of_graph_get_remote_port_parent(ep);
-			rport = of_graph_get_remote_port(ep);
-
-			if (!rparent || !rport)
-				continue;
+	/* If there are no output connections, we are done */
+	if (!pdata->nr_outport)
+		return pdata;
 
-			if (of_graph_parse_endpoint(rport, &rendpoint))
-				continue;
+	ret = of_coresight_alloc_memory(dev, pdata);
+	if (ret)
+		return ERR_PTR(ret);
 
-			rdev = of_coresight_get_endpoint_device(rparent);
-			if (!rdev)
-				return ERR_PTR(-EPROBE_DEFER);
-
-			pdata->child_names[i] = dev_name(rdev);
-			pdata->child_ports[i] = rendpoint.id;
-
-			i++;
-		} while (ep);
+	parent = of_coresight_get_output_ports_node(node);
+	/*
+	 * If the DT uses obsoleted bindings, the ports are listed
+	 * under the device and we need to filter out the input
+	 * ports.
+	 */
+	if (!parent) {
+		legacy_binding = true;
+		parent = node;
+		dev_warn_once(dev, "Uses obsolete Coresight DT bindings\n");
 	}
 
-	pdata->cpu = of_coresight_get_cpu(node);
+	conn = pdata->conns;
+
+	/* Iterate through each output port to discover topology */
+	while ((ep = of_graph_get_next_endpoint(parent, ep))) {
+		/*
+		 * Legacy binding mixes input/output ports under the
+		 * same parent. So, skip the input ports if we are dealing
+		 * with legacy binding, as they processed with their
+		 * connected output ports.
+		 */
+		if (legacy_binding && of_coresight_legacy_ep_is_input(ep))
+			continue;
+
+		ret = of_coresight_parse_endpoint(dev, ep, conn);
+		switch (ret) {
+		case 1:
+			conn++;		/* Fall through */
+		case 0:
+			break;
+		default:
+			return ERR_PTR(ret);
+		}
+	}
 
 	return pdata;
 }
diff --git a/drivers/hwtracing/stm/Kconfig b/drivers/hwtracing/stm/Kconfig
index 723e2d90083d..752dd66742bf 100644
--- a/drivers/hwtracing/stm/Kconfig
+++ b/drivers/hwtracing/stm/Kconfig
@@ -11,6 +11,35 @@ config STM
 
 if STM
 
+config STM_PROTO_BASIC
+	tristate "Basic STM framing protocol driver"
+	default CONFIG_STM
+	help
+	  This is a simple framing protocol for sending data over STM
+	  devices. This was the protocol that the STM framework used
+	  exclusively until the MIPI SyS-T support was added. Use this
+	  driver for compatibility with your existing STM setup.
+
+	  The receiving side only needs to be able to decode the MIPI
+	  STP protocol in order to extract the data.
+
+	  If you want to be able to use the basic protocol or want the
+	  backwards compatibility for your existing setup, say Y.
+
+config STM_PROTO_SYS_T
+	tristate "MIPI SyS-T STM framing protocol driver"
+	default CONFIG_STM
+	help
+	  This is an implementation of MIPI SyS-T protocol to be used
+	  over the STP transport. In addition to the data payload, it
+	  also carries additional metadata for time correlation, better
+	  means of trace source identification, etc.
+
+	  The receiving side must be able to decode this protocol in
+	  addition to the MIPI STP, in order to extract the data.
+
+	  If you don't know what this is, say N.
+
 config STM_DUMMY
 	tristate "Dummy STM driver"
 	help
diff --git a/drivers/hwtracing/stm/Makefile b/drivers/hwtracing/stm/Makefile
index effc19e5190f..1692fcd29277 100644
--- a/drivers/hwtracing/stm/Makefile
+++ b/drivers/hwtracing/stm/Makefile
@@ -3,6 +3,12 @@ obj-$(CONFIG_STM)	+= stm_core.o
 
 stm_core-y		:= core.o policy.o
 
+obj-$(CONFIG_STM_PROTO_BASIC) += stm_p_basic.o
+obj-$(CONFIG_STM_PROTO_SYS_T) += stm_p_sys-t.o
+
+stm_p_basic-y		:= p_basic.o
+stm_p_sys-t-y		:= p_sys-t.o
+
 obj-$(CONFIG_STM_DUMMY)	+= dummy_stm.o
 
 obj-$(CONFIG_STM_SOURCE_CONSOLE)	+= stm_console.o
diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
index 10bcb5d73f90..93ce3aa740a9 100644
--- a/drivers/hwtracing/stm/core.c
+++ b/drivers/hwtracing/stm/core.c
@@ -293,15 +293,15 @@ static int stm_output_assign(struct stm_device *stm, unsigned int width,
 	if (width > stm->data->sw_nchannels)
 		return -EINVAL;
 
-	if (policy_node) {
-		stp_policy_node_get_ranges(policy_node,
-					   &midx, &mend, &cidx, &cend);
-	} else {
-		midx = stm->data->sw_start;
-		cidx = 0;
-		mend = stm->data->sw_end;
-		cend = stm->data->sw_nchannels - 1;
-	}
+	/* We no longer accept policy_node==NULL here */
+	if (WARN_ON_ONCE(!policy_node))
+		return -EINVAL;
+
+	/*
+	 * Also, the caller holds reference to policy_node, so it won't
+	 * disappear on us.
+	 */
+	stp_policy_node_get_ranges(policy_node, &midx, &mend, &cidx, &cend);
 
 	spin_lock(&stm->mc_lock);
 	spin_lock(&output->lock);
@@ -316,11 +316,26 @@ static int stm_output_assign(struct stm_device *stm, unsigned int width,
 	output->master = midx;
 	output->channel = cidx;
 	output->nr_chans = width;
+	if (stm->pdrv->output_open) {
+		void *priv = stp_policy_node_priv(policy_node);
+
+		if (WARN_ON_ONCE(!priv))
+			goto unlock;
+
+		/* configfs subsys mutex is held by the caller */
+		ret = stm->pdrv->output_open(priv, output);
+		if (ret)
+			goto unlock;
+	}
+
 	stm_output_claim(stm, output);
 	dev_dbg(&stm->dev, "assigned %u:%u (+%u)\n", midx, cidx, width);
 
 	ret = 0;
 unlock:
+	if (ret)
+		output->nr_chans = 0;
+
 	spin_unlock(&output->lock);
 	spin_unlock(&stm->mc_lock);
 
@@ -333,6 +348,8 @@ static void stm_output_free(struct stm_device *stm, struct stm_output *output)
 	spin_lock(&output->lock);
 	if (output->nr_chans)
 		stm_output_disclaim(stm, output);
+	if (stm->pdrv && stm->pdrv->output_close)
+		stm->pdrv->output_close(output);
 	spin_unlock(&output->lock);
 	spin_unlock(&stm->mc_lock);
 }
@@ -349,6 +366,127 @@ static int major_match(struct device *dev, const void *data)
 	return MAJOR(dev->devt) == major;
 }
 
+/*
+ * Framing protocol management
+ * Modules can implement STM protocol drivers and (un-)register them
+ * with the STM class framework.
+ */
+static struct list_head stm_pdrv_head;
+static struct mutex stm_pdrv_mutex;
+
+struct stm_pdrv_entry {
+	struct list_head			entry;
+	const struct stm_protocol_driver	*pdrv;
+	const struct config_item_type		*node_type;
+};
+
+static const struct stm_pdrv_entry *
+__stm_lookup_protocol(const char *name)
+{
+	struct stm_pdrv_entry *pe;
+
+	/*
+	 * If no name is given (NULL or ""), fall back to "p_basic".
+	 */
+	if (!name || !*name)
+		name = "p_basic";
+
+	list_for_each_entry(pe, &stm_pdrv_head, entry) {
+		if (!strcmp(name, pe->pdrv->name))
+			return pe;
+	}
+
+	return NULL;
+}
+
+int stm_register_protocol(const struct stm_protocol_driver *pdrv)
+{
+	struct stm_pdrv_entry *pe = NULL;
+	int ret = -ENOMEM;
+
+	mutex_lock(&stm_pdrv_mutex);
+
+	if (__stm_lookup_protocol(pdrv->name)) {
+		ret = -EEXIST;
+		goto unlock;
+	}
+
+	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+	if (!pe)
+		goto unlock;
+
+	if (pdrv->policy_attr) {
+		pe->node_type = get_policy_node_type(pdrv->policy_attr);
+		if (!pe->node_type)
+			goto unlock;
+	}
+
+	list_add_tail(&pe->entry, &stm_pdrv_head);
+	pe->pdrv = pdrv;
+
+	ret = 0;
+unlock:
+	mutex_unlock(&stm_pdrv_mutex);
+
+	if (ret)
+		kfree(pe);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(stm_register_protocol);
+
+void stm_unregister_protocol(const struct stm_protocol_driver *pdrv)
+{
+	struct stm_pdrv_entry *pe, *iter;
+
+	mutex_lock(&stm_pdrv_mutex);
+
+	list_for_each_entry_safe(pe, iter, &stm_pdrv_head, entry) {
+		if (pe->pdrv == pdrv) {
+			list_del(&pe->entry);
+
+			if (pe->node_type) {
+				kfree(pe->node_type->ct_attrs);
+				kfree(pe->node_type);
+			}
+			kfree(pe);
+			break;
+		}
+	}
+
+	mutex_unlock(&stm_pdrv_mutex);
+}
+EXPORT_SYMBOL_GPL(stm_unregister_protocol);
+
+static bool stm_get_protocol(const struct stm_protocol_driver *pdrv)
+{
+	return try_module_get(pdrv->owner);
+}
+
+void stm_put_protocol(const struct stm_protocol_driver *pdrv)
+{
+	module_put(pdrv->owner);
+}
+
+int stm_lookup_protocol(const char *name,
+			const struct stm_protocol_driver **pdrv,
+			const struct config_item_type **node_type)
+{
+	const struct stm_pdrv_entry *pe;
+
+	mutex_lock(&stm_pdrv_mutex);
+
+	pe = __stm_lookup_protocol(name);
+	if (pe && pe->pdrv && stm_get_protocol(pe->pdrv)) {
+		*pdrv = pe->pdrv;
+		*node_type = pe->node_type;
+	}
+
+	mutex_unlock(&stm_pdrv_mutex);
+
+	return pe ? 0 : -ENOENT;
+}
+
 static int stm_char_open(struct inode *inode, struct file *file)
 {
 	struct stm_file *stmf;
@@ -405,42 +543,81 @@ static int stm_char_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int stm_file_assign(struct stm_file *stmf, char *id, unsigned int width)
+static int
+stm_assign_first_policy(struct stm_device *stm, struct stm_output *output,
+			char **ids, unsigned int width)
 {
-	struct stm_device *stm = stmf->stm;
-	int ret;
+	struct stp_policy_node *pn;
+	int err, n;
 
-	stmf->policy_node = stp_policy_node_lookup(stm, id);
+	/*
+	 * On success, stp_policy_node_lookup() will return holding the
+	 * configfs subsystem mutex, which is then released in
+	 * stp_policy_node_put(). This allows the pdrv->output_open() in
+	 * stm_output_assign() to serialize against the attribute accessors.
+	 */
+	for (n = 0, pn = NULL; ids[n] && !pn; n++)
+		pn = stp_policy_node_lookup(stm, ids[n]);
 
-	ret = stm_output_assign(stm, width, stmf->policy_node, &stmf->output);
+	if (!pn)
+		return -EINVAL;
 
-	if (stmf->policy_node)
-		stp_policy_node_put(stmf->policy_node);
+	err = stm_output_assign(stm, width, pn, output);
 
-	return ret;
+	stp_policy_node_put(pn);
+
+	return err;
 }
 
-static ssize_t notrace stm_write(struct stm_data *data, unsigned int master,
-			  unsigned int channel, const char *buf, size_t count)
+/**
+ * stm_data_write() - send the given payload as data packets
+ * @data:	stm driver's data
+ * @m:		STP master
+ * @c:		STP channel
+ * @ts_first:	timestamp the first packet
+ * @buf:	data payload buffer
+ * @count:	data payload size
+ */
+ssize_t notrace stm_data_write(struct stm_data *data, unsigned int m,
+			       unsigned int c, bool ts_first, const void *buf,
+			       size_t count)
 {
-	unsigned int flags = STP_PACKET_TIMESTAMPED;
-	const unsigned char *p = buf, nil = 0;
-	size_t pos;
+	unsigned int flags = ts_first ? STP_PACKET_TIMESTAMPED : 0;
 	ssize_t sz;
+	size_t pos;
 
-	for (pos = 0, p = buf; count > pos; pos += sz, p += sz) {
+	for (pos = 0, sz = 0; pos < count; pos += sz) {
 		sz = min_t(unsigned int, count - pos, 8);
-		sz = data->packet(data, master, channel, STP_PACKET_DATA, flags,
-				  sz, p);
-		flags = 0;
-
-		if (sz < 0)
+		sz = data->packet(data, m, c, STP_PACKET_DATA, flags, sz,
+				  &((u8 *)buf)[pos]);
+		if (sz <= 0)
 			break;
+
+		if (ts_first) {
+			flags = 0;
+			ts_first = false;
+		}
 	}
 
-	data->packet(data, master, channel, STP_PACKET_FLAG, 0, 0, &nil);
+	return sz < 0 ? sz : pos;
+}
+EXPORT_SYMBOL_GPL(stm_data_write);
+
+static ssize_t notrace
+stm_write(struct stm_device *stm, struct stm_output *output,
+	  unsigned int chan, const char *buf, size_t count)
+{
+	int err;
+
+	/* stm->pdrv is serialized against policy_mutex */
+	if (!stm->pdrv)
+		return -ENODEV;
+
+	err = stm->pdrv->write(stm->data, output, chan, buf, count);
+	if (err < 0)
+		return err;
 
-	return pos;
+	return err;
 }
 
 static ssize_t stm_char_write(struct file *file, const char __user *buf,
@@ -455,16 +632,21 @@ static ssize_t stm_char_write(struct file *file, const char __user *buf,
 		count = PAGE_SIZE - 1;
 
 	/*
-	 * if no m/c have been assigned to this writer up to this
-	 * point, use "default" policy entry
+	 * If no m/c have been assigned to this writer up to this
+	 * point, try to use the task name and "default" policy entries.
 	 */
 	if (!stmf->output.nr_chans) {
-		err = stm_file_assign(stmf, "default", 1);
+		char comm[sizeof(current->comm)];
+		char *ids[] = { comm, "default", NULL };
+
+		get_task_comm(comm, current);
+
+		err = stm_assign_first_policy(stmf->stm, &stmf->output, ids, 1);
 		/*
 		 * EBUSY means that somebody else just assigned this
 		 * output, which is just fine for write()
 		 */
-		if (err && err != -EBUSY)
+		if (err)
 			return err;
 	}
 
@@ -480,8 +662,7 @@ static ssize_t stm_char_write(struct file *file, const char __user *buf,
 
 	pm_runtime_get_sync(&stm->dev);
 
-	count = stm_write(stm->data, stmf->output.master, stmf->output.channel,
-			  kbuf, count);
+	count = stm_write(stm, &stmf->output, 0, kbuf, count);
 
 	pm_runtime_mark_last_busy(&stm->dev);
 	pm_runtime_put_autosuspend(&stm->dev);
@@ -550,6 +731,7 @@ static int stm_char_policy_set_ioctl(struct stm_file *stmf, void __user *arg)
 {
 	struct stm_device *stm = stmf->stm;
 	struct stp_policy_id *id;
+	char *ids[] = { NULL, NULL };
 	int ret = -EINVAL;
 	u32 size;
 
@@ -582,7 +764,9 @@ static int stm_char_policy_set_ioctl(struct stm_file *stmf, void __user *arg)
 	    id->width > PAGE_SIZE / stm->data->sw_mmiosz)
 		goto err_free;
 
-	ret = stm_file_assign(stmf, id->id, id->width);
+	ids[0] = id->id;
+	ret = stm_assign_first_policy(stmf->stm, &stmf->output, ids,
+				      id->width);
 	if (ret)
 		goto err_free;
 
@@ -818,8 +1002,8 @@ EXPORT_SYMBOL_GPL(stm_unregister_device);
 static int stm_source_link_add(struct stm_source_device *src,
 			       struct stm_device *stm)
 {
-	char *id;
-	int err;
+	char *ids[] = { NULL, "default", NULL };
+	int err = -ENOMEM;
 
 	mutex_lock(&stm->link_mutex);
 	spin_lock(&stm->link_lock);
@@ -833,19 +1017,13 @@ static int stm_source_link_add(struct stm_source_device *src,
 	spin_unlock(&stm->link_lock);
 	mutex_unlock(&stm->link_mutex);
 
-	id = kstrdup(src->data->name, GFP_KERNEL);
-	if (id) {
-		src->policy_node =
-			stp_policy_node_lookup(stm, id);
-
-		kfree(id);
-	}
-
-	err = stm_output_assign(stm, src->data->nr_chans,
-				src->policy_node, &src->output);
+	ids[0] = kstrdup(src->data->name, GFP_KERNEL);
+	if (!ids[0])
+		goto fail_detach;
 
-	if (src->policy_node)
-		stp_policy_node_put(src->policy_node);
+	err = stm_assign_first_policy(stm, &src->output, ids,
+				      src->data->nr_chans);
+	kfree(ids[0]);
 
 	if (err)
 		goto fail_detach;
@@ -1134,9 +1312,7 @@ int notrace stm_source_write(struct stm_source_data *data,
 
 	stm = srcu_dereference(src->link, &stm_source_srcu);
 	if (stm)
-		count = stm_write(stm->data, src->output.master,
-				  src->output.channel + chan,
-				  buf, count);
+		count = stm_write(stm, &src->output, chan, buf, count);
 	else
 		count = -ENODEV;
 
@@ -1163,7 +1339,15 @@ static int __init stm_core_init(void)
 		goto err_src;
 
 	init_srcu_struct(&stm_source_srcu);
+	INIT_LIST_HEAD(&stm_pdrv_head);
+	mutex_init(&stm_pdrv_mutex);
 
+	/*
+	 * So as to not confuse existing users with a requirement
+	 * to load yet another module, do it here.
+	 */
+	if (IS_ENABLED(CONFIG_STM_PROTO_BASIC))
+		(void)request_module_nowait("stm_p_basic");
 	stm_core_up++;
 
 	return 0;
diff --git a/drivers/hwtracing/stm/heartbeat.c b/drivers/hwtracing/stm/heartbeat.c
index 7db42395e131..3e7df1c0477f 100644
--- a/drivers/hwtracing/stm/heartbeat.c
+++ b/drivers/hwtracing/stm/heartbeat.c
@@ -76,7 +76,7 @@ static int stm_heartbeat_init(void)
 			goto fail_unregister;
 
 		stm_heartbeat[i].data.nr_chans	= 1;
-		stm_heartbeat[i].data.link		= stm_heartbeat_link;
+		stm_heartbeat[i].data.link	= stm_heartbeat_link;
 		stm_heartbeat[i].data.unlink	= stm_heartbeat_unlink;
 		hrtimer_init(&stm_heartbeat[i].hrtimer, CLOCK_MONOTONIC,
 			     HRTIMER_MODE_ABS);
diff --git a/drivers/hwtracing/stm/p_basic.c b/drivers/hwtracing/stm/p_basic.c
new file mode 100644
index 000000000000..8980a6a5fd6c
--- /dev/null
+++ b/drivers/hwtracing/stm/p_basic.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Basic framing protocol for STM devices.
+ * Copyright (c) 2018, Intel Corporation.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/stm.h>
+#include "stm.h"
+
+static ssize_t basic_write(struct stm_data *data, struct stm_output *output,
+			   unsigned int chan, const char *buf, size_t count)
+{
+	unsigned int c = output->channel + chan;
+	unsigned int m = output->master;
+	const unsigned char nil = 0;
+	ssize_t sz;
+
+	sz = stm_data_write(data, m, c, true, buf, count);
+	if (sz > 0)
+		data->packet(data, m, c, STP_PACKET_FLAG, 0, 0, &nil);
+
+	return sz;
+}
+
+static const struct stm_protocol_driver basic_pdrv = {
+	.owner	= THIS_MODULE,
+	.name	= "p_basic",
+	.write	= basic_write,
+};
+
+static int basic_stm_init(void)
+{
+	return stm_register_protocol(&basic_pdrv);
+}
+
+static void basic_stm_exit(void)
+{
+	stm_unregister_protocol(&basic_pdrv);
+}
+
+module_init(basic_stm_init);
+module_exit(basic_stm_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Basic STM framing protocol driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/p_sys-t.c b/drivers/hwtracing/stm/p_sys-t.c
new file mode 100644
index 000000000000..b178a5495b67
--- /dev/null
+++ b/drivers/hwtracing/stm/p_sys-t.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MIPI SyS-T framing protocol for STM devices.
+ * Copyright (c) 2018, Intel Corporation.
+ */
+
+#include <linux/configfs.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <linux/stm.h>
+#include "stm.h"
+
+enum sys_t_message_type {
+	MIPI_SYST_TYPE_BUILD	= 0,
+	MIPI_SYST_TYPE_SHORT32,
+	MIPI_SYST_TYPE_STRING,
+	MIPI_SYST_TYPE_CATALOG,
+	MIPI_SYST_TYPE_RAW	= 6,
+	MIPI_SYST_TYPE_SHORT64,
+	MIPI_SYST_TYPE_CLOCK,
+};
+
+enum sys_t_message_severity {
+	MIPI_SYST_SEVERITY_MAX	= 0,
+	MIPI_SYST_SEVERITY_FATAL,
+	MIPI_SYST_SEVERITY_ERROR,
+	MIPI_SYST_SEVERITY_WARNING,
+	MIPI_SYST_SEVERITY_INFO,
+	MIPI_SYST_SEVERITY_USER1,
+	MIPI_SYST_SEVERITY_USER2,
+	MIPI_SYST_SEVERITY_DEBUG,
+};
+
+enum sys_t_message_build_subtype {
+	MIPI_SYST_BUILD_ID_COMPACT32 = 0,
+	MIPI_SYST_BUILD_ID_COMPACT64,
+	MIPI_SYST_BUILD_ID_LONG,
+};
+
+enum sys_t_message_clock_subtype {
+	MIPI_SYST_CLOCK_TRANSPORT_SYNC = 1,
+};
+
+enum sys_t_message_string_subtype {
+	MIPI_SYST_STRING_GENERIC	= 1,
+	MIPI_SYST_STRING_FUNCTIONENTER,
+	MIPI_SYST_STRING_FUNCTIONEXIT,
+	MIPI_SYST_STRING_INVALIDPARAM	= 5,
+	MIPI_SYST_STRING_ASSERT		= 7,
+	MIPI_SYST_STRING_PRINTF_32	= 11,
+	MIPI_SYST_STRING_PRINTF_64	= 12,
+};
+
+#define MIPI_SYST_TYPE(t)		((u32)(MIPI_SYST_TYPE_ ## t))
+#define MIPI_SYST_SEVERITY(s)		((u32)(MIPI_SYST_SEVERITY_ ## s) << 4)
+#define MIPI_SYST_OPT_LOC		BIT(8)
+#define MIPI_SYST_OPT_LEN		BIT(9)
+#define MIPI_SYST_OPT_CHK		BIT(10)
+#define MIPI_SYST_OPT_TS		BIT(11)
+#define MIPI_SYST_UNIT(u)		((u32)(u) << 12)
+#define MIPI_SYST_ORIGIN(o)		((u32)(o) << 16)
+#define MIPI_SYST_OPT_GUID		BIT(23)
+#define MIPI_SYST_SUBTYPE(s)		((u32)(MIPI_SYST_ ## s) << 24)
+#define MIPI_SYST_UNITLARGE(u)		(MIPI_SYST_UNIT(u & 0xf) | \
+					 MIPI_SYST_ORIGIN(u >> 4))
+#define MIPI_SYST_TYPES(t, s)		(MIPI_SYST_TYPE(t) | \
+					 MIPI_SYST_SUBTYPE(t ## _ ## s))
+
+#define DATA_HEADER	(MIPI_SYST_TYPES(STRING, GENERIC)	| \
+			 MIPI_SYST_SEVERITY(INFO)		| \
+			 MIPI_SYST_OPT_GUID)
+
+#define CLOCK_SYNC_HEADER	(MIPI_SYST_TYPES(CLOCK, TRANSPORT_SYNC)	| \
+				 MIPI_SYST_SEVERITY(MAX))
+
+struct sys_t_policy_node {
+	uuid_t		uuid;
+	bool		do_len;
+	unsigned long	ts_interval;
+	unsigned long	clocksync_interval;
+};
+
+struct sys_t_output {
+	struct sys_t_policy_node	node;
+	unsigned long	ts_jiffies;
+	unsigned long	clocksync_jiffies;
+};
+
+static void sys_t_policy_node_init(void *priv)
+{
+	struct sys_t_policy_node *pn = priv;
+
+	generate_random_uuid(pn->uuid.b);
+}
+
+static int sys_t_output_open(void *priv, struct stm_output *output)
+{
+	struct sys_t_policy_node *pn = priv;
+	struct sys_t_output *opriv;
+
+	opriv = kzalloc(sizeof(*opriv), GFP_ATOMIC);
+	if (!opriv)
+		return -ENOMEM;
+
+	memcpy(&opriv->node, pn, sizeof(opriv->node));
+	output->pdrv_private = opriv;
+
+	return 0;
+}
+
+static void sys_t_output_close(struct stm_output *output)
+{
+	kfree(output->pdrv_private);
+}
+
+static ssize_t sys_t_policy_uuid_show(struct config_item *item,
+				      char *page)
+{
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+
+	return sprintf(page, "%pU\n", &pn->uuid);
+}
+
+static ssize_t
+sys_t_policy_uuid_store(struct config_item *item, const char *page,
+			size_t count)
+{
+	struct mutex *mutexp = &item->ci_group->cg_subsys->su_mutex;
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+	int ret;
+
+	mutex_lock(mutexp);
+	ret = uuid_parse(page, &pn->uuid);
+	mutex_unlock(mutexp);
+
+	return ret < 0 ? ret : count;
+}
+
+CONFIGFS_ATTR(sys_t_policy_, uuid);
+
+static ssize_t sys_t_policy_do_len_show(struct config_item *item,
+				      char *page)
+{
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+
+	return sprintf(page, "%d\n", pn->do_len);
+}
+
+static ssize_t
+sys_t_policy_do_len_store(struct config_item *item, const char *page,
+			size_t count)
+{
+	struct mutex *mutexp = &item->ci_group->cg_subsys->su_mutex;
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+	int ret;
+
+	mutex_lock(mutexp);
+	ret = kstrtobool(page, &pn->do_len);
+	mutex_unlock(mutexp);
+
+	return ret ? ret : count;
+}
+
+CONFIGFS_ATTR(sys_t_policy_, do_len);
+
+static ssize_t sys_t_policy_ts_interval_show(struct config_item *item,
+					     char *page)
+{
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+
+	return sprintf(page, "%u\n", jiffies_to_msecs(pn->ts_interval));
+}
+
+static ssize_t
+sys_t_policy_ts_interval_store(struct config_item *item, const char *page,
+			       size_t count)
+{
+	struct mutex *mutexp = &item->ci_group->cg_subsys->su_mutex;
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+	unsigned int ms;
+	int ret;
+
+	mutex_lock(mutexp);
+	ret = kstrtouint(page, 10, &ms);
+	mutex_unlock(mutexp);
+
+	if (!ret) {
+		pn->ts_interval = msecs_to_jiffies(ms);
+		return count;
+	}
+
+	return ret;
+}
+
+CONFIGFS_ATTR(sys_t_policy_, ts_interval);
+
+static ssize_t sys_t_policy_clocksync_interval_show(struct config_item *item,
+						    char *page)
+{
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+
+	return sprintf(page, "%u\n", jiffies_to_msecs(pn->clocksync_interval));
+}
+
+static ssize_t
+sys_t_policy_clocksync_interval_store(struct config_item *item,
+				      const char *page, size_t count)
+{
+	struct mutex *mutexp = &item->ci_group->cg_subsys->su_mutex;
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+	unsigned int ms;
+	int ret;
+
+	mutex_lock(mutexp);
+	ret = kstrtouint(page, 10, &ms);
+	mutex_unlock(mutexp);
+
+	if (!ret) {
+		pn->clocksync_interval = msecs_to_jiffies(ms);
+		return count;
+	}
+
+	return ret;
+}
+
+CONFIGFS_ATTR(sys_t_policy_, clocksync_interval);
+
+static struct configfs_attribute *sys_t_policy_attrs[] = {
+	&sys_t_policy_attr_uuid,
+	&sys_t_policy_attr_do_len,
+	&sys_t_policy_attr_ts_interval,
+	&sys_t_policy_attr_clocksync_interval,
+	NULL,
+};
+
+static inline bool sys_t_need_ts(struct sys_t_output *op)
+{
+	if (op->node.ts_interval &&
+	    time_after(op->ts_jiffies + op->node.ts_interval, jiffies)) {
+		op->ts_jiffies = jiffies;
+
+		return true;
+	}
+
+	return false;
+}
+
+static bool sys_t_need_clock_sync(struct sys_t_output *op)
+{
+	if (op->node.clocksync_interval &&
+	    time_after(op->clocksync_jiffies + op->node.clocksync_interval,
+		       jiffies)) {
+		op->clocksync_jiffies = jiffies;
+
+		return true;
+	}
+
+	return false;
+}
+
+static ssize_t
+sys_t_clock_sync(struct stm_data *data, unsigned int m, unsigned int c)
+{
+	u32 header = CLOCK_SYNC_HEADER;
+	const unsigned char nil = 0;
+	u64 payload[2]; /* Clock value and frequency */
+	ssize_t sz;
+
+	sz = data->packet(data, m, c, STP_PACKET_DATA, STP_PACKET_TIMESTAMPED,
+			  4, (u8 *)&header);
+	if (sz <= 0)
+		return sz;
+
+	payload[0] = ktime_get_real_ns();
+	payload[1] = NSEC_PER_SEC;
+	sz = stm_data_write(data, m, c, false, &payload, sizeof(payload));
+	if (sz <= 0)
+		return sz;
+
+	data->packet(data, m, c, STP_PACKET_FLAG, 0, 0, &nil);
+
+	return sizeof(header) + sizeof(payload);
+}
+
+static ssize_t sys_t_write(struct stm_data *data, struct stm_output *output,
+			   unsigned int chan, const char *buf, size_t count)
+{
+	struct sys_t_output *op = output->pdrv_private;
+	unsigned int c = output->channel + chan;
+	unsigned int m = output->master;
+	const unsigned char nil = 0;
+	u32 header = DATA_HEADER;
+	ssize_t sz;
+
+	/* We require an existing policy node to proceed */
+	if (!op)
+		return -EINVAL;
+
+	if (sys_t_need_clock_sync(op)) {
+		sz = sys_t_clock_sync(data, m, c);
+		if (sz <= 0)
+			return sz;
+	}
+
+	if (op->node.do_len)
+		header |= MIPI_SYST_OPT_LEN;
+	if (sys_t_need_ts(op))
+		header |= MIPI_SYST_OPT_TS;
+
+	/*
+	 * STP framing rules for SyS-T frames:
+	 *   * the first packet of the SyS-T frame is timestamped;
+	 *   * the last packet is a FLAG.
+	 */
+	/* Message layout: HEADER / GUID / [LENGTH /][TIMESTAMP /] DATA */
+	/* HEADER */
+	sz = data->packet(data, m, c, STP_PACKET_DATA, STP_PACKET_TIMESTAMPED,
+			  4, (u8 *)&header);
+	if (sz <= 0)
+		return sz;
+
+	/* GUID */
+	sz = stm_data_write(data, m, c, false, op->node.uuid.b, UUID_SIZE);
+	if (sz <= 0)
+		return sz;
+
+	/* [LENGTH] */
+	if (op->node.do_len) {
+		u16 length = count;
+
+		sz = data->packet(data, m, c, STP_PACKET_DATA, 0, 2,
+				  (u8 *)&length);
+		if (sz <= 0)
+			return sz;
+	}
+
+	/* [TIMESTAMP] */
+	if (header & MIPI_SYST_OPT_TS) {
+		u64 ts = ktime_get_real_ns();
+
+		sz = stm_data_write(data, m, c, false, &ts, sizeof(ts));
+		if (sz <= 0)
+			return sz;
+	}
+
+	/* DATA */
+	sz = stm_data_write(data, m, c, false, buf, count);
+	if (sz > 0)
+		data->packet(data, m, c, STP_PACKET_FLAG, 0, 0, &nil);
+
+	return sz;
+}
+
+static const struct stm_protocol_driver sys_t_pdrv = {
+	.owner			= THIS_MODULE,
+	.name			= "p_sys-t",
+	.priv_sz		= sizeof(struct sys_t_policy_node),
+	.write			= sys_t_write,
+	.policy_attr		= sys_t_policy_attrs,
+	.policy_node_init	= sys_t_policy_node_init,
+	.output_open		= sys_t_output_open,
+	.output_close		= sys_t_output_close,
+};
+
+static int sys_t_stm_init(void)
+{
+	return stm_register_protocol(&sys_t_pdrv);
+}
+
+static void sys_t_stm_exit(void)
+{
+	stm_unregister_protocol(&sys_t_pdrv);
+}
+
+module_init(sys_t_stm_init);
+module_exit(sys_t_stm_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MIPI SyS-T STM framing protocol driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/policy.c b/drivers/hwtracing/stm/policy.c
index 3fd07e275b34..0910ec807187 100644
--- a/drivers/hwtracing/stm/policy.c
+++ b/drivers/hwtracing/stm/policy.c
@@ -33,8 +33,18 @@ struct stp_policy_node {
 	unsigned int		last_master;
 	unsigned int		first_channel;
 	unsigned int		last_channel;
+	/* this is the one that's exposed to the attributes */
+	unsigned char		priv[0];
 };
 
+void *stp_policy_node_priv(struct stp_policy_node *pn)
+{
+	if (!pn)
+		return NULL;
+
+	return pn->priv;
+}
+
 static struct configfs_subsystem stp_policy_subsys;
 
 void stp_policy_node_get_ranges(struct stp_policy_node *policy_node,
@@ -68,6 +78,14 @@ to_stp_policy_node(struct config_item *item)
 		NULL;
 }
 
+void *to_pdrv_policy_node(struct config_item *item)
+{
+	struct stp_policy_node *node = to_stp_policy_node(item);
+
+	return stp_policy_node_priv(node);
+}
+EXPORT_SYMBOL_GPL(to_pdrv_policy_node);
+
 static ssize_t
 stp_policy_node_masters_show(struct config_item *item, char *page)
 {
@@ -163,7 +181,9 @@ unlock:
 
 static void stp_policy_node_release(struct config_item *item)
 {
-	kfree(to_stp_policy_node(item));
+	struct stp_policy_node *node = to_stp_policy_node(item);
+
+	kfree(node);
 }
 
 static struct configfs_item_operations stp_policy_node_item_ops = {
@@ -182,10 +202,34 @@ static struct configfs_attribute *stp_policy_node_attrs[] = {
 static const struct config_item_type stp_policy_type;
 static const struct config_item_type stp_policy_node_type;
 
+const struct config_item_type *
+get_policy_node_type(struct configfs_attribute **attrs)
+{
+	struct config_item_type *type;
+	struct configfs_attribute **merged;
+
+	type = kmemdup(&stp_policy_node_type, sizeof(stp_policy_node_type),
+		       GFP_KERNEL);
+	if (!type)
+		return NULL;
+
+	merged = memcat_p(stp_policy_node_attrs, attrs);
+	if (!merged) {
+		kfree(type);
+		return NULL;
+	}
+
+	type->ct_attrs = merged;
+
+	return type;
+}
+
 static struct config_group *
 stp_policy_node_make(struct config_group *group, const char *name)
 {
+	const struct config_item_type *type = &stp_policy_node_type;
 	struct stp_policy_node *policy_node, *parent_node;
+	const struct stm_protocol_driver *pdrv;
 	struct stp_policy *policy;
 
 	if (group->cg_item.ci_type == &stp_policy_type) {
@@ -199,12 +243,20 @@ stp_policy_node_make(struct config_group *group, const char *name)
 	if (!policy->stm)
 		return ERR_PTR(-ENODEV);
 
-	policy_node = kzalloc(sizeof(struct stp_policy_node), GFP_KERNEL);
+	pdrv = policy->stm->pdrv;
+	policy_node =
+		kzalloc(offsetof(struct stp_policy_node, priv[pdrv->priv_sz]),
+			GFP_KERNEL);
 	if (!policy_node)
 		return ERR_PTR(-ENOMEM);
 
-	config_group_init_type_name(&policy_node->group, name,
-				    &stp_policy_node_type);
+	if (pdrv->policy_node_init)
+		pdrv->policy_node_init((void *)policy_node->priv);
+
+	if (policy->stm->pdrv_node_type)
+		type = policy->stm->pdrv_node_type;
+
+	config_group_init_type_name(&policy_node->group, name, type);
 
 	policy_node->policy = policy;
 
@@ -254,8 +306,25 @@ static ssize_t stp_policy_device_show(struct config_item *item,
 
 CONFIGFS_ATTR_RO(stp_policy_, device);
 
+static ssize_t stp_policy_protocol_show(struct config_item *item,
+					char *page)
+{
+	struct stp_policy *policy = to_stp_policy(item);
+	ssize_t count;
+
+	count = sprintf(page, "%s\n",
+			(policy && policy->stm) ?
+			policy->stm->pdrv->name :
+			"<none>");
+
+	return count;
+}
+
+CONFIGFS_ATTR_RO(stp_policy_, protocol);
+
 static struct configfs_attribute *stp_policy_attrs[] = {
 	&stp_policy_attr_device,
+	&stp_policy_attr_protocol,
 	NULL,
 };
 
@@ -276,6 +345,7 @@ void stp_policy_unbind(struct stp_policy *policy)
 	stm->policy = NULL;
 	policy->stm = NULL;
 
+	stm_put_protocol(stm->pdrv);
 	stm_put_device(stm);
 }
 
@@ -311,11 +381,14 @@ static const struct config_item_type stp_policy_type = {
 };
 
 static struct config_group *
-stp_policies_make(struct config_group *group, const char *name)
+stp_policy_make(struct config_group *group, const char *name)
 {
+	const struct config_item_type *pdrv_node_type;
+	const struct stm_protocol_driver *pdrv;
+	char *devname, *proto, *p;
 	struct config_group *ret;
 	struct stm_device *stm;
-	char *devname, *p;
+	int err;
 
 	devname = kasprintf(GFP_KERNEL, "%s", name);
 	if (!devname)
@@ -326,6 +399,7 @@ stp_policies_make(struct config_group *group, const char *name)
 	 * <device_name> is the name of an existing stm device; may
 	 *               contain dots;
 	 * <policy_name> is an arbitrary string; may not contain dots
+	 * <device_name>:<protocol_name>.<policy_name>
 	 */
 	p = strrchr(devname, '.');
 	if (!p) {
@@ -335,11 +409,28 @@ stp_policies_make(struct config_group *group, const char *name)
 
 	*p = '\0';
 
+	/*
+	 * look for ":<protocol_name>":
+	 *  + no protocol suffix: fall back to whatever is available;
+	 *  + unknown protocol: fail the whole thing
+	 */
+	proto = strrchr(devname, ':');
+	if (proto)
+		*proto++ = '\0';
+
 	stm = stm_find_device(devname);
+	if (!stm) {
+		kfree(devname);
+		return ERR_PTR(-ENODEV);
+	}
+
+	err = stm_lookup_protocol(proto, &pdrv, &pdrv_node_type);
 	kfree(devname);
 
-	if (!stm)
+	if (err) {
+		stm_put_device(stm);
 		return ERR_PTR(-ENODEV);
+	}
 
 	mutex_lock(&stm->policy_mutex);
 	if (stm->policy) {
@@ -349,31 +440,37 @@ stp_policies_make(struct config_group *group, const char *name)
 
 	stm->policy = kzalloc(sizeof(*stm->policy), GFP_KERNEL);
 	if (!stm->policy) {
-		ret = ERR_PTR(-ENOMEM);
-		goto unlock_policy;
+		mutex_unlock(&stm->policy_mutex);
+		stm_put_protocol(pdrv);
+		stm_put_device(stm);
+		return ERR_PTR(-ENOMEM);
 	}
 
 	config_group_init_type_name(&stm->policy->group, name,
 				    &stp_policy_type);
-	stm->policy->stm = stm;
 
+	stm->pdrv = pdrv;
+	stm->pdrv_node_type = pdrv_node_type;
+	stm->policy->stm = stm;
 	ret = &stm->policy->group;
 
 unlock_policy:
 	mutex_unlock(&stm->policy_mutex);
 
-	if (IS_ERR(ret))
+	if (IS_ERR(ret)) {
+		stm_put_protocol(stm->pdrv);
 		stm_put_device(stm);
+	}
 
 	return ret;
 }
 
-static struct configfs_group_operations stp_policies_group_ops = {
-	.make_group	= stp_policies_make,
+static struct configfs_group_operations stp_policy_root_group_ops = {
+	.make_group	= stp_policy_make,
 };
 
-static const struct config_item_type stp_policies_type = {
-	.ct_group_ops	= &stp_policies_group_ops,
+static const struct config_item_type stp_policy_root_type = {
+	.ct_group_ops	= &stp_policy_root_group_ops,
 	.ct_owner	= THIS_MODULE,
 };
 
@@ -381,7 +478,7 @@ static struct configfs_subsystem stp_policy_subsys = {
 	.su_group = {
 		.cg_item = {
 			.ci_namebuf	= "stp-policy",
-			.ci_type	= &stp_policies_type,
+			.ci_type	= &stp_policy_root_type,
 		},
 	},
 };
@@ -392,7 +489,7 @@ static struct configfs_subsystem stp_policy_subsys = {
 static struct stp_policy_node *
 __stp_policy_node_lookup(struct stp_policy *policy, char *s)
 {
-	struct stp_policy_node *policy_node, *ret;
+	struct stp_policy_node *policy_node, *ret = NULL;
 	struct list_head *head = &policy->group.cg_children;
 	struct config_item *item;
 	char *start, *end = s;
@@ -400,10 +497,6 @@ __stp_policy_node_lookup(struct stp_policy *policy, char *s)
 	if (list_empty(head))
 		return NULL;
 
-	/* return the first entry if everything else fails */
-	item = list_entry(head->next, struct config_item, ci_entry);
-	ret = to_stp_policy_node(item);
-
 next:
 	for (;;) {
 		start = strsep(&end, "/");
@@ -449,25 +542,25 @@ stp_policy_node_lookup(struct stm_device *stm, char *s)
 
 	if (policy_node)
 		config_item_get(&policy_node->group.cg_item);
-	mutex_unlock(&stp_policy_subsys.su_mutex);
+	else
+		mutex_unlock(&stp_policy_subsys.su_mutex);
 
 	return policy_node;
 }
 
 void stp_policy_node_put(struct stp_policy_node *policy_node)
 {
+	lockdep_assert_held(&stp_policy_subsys.su_mutex);
+
+	mutex_unlock(&stp_policy_subsys.su_mutex);
 	config_item_put(&policy_node->group.cg_item);
 }
 
 int __init stp_configfs_init(void)
 {
-	int err;
-
 	config_group_init(&stp_policy_subsys.su_group);
 	mutex_init(&stp_policy_subsys.su_mutex);
-	err = configfs_register_subsystem(&stp_policy_subsys);
-
-	return err;
+	return configfs_register_subsystem(&stp_policy_subsys);
 }
 
 void __exit stp_configfs_exit(void)
diff --git a/drivers/hwtracing/stm/stm.h b/drivers/hwtracing/stm/stm.h
index 923571adc6f4..3569439d53bb 100644
--- a/drivers/hwtracing/stm/stm.h
+++ b/drivers/hwtracing/stm/stm.h
@@ -10,20 +10,17 @@
 #ifndef _STM_STM_H_
 #define _STM_STM_H_
 
+#include <linux/configfs.h>
+
 struct stp_policy;
 struct stp_policy_node;
+struct stm_protocol_driver;
 
-struct stp_policy_node *
-stp_policy_node_lookup(struct stm_device *stm, char *s);
-void stp_policy_node_put(struct stp_policy_node *policy_node);
-void stp_policy_unbind(struct stp_policy *policy);
-
-void stp_policy_node_get_ranges(struct stp_policy_node *policy_node,
-				unsigned int *mstart, unsigned int *mend,
-				unsigned int *cstart, unsigned int *cend);
 int stp_configfs_init(void);
 void stp_configfs_exit(void);
 
+void *stp_policy_node_priv(struct stp_policy_node *pn);
+
 struct stp_master {
 	unsigned int	nr_free;
 	unsigned long	chan_map[0];
@@ -40,6 +37,9 @@ struct stm_device {
 	struct mutex		link_mutex;
 	spinlock_t		link_lock;
 	struct list_head	link_list;
+	/* framing protocol in use */
+	const struct stm_protocol_driver	*pdrv;
+	const struct config_item_type		*pdrv_node_type;
 	/* master allocation */
 	spinlock_t		mc_lock;
 	struct stp_master	*masters[0];
@@ -48,16 +48,28 @@ struct stm_device {
 #define to_stm_device(_d)				\
 	container_of((_d), struct stm_device, dev)
 
+struct stp_policy_node *
+stp_policy_node_lookup(struct stm_device *stm, char *s);
+void stp_policy_node_put(struct stp_policy_node *policy_node);
+void stp_policy_unbind(struct stp_policy *policy);
+
+void stp_policy_node_get_ranges(struct stp_policy_node *policy_node,
+				unsigned int *mstart, unsigned int *mend,
+				unsigned int *cstart, unsigned int *cend);
+
+const struct config_item_type *
+get_policy_node_type(struct configfs_attribute **attrs);
+
 struct stm_output {
 	spinlock_t		lock;
 	unsigned int		master;
 	unsigned int		channel;
 	unsigned int		nr_chans;
+	void			*pdrv_private;
 };
 
 struct stm_file {
 	struct stm_device	*stm;
-	struct stp_policy_node	*policy_node;
 	struct stm_output	output;
 };
 
@@ -71,11 +83,35 @@ struct stm_source_device {
 	struct stm_device __rcu	*link;
 	struct list_head	link_entry;
 	/* one output per stm_source device */
-	struct stp_policy_node	*policy_node;
 	struct stm_output	output;
 };
 
 #define to_stm_source_device(_d)				\
 	container_of((_d), struct stm_source_device, dev)
 
+void *to_pdrv_policy_node(struct config_item *item);
+
+struct stm_protocol_driver {
+	struct module	*owner;
+	const char	*name;
+	ssize_t		(*write)(struct stm_data *data,
+				 struct stm_output *output, unsigned int chan,
+				 const char *buf, size_t count);
+	void		(*policy_node_init)(void *arg);
+	int		(*output_open)(void *priv, struct stm_output *output);
+	void		(*output_close)(struct stm_output *output);
+	ssize_t		priv_sz;
+	struct configfs_attribute	**policy_attr;
+};
+
+int stm_register_protocol(const struct stm_protocol_driver *pdrv);
+void stm_unregister_protocol(const struct stm_protocol_driver *pdrv);
+int stm_lookup_protocol(const char *name,
+			const struct stm_protocol_driver **pdrv,
+			const struct config_item_type **type);
+void stm_put_protocol(const struct stm_protocol_driver *pdrv);
+ssize_t stm_data_write(struct stm_data *data, unsigned int m,
+		       unsigned int c, bool ts_first, const void *buf,
+		       size_t count);
+
 #endif /* _STM_STM_H_ */
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index abb6660c099c..0a3ec7c726ec 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -26,6 +26,7 @@ config INFINIBAND_USER_MAD
 config INFINIBAND_USER_ACCESS
 	tristate "InfiniBand userspace access (verbs and CM)"
 	select ANON_INODES
+	depends on MMU
 	---help---
 	  Userspace InfiniBand access support.  This enables the
 	  kernel side of userspace verbs and the userspace
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 46b855a42884..0dce94e3c495 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -45,6 +45,7 @@
 #include <net/addrconf.h>
 #include <net/ip6_route.h>
 #include <rdma/ib_addr.h>
+#include <rdma/ib_sa.h>
 #include <rdma/ib.h>
 #include <rdma/rdma_netlink.h>
 #include <net/netlink.h>
@@ -61,6 +62,7 @@ struct addr_req {
 			 struct rdma_dev_addr *addr, void *context);
 	unsigned long timeout;
 	struct delayed_work work;
+	bool resolve_by_gid_attr;	/* Consider gid attr in resolve phase */
 	int status;
 	u32 seq;
 };
@@ -219,60 +221,75 @@ int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
 }
 EXPORT_SYMBOL(rdma_addr_size_kss);
 
-void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
-		    const struct net_device *dev,
-		    const unsigned char *dst_dev_addr)
+/**
+ * rdma_copy_src_l2_addr - Copy netdevice source addresses
+ * @dev_addr:	Destination address pointer where to copy the addresses
+ * @dev:	Netdevice whose source addresses to copy
+ *
+ * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice.
+ * This includes unicast address, broadcast address, device type and
+ * interface index.
+ */
+void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
+			   const struct net_device *dev)
 {
 	dev_addr->dev_type = dev->type;
 	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
 	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
-	if (dst_dev_addr)
-		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
 	dev_addr->bound_dev_if = dev->ifindex;
 }
-EXPORT_SYMBOL(rdma_copy_addr);
+EXPORT_SYMBOL(rdma_copy_src_l2_addr);
 
-int rdma_translate_ip(const struct sockaddr *addr,
-		      struct rdma_dev_addr *dev_addr)
+static struct net_device *
+rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in)
 {
-	struct net_device *dev;
+	struct net_device *dev = NULL;
+	int ret = -EADDRNOTAVAIL;
 
-	if (dev_addr->bound_dev_if) {
-		dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
-		if (!dev)
-			return -ENODEV;
-		rdma_copy_addr(dev_addr, dev, NULL);
-		dev_put(dev);
-		return 0;
-	}
-
-	switch (addr->sa_family) {
+	switch (src_in->sa_family) {
 	case AF_INET:
-		dev = ip_dev_find(dev_addr->net,
-			((const struct sockaddr_in *)addr)->sin_addr.s_addr);
-
-		if (!dev)
-			return -EADDRNOTAVAIL;
-
-		rdma_copy_addr(dev_addr, dev, NULL);
-		dev_put(dev);
+		dev = __ip_dev_find(net,
+				    ((const struct sockaddr_in *)src_in)->sin_addr.s_addr,
+				    false);
+		if (dev)
+			ret = 0;
 		break;
 #if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
-		rcu_read_lock();
-		for_each_netdev_rcu(dev_addr->net, dev) {
-			if (ipv6_chk_addr(dev_addr->net,
-					  &((const struct sockaddr_in6 *)addr)->sin6_addr,
+		for_each_netdev_rcu(net, dev) {
+			if (ipv6_chk_addr(net,
+					  &((const struct sockaddr_in6 *)src_in)->sin6_addr,
 					  dev, 1)) {
-				rdma_copy_addr(dev_addr, dev, NULL);
+				ret = 0;
 				break;
 			}
 		}
-		rcu_read_unlock();
 		break;
 #endif
 	}
-	return 0;
+	return ret ? ERR_PTR(ret) : dev;
+}
+
+int rdma_translate_ip(const struct sockaddr *addr,
+		      struct rdma_dev_addr *dev_addr)
+{
+	struct net_device *dev;
+
+	if (dev_addr->bound_dev_if) {
+		dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+		if (!dev)
+			return -ENODEV;
+		rdma_copy_src_l2_addr(dev_addr, dev);
+		dev_put(dev);
+		return 0;
+	}
+
+	rcu_read_lock();
+	dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr);
+	if (!IS_ERR(dev))
+		rdma_copy_src_l2_addr(dev_addr, dev);
+	rcu_read_unlock();
+	return PTR_ERR_OR_ZERO(dev);
 }
 EXPORT_SYMBOL(rdma_translate_ip);
 
@@ -295,15 +312,12 @@ static void queue_req(struct addr_req *req)
 	spin_unlock_bh(&lock);
 }
 
-static int ib_nl_fetch_ha(const struct dst_entry *dst,
-			  struct rdma_dev_addr *dev_addr,
+static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr,
 			  const void *daddr, u32 seq, u16 family)
 {
-	if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
+	if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
 		return -EADDRNOTAVAIL;
 
-	/* We fill in what we can, the response will fill the rest */
-	rdma_copy_addr(dev_addr, dst->dev, NULL);
 	return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
 }
 
@@ -322,7 +336,7 @@ static int dst_fetch_ha(const struct dst_entry *dst,
 		neigh_event_send(n, NULL);
 		ret = -ENODATA;
 	} else {
-		rdma_copy_addr(dev_addr, dst->dev, n->ha);
+		memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN);
 	}
 
 	neigh_release(n);
@@ -356,18 +370,22 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
 		(const void *)&dst_in6->sin6_addr;
 	sa_family_t family = dst_in->sa_family;
 
-	/* Gateway + ARPHRD_INFINIBAND -> IB router */
-	if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
-		return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
+	/* If we have a gateway in IB mode then it must be an IB network */
+	if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
+		return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
 	else
 		return dst_fetch_ha(dst, dev_addr, daddr);
 }
 
-static int addr4_resolve(struct sockaddr_in *src_in,
-			 const struct sockaddr_in *dst_in,
+static int addr4_resolve(struct sockaddr *src_sock,
+			 const struct sockaddr *dst_sock,
 			 struct rdma_dev_addr *addr,
 			 struct rtable **prt)
 {
+	struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock;
+	const struct sockaddr_in *dst_in =
+			(const struct sockaddr_in *)dst_sock;
+
 	__be32 src_ip = src_in->sin_addr.s_addr;
 	__be32 dst_ip = dst_in->sin_addr.s_addr;
 	struct rtable *rt;
@@ -383,16 +401,8 @@ static int addr4_resolve(struct sockaddr_in *src_in,
 	if (ret)
 		return ret;
 
-	src_in->sin_family = AF_INET;
 	src_in->sin_addr.s_addr = fl4.saddr;
 
-	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
-	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
-	 * type accordingly.
-	 */
-	if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
-		addr->network = RDMA_NETWORK_IPV4;
-
 	addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
 
 	*prt = rt;
@@ -400,14 +410,16 @@ static int addr4_resolve(struct sockaddr_in *src_in,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int addr6_resolve(struct sockaddr_in6 *src_in,
-			 const struct sockaddr_in6 *dst_in,
+static int addr6_resolve(struct sockaddr *src_sock,
+			 const struct sockaddr *dst_sock,
 			 struct rdma_dev_addr *addr,
 			 struct dst_entry **pdst)
 {
+	struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock;
+	const struct sockaddr_in6 *dst_in =
+				(const struct sockaddr_in6 *)dst_sock;
 	struct flowi6 fl6;
 	struct dst_entry *dst;
-	struct rt6_info *rt;
 	int ret;
 
 	memset(&fl6, 0, sizeof fl6);
@@ -419,19 +431,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 	if (ret < 0)
 		return ret;
 
-	rt = (struct rt6_info *)dst;
-	if (ipv6_addr_any(&src_in->sin6_addr)) {
-		src_in->sin6_family = AF_INET6;
+	if (ipv6_addr_any(&src_in->sin6_addr))
 		src_in->sin6_addr = fl6.saddr;
-	}
-
-	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
-	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
-	 * type accordingly.
-	 */
-	if (rt->rt6i_flags & RTF_GATEWAY &&
-	    ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
-		addr->network = RDMA_NETWORK_IPV6;
 
 	addr->hoplimit = ip6_dst_hoplimit(dst);
 
@@ -439,8 +440,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 	return 0;
 }
 #else
-static int addr6_resolve(struct sockaddr_in6 *src_in,
-			 const struct sockaddr_in6 *dst_in,
+static int addr6_resolve(struct sockaddr *src_sock,
+			 const struct sockaddr *dst_sock,
 			 struct rdma_dev_addr *addr,
 			 struct dst_entry **pdst)
 {
@@ -451,36 +452,110 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 static int addr_resolve_neigh(const struct dst_entry *dst,
 			      const struct sockaddr *dst_in,
 			      struct rdma_dev_addr *addr,
+			      unsigned int ndev_flags,
 			      u32 seq)
 {
-	if (dst->dev->flags & IFF_LOOPBACK) {
-		int ret;
+	int ret = 0;
 
-		ret = rdma_translate_ip(dst_in, addr);
-		if (!ret)
-			memcpy(addr->dst_dev_addr, addr->src_dev_addr,
-			       MAX_ADDR_LEN);
+	if (ndev_flags & IFF_LOOPBACK) {
+		memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+	} else {
+		if (!(ndev_flags & IFF_NOARP)) {
+			/* If the device doesn't do ARP internally */
+			ret = fetch_ha(dst, addr, dst_in, seq);
+		}
+	}
+	return ret;
+}
 
-		return ret;
+static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
+			    const struct sockaddr *dst_in,
+			    const struct dst_entry *dst,
+			    const struct net_device *ndev)
+{
+	int ret = 0;
+
+	if (dst->dev->flags & IFF_LOOPBACK)
+		ret = rdma_translate_ip(dst_in, dev_addr);
+	else
+		rdma_copy_src_l2_addr(dev_addr, dst->dev);
+
+	/*
+	 * If there's a gateway and type of device not ARPHRD_INFINIBAND,
+	 * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
+	 * network type accordingly.
+	 */
+	if (has_gateway(dst, dst_in->sa_family) &&
+	    ndev->type != ARPHRD_INFINIBAND)
+		dev_addr->network = dst_in->sa_family == AF_INET ?
+						RDMA_NETWORK_IPV4 :
+						RDMA_NETWORK_IPV6;
+	else
+		dev_addr->network = RDMA_NETWORK_IB;
+
+	return ret;
+}
+
+static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
+				 unsigned int *ndev_flags,
+				 const struct sockaddr *dst_in,
+				 const struct dst_entry *dst)
+{
+	struct net_device *ndev = READ_ONCE(dst->dev);
+
+	*ndev_flags = ndev->flags;
+	/* A physical device must be the RDMA device to use */
+	if (ndev->flags & IFF_LOOPBACK) {
+		/*
+		 * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
+		 * loopback IP address. So if route is resolved to loopback
+		 * interface, translate that to a real ndev based on non
+		 * loopback IP address.
+		 */
+		ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in);
+		if (IS_ERR(ndev))
+			return -ENODEV;
 	}
 
-	/* If the device doesn't do ARP internally */
-	if (!(dst->dev->flags & IFF_NOARP))
-		return fetch_ha(dst, addr, dst_in, seq);
+	return copy_src_l2_addr(dev_addr, dst_in, dst, ndev);
+}
+
+static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr)
+{
+	struct net_device *ndev;
 
-	rdma_copy_addr(addr, dst->dev, NULL);
+	ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr);
+	if (IS_ERR(ndev))
+		return PTR_ERR(ndev);
 
+	/*
+	 * Since we are holding the rcu, reading net and ifindex
+	 * are safe without any additional reference; because
+	 * change_net_namespace() in net/core/dev.c does rcu sync
+	 * after it changes the state to IFF_DOWN and before
+	 * updating netdev fields {net, ifindex}.
+	 */
+	addr->net = dev_net(ndev);
+	addr->bound_dev_if = ndev->ifindex;
 	return 0;
 }
 
+static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr)
+{
+	addr->net = &init_net;
+	addr->bound_dev_if = 0;
+}
+
 static int addr_resolve(struct sockaddr *src_in,
 			const struct sockaddr *dst_in,
 			struct rdma_dev_addr *addr,
 			bool resolve_neigh,
+			bool resolve_by_gid_attr,
 			u32 seq)
 {
-	struct net_device *ndev;
-	struct dst_entry *dst;
+	struct dst_entry *dst = NULL;
+	unsigned int ndev_flags = 0;
+	struct rtable *rt = NULL;
 	int ret;
 
 	if (!addr->net) {
@@ -488,58 +563,55 @@ static int addr_resolve(struct sockaddr *src_in,
 		return -EINVAL;
 	}
 
-	if (src_in->sa_family == AF_INET) {
-		struct rtable *rt = NULL;
-		const struct sockaddr_in *dst_in4 =
-			(const struct sockaddr_in *)dst_in;
-
-		ret = addr4_resolve((struct sockaddr_in *)src_in,
-				    dst_in4, addr, &rt);
-		if (ret)
-			return ret;
-
-		if (resolve_neigh)
-			ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
-
-		if (addr->bound_dev_if) {
-			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
-		} else {
-			ndev = rt->dst.dev;
-			dev_hold(ndev);
+	rcu_read_lock();
+	if (resolve_by_gid_attr) {
+		if (!addr->sgid_attr) {
+			rcu_read_unlock();
+			pr_warn_ratelimited("%s: missing gid_attr\n", __func__);
+			return -EINVAL;
 		}
-
-		ip_rt_put(rt);
-	} else {
-		const struct sockaddr_in6 *dst_in6 =
-			(const struct sockaddr_in6 *)dst_in;
-
-		ret = addr6_resolve((struct sockaddr_in6 *)src_in,
-				    dst_in6, addr,
-				    &dst);
-		if (ret)
+		/*
+		 * If the request is for a specific gid attribute of the
+		 * rdma_dev_addr, derive net from the netdevice of the
+		 * GID attribute.
+		 */
+		ret = set_addr_netns_by_gid_rcu(addr);
+		if (ret) {
+			rcu_read_unlock();
 			return ret;
-
-		if (resolve_neigh)
-			ret = addr_resolve_neigh(dst, dst_in, addr, seq);
-
-		if (addr->bound_dev_if) {
-			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
-		} else {
-			ndev = dst->dev;
-			dev_hold(ndev);
 		}
-
-		dst_release(dst);
 	}
-
-	if (ndev) {
-		if (ndev->flags & IFF_LOOPBACK)
-			ret = rdma_translate_ip(dst_in, addr);
-		else
-			addr->bound_dev_if = ndev->ifindex;
-		dev_put(ndev);
+	if (src_in->sa_family == AF_INET) {
+		ret = addr4_resolve(src_in, dst_in, addr, &rt);
+		dst = &rt->dst;
+	} else {
+		ret = addr6_resolve(src_in, dst_in, addr, &dst);
 	}
+	if (ret) {
+		rcu_read_unlock();
+		goto done;
+	}
+	ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst);
+	rcu_read_unlock();
+
+	/*
+	 * Resolve neighbor destination address if requested and
+	 * only if src addr translation didn't fail.
+	 */
+	if (!ret && resolve_neigh)
+		ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq);
 
+	if (src_in->sa_family == AF_INET)
+		ip_rt_put(rt);
+	else
+		dst_release(dst);
+done:
+	/*
+	 * Clear the addr net to go back to its original state, only if it was
+	 * derived from GID attribute in this context.
+	 */
+	if (resolve_by_gid_attr)
+		rdma_addr_set_net_defaults(addr);
 	return ret;
 }
 
@@ -554,7 +626,8 @@ static void process_one_req(struct work_struct *_work)
 		src_in = (struct sockaddr *)&req->src_addr;
 		dst_in = (struct sockaddr *)&req->dst_addr;
 		req->status = addr_resolve(src_in, dst_in, req->addr,
-					   true, req->seq);
+					   true, req->resolve_by_gid_attr,
+					   req->seq);
 		if (req->status && time_after_eq(jiffies, req->timeout)) {
 			req->status = -ETIMEDOUT;
 		} else if (req->status == -ENODATA) {
@@ -586,10 +659,10 @@ static void process_one_req(struct work_struct *_work)
 }
 
 int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
-		    struct rdma_dev_addr *addr, int timeout_ms,
+		    struct rdma_dev_addr *addr, unsigned long timeout_ms,
 		    void (*callback)(int status, struct sockaddr *src_addr,
 				     struct rdma_dev_addr *addr, void *context),
-		    void *context)
+		    bool resolve_by_gid_attr, void *context)
 {
 	struct sockaddr *src_in, *dst_in;
 	struct addr_req *req;
@@ -617,10 +690,12 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
 	req->addr = addr;
 	req->callback = callback;
 	req->context = context;
+	req->resolve_by_gid_attr = resolve_by_gid_attr;
 	INIT_DELAYED_WORK(&req->work, process_one_req);
 	req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
 
-	req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
+	req->status = addr_resolve(src_in, dst_in, addr, true,
+				   req->resolve_by_gid_attr, req->seq);
 	switch (req->status) {
 	case 0:
 		req->timeout = jiffies;
@@ -641,25 +716,53 @@ err:
 }
 EXPORT_SYMBOL(rdma_resolve_ip);
 
-int rdma_resolve_ip_route(struct sockaddr *src_addr,
-			  const struct sockaddr *dst_addr,
-			  struct rdma_dev_addr *addr)
+int roce_resolve_route_from_path(struct sa_path_rec *rec,
+				 const struct ib_gid_attr *attr)
 {
-	struct sockaddr_storage ssrc_addr = {};
-	struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
+	union {
+		struct sockaddr     _sockaddr;
+		struct sockaddr_in  _sockaddr_in;
+		struct sockaddr_in6 _sockaddr_in6;
+	} sgid, dgid;
+	struct rdma_dev_addr dev_addr = {};
+	int ret;
 
-	if (src_addr) {
-		if (src_addr->sa_family != dst_addr->sa_family)
-			return -EINVAL;
+	if (rec->roce.route_resolved)
+		return 0;
 
-		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
-	} else {
-		src_in->sa_family = dst_addr->sa_family;
-	}
+	rdma_gid2ip(&sgid._sockaddr, &rec->sgid);
+	rdma_gid2ip(&dgid._sockaddr, &rec->dgid);
+
+	if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family)
+		return -EINVAL;
+
+	if (!attr || !attr->ndev)
+		return -EINVAL;
+
+	dev_addr.net = &init_net;
+	dev_addr.sgid_attr = attr;
+
+	ret = addr_resolve(&sgid._sockaddr, &dgid._sockaddr,
+			   &dev_addr, false, true, 0);
+	if (ret)
+		return ret;
+
+	if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+	     dev_addr.network == RDMA_NETWORK_IPV6) &&
+	    rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
+		return -EINVAL;
 
-	return addr_resolve(src_in, dst_addr, addr, false, 0);
+	rec->roce.route_resolved = true;
+	return 0;
 }
 
+/**
+ * rdma_addr_cancel - Cancel resolve ip request
+ * @addr:	Pointer to address structure given previously
+ *		during rdma_resolve_ip().
+ * rdma_addr_cancel() is synchronous function which cancels any pending
+ * request if there is any.
+ */
 void rdma_addr_cancel(struct rdma_dev_addr *addr)
 {
 	struct addr_req *req, *temp_req;
@@ -687,11 +790,6 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
 	 * guarentees no work is running and none will be started.
 	 */
 	cancel_delayed_work_sync(&found->work);
-
-	if (found->callback)
-		found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr,
-			      found->addr, found->context);
-
 	kfree(found);
 }
 EXPORT_SYMBOL(rdma_addr_cancel);
@@ -710,7 +808,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
 
 int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 				 const union ib_gid *dgid,
-				 u8 *dmac, const struct net_device *ndev,
+				 u8 *dmac, const struct ib_gid_attr *sgid_attr,
 				 int *hoplimit)
 {
 	struct rdma_dev_addr dev_addr;
@@ -726,12 +824,12 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 	rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 
 	memset(&dev_addr, 0, sizeof(dev_addr));
-	dev_addr.bound_dev_if = ndev->ifindex;
 	dev_addr.net = &init_net;
+	dev_addr.sgid_attr = sgid_attr;
 
 	init_completion(&ctx.comp);
 	ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr,
-			      &dev_addr, 1000, resolve_cb, &ctx);
+			      &dev_addr, 1000, resolve_cb, true, &ctx);
 	if (ret)
 		return ret;
 
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 3208ad6ad540..5b2fce4a7091 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -212,9 +212,8 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
 	u8 port_num = entry->attr.port_num;
 	struct ib_gid_table *table = rdma_gid_table(device, port_num);
 
-	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
-		 device->name, port_num, entry->attr.index,
-		 entry->attr.gid.raw);
+	dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
+		port_num, entry->attr.index, entry->attr.gid.raw);
 
 	if (rdma_cap_roce_gid_table(device, port_num) &&
 	    entry->state != GID_TABLE_ENTRY_INVALID)
@@ -289,9 +288,9 @@ static void store_gid_entry(struct ib_gid_table *table,
 {
 	entry->state = GID_TABLE_ENTRY_VALID;
 
-	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
-		 entry->attr.device->name, entry->attr.port_num,
-		 entry->attr.index, entry->attr.gid.raw);
+	dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n",
+		__func__, entry->attr.port_num, entry->attr.index,
+		entry->attr.gid.raw);
 
 	lockdep_assert_held(&table->lock);
 	write_lock_irq(&table->rwlock);
@@ -320,17 +319,16 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
 	int ret;
 
 	if (!attr->ndev) {
-		pr_err("%s NULL netdev device=%s port=%d index=%d\n",
-		       __func__, attr->device->name, attr->port_num,
-		       attr->index);
+		dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n",
+			__func__, attr->port_num, attr->index);
 		return -EINVAL;
 	}
 	if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
 		ret = attr->device->add_gid(attr, &entry->context);
 		if (ret) {
-			pr_err("%s GID add failed device=%s port=%d index=%d\n",
-			       __func__, attr->device->name, attr->port_num,
-			       attr->index);
+			dev_err(&attr->device->dev,
+				"%s GID add failed port=%d index=%d\n",
+				__func__, attr->port_num, attr->index);
 			return ret;
 		}
 	}
@@ -353,9 +351,8 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
 
 	lockdep_assert_held(&table->lock);
 
-	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
-		 ib_dev->name, port, ix,
-		 table->data_vec[ix]->attr.gid.raw);
+	dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port,
+		ix, table->data_vec[ix]->attr.gid.raw);
 
 	write_lock_irq(&table->rwlock);
 	entry = table->data_vec[ix];
@@ -782,9 +779,9 @@ static void release_gid_table(struct ib_device *device, u8 port,
 		if (is_gid_entry_free(table->data_vec[i]))
 			continue;
 		if (kref_read(&table->data_vec[i]->kref) > 1) {
-			pr_err("GID entry ref leak for %s (index %d) ref=%d\n",
-			       device->name, i,
-			       kref_read(&table->data_vec[i]->kref));
+			dev_err(&device->dev,
+				"GID entry ref leak for index %d ref=%d\n", i,
+				kref_read(&table->data_vec[i]->kref));
 			leak = true;
 		}
 	}
@@ -1252,6 +1249,39 @@ void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
 }
 EXPORT_SYMBOL(rdma_hold_gid_attr);
 
+/**
+ * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
+ * which must be in UP state.
+ *
+ * @attr:Pointer to the GID attribute
+ *
+ * Returns pointer to netdevice if the netdevice was attached to GID and
+ * netdevice is in UP state. Caller must hold RCU lock as this API
+ * reads the netdev flags which can change while netdevice migrates to
+ * different net namespace. Returns ERR_PTR with error code otherwise.
+ *
+ */
+struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
+{
+	struct ib_gid_table_entry *entry =
+			container_of(attr, struct ib_gid_table_entry, attr);
+	struct ib_device *device = entry->attr.device;
+	struct net_device *ndev = ERR_PTR(-ENODEV);
+	u8 port_num = entry->attr.port_num;
+	struct ib_gid_table *table;
+	unsigned long flags;
+	bool valid;
+
+	table = rdma_gid_table(device, port_num);
+
+	read_lock_irqsave(&table->rwlock, flags);
+	valid = is_gid_entry_valid(table->data_vec[attr->index]);
+	if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP))
+		ndev = attr->ndev;
+	read_unlock_irqrestore(&table->rwlock, flags);
+	return ndev;
+}
+
 static int config_non_roce_gid_cache(struct ib_device *device,
 				     u8 port, int gid_tbl_len)
 {
@@ -1270,8 +1300,9 @@ static int config_non_roce_gid_cache(struct ib_device *device,
 			continue;
 		ret = device->query_gid(device, port, i, &gid_attr.gid);
 		if (ret) {
-			pr_warn("query_gid failed (%d) for %s (index %d)\n",
-				ret, device->name, i);
+			dev_warn(&device->dev,
+				 "query_gid failed (%d) for index %d\n", ret,
+				 i);
 			goto err;
 		}
 		gid_attr.index = i;
@@ -1300,8 +1331,7 @@ static void ib_cache_update(struct ib_device *device,
 
 	ret = ib_query_port(device, port, tprops);
 	if (ret) {
-		pr_warn("ib_query_port failed (%d) for %s\n",
-			ret, device->name);
+		dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
 		goto err;
 	}
 
@@ -1323,8 +1353,9 @@ static void ib_cache_update(struct ib_device *device,
 	for (i = 0; i < pkey_cache->table_len; ++i) {
 		ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
 		if (ret) {
-			pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
-				ret, device->name, i);
+			dev_warn(&device->dev,
+				 "ib_query_pkey failed (%d) for index %d\n",
+				 ret, i);
 			goto err;
 		}
 	}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 6e39c27dca8e..edb2cb758be7 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3292,8 +3292,11 @@ static int cm_lap_handler(struct cm_work *work)
 	if (ret)
 		goto unlock;
 
-	cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
-			   cm_id_priv);
+	ret = cm_init_av_by_path(param->alternate_path, NULL,
+				 &cm_id_priv->alt_av, cm_id_priv);
+	if (ret)
+		goto unlock;
+
 	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
 	cm_id_priv->tid = lap_msg->hdr.tid;
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -4367,7 +4370,7 @@ static void cm_add_one(struct ib_device *ib_device)
 	cm_dev->going_down = 0;
 	cm_dev->device = device_create(&cm_class, &ib_device->dev,
 				       MKDEV(0, 0), NULL,
-				       "%s", ib_device->name);
+				       "%s", dev_name(&ib_device->dev));
 	if (IS_ERR(cm_dev->device)) {
 		kfree(cm_dev);
 		return;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index a36c94930c31..15d5bb7bf6bb 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -639,13 +639,21 @@ static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
 	id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
 }
 
-static int cma_acquire_dev(struct rdma_id_private *id_priv,
-			   const struct rdma_id_private *listen_id_priv)
+/**
+ * cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute
+ * based on source ip address.
+ * @id_priv:	cm_id which should be bound to cma device
+ *
+ * cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute
+ * based on source IP address. It returns 0 on success or error code otherwise.
+ * It is applicable to active and passive side cm_id.
+ */
+static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	const struct ib_gid_attr *sgid_attr;
-	struct cma_device *cma_dev;
 	union ib_gid gid, iboe_gid, *gidp;
+	struct cma_device *cma_dev;
 	enum ib_gid_type gid_type;
 	int ret = -ENODEV;
 	u8 port;
@@ -654,41 +662,125 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
 	    id_priv->id.ps == RDMA_PS_IPOIB)
 		return -EINVAL;
 
-	mutex_lock(&lock);
 	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
 		    &iboe_gid);
 
 	memcpy(&gid, dev_addr->src_dev_addr +
-	       rdma_addr_gid_offset(dev_addr), sizeof gid);
-
-	if (listen_id_priv) {
-		cma_dev = listen_id_priv->cma_dev;
-		port = listen_id_priv->id.port_num;
-		gidp = rdma_protocol_roce(cma_dev->device, port) ?
-		       &iboe_gid : &gid;
-		gid_type = listen_id_priv->gid_type;
-		sgid_attr = cma_validate_port(cma_dev->device, port,
-					      gid_type, gidp, id_priv);
-		if (!IS_ERR(sgid_attr)) {
-			id_priv->id.port_num = port;
-			cma_bind_sgid_attr(id_priv, sgid_attr);
-			ret = 0;
-			goto out;
+	       rdma_addr_gid_offset(dev_addr), sizeof(gid));
+
+	mutex_lock(&lock);
+	list_for_each_entry(cma_dev, &dev_list, list) {
+		for (port = rdma_start_port(cma_dev->device);
+		     port <= rdma_end_port(cma_dev->device); port++) {
+			gidp = rdma_protocol_roce(cma_dev->device, port) ?
+			       &iboe_gid : &gid;
+			gid_type = cma_dev->default_gid_type[port - 1];
+			sgid_attr = cma_validate_port(cma_dev->device, port,
+						      gid_type, gidp, id_priv);
+			if (!IS_ERR(sgid_attr)) {
+				id_priv->id.port_num = port;
+				cma_bind_sgid_attr(id_priv, sgid_attr);
+				cma_attach_to_dev(id_priv, cma_dev);
+				ret = 0;
+				goto out;
+			}
 		}
 	}
+out:
+	mutex_unlock(&lock);
+	return ret;
+}
+
+/**
+ * cma_ib_acquire_dev - Acquire cma device, port and SGID attribute
+ * @id_priv:		cm id to bind to cma device
+ * @listen_id_priv:	listener cm id to match against
+ * @req:		Pointer to req structure containaining incoming
+ *			request information
+ * cma_ib_acquire_dev() acquires cma device, port and SGID attribute when
+ * rdma device matches for listen_id and incoming request. It also verifies
+ * that a GID table entry is present for the source address.
+ * Returns 0 on success, or returns error code otherwise.
+ */
+static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
+			      const struct rdma_id_private *listen_id_priv,
+			      struct cma_req_info *req)
+{
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	const struct ib_gid_attr *sgid_attr;
+	enum ib_gid_type gid_type;
+	union ib_gid gid;
+
+	if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
+	    id_priv->id.ps == RDMA_PS_IPOIB)
+		return -EINVAL;
+
+	if (rdma_protocol_roce(req->device, req->port))
+		rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+			    &gid);
+	else
+		memcpy(&gid, dev_addr->src_dev_addr +
+		       rdma_addr_gid_offset(dev_addr), sizeof(gid));
+
+	gid_type = listen_id_priv->cma_dev->default_gid_type[req->port - 1];
+	sgid_attr = cma_validate_port(req->device, req->port,
+				      gid_type, &gid, id_priv);
+	if (IS_ERR(sgid_attr))
+		return PTR_ERR(sgid_attr);
+
+	id_priv->id.port_num = req->port;
+	cma_bind_sgid_attr(id_priv, sgid_attr);
+	/* Need to acquire lock to protect against reader
+	 * of cma_dev->id_list such as cma_netdev_callback() and
+	 * cma_process_remove().
+	 */
+	mutex_lock(&lock);
+	cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
+	mutex_unlock(&lock);
+	return 0;
+}
+
+static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
+			      const struct rdma_id_private *listen_id_priv)
+{
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	const struct ib_gid_attr *sgid_attr;
+	struct cma_device *cma_dev;
+	enum ib_gid_type gid_type;
+	int ret = -ENODEV;
+	union ib_gid gid;
+	u8 port;
+
+	if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
+	    id_priv->id.ps == RDMA_PS_IPOIB)
+		return -EINVAL;
+
+	memcpy(&gid, dev_addr->src_dev_addr +
+	       rdma_addr_gid_offset(dev_addr), sizeof(gid));
+
+	mutex_lock(&lock);
+
+	cma_dev = listen_id_priv->cma_dev;
+	port = listen_id_priv->id.port_num;
+	gid_type = listen_id_priv->gid_type;
+	sgid_attr = cma_validate_port(cma_dev->device, port,
+				      gid_type, &gid, id_priv);
+	if (!IS_ERR(sgid_attr)) {
+		id_priv->id.port_num = port;
+		cma_bind_sgid_attr(id_priv, sgid_attr);
+		ret = 0;
+		goto out;
+	}
 
 	list_for_each_entry(cma_dev, &dev_list, list) {
 		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
-			if (listen_id_priv &&
-			    listen_id_priv->cma_dev == cma_dev &&
+			if (listen_id_priv->cma_dev == cma_dev &&
 			    listen_id_priv->id.port_num == port)
 				continue;
 
-			gidp = rdma_protocol_roce(cma_dev->device, port) ?
-			       &iboe_gid : &gid;
 			gid_type = cma_dev->default_gid_type[port - 1];
 			sgid_attr = cma_validate_port(cma_dev->device, port,
-						      gid_type, gidp, id_priv);
+						      gid_type, &gid, id_priv);
 			if (!IS_ERR(sgid_attr)) {
 				id_priv->id.port_num = port;
 				cma_bind_sgid_attr(id_priv, sgid_attr);
@@ -785,10 +877,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
 	if (!id_priv)
 		return ERR_PTR(-ENOMEM);
 
-	if (caller)
-		id_priv->res.kern_name = caller;
-	else
-		rdma_restrack_set_task(&id_priv->res, current);
+	rdma_restrack_set_task(&id_priv->res, caller);
 	id_priv->res.type = RDMA_RESTRACK_CM_ID;
 	id_priv->state = RDMA_CM_IDLE;
 	id_priv->id.context = context;
@@ -1462,18 +1551,35 @@ static bool cma_protocol_roce(const struct rdma_cm_id *id)
 	return rdma_protocol_roce(device, port_num);
 }
 
+static bool cma_is_req_ipv6_ll(const struct cma_req_info *req)
+{
+	const struct sockaddr *daddr =
+			(const struct sockaddr *)&req->listen_addr_storage;
+	const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
+
+	/* Returns true if the req is for IPv6 link local */
+	return (daddr->sa_family == AF_INET6 &&
+		(ipv6_addr_type(&daddr6->sin6_addr) & IPV6_ADDR_LINKLOCAL));
+}
+
 static bool cma_match_net_dev(const struct rdma_cm_id *id,
 			      const struct net_device *net_dev,
-			      u8 port_num)
+			      const struct cma_req_info *req)
 {
 	const struct rdma_addr *addr = &id->route.addr;
 
 	if (!net_dev)
 		/* This request is an AF_IB request */
-		return (!id->port_num || id->port_num == port_num) &&
+		return (!id->port_num || id->port_num == req->port) &&
 		       (addr->src_addr.ss_family == AF_IB);
 
 	/*
+	 * If the request is not for IPv6 link local, allow matching
+	 * request to any netdevice of the one or multiport rdma device.
+	 */
+	if (!cma_is_req_ipv6_ll(req))
+		return true;
+	/*
 	 * Net namespaces must match, and if the listner is listening
 	 * on a specific netdevice than netdevice must match as well.
 	 */
@@ -1500,13 +1606,14 @@ static struct rdma_id_private *cma_find_listener(
 	hlist_for_each_entry(id_priv, &bind_list->owners, node) {
 		if (cma_match_private_data(id_priv, ib_event->private_data)) {
 			if (id_priv->id.device == cm_id->device &&
-			    cma_match_net_dev(&id_priv->id, net_dev, req->port))
+			    cma_match_net_dev(&id_priv->id, net_dev, req))
 				return id_priv;
 			list_for_each_entry(id_priv_dev,
 					    &id_priv->listen_list,
 					    listen_list) {
 				if (id_priv_dev->id.device == cm_id->device &&
-				    cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
+				    cma_match_net_dev(&id_priv_dev->id,
+						      net_dev, req))
 					return id_priv_dev;
 			}
 		}
@@ -1518,18 +1625,18 @@ static struct rdma_id_private *cma_find_listener(
 static struct rdma_id_private *
 cma_ib_id_from_event(struct ib_cm_id *cm_id,
 		     const struct ib_cm_event *ib_event,
+		     struct cma_req_info *req,
 		     struct net_device **net_dev)
 {
-	struct cma_req_info req;
 	struct rdma_bind_list *bind_list;
 	struct rdma_id_private *id_priv;
 	int err;
 
-	err = cma_save_req_info(ib_event, &req);
+	err = cma_save_req_info(ib_event, req);
 	if (err)
 		return ERR_PTR(err);
 
-	*net_dev = cma_get_net_dev(ib_event, &req);
+	*net_dev = cma_get_net_dev(ib_event, req);
 	if (IS_ERR(*net_dev)) {
 		if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
 			/* Assuming the protocol is AF_IB */
@@ -1567,17 +1674,17 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
 		}
 
 		if (!validate_net_dev(*net_dev,
-				 (struct sockaddr *)&req.listen_addr_storage,
-				 (struct sockaddr *)&req.src_addr_storage)) {
+				 (struct sockaddr *)&req->listen_addr_storage,
+				 (struct sockaddr *)&req->src_addr_storage)) {
 			id_priv = ERR_PTR(-EHOSTUNREACH);
 			goto err;
 		}
 	}
 
 	bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
-				rdma_ps_from_service_id(req.service_id),
-				cma_port_from_service_id(req.service_id));
-	id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
+				rdma_ps_from_service_id(req->service_id),
+				cma_port_from_service_id(req->service_id));
+	id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
 err:
 	rcu_read_unlock();
 	if (IS_ERR(id_priv) && *net_dev) {
@@ -1710,8 +1817,8 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 	mutex_lock(&id_priv->handler_mutex);
 	mutex_unlock(&id_priv->handler_mutex);
 
+	rdma_restrack_del(&id_priv->res);
 	if (id_priv->cma_dev) {
-		rdma_restrack_del(&id_priv->res);
 		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
 			if (id_priv->cm_id.ib)
 				ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1902,7 +2009,7 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
 		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
 
 	if (net_dev) {
-		rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
+		rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev);
 	} else {
 		if (!cma_protocol_roce(listen_id) &&
 		    cma_any_addr(cma_src_addr(id_priv))) {
@@ -1952,7 +2059,7 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
 		goto err;
 
 	if (net_dev) {
-		rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
+		rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev);
 	} else {
 		if (!cma_any_addr(cma_src_addr(id_priv))) {
 			ret = cma_translate_addr(cma_src_addr(id_priv),
@@ -1999,11 +2106,12 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
 {
 	struct rdma_id_private *listen_id, *conn_id = NULL;
 	struct rdma_cm_event event = {};
+	struct cma_req_info req = {};
 	struct net_device *net_dev;
 	u8 offset;
 	int ret;
 
-	listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev);
+	listen_id = cma_ib_id_from_event(cm_id, ib_event, &req, &net_dev);
 	if (IS_ERR(listen_id))
 		return PTR_ERR(listen_id);
 
@@ -2036,7 +2144,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
 	}
 
 	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
-	ret = cma_acquire_dev(conn_id, listen_id);
+	ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
 	if (ret)
 		goto err2;
 
@@ -2232,7 +2340,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 		goto out;
 	}
 
-	ret = cma_acquire_dev(conn_id, listen_id);
+	ret = cma_iw_acquire_dev(conn_id, listen_id);
 	if (ret) {
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(new_cm_id);
@@ -2354,8 +2462,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 
 	ret = rdma_listen(id, id_priv->backlog);
 	if (ret)
-		pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
-			ret, cma_dev->device->name);
+		dev_warn(&cma_dev->device->dev,
+			 "RDMA CMA: cma_listen_on_dev, error %d\n", ret);
 }
 
 static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -2402,8 +2510,8 @@ static void cma_query_handler(int status, struct sa_path_rec *path_rec,
 	queue_work(cma_wq, &work->work);
 }
 
-static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
-			      struct cma_work *work)
+static int cma_query_ib_route(struct rdma_id_private *id_priv,
+			      unsigned long timeout_ms, struct cma_work *work)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	struct sa_path_rec path_rec;
@@ -2521,7 +2629,8 @@ static void cma_init_resolve_addr_work(struct cma_work *work,
 	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
 }
 
-static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
+static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
+				unsigned long timeout_ms)
 {
 	struct rdma_route *route = &id_priv->id.route;
 	struct cma_work *work;
@@ -2643,7 +2752,7 @@ err:
 }
 EXPORT_SYMBOL(rdma_set_ib_path);
 
-static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
 {
 	struct cma_work *work;
 
@@ -2744,7 +2853,7 @@ err1:
 	return ret;
 }
 
-int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
+int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
@@ -2759,7 +2868,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 	else if (rdma_protocol_roce(id->device, id->port_num))
 		ret = cma_resolve_iboe_route(id_priv);
 	else if (rdma_protocol_iwarp(id->device, id->port_num))
-		ret = cma_resolve_iw_route(id_priv, timeout_ms);
+		ret = cma_resolve_iw_route(id_priv);
 	else
 		ret = -ENOSYS;
 
@@ -2862,7 +2971,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 
 	memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
 	if (!status && !id_priv->cma_dev) {
-		status = cma_acquire_dev(id_priv, NULL);
+		status = cma_acquire_dev_by_src_ip(id_priv);
 		if (status)
 			pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
 					     status);
@@ -2882,13 +2991,11 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 	if (id_priv->id.event_handler(&id_priv->id, &event)) {
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
-		cma_deref_id(id_priv);
 		rdma_destroy_id(&id_priv->id);
 		return;
 	}
 out:
 	mutex_unlock(&id_priv->handler_mutex);
-	cma_deref_id(id_priv);
 }
 
 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
@@ -2966,7 +3073,7 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 }
 
 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
-		      const struct sockaddr *dst_addr, int timeout_ms)
+		      const struct sockaddr *dst_addr, unsigned long timeout_ms)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
@@ -2985,16 +3092,16 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 		return -EINVAL;
 
 	memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
-	atomic_inc(&id_priv->refcount);
 	if (cma_any_addr(dst_addr)) {
 		ret = cma_resolve_loopback(id_priv);
 	} else {
 		if (dst_addr->sa_family == AF_IB) {
 			ret = cma_resolve_ib_addr(id_priv);
 		} else {
-			ret = rdma_resolve_ip(cma_src_addr(id_priv),
-					      dst_addr, &id->route.addr.dev_addr,
-					      timeout_ms, addr_handler, id_priv);
+			ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
+					      &id->route.addr.dev_addr,
+					      timeout_ms, addr_handler,
+					      false, id_priv);
 		}
 	}
 	if (ret)
@@ -3003,7 +3110,6 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 	return 0;
 err:
 	cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
-	cma_deref_id(id_priv);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_resolve_addr);
@@ -3414,7 +3520,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 		if (ret)
 			goto err1;
 
-		ret = cma_acquire_dev(id_priv, NULL);
+		ret = cma_acquire_dev_by_src_ip(id_priv);
 		if (ret)
 			goto err1;
 	}
@@ -3439,10 +3545,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
 	return 0;
 err2:
-	if (id_priv->cma_dev) {
-		rdma_restrack_del(&id_priv->res);
+	rdma_restrack_del(&id_priv->res);
+	if (id_priv->cma_dev)
 		cma_release_dev(id_priv);
-	}
 err1:
 	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
 	return ret;
@@ -3839,10 +3944,7 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 
-	if (caller)
-		id_priv->res.kern_name = caller;
-	else
-		rdma_restrack_set_task(&id_priv->res, current);
+	rdma_restrack_set_task(&id_priv->res, caller);
 
 	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
 		return -EINVAL;
@@ -4087,9 +4189,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
 	    (!ib_sa_sendonly_fullmem_support(&sa_client,
 					     id_priv->id.device,
 					     id_priv->id.port_num))) {
-		pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
-			"RDMA CM: SM doesn't support Send Only Full Member option\n",
-			id_priv->id.device->name, id_priv->id.port_num);
+		dev_warn(
+			&id_priv->id.device->dev,
+			"RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
+			id_priv->id.port_num);
 		return -EOPNOTSUPP;
 	}
 
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index eee38b40be99..8c2dfb3e294e 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -65,7 +65,7 @@ static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
 
 static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
 {
-	return !strcmp(ib_dev->name, cookie);
+	return !strcmp(dev_name(&ib_dev->dev), cookie);
 }
 
 static int cma_configfs_params_get(struct config_item *item,
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 77c7005c396c..bb9007a0cca7 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -44,7 +44,7 @@
 #include "mad_priv.h"
 
 /* Total number of ports combined across all struct ib_devices's */
-#define RDMA_MAX_PORTS 1024
+#define RDMA_MAX_PORTS 8192
 
 struct pkey_index_qp_list {
 	struct list_head    pkey_index_list;
@@ -87,6 +87,7 @@ int  ib_device_register_sysfs(struct ib_device *device,
 			      int (*port_callback)(struct ib_device *,
 						   u8, struct kobject *));
 void ib_device_unregister_sysfs(struct ib_device *device);
+int ib_device_rename(struct ib_device *ibdev, const char *name);
 
 typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
 	      struct net_device *idev, void *cookie);
@@ -338,7 +339,14 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
 
 int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 				 const union ib_gid *dgid,
-				 u8 *dmac, const struct net_device *ndev,
+				 u8 *dmac, const struct ib_gid_attr *sgid_attr,
 				 int *hoplimit);
+void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
+			   const struct net_device *dev);
 
+struct sa_path_rec;
+int roce_resolve_route_from_path(struct sa_path_rec *rec,
+				 const struct ib_gid_attr *attr);
+
+struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
 #endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index af5ad6a56ae4..b1e5365ddafa 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -112,12 +112,12 @@ static void ib_cq_poll_work(struct work_struct *work)
 				    IB_POLL_BATCH);
 	if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
 	    ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
-		queue_work(ib_comp_wq, &cq->work);
+		queue_work(cq->comp_wq, &cq->work);
 }
 
 static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
 {
-	queue_work(ib_comp_wq, &cq->work);
+	queue_work(cq->comp_wq, &cq->work);
 }
 
 /**
@@ -161,7 +161,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
 		goto out_destroy_cq;
 
 	cq->res.type = RDMA_RESTRACK_CQ;
-	cq->res.kern_name = caller;
+	rdma_restrack_set_task(&cq->res, caller);
 	rdma_restrack_add(&cq->res);
 
 	switch (cq->poll_ctx) {
@@ -175,9 +175,12 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
 		ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 		break;
 	case IB_POLL_WORKQUEUE:
+	case IB_POLL_UNBOUND_WORKQUEUE:
 		cq->comp_handler = ib_cq_completion_workqueue;
 		INIT_WORK(&cq->work, ib_cq_poll_work);
 		ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+		cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
+				ib_comp_wq : ib_comp_unbound_wq;
 		break;
 	default:
 		ret = -EINVAL;
@@ -213,6 +216,7 @@ void ib_free_cq(struct ib_cq *cq)
 		irq_poll_disable(&cq->iop);
 		break;
 	case IB_POLL_WORKQUEUE:
+	case IB_POLL_UNBOUND_WORKQUEUE:
 		cancel_work_sync(&cq->work);
 		break;
 	default:
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index db3b6271f09d..87eb4f2cdd7d 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -61,6 +61,7 @@ struct ib_client_data {
 };
 
 struct workqueue_struct *ib_comp_wq;
+struct workqueue_struct *ib_comp_unbound_wq;
 struct workqueue_struct *ib_wq;
 EXPORT_SYMBOL_GPL(ib_wq);
 
@@ -122,8 +123,9 @@ static int ib_device_check_mandatory(struct ib_device *device)
 
 	for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
 		if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
-			pr_warn("Device %s is missing mandatory function %s\n",
-				device->name, mandatory_table[i].name);
+			dev_warn(&device->dev,
+				 "Device is missing mandatory function %s\n",
+				 mandatory_table[i].name);
 			return -EINVAL;
 		}
 	}
@@ -163,16 +165,40 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
 	struct ib_device *device;
 
 	list_for_each_entry(device, &device_list, core_list)
-		if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
+		if (!strcmp(name, dev_name(&device->dev)))
 			return device;
 
 	return NULL;
 }
 
-static int alloc_name(char *name)
+int ib_device_rename(struct ib_device *ibdev, const char *name)
+{
+	struct ib_device *device;
+	int ret = 0;
+
+	if (!strcmp(name, dev_name(&ibdev->dev)))
+		return ret;
+
+	mutex_lock(&device_mutex);
+	list_for_each_entry(device, &device_list, core_list) {
+		if (!strcmp(name, dev_name(&device->dev))) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+
+	ret = device_rename(&ibdev->dev, name);
+	if (ret)
+		goto out;
+	strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
+out:
+	mutex_unlock(&device_mutex);
+	return ret;
+}
+
+static int alloc_name(struct ib_device *ibdev, const char *name)
 {
 	unsigned long *inuse;
-	char buf[IB_DEVICE_NAME_MAX];
 	struct ib_device *device;
 	int i;
 
@@ -181,24 +207,21 @@ static int alloc_name(char *name)
 		return -ENOMEM;
 
 	list_for_each_entry(device, &device_list, core_list) {
-		if (!sscanf(device->name, name, &i))
+		char buf[IB_DEVICE_NAME_MAX];
+
+		if (sscanf(dev_name(&device->dev), name, &i) != 1)
 			continue;
 		if (i < 0 || i >= PAGE_SIZE * 8)
 			continue;
 		snprintf(buf, sizeof buf, name, i);
-		if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
+		if (!strcmp(buf, dev_name(&device->dev)))
 			set_bit(i, inuse);
 	}
 
 	i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
 	free_page((unsigned long) inuse);
-	snprintf(buf, sizeof buf, name, i);
 
-	if (__ib_device_get_by_name(buf))
-		return -ENFILE;
-
-	strlcpy(name, buf, IB_DEVICE_NAME_MAX);
-	return 0;
+	return dev_set_name(&ibdev->dev, name, i);
 }
 
 static void ib_device_release(struct device *device)
@@ -221,9 +244,7 @@ static void ib_device_release(struct device *device)
 static int ib_device_uevent(struct device *device,
 			    struct kobj_uevent_env *env)
 {
-	struct ib_device *dev = container_of(device, struct ib_device, dev);
-
-	if (add_uevent_var(env, "NAME=%s", dev->name))
+	if (add_uevent_var(env, "NAME=%s", dev_name(device)))
 		return -ENOMEM;
 
 	/*
@@ -269,7 +290,7 @@ struct ib_device *ib_alloc_device(size_t size)
 
 	INIT_LIST_HEAD(&device->event_handler_list);
 	spin_lock_init(&device->event_handler_lock);
-	spin_lock_init(&device->client_data_lock);
+	rwlock_init(&device->client_data_lock);
 	INIT_LIST_HEAD(&device->client_data_list);
 	INIT_LIST_HEAD(&device->port_list);
 
@@ -285,6 +306,7 @@ EXPORT_SYMBOL(ib_alloc_device);
  */
 void ib_dealloc_device(struct ib_device *device)
 {
+	WARN_ON(!list_empty(&device->client_data_list));
 	WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
 		device->reg_state != IB_DEV_UNINITIALIZED);
 	rdma_restrack_clean(&device->res);
@@ -295,9 +317,8 @@ EXPORT_SYMBOL(ib_dealloc_device);
 static int add_client_context(struct ib_device *device, struct ib_client *client)
 {
 	struct ib_client_data *context;
-	unsigned long flags;
 
-	context = kmalloc(sizeof *context, GFP_KERNEL);
+	context = kmalloc(sizeof(*context), GFP_KERNEL);
 	if (!context)
 		return -ENOMEM;
 
@@ -306,9 +327,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
 	context->going_down = false;
 
 	down_write(&lists_rwsem);
-	spin_lock_irqsave(&device->client_data_lock, flags);
+	write_lock_irq(&device->client_data_lock);
 	list_add(&context->list, &device->client_data_list);
-	spin_unlock_irqrestore(&device->client_data_lock, flags);
+	write_unlock_irq(&device->client_data_lock);
 	up_write(&lists_rwsem);
 
 	return 0;
@@ -444,22 +465,8 @@ static u32 __dev_new_index(void)
 	}
 }
 
-/**
- * ib_register_device - Register an IB device with IB core
- * @device:Device to register
- *
- * Low-level drivers use ib_register_device() to register their
- * devices with the IB core.  All registered clients will receive a
- * callback for each device that is added. @device must be allocated
- * with ib_alloc_device().
- */
-int ib_register_device(struct ib_device *device,
-		       int (*port_callback)(struct ib_device *,
-					    u8, struct kobject *))
+static void setup_dma_device(struct ib_device *device)
 {
-	int ret;
-	struct ib_client *client;
-	struct ib_udata uhw = {.outlen = 0, .inlen = 0};
 	struct device *parent = device->dev.parent;
 
 	WARN_ON_ONCE(device->dma_device);
@@ -491,56 +498,113 @@ int ib_register_device(struct ib_device *device,
 		WARN_ON_ONCE(!parent);
 		device->dma_device = parent;
 	}
+}
 
-	mutex_lock(&device_mutex);
+static void cleanup_device(struct ib_device *device)
+{
+	ib_cache_cleanup_one(device);
+	ib_cache_release_one(device);
+	kfree(device->port_pkey_list);
+	kfree(device->port_immutable);
+}
 
-	if (strchr(device->name, '%')) {
-		ret = alloc_name(device->name);
-		if (ret)
-			goto out;
-	}
+static int setup_device(struct ib_device *device)
+{
+	struct ib_udata uhw = {.outlen = 0, .inlen = 0};
+	int ret;
 
-	if (ib_device_check_mandatory(device)) {
-		ret = -EINVAL;
-		goto out;
-	}
+	ret = ib_device_check_mandatory(device);
+	if (ret)
+		return ret;
 
 	ret = read_port_immutable(device);
 	if (ret) {
-		pr_warn("Couldn't create per port immutable data %s\n",
-			device->name);
-		goto out;
+		dev_warn(&device->dev,
+			 "Couldn't create per port immutable data\n");
+		return ret;
 	}
 
-	ret = setup_port_pkey_list(device);
+	memset(&device->attrs, 0, sizeof(device->attrs));
+	ret = device->query_device(device, &device->attrs, &uhw);
 	if (ret) {
-		pr_warn("Couldn't create per port_pkey_list\n");
-		goto out;
+		dev_warn(&device->dev,
+			 "Couldn't query the device attributes\n");
+		goto port_cleanup;
 	}
 
-	ret = ib_cache_setup_one(device);
+	ret = setup_port_pkey_list(device);
 	if (ret) {
-		pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
+		dev_warn(&device->dev, "Couldn't create per port_pkey_list\n");
 		goto port_cleanup;
 	}
 
-	ret = ib_device_register_rdmacg(device);
+	ret = ib_cache_setup_one(device);
 	if (ret) {
-		pr_warn("Couldn't register device with rdma cgroup\n");
-		goto cache_cleanup;
+		dev_warn(&device->dev,
+			 "Couldn't set up InfiniBand P_Key/GID cache\n");
+		goto pkey_cleanup;
+	}
+	return 0;
+
+pkey_cleanup:
+	kfree(device->port_pkey_list);
+port_cleanup:
+	kfree(device->port_immutable);
+	return ret;
+}
+
+/**
+ * ib_register_device - Register an IB device with IB core
+ * @device:Device to register
+ *
+ * Low-level drivers use ib_register_device() to register their
+ * devices with the IB core.  All registered clients will receive a
+ * callback for each device that is added. @device must be allocated
+ * with ib_alloc_device().
+ */
+int ib_register_device(struct ib_device *device, const char *name,
+		       int (*port_callback)(struct ib_device *, u8,
+					    struct kobject *))
+{
+	int ret;
+	struct ib_client *client;
+
+	setup_dma_device(device);
+
+	mutex_lock(&device_mutex);
+
+	if (strchr(name, '%')) {
+		ret = alloc_name(device, name);
+		if (ret)
+			goto out;
+	} else {
+		ret = dev_set_name(&device->dev, name);
+		if (ret)
+			goto out;
+	}
+	if (__ib_device_get_by_name(dev_name(&device->dev))) {
+		ret = -ENFILE;
+		goto out;
 	}
+	strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
 
-	memset(&device->attrs, 0, sizeof(device->attrs));
-	ret = device->query_device(device, &device->attrs, &uhw);
+	ret = setup_device(device);
+	if (ret)
+		goto out;
+
+	device->index = __dev_new_index();
+
+	ret = ib_device_register_rdmacg(device);
 	if (ret) {
-		pr_warn("Couldn't query the device attributes\n");
-		goto cg_cleanup;
+		dev_warn(&device->dev,
+			 "Couldn't register device with rdma cgroup\n");
+		goto dev_cleanup;
 	}
 
 	ret = ib_device_register_sysfs(device, port_callback);
 	if (ret) {
-		pr_warn("Couldn't register device %s with driver model\n",
-			device->name);
+		dev_warn(&device->dev,
+			 "Couldn't register device with driver model\n");
 		goto cg_cleanup;
 	}
 
@@ -550,7 +614,6 @@ int ib_register_device(struct ib_device *device,
 		if (!add_client_context(device, client) && client->add)
 			client->add(device);
 
-	device->index = __dev_new_index();
 	down_write(&lists_rwsem);
 	list_add_tail(&device->core_list, &device_list);
 	up_write(&lists_rwsem);
@@ -559,11 +622,8 @@ int ib_register_device(struct ib_device *device,
 
 cg_cleanup:
 	ib_device_unregister_rdmacg(device);
-cache_cleanup:
-	ib_cache_cleanup_one(device);
-	ib_cache_release_one(device);
-port_cleanup:
-	kfree(device->port_immutable);
+dev_cleanup:
+	cleanup_device(device);
 out:
 	mutex_unlock(&device_mutex);
 	return ret;
@@ -585,21 +645,20 @@ void ib_unregister_device(struct ib_device *device)
 
 	down_write(&lists_rwsem);
 	list_del(&device->core_list);
-	spin_lock_irqsave(&device->client_data_lock, flags);
-	list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
+	write_lock_irq(&device->client_data_lock);
+	list_for_each_entry(context, &device->client_data_list, list)
 		context->going_down = true;
-	spin_unlock_irqrestore(&device->client_data_lock, flags);
+	write_unlock_irq(&device->client_data_lock);
 	downgrade_write(&lists_rwsem);
 
-	list_for_each_entry_safe(context, tmp, &device->client_data_list,
-				 list) {
+	list_for_each_entry(context, &device->client_data_list, list) {
 		if (context->client->remove)
 			context->client->remove(device, context->data);
 	}
 	up_read(&lists_rwsem);
 
-	ib_device_unregister_rdmacg(device);
 	ib_device_unregister_sysfs(device);
+	ib_device_unregister_rdmacg(device);
 
 	mutex_unlock(&device_mutex);
 
@@ -609,10 +668,13 @@ void ib_unregister_device(struct ib_device *device)
 	kfree(device->port_pkey_list);
 
 	down_write(&lists_rwsem);
-	spin_lock_irqsave(&device->client_data_lock, flags);
-	list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
+	write_lock_irqsave(&device->client_data_lock, flags);
+	list_for_each_entry_safe(context, tmp, &device->client_data_list,
+				 list) {
+		list_del(&context->list);
 		kfree(context);
-	spin_unlock_irqrestore(&device->client_data_lock, flags);
+	}
+	write_unlock_irqrestore(&device->client_data_lock, flags);
 	up_write(&lists_rwsem);
 
 	device->reg_state = IB_DEV_UNREGISTERED;
@@ -662,9 +724,8 @@ EXPORT_SYMBOL(ib_register_client);
  */
 void ib_unregister_client(struct ib_client *client)
 {
-	struct ib_client_data *context, *tmp;
+	struct ib_client_data *context;
 	struct ib_device *device;
-	unsigned long flags;
 
 	mutex_lock(&device_mutex);
 
@@ -676,14 +737,14 @@ void ib_unregister_client(struct ib_client *client)
 		struct ib_client_data *found_context = NULL;
 
 		down_write(&lists_rwsem);
-		spin_lock_irqsave(&device->client_data_lock, flags);
-		list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
+		write_lock_irq(&device->client_data_lock);
+		list_for_each_entry(context, &device->client_data_list, list)
 			if (context->client == client) {
 				context->going_down = true;
 				found_context = context;
 				break;
 			}
-		spin_unlock_irqrestore(&device->client_data_lock, flags);
+		write_unlock_irq(&device->client_data_lock);
 		up_write(&lists_rwsem);
 
 		if (client->remove)
@@ -691,17 +752,18 @@ void ib_unregister_client(struct ib_client *client)
 					       found_context->data : NULL);
 
 		if (!found_context) {
-			pr_warn("No client context found for %s/%s\n",
-				device->name, client->name);
+			dev_warn(&device->dev,
+				 "No client context found for %s\n",
+				 client->name);
 			continue;
 		}
 
 		down_write(&lists_rwsem);
-		spin_lock_irqsave(&device->client_data_lock, flags);
+		write_lock_irq(&device->client_data_lock);
 		list_del(&found_context->list);
-		kfree(found_context);
-		spin_unlock_irqrestore(&device->client_data_lock, flags);
+		write_unlock_irq(&device->client_data_lock);
 		up_write(&lists_rwsem);
+		kfree(found_context);
 	}
 
 	mutex_unlock(&device_mutex);
@@ -722,13 +784,13 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
 	void *ret = NULL;
 	unsigned long flags;
 
-	spin_lock_irqsave(&device->client_data_lock, flags);
+	read_lock_irqsave(&device->client_data_lock, flags);
 	list_for_each_entry(context, &device->client_data_list, list)
 		if (context->client == client) {
 			ret = context->data;
 			break;
 		}
-	spin_unlock_irqrestore(&device->client_data_lock, flags);
+	read_unlock_irqrestore(&device->client_data_lock, flags);
 
 	return ret;
 }
@@ -749,18 +811,18 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
 	struct ib_client_data *context;
 	unsigned long flags;
 
-	spin_lock_irqsave(&device->client_data_lock, flags);
+	write_lock_irqsave(&device->client_data_lock, flags);
 	list_for_each_entry(context, &device->client_data_list, list)
 		if (context->client == client) {
 			context->data = data;
 			goto out;
 		}
 
-	pr_warn("No client context found for %s/%s\n",
-		device->name, client->name);
+	dev_warn(&device->dev, "No client context found for %s\n",
+		 client->name);
 
 out:
-	spin_unlock_irqrestore(&device->client_data_lock, flags);
+	write_unlock_irqrestore(&device->client_data_lock, flags);
 }
 EXPORT_SYMBOL(ib_set_client_data);
 
@@ -1166,10 +1228,19 @@ static int __init ib_core_init(void)
 		goto err;
 	}
 
+	ib_comp_unbound_wq =
+		alloc_workqueue("ib-comp-unb-wq",
+				WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
+				WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
+	if (!ib_comp_unbound_wq) {
+		ret = -ENOMEM;
+		goto err_comp;
+	}
+
 	ret = class_register(&ib_class);
 	if (ret) {
 		pr_warn("Couldn't create InfiniBand device class\n");
-		goto err_comp;
+		goto err_comp_unbound;
 	}
 
 	ret = rdma_nl_init();
@@ -1218,6 +1289,8 @@ err_ibnl:
 	rdma_nl_exit();
 err_sysfs:
 	class_unregister(&ib_class);
+err_comp_unbound:
+	destroy_workqueue(ib_comp_unbound_wq);
 err_comp:
 	destroy_workqueue(ib_comp_wq);
 err:
@@ -1236,6 +1309,7 @@ static void __exit ib_core_cleanup(void)
 	addr_cleanup();
 	rdma_nl_exit();
 	class_unregister(&ib_class);
+	destroy_workqueue(ib_comp_unbound_wq);
 	destroy_workqueue(ib_comp_wq);
 	/* Make sure that any pending umem accounting work is done. */
 	destroy_workqueue(ib_wq);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index a077500f7f32..83ba0068e8bb 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -213,7 +213,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 	device = pd->device;
 	if (!device->alloc_fmr    || !device->dealloc_fmr  ||
 	    !device->map_phys_fmr || !device->unmap_fmr) {
-		pr_info(PFX "Device %s does not support FMRs\n", device->name);
+		dev_info(&device->dev, "Device does not support FMRs\n");
 		return ERR_PTR(-ENOSYS);
 	}
 
@@ -257,7 +257,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 	atomic_set(&pool->flush_ser, 0);
 	init_waitqueue_head(&pool->force_wait);
 
-	pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name);
+	pool->worker =
+		kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev));
 	if (IS_ERR(pool->worker)) {
 		pr_warn(PFX "couldn't start cleanup kthread worker\n");
 		ret = PTR_ERR(pool->worker);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 5d676cff41f4..ba668d49c751 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -509,7 +509,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
 	cm_id->m_local_addr = cm_id->local_addr;
 	cm_id->m_remote_addr = cm_id->remote_addr;
 
-	memcpy(pm_reg_msg.dev_name, cm_id->device->name,
+	memcpy(pm_reg_msg.dev_name, dev_name(&cm_id->device->dev),
 	       sizeof(pm_reg_msg.dev_name));
 	memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname,
 	       sizeof(pm_reg_msg.if_name));
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ef459f2f2eeb..d7025cd5be28 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -220,33 +220,37 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 	int ret2, qpn;
 	u8 mgmt_class, vclass;
 
+	if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) ||
+	    (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num)))
+		return ERR_PTR(-EPROTONOSUPPORT);
+
 	/* Validate parameters */
 	qpn = get_spl_qp_index(qp_type);
 	if (qpn == -1) {
-		dev_notice(&device->dev,
-			   "ib_register_mad_agent: invalid QP Type %d\n",
-			   qp_type);
+		dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n",
+				    __func__, qp_type);
 		goto error1;
 	}
 
 	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
-		dev_notice(&device->dev,
-			   "ib_register_mad_agent: invalid RMPP Version %u\n",
-			   rmpp_version);
+		dev_dbg_ratelimited(&device->dev,
+				    "%s: invalid RMPP Version %u\n",
+				    __func__, rmpp_version);
 		goto error1;
 	}
 
 	/* Validate MAD registration request if supplied */
 	if (mad_reg_req) {
 		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
-			dev_notice(&device->dev,
-				   "ib_register_mad_agent: invalid Class Version %u\n",
-				   mad_reg_req->mgmt_class_version);
+			dev_dbg_ratelimited(&device->dev,
+					    "%s: invalid Class Version %u\n",
+					    __func__,
+					    mad_reg_req->mgmt_class_version);
 			goto error1;
 		}
 		if (!recv_handler) {
-			dev_notice(&device->dev,
-				   "ib_register_mad_agent: no recv_handler\n");
+			dev_dbg_ratelimited(&device->dev,
+					    "%s: no recv_handler\n", __func__);
 			goto error1;
 		}
 		if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
@@ -256,9 +260,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 			 */
 			if (mad_reg_req->mgmt_class !=
 			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-				dev_notice(&device->dev,
-					   "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n",
-					   mad_reg_req->mgmt_class);
+				dev_dbg_ratelimited(&device->dev,
+					"%s: Invalid Mgmt Class 0x%x\n",
+					__func__, mad_reg_req->mgmt_class);
 				goto error1;
 			}
 		} else if (mad_reg_req->mgmt_class == 0) {
@@ -266,8 +270,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 			 * Class 0 is reserved in IBA and is used for
 			 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
 			 */
-			dev_notice(&device->dev,
-				   "ib_register_mad_agent: Invalid Mgmt Class 0\n");
+			dev_dbg_ratelimited(&device->dev,
+					    "%s: Invalid Mgmt Class 0\n",
+					    __func__);
 			goto error1;
 		} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
 			/*
@@ -275,18 +280,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 			 * ensure supplied OUI is not zero
 			 */
 			if (!is_vendor_oui(mad_reg_req->oui)) {
-				dev_notice(&device->dev,
-					   "ib_register_mad_agent: No OUI specified for class 0x%x\n",
-					   mad_reg_req->mgmt_class);
+				dev_dbg_ratelimited(&device->dev,
+					"%s: No OUI specified for class 0x%x\n",
+					__func__,
+					mad_reg_req->mgmt_class);
 				goto error1;
 			}
 		}
 		/* Make sure class supplied is consistent with RMPP */
 		if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
 			if (rmpp_version) {
-				dev_notice(&device->dev,
-					   "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n",
-					   mad_reg_req->mgmt_class);
+				dev_dbg_ratelimited(&device->dev,
+					"%s: RMPP version for non-RMPP class 0x%x\n",
+					__func__, mad_reg_req->mgmt_class);
 				goto error1;
 			}
 		}
@@ -297,9 +303,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 					IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
 			    (mad_reg_req->mgmt_class !=
 					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
-				dev_notice(&device->dev,
-					   "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n",
-					   mad_reg_req->mgmt_class);
+				dev_dbg_ratelimited(&device->dev,
+					"%s: Invalid SM QP type: class 0x%x\n",
+					__func__, mad_reg_req->mgmt_class);
 				goto error1;
 			}
 		} else {
@@ -307,9 +313,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 					IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
 			    (mad_reg_req->mgmt_class ==
 					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
-				dev_notice(&device->dev,
-					   "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n",
-					   mad_reg_req->mgmt_class);
+				dev_dbg_ratelimited(&device->dev,
+					"%s: Invalid GS QP type: class 0x%x\n",
+					__func__, mad_reg_req->mgmt_class);
 				goto error1;
 			}
 		}
@@ -324,18 +330,18 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 	/* Validate device and port */
 	port_priv = ib_get_mad_port(device, port_num);
 	if (!port_priv) {
-		dev_notice(&device->dev,
-			   "ib_register_mad_agent: Invalid port %d\n",
-			   port_num);
+		dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n",
+				    __func__, port_num);
 		ret = ERR_PTR(-ENODEV);
 		goto error1;
 	}
 
-	/* Verify the QP requested is supported.  For example, Ethernet devices
-	 * will not have QP0 */
+	/* Verify the QP requested is supported. For example, Ethernet devices
+	 * will not have QP0.
+	 */
 	if (!port_priv->qp_info[qpn].qp) {
-		dev_notice(&device->dev,
-			   "ib_register_mad_agent: QP %d not supported\n", qpn);
+		dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n",
+				    __func__, qpn);
 		ret = ERR_PTR(-EPROTONOSUPPORT);
 		goto error1;
 	}
@@ -2408,7 +2414,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
 }
 
 void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
-			  int timeout_ms)
+			  unsigned long timeout_ms)
 {
 	mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
 	wait_for_response(mad_send_wr);
@@ -3183,7 +3189,7 @@ static int ib_mad_port_open(struct ib_device *device,
 		cq_size *= 2;
 
 	port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
-			IB_POLL_WORKQUEUE);
+			IB_POLL_UNBOUND_WORKQUEUE);
 	if (IS_ERR(port_priv->cq)) {
 		dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
 		ret = PTR_ERR(port_priv->cq);
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index d84ae1671898..216509036aa8 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -221,6 +221,6 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
 void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
 
 void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
-			  int timeout_ms);
+			  unsigned long timeout_ms);
 
 #endif	/* __IB_MAD_PRIV_H__ */
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 3ccaae18ad75..724f5a62e82f 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -47,9 +47,9 @@ static struct {
 	const struct rdma_nl_cbs   *cb_table;
 } rdma_nl_types[RDMA_NL_NUM_CLIENTS];
 
-int rdma_nl_chk_listeners(unsigned int group)
+bool rdma_nl_chk_listeners(unsigned int group)
 {
-	return (netlink_has_listeners(nls, group)) ? 0 : -1;
+	return netlink_has_listeners(nls, group);
 }
 EXPORT_SYMBOL(rdma_nl_chk_listeners);
 
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 0385ab438320..573399e3ccc1 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -179,7 +179,8 @@ static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
 {
 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
 		return -EMSGSIZE;
-	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
+	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
+			   dev_name(&device->dev)))
 		return -EMSGSIZE;
 
 	return 0;
@@ -645,6 +646,36 @@ err:
 	return err;
 }
 
+static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	struct ib_device *device;
+	u32 index;
+	int err;
+
+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
+			  extack);
+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+		return -EINVAL;
+
+	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+	device = ib_device_get_by_index(index);
+	if (!device)
+		return -EINVAL;
+
+	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
+		char name[IB_DEVICE_NAME_MAX] = {};
+
+		nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+			    IB_DEVICE_NAME_MAX);
+		err = ib_device_rename(device, name);
+	}
+
+	put_device(&device->dev);
+	return err;
+}
+
 static int _nldev_get_dumpit(struct ib_device *device,
 			     struct sk_buff *skb,
 			     struct netlink_callback *cb,
@@ -1077,6 +1108,10 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 		.doit = nldev_get_doit,
 		.dump = nldev_get_dumpit,
 	},
+	[RDMA_NLDEV_CMD_SET] = {
+		.doit = nldev_set_doit,
+		.flags = RDMA_NL_ADMIN_PERM,
+	},
 	[RDMA_NLDEV_CMD_PORT_GET] = {
 		.doit = nldev_port_get_doit,
 		.dump = nldev_port_get_dumpit,
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index c4118bcd5103..752a55c6bdce 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -794,44 +794,6 @@ void uverbs_close_fd(struct file *f)
 	uverbs_uobject_put(uobj);
 }
 
-static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
-{
-	struct ib_device *ib_dev = ibcontext->device;
-	struct task_struct *owning_process  = NULL;
-	struct mm_struct   *owning_mm       = NULL;
-
-	owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
-	if (!owning_process)
-		return;
-
-	owning_mm = get_task_mm(owning_process);
-	if (!owning_mm) {
-		pr_info("no mm, disassociate ucontext is pending task termination\n");
-		while (1) {
-			put_task_struct(owning_process);
-			usleep_range(1000, 2000);
-			owning_process = get_pid_task(ibcontext->tgid,
-						      PIDTYPE_PID);
-			if (!owning_process ||
-			    owning_process->state == TASK_DEAD) {
-				pr_info("disassociate ucontext done, task was terminated\n");
-				/* in case task was dead need to release the
-				 * task struct.
-				 */
-				if (owning_process)
-					put_task_struct(owning_process);
-				return;
-			}
-		}
-	}
-
-	down_write(&owning_mm->mmap_sem);
-	ib_dev->disassociate_ucontext(ibcontext);
-	up_write(&owning_mm->mmap_sem);
-	mmput(owning_mm);
-	put_task_struct(owning_process);
-}
-
 /*
  * Drop the ucontext off the ufile and completely disconnect it from the
  * ib_device
@@ -840,20 +802,28 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
 				   enum rdma_remove_reason reason)
 {
 	struct ib_ucontext *ucontext = ufile->ucontext;
+	struct ib_device *ib_dev = ucontext->device;
 	int ret;
 
-	if (reason == RDMA_REMOVE_DRIVER_REMOVE)
-		ufile_disassociate_ucontext(ucontext);
+	/*
+	 * If we are closing the FD then the user mmap VMAs must have
+	 * already been destroyed as they hold on to the filep, otherwise
+	 * they need to be zap'd.
+	 */
+	if (reason == RDMA_REMOVE_DRIVER_REMOVE) {
+		uverbs_user_mmap_disassociate(ufile);
+		if (ib_dev->disassociate_ucontext)
+			ib_dev->disassociate_ucontext(ucontext);
+	}
 
-	put_pid(ucontext->tgid);
-	ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
+	ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
 			   RDMACG_RESOURCE_HCA_HANDLE);
 
 	/*
 	 * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
 	 * the error return.
 	 */
-	ret = ucontext->device->dealloc_ucontext(ucontext);
+	ret = ib_dev->dealloc_ucontext(ucontext);
 	WARN_ON(ret);
 
 	ufile->ucontext = NULL;
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index f962f2a593ba..4886d2bba7c7 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -160,5 +160,6 @@ void uverbs_disassociate_api(struct uverbs_api *uapi);
 void uverbs_destroy_api(struct uverbs_api *uapi);
 void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
 			      unsigned int num_attrs);
+void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
 
 #endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 3b7fa0ccaa08..06d8657ce583 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -50,8 +50,7 @@ void rdma_restrack_clean(struct rdma_restrack_root *res)
 
 	dev = container_of(res, struct ib_device, res);
 	pr_err("restrack: %s", CUT_HERE);
-	pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n",
-	       dev->name);
+	dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
 	hash_for_each(res->hash, bkt, e, node) {
 		if (rdma_is_kernel_res(e)) {
 			owner = e->kern_name;
@@ -156,6 +155,21 @@ static bool res_is_user(struct rdma_restrack_entry *res)
 	}
 }
 
+void rdma_restrack_set_task(struct rdma_restrack_entry *res,
+			    const char *caller)
+{
+	if (caller) {
+		res->kern_name = caller;
+		return;
+	}
+
+	if (res->task)
+		put_task_struct(res->task);
+	get_task_struct(current);
+	res->task = current;
+}
+EXPORT_SYMBOL(rdma_restrack_set_task);
+
 void rdma_restrack_add(struct rdma_restrack_entry *res)
 {
 	struct ib_device *dev = res_to_dev(res);
@@ -168,7 +182,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
 
 	if (res_is_user(res)) {
 		if (!res->task)
-			rdma_restrack_set_task(res, current);
+			rdma_restrack_set_task(res, NULL);
 		res->kern_name = NULL;
 	} else {
 		set_kern_name(res);
@@ -209,7 +223,7 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
 	struct ib_device *dev;
 
 	if (!res->valid)
-		return;
+		goto out;
 
 	dev = res_to_dev(res);
 	if (!dev)
@@ -222,8 +236,12 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
 	down_write(&dev->res.rwsem);
 	hash_del(&res->node);
 	res->valid = false;
-	if (res->task)
-		put_task_struct(res->task);
 	up_write(&dev->res.rwsem);
+
+out:
+	if (res->task) {
+		put_task_struct(res->task);
+		res->task = NULL;
+	}
 }
 EXPORT_SYMBOL(rdma_restrack_del);
diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h
index b1d4bbf4ce5c..cbaaaa92fff3 100644
--- a/drivers/infiniband/core/sa.h
+++ b/drivers/infiniband/core/sa.h
@@ -49,16 +49,14 @@ static inline void ib_sa_client_put(struct ib_sa_client *client)
 }
 
 int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
-			     struct ib_device *device, u8 port_num,
-			     u8 method,
+			     struct ib_device *device, u8 port_num, u8 method,
 			     struct ib_sa_mcmember_rec *rec,
 			     ib_sa_comp_mask comp_mask,
-			     int timeout_ms, gfp_t gfp_mask,
+			     unsigned long timeout_ms, gfp_t gfp_mask,
 			     void (*callback)(int status,
 					      struct ib_sa_mcmember_rec *resp,
 					      void *context),
-			     void *context,
-			     struct ib_sa_query **sa_query);
+			     void *context, struct ib_sa_query **sa_query);
 
 int mcast_init(void);
 void mcast_cleanup(void);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 7b794a14d6e8..be5ba5e15496 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -761,7 +761,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
 
 	/* Construct the family header first */
 	header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
-	memcpy(header->device_name, query->port->agent->device->name,
+	memcpy(header->device_name, dev_name(&query->port->agent->device->dev),
 	       LS_DEVICE_NAME_MAX);
 	header->port_num = query->port->port_num;
 
@@ -835,7 +835,6 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
 	struct sk_buff *skb = NULL;
 	struct nlmsghdr *nlh;
 	void *data;
-	int ret = 0;
 	struct ib_sa_mad *mad;
 	int len;
 
@@ -862,13 +861,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
 	/* Repair the nlmsg header length */
 	nlmsg_end(skb, nlh);
 
-	ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
-	if (!ret)
-		ret = len;
-	else
-		ret = 0;
-
-	return ret;
+	return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
 }
 
 static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
@@ -891,14 +884,12 @@ static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
 	spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 
 	ret = ib_nl_send_msg(query, gfp_mask);
-	if (ret <= 0) {
+	if (ret) {
 		ret = -EIO;
 		/* Remove the request */
 		spin_lock_irqsave(&ib_nl_request_lock, flags);
 		list_del(&query->list);
 		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
-	} else {
-		ret = 0;
 	}
 
 	return ret;
@@ -1227,46 +1218,6 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
 	return src_path_mask;
 }
 
-static int roce_resolve_route_from_path(struct sa_path_rec *rec,
-					const struct ib_gid_attr *attr)
-{
-	struct rdma_dev_addr dev_addr = {};
-	union {
-		struct sockaddr     _sockaddr;
-		struct sockaddr_in  _sockaddr_in;
-		struct sockaddr_in6 _sockaddr_in6;
-	} sgid_addr, dgid_addr;
-	int ret;
-
-	if (rec->roce.route_resolved)
-		return 0;
-	if (!attr || !attr->ndev)
-		return -EINVAL;
-
-	dev_addr.bound_dev_if = attr->ndev->ifindex;
-	/* TODO: Use net from the ib_gid_attr once it is added to it,
-	 * until than, limit itself to init_net.
-	 */
-	dev_addr.net = &init_net;
-
-	rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
-	rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
-
-	/* validate the route */
-	ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
-				    &dgid_addr._sockaddr, &dev_addr);
-	if (ret)
-		return ret;
-
-	if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
-	     dev_addr.network == RDMA_NETWORK_IPV6) &&
-	    rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
-		return -EINVAL;
-
-	rec->roce.route_resolved = true;
-	return 0;
-}
-
 static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
 				   struct sa_path_rec *rec,
 				   struct rdma_ah_attr *ah_attr,
@@ -1409,7 +1360,8 @@ static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
 	spin_unlock_irqrestore(&tid_lock, flags);
 }
 
-static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
+static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
+		    gfp_t gfp_mask)
 {
 	bool preload = gfpflags_allow_blocking(gfp_mask);
 	unsigned long flags;
@@ -1433,7 +1385,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
 
 	if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
 	    (!(query->flags & IB_SA_QUERY_OPA))) {
-		if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
+		if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
 			if (!ib_nl_make_request(query, gfp_mask))
 				return id;
 		}
@@ -1599,7 +1551,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
 		       struct ib_device *device, u8 port_num,
 		       struct sa_path_rec *rec,
 		       ib_sa_comp_mask comp_mask,
-		       int timeout_ms, gfp_t gfp_mask,
+		       unsigned long timeout_ms, gfp_t gfp_mask,
 		       void (*callback)(int status,
 					struct sa_path_rec *resp,
 					void *context),
@@ -1753,7 +1705,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
 			    struct ib_device *device, u8 port_num, u8 method,
 			    struct ib_sa_service_rec *rec,
 			    ib_sa_comp_mask comp_mask,
-			    int timeout_ms, gfp_t gfp_mask,
+			    unsigned long timeout_ms, gfp_t gfp_mask,
 			    void (*callback)(int status,
 					     struct ib_sa_service_rec *resp,
 					     void *context),
@@ -1850,7 +1802,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
 			     u8 method,
 			     struct ib_sa_mcmember_rec *rec,
 			     ib_sa_comp_mask comp_mask,
-			     int timeout_ms, gfp_t gfp_mask,
+			     unsigned long timeout_ms, gfp_t gfp_mask,
 			     void (*callback)(int status,
 					      struct ib_sa_mcmember_rec *resp,
 					      void *context),
@@ -1941,7 +1893,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
 			      struct ib_device *device, u8 port_num,
 			      struct ib_sa_guidinfo_rec *rec,
 			      ib_sa_comp_mask comp_mask, u8 method,
-			      int timeout_ms, gfp_t gfp_mask,
+			      unsigned long timeout_ms, gfp_t gfp_mask,
 			      void (*callback)(int status,
 					       struct ib_sa_guidinfo_rec *resp,
 					       void *context),
@@ -2108,7 +2060,7 @@ static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
 }
 
 static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
-					  int timeout_ms,
+					  unsigned long timeout_ms,
 					  void (*callback)(void *context),
 					  void *context,
 					  struct ib_sa_query **sa_query)
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 9b0bea8303e0..1143c0448666 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -685,9 +685,8 @@ static int ib_mad_agent_security_change(struct notifier_block *nb,
 	if (event != LSM_POLICY_CHANGE)
 		return NOTIFY_DONE;
 
-	ag->smp_allowed = !security_ib_endport_manage_subnet(ag->security,
-							     ag->device->name,
-							     ag->port_num);
+	ag->smp_allowed = !security_ib_endport_manage_subnet(
+		ag->security, dev_name(&ag->device->dev), ag->port_num);
 
 	return NOTIFY_OK;
 }
@@ -708,7 +707,7 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
 		return 0;
 
 	ret = security_ib_endport_manage_subnet(agent->security,
-						agent->device->name,
+						dev_name(&agent->device->dev),
 						agent->port_num);
 	if (ret)
 		return ret;
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 7fd14ead7b37..6fcce2c206c6 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -512,7 +512,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
 	ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
 			40 + offset / 8, sizeof(data));
 	if (ret < 0)
-		return sprintf(buf, "N/A (no PMA)\n");
+		return ret;
 
 	switch (width) {
 	case 4:
@@ -1036,7 +1036,7 @@ static int add_port(struct ib_device *device, int port_num,
 	p->port_num   = port_num;
 
 	ret = kobject_init_and_add(&p->kobj, &port_type,
-				   device->ports_parent,
+				   device->ports_kobj,
 				   "%d", port_num);
 	if (ret) {
 		kfree(p);
@@ -1057,10 +1057,12 @@ static int add_port(struct ib_device *device, int port_num,
 		goto err_put;
 	}
 
-	p->pma_table = get_counter_table(device, port_num);
-	ret = sysfs_create_group(&p->kobj, p->pma_table);
-	if (ret)
-		goto err_put_gid_attrs;
+	if (device->process_mad) {
+		p->pma_table = get_counter_table(device, port_num);
+		ret = sysfs_create_group(&p->kobj, p->pma_table);
+		if (ret)
+			goto err_put_gid_attrs;
+	}
 
 	p->gid_group.name  = "gids";
 	p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
@@ -1118,9 +1120,9 @@ static int add_port(struct ib_device *device, int port_num,
 	}
 
 	/*
-	 * If port == 0, it means we have only one port and the parent
-	 * device, not this port device, should be the holder of the
-	 * hw_counters
+	 * If port == 0, it means hw_counters are per device and not per
+	 * port, so holder should be device. Therefore skip per port conunter
+	 * initialization.
 	 */
 	if (device->alloc_hw_stats && port_num)
 		setup_hw_stats(device, p, port_num);
@@ -1173,7 +1175,8 @@ err_free_gid:
 	p->gid_group.attrs = NULL;
 
 err_remove_pma:
-	sysfs_remove_group(&p->kobj, p->pma_table);
+	if (p->pma_table)
+		sysfs_remove_group(&p->kobj, p->pma_table);
 
 err_put_gid_attrs:
 	kobject_put(&p->gid_attr_group->kobj);
@@ -1183,7 +1186,7 @@ err_put:
 	return ret;
 }
 
-static ssize_t show_node_type(struct device *device,
+static ssize_t node_type_show(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
 	struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1198,8 +1201,9 @@ static ssize_t show_node_type(struct device *device,
 	default:		  return sprintf(buf, "%d: <unknown>\n", dev->node_type);
 	}
 }
+static DEVICE_ATTR_RO(node_type);
 
-static ssize_t show_sys_image_guid(struct device *device,
+static ssize_t sys_image_guid_show(struct device *device,
 				   struct device_attribute *dev_attr, char *buf)
 {
 	struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1210,8 +1214,9 @@ static ssize_t show_sys_image_guid(struct device *device,
 		       be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
 		       be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
 }
+static DEVICE_ATTR_RO(sys_image_guid);
 
-static ssize_t show_node_guid(struct device *device,
+static ssize_t node_guid_show(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
 	struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1222,8 +1227,9 @@ static ssize_t show_node_guid(struct device *device,
 		       be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
 		       be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
 }
+static DEVICE_ATTR_RO(node_guid);
 
-static ssize_t show_node_desc(struct device *device,
+static ssize_t node_desc_show(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
 	struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1231,9 +1237,9 @@ static ssize_t show_node_desc(struct device *device,
 	return sprintf(buf, "%.64s\n", dev->node_desc);
 }
 
-static ssize_t set_node_desc(struct device *device,
-			     struct device_attribute *attr,
-			     const char *buf, size_t count)
+static ssize_t node_desc_store(struct device *device,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
 {
 	struct ib_device *dev = container_of(device, struct ib_device, dev);
 	struct ib_device_modify desc = {};
@@ -1249,8 +1255,9 @@ static ssize_t set_node_desc(struct device *device,
 
 	return count;
 }
+static DEVICE_ATTR_RW(node_desc);
 
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
 			   char *buf)
 {
 	struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1259,19 +1266,19 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
 	strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
 	return strlen(buf);
 }
+static DEVICE_ATTR_RO(fw_ver);
+
+static struct attribute *ib_dev_attrs[] = {
+	&dev_attr_node_type.attr,
+	&dev_attr_node_guid.attr,
+	&dev_attr_sys_image_guid.attr,
+	&dev_attr_fw_ver.attr,
+	&dev_attr_node_desc.attr,
+	NULL,
+};
 
-static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
-static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
-static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
-static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
-
-static struct device_attribute *ib_class_attributes[] = {
-	&dev_attr_node_type,
-	&dev_attr_sys_image_guid,
-	&dev_attr_node_guid,
-	&dev_attr_node_desc,
-	&dev_attr_fw_ver,
+static const struct attribute_group dev_attr_group = {
+	.attrs = ib_dev_attrs,
 };
 
 static void free_port_list_attributes(struct ib_device *device)
@@ -1285,7 +1292,9 @@ static void free_port_list_attributes(struct ib_device *device)
 			kfree(port->hw_stats);
 			free_hsag(&port->kobj, port->hw_stats_ag);
 		}
-		sysfs_remove_group(p, port->pma_table);
+
+		if (port->pma_table)
+			sysfs_remove_group(p, port->pma_table);
 		sysfs_remove_group(p, &port->pkey_group);
 		sysfs_remove_group(p, &port->gid_group);
 		sysfs_remove_group(&port->gid_attr_group->kobj,
@@ -1296,7 +1305,7 @@ static void free_port_list_attributes(struct ib_device *device)
 		kobject_put(p);
 	}
 
-	kobject_put(device->ports_parent);
+	kobject_put(device->ports_kobj);
 }
 
 int ib_device_register_sysfs(struct ib_device *device,
@@ -1307,23 +1316,15 @@ int ib_device_register_sysfs(struct ib_device *device,
 	int ret;
 	int i;
 
-	ret = dev_set_name(class_dev, "%s", device->name);
-	if (ret)
-		return ret;
+	device->groups[0] = &dev_attr_group;
+	class_dev->groups = device->groups;
 
 	ret = device_add(class_dev);
 	if (ret)
 		goto err;
 
-	for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
-		ret = device_create_file(class_dev, ib_class_attributes[i]);
-		if (ret)
-			goto err_unregister;
-	}
-
-	device->ports_parent = kobject_create_and_add("ports",
-						      &class_dev->kobj);
-	if (!device->ports_parent) {
+	device->ports_kobj = kobject_create_and_add("ports", &class_dev->kobj);
+	if (!device->ports_kobj) {
 		ret = -ENOMEM;
 		goto err_put;
 	}
@@ -1347,20 +1348,15 @@ int ib_device_register_sysfs(struct ib_device *device,
 
 err_put:
 	free_port_list_attributes(device);
-
-err_unregister:
 	device_del(class_dev);
-
 err:
 	return ret;
 }
 
 void ib_device_unregister_sysfs(struct ib_device *device)
 {
-	int i;
-
-	/* Hold kobject until ib_dealloc_device() */
-	kobject_get(&device->dev.kobj);
+	/* Hold device until ib_dealloc_device() */
+	get_device(&device->dev);
 
 	free_port_list_attributes(device);
 
@@ -1369,8 +1365,5 @@ void ib_device_unregister_sysfs(struct ib_device *device)
 		free_hsag(&device->dev.kobj, device->hw_stats_ag);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
-		device_remove_file(&device->dev, ib_class_attributes[i]);
-
 	device_unregister(&device->dev);
 }
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index a41792dbae1f..c6144df47ea4 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -85,7 +85,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	struct page **page_list;
 	struct vm_area_struct **vma_list;
 	unsigned long lock_limit;
+	unsigned long new_pinned;
 	unsigned long cur_base;
+	struct mm_struct *mm;
 	unsigned long npages;
 	int ret;
 	int i;
@@ -107,25 +109,32 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	if (!can_do_mlock())
 		return ERR_PTR(-EPERM);
 
-	umem = kzalloc(sizeof *umem, GFP_KERNEL);
-	if (!umem)
-		return ERR_PTR(-ENOMEM);
+	if (access & IB_ACCESS_ON_DEMAND) {
+		umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
+		if (!umem)
+			return ERR_PTR(-ENOMEM);
+		umem->is_odp = 1;
+	} else {
+		umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+		if (!umem)
+			return ERR_PTR(-ENOMEM);
+	}
 
 	umem->context    = context;
 	umem->length     = size;
 	umem->address    = addr;
 	umem->page_shift = PAGE_SHIFT;
 	umem->writable   = ib_access_writable(access);
+	umem->owning_mm = mm = current->mm;
+	mmgrab(mm);
 
 	if (access & IB_ACCESS_ON_DEMAND) {
-		ret = ib_umem_odp_get(context, umem, access);
+		ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
 		if (ret)
 			goto umem_kfree;
 		return umem;
 	}
 
-	umem->odp_data = NULL;
-
 	/* We assume the memory is from hugetlb until proved otherwise */
 	umem->hugetlb   = 1;
 
@@ -144,25 +153,25 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 		umem->hugetlb = 0;
 
 	npages = ib_umem_num_pages(umem);
+	if (npages == 0 || npages > UINT_MAX) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
-	down_write(&current->mm->mmap_sem);
-	current->mm->pinned_vm += npages;
-	if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) {
-		up_write(&current->mm->mmap_sem);
+	down_write(&mm->mmap_sem);
+	if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) ||
+	    (new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) {
+		up_write(&mm->mmap_sem);
 		ret = -ENOMEM;
-		goto vma;
+		goto out;
 	}
-	up_write(&current->mm->mmap_sem);
+	mm->pinned_vm = new_pinned;
+	up_write(&mm->mmap_sem);
 
 	cur_base = addr & PAGE_MASK;
 
-	if (npages == 0 || npages > UINT_MAX) {
-		ret = -EINVAL;
-		goto vma;
-	}
-
 	ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
 	if (ret)
 		goto vma;
@@ -172,14 +181,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
 	sg_list_start = umem->sg_head.sgl;
 
-	down_read(&current->mm->mmap_sem);
 	while (npages) {
+		down_read(&mm->mmap_sem);
 		ret = get_user_pages_longterm(cur_base,
 				     min_t(unsigned long, npages,
 					   PAGE_SIZE / sizeof (struct page *)),
 				     gup_flags, page_list, vma_list);
 		if (ret < 0) {
-			up_read(&current->mm->mmap_sem);
+			up_read(&mm->mmap_sem);
 			goto umem_release;
 		}
 
@@ -187,17 +196,20 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 		cur_base += ret * PAGE_SIZE;
 		npages   -= ret;
 
+		/* Continue to hold the mmap_sem as vma_list access
+		 * needs to be protected.
+		 */
 		for_each_sg(sg_list_start, sg, ret, i) {
 			if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
 				umem->hugetlb = 0;
 
 			sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
 		}
+		up_read(&mm->mmap_sem);
 
 		/* preparing for next loop */
 		sg_list_start = sg;
 	}
-	up_read(&current->mm->mmap_sem);
 
 	umem->nmap = ib_dma_map_sg_attrs(context->device,
 				  umem->sg_head.sgl,
@@ -216,29 +228,40 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 umem_release:
 	__ib_umem_release(context->device, umem, 0);
 vma:
-	down_write(&current->mm->mmap_sem);
-	current->mm->pinned_vm -= ib_umem_num_pages(umem);
-	up_write(&current->mm->mmap_sem);
+	down_write(&mm->mmap_sem);
+	mm->pinned_vm -= ib_umem_num_pages(umem);
+	up_write(&mm->mmap_sem);
 out:
 	if (vma_list)
 		free_page((unsigned long) vma_list);
 	free_page((unsigned long) page_list);
 umem_kfree:
-	if (ret)
+	if (ret) {
+		mmdrop(umem->owning_mm);
 		kfree(umem);
+	}
 	return ret ? ERR_PTR(ret) : umem;
 }
 EXPORT_SYMBOL(ib_umem_get);
 
-static void ib_umem_account(struct work_struct *work)
+static void __ib_umem_release_tail(struct ib_umem *umem)
+{
+	mmdrop(umem->owning_mm);
+	if (umem->is_odp)
+		kfree(to_ib_umem_odp(umem));
+	else
+		kfree(umem);
+}
+
+static void ib_umem_release_defer(struct work_struct *work)
 {
 	struct ib_umem *umem = container_of(work, struct ib_umem, work);
 
-	down_write(&umem->mm->mmap_sem);
-	umem->mm->pinned_vm -= umem->diff;
-	up_write(&umem->mm->mmap_sem);
-	mmput(umem->mm);
-	kfree(umem);
+	down_write(&umem->owning_mm->mmap_sem);
+	umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
+	up_write(&umem->owning_mm->mmap_sem);
+
+	__ib_umem_release_tail(umem);
 }
 
 /**
@@ -248,52 +271,36 @@ static void ib_umem_account(struct work_struct *work)
 void ib_umem_release(struct ib_umem *umem)
 {
 	struct ib_ucontext *context = umem->context;
-	struct mm_struct *mm;
-	struct task_struct *task;
-	unsigned long diff;
 
-	if (umem->odp_data) {
-		ib_umem_odp_release(umem);
+	if (umem->is_odp) {
+		ib_umem_odp_release(to_ib_umem_odp(umem));
+		__ib_umem_release_tail(umem);
 		return;
 	}
 
 	__ib_umem_release(umem->context->device, umem, 1);
 
-	task = get_pid_task(umem->context->tgid, PIDTYPE_PID);
-	if (!task)
-		goto out;
-	mm = get_task_mm(task);
-	put_task_struct(task);
-	if (!mm)
-		goto out;
-
-	diff = ib_umem_num_pages(umem);
-
 	/*
 	 * We may be called with the mm's mmap_sem already held.  This
 	 * can happen when a userspace munmap() is the call that drops
 	 * the last reference to our file and calls our release
 	 * method.  If there are memory regions to destroy, we'll end
 	 * up here and not be able to take the mmap_sem.  In that case
-	 * we defer the vm_locked accounting to the system workqueue.
+	 * we defer the vm_locked accounting a workqueue.
 	 */
 	if (context->closing) {
-		if (!down_write_trylock(&mm->mmap_sem)) {
-			INIT_WORK(&umem->work, ib_umem_account);
-			umem->mm   = mm;
-			umem->diff = diff;
-
+		if (!down_write_trylock(&umem->owning_mm->mmap_sem)) {
+			INIT_WORK(&umem->work, ib_umem_release_defer);
 			queue_work(ib_wq, &umem->work);
 			return;
 		}
-	} else
-		down_write(&mm->mmap_sem);
+	} else {
+		down_write(&umem->owning_mm->mmap_sem);
+	}
+	umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
+	up_write(&umem->owning_mm->mmap_sem);
 
-	mm->pinned_vm -= diff;
-	up_write(&mm->mmap_sem);
-	mmput(mm);
-out:
-	kfree(umem);
+	__ib_umem_release_tail(umem);
 }
 EXPORT_SYMBOL(ib_umem_release);
 
@@ -303,7 +310,7 @@ int ib_umem_page_count(struct ib_umem *umem)
 	int n;
 	struct scatterlist *sg;
 
-	if (umem->odp_data)
+	if (umem->is_odp)
 		return ib_umem_num_pages(umem);
 
 	n = 0;
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 6ec748eccff7..2b4c5e7dd5a1 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -58,7 +58,7 @@ static u64 node_start(struct umem_odp_node *n)
 	struct ib_umem_odp *umem_odp =
 			container_of(n, struct ib_umem_odp, interval_tree);
 
-	return ib_umem_start(umem_odp->umem);
+	return ib_umem_start(&umem_odp->umem);
 }
 
 /* Note that the representation of the intervals in the interval tree
@@ -71,140 +71,86 @@ static u64 node_last(struct umem_odp_node *n)
 	struct ib_umem_odp *umem_odp =
 			container_of(n, struct ib_umem_odp, interval_tree);
 
-	return ib_umem_end(umem_odp->umem) - 1;
+	return ib_umem_end(&umem_odp->umem) - 1;
 }
 
 INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
 		     node_start, node_last, static, rbt_ib_umem)
 
-static void ib_umem_notifier_start_account(struct ib_umem *item)
+static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
 {
-	mutex_lock(&item->odp_data->umem_mutex);
-
-	/* Only update private counters for this umem if it has them.
-	 * Otherwise skip it. All page faults will be delayed for this umem. */
-	if (item->odp_data->mn_counters_active) {
-		int notifiers_count = item->odp_data->notifiers_count++;
-
-		if (notifiers_count == 0)
-			/* Initialize the completion object for waiting on
-			 * notifiers. Since notifier_count is zero, no one
-			 * should be waiting right now. */
-			reinit_completion(&item->odp_data->notifier_completion);
-	}
-	mutex_unlock(&item->odp_data->umem_mutex);
-}
-
-static void ib_umem_notifier_end_account(struct ib_umem *item)
-{
-	mutex_lock(&item->odp_data->umem_mutex);
-
-	/* Only update private counters for this umem if it has them.
-	 * Otherwise skip it. All page faults will be delayed for this umem. */
-	if (item->odp_data->mn_counters_active) {
+	mutex_lock(&umem_odp->umem_mutex);
+	if (umem_odp->notifiers_count++ == 0)
 		/*
-		 * This sequence increase will notify the QP page fault that
-		 * the page that is going to be mapped in the spte could have
-		 * been freed.
+		 * Initialize the completion object for waiting on
+		 * notifiers. Since notifier_count is zero, no one should be
+		 * waiting right now.
 		 */
-		++item->odp_data->notifiers_seq;
-		if (--item->odp_data->notifiers_count == 0)
-			complete_all(&item->odp_data->notifier_completion);
-	}
-	mutex_unlock(&item->odp_data->umem_mutex);
+		reinit_completion(&umem_odp->notifier_completion);
+	mutex_unlock(&umem_odp->umem_mutex);
 }
 
-/* Account for a new mmu notifier in an ib_ucontext. */
-static void ib_ucontext_notifier_start_account(struct ib_ucontext *context)
+static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
 {
-	atomic_inc(&context->notifier_count);
+	mutex_lock(&umem_odp->umem_mutex);
+	/*
+	 * This sequence increase will notify the QP page fault that the page
+	 * that is going to be mapped in the spte could have been freed.
+	 */
+	++umem_odp->notifiers_seq;
+	if (--umem_odp->notifiers_count == 0)
+		complete_all(&umem_odp->notifier_completion);
+	mutex_unlock(&umem_odp->umem_mutex);
 }
 
-/* Account for a terminating mmu notifier in an ib_ucontext.
- *
- * Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since
- * the function takes the semaphore itself. */
-static void ib_ucontext_notifier_end_account(struct ib_ucontext *context)
+static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
+					       u64 start, u64 end, void *cookie)
 {
-	int zero_notifiers = atomic_dec_and_test(&context->notifier_count);
-
-	if (zero_notifiers &&
-	    !list_empty(&context->no_private_counters)) {
-		/* No currently running mmu notifiers. Now is the chance to
-		 * add private accounting to all previously added umems. */
-		struct ib_umem_odp *odp_data, *next;
-
-		/* Prevent concurrent mmu notifiers from working on the
-		 * no_private_counters list. */
-		down_write(&context->umem_rwsem);
-
-		/* Read the notifier_count again, with the umem_rwsem
-		 * semaphore taken for write. */
-		if (!atomic_read(&context->notifier_count)) {
-			list_for_each_entry_safe(odp_data, next,
-						 &context->no_private_counters,
-						 no_private_counters) {
-				mutex_lock(&odp_data->umem_mutex);
-				odp_data->mn_counters_active = true;
-				list_del(&odp_data->no_private_counters);
-				complete_all(&odp_data->notifier_completion);
-				mutex_unlock(&odp_data->umem_mutex);
-			}
-		}
-
-		up_write(&context->umem_rwsem);
-	}
-}
+	struct ib_umem *umem = &umem_odp->umem;
 
-static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start,
-					       u64 end, void *cookie) {
 	/*
 	 * Increase the number of notifiers running, to
 	 * prevent any further fault handling on this MR.
 	 */
-	ib_umem_notifier_start_account(item);
-	item->odp_data->dying = 1;
+	ib_umem_notifier_start_account(umem_odp);
+	umem_odp->dying = 1;
 	/* Make sure that the fact the umem is dying is out before we release
 	 * all pending page faults. */
 	smp_wmb();
-	complete_all(&item->odp_data->notifier_completion);
-	item->context->invalidate_range(item, ib_umem_start(item),
-					ib_umem_end(item));
+	complete_all(&umem_odp->notifier_completion);
+	umem->context->invalidate_range(umem_odp, ib_umem_start(umem),
+					ib_umem_end(umem));
 	return 0;
 }
 
 static void ib_umem_notifier_release(struct mmu_notifier *mn,
 				     struct mm_struct *mm)
 {
-	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
-
-	if (!context->invalidate_range)
-		return;
-
-	ib_ucontext_notifier_start_account(context);
-	down_read(&context->umem_rwsem);
-	rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
-				      ULLONG_MAX,
-				      ib_umem_notifier_release_trampoline,
-				      true,
-				      NULL);
-	up_read(&context->umem_rwsem);
+	struct ib_ucontext_per_mm *per_mm =
+		container_of(mn, struct ib_ucontext_per_mm, mn);
+
+	down_read(&per_mm->umem_rwsem);
+	if (per_mm->active)
+		rbt_ib_umem_for_each_in_range(
+			&per_mm->umem_tree, 0, ULLONG_MAX,
+			ib_umem_notifier_release_trampoline, true, NULL);
+	up_read(&per_mm->umem_rwsem);
 }
 
-static int invalidate_page_trampoline(struct ib_umem *item, u64 start,
+static int invalidate_page_trampoline(struct ib_umem_odp *item, u64 start,
 				      u64 end, void *cookie)
 {
 	ib_umem_notifier_start_account(item);
-	item->context->invalidate_range(item, start, start + PAGE_SIZE);
+	item->umem.context->invalidate_range(item, start, start + PAGE_SIZE);
 	ib_umem_notifier_end_account(item);
 	return 0;
 }
 
-static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
-					     u64 end, void *cookie)
+static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
+					     u64 start, u64 end, void *cookie)
 {
 	ib_umem_notifier_start_account(item);
-	item->context->invalidate_range(item, start, end);
+	item->umem.context->invalidate_range(item, start, end);
 	return 0;
 }
 
@@ -214,28 +160,30 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
 						    unsigned long end,
 						    bool blockable)
 {
-	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
-	int ret;
-
-	if (!context->invalidate_range)
-		return 0;
+	struct ib_ucontext_per_mm *per_mm =
+		container_of(mn, struct ib_ucontext_per_mm, mn);
 
 	if (blockable)
-		down_read(&context->umem_rwsem);
-	else if (!down_read_trylock(&context->umem_rwsem))
+		down_read(&per_mm->umem_rwsem);
+	else if (!down_read_trylock(&per_mm->umem_rwsem))
 		return -EAGAIN;
 
-	ib_ucontext_notifier_start_account(context);
-	ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
-				      end,
-				      invalidate_range_start_trampoline,
-				      blockable, NULL);
-	up_read(&context->umem_rwsem);
+	if (!per_mm->active) {
+		up_read(&per_mm->umem_rwsem);
+		/*
+		 * At this point active is permanently set and visible to this
+		 * CPU without a lock, that fact is relied on to skip the unlock
+		 * in range_end.
+		 */
+		return 0;
+	}
 
-	return ret;
+	return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, end,
+					     invalidate_range_start_trampoline,
+					     blockable, NULL);
 }
 
-static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
+static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
 					   u64 end, void *cookie)
 {
 	ib_umem_notifier_end_account(item);
@@ -247,22 +195,16 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
 						  unsigned long start,
 						  unsigned long end)
 {
-	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+	struct ib_ucontext_per_mm *per_mm =
+		container_of(mn, struct ib_ucontext_per_mm, mn);
 
-	if (!context->invalidate_range)
+	if (unlikely(!per_mm->active))
 		return;
 
-	/*
-	 * TODO: we currently bail out if there is any sleepable work to be done
-	 * in ib_umem_notifier_invalidate_range_start so we shouldn't really block
-	 * here. But this is ugly and fragile.
-	 */
-	down_read(&context->umem_rwsem);
-	rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+	rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start,
 				      end,
 				      invalidate_range_end_trampoline, true, NULL);
-	up_read(&context->umem_rwsem);
-	ib_ucontext_notifier_end_account(context);
+	up_read(&per_mm->umem_rwsem);
 }
 
 static const struct mmu_notifier_ops ib_umem_notifiers = {
@@ -271,31 +213,158 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
 	.invalidate_range_end       = ib_umem_notifier_invalidate_range_end,
 };
 
-struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
-				  unsigned long addr,
-				  size_t size)
+static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
 {
-	struct ib_umem *umem;
+	struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
+	struct ib_umem *umem = &umem_odp->umem;
+
+	down_write(&per_mm->umem_rwsem);
+	if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
+		rbt_ib_umem_insert(&umem_odp->interval_tree,
+				   &per_mm->umem_tree);
+	up_write(&per_mm->umem_rwsem);
+}
+
+static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
+{
+	struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
+	struct ib_umem *umem = &umem_odp->umem;
+
+	down_write(&per_mm->umem_rwsem);
+	if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
+		rbt_ib_umem_remove(&umem_odp->interval_tree,
+				   &per_mm->umem_tree);
+	complete_all(&umem_odp->notifier_completion);
+
+	up_write(&per_mm->umem_rwsem);
+}
+
+static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
+					       struct mm_struct *mm)
+{
+	struct ib_ucontext_per_mm *per_mm;
+	int ret;
+
+	per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL);
+	if (!per_mm)
+		return ERR_PTR(-ENOMEM);
+
+	per_mm->context = ctx;
+	per_mm->mm = mm;
+	per_mm->umem_tree = RB_ROOT_CACHED;
+	init_rwsem(&per_mm->umem_rwsem);
+	per_mm->active = ctx->invalidate_range;
+
+	rcu_read_lock();
+	per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+	rcu_read_unlock();
+
+	WARN_ON(mm != current->mm);
+
+	per_mm->mn.ops = &ib_umem_notifiers;
+	ret = mmu_notifier_register(&per_mm->mn, per_mm->mm);
+	if (ret) {
+		dev_err(&ctx->device->dev,
+			"Failed to register mmu_notifier %d\n", ret);
+		goto out_pid;
+	}
+
+	list_add(&per_mm->ucontext_list, &ctx->per_mm_list);
+	return per_mm;
+
+out_pid:
+	put_pid(per_mm->tgid);
+	kfree(per_mm);
+	return ERR_PTR(ret);
+}
+
+static int get_per_mm(struct ib_umem_odp *umem_odp)
+{
+	struct ib_ucontext *ctx = umem_odp->umem.context;
+	struct ib_ucontext_per_mm *per_mm;
+
+	/*
+	 * Generally speaking we expect only one or two per_mm in this list,
+	 * so no reason to optimize this search today.
+	 */
+	mutex_lock(&ctx->per_mm_list_lock);
+	list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) {
+		if (per_mm->mm == umem_odp->umem.owning_mm)
+			goto found;
+	}
+
+	per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm);
+	if (IS_ERR(per_mm)) {
+		mutex_unlock(&ctx->per_mm_list_lock);
+		return PTR_ERR(per_mm);
+	}
+
+found:
+	umem_odp->per_mm = per_mm;
+	per_mm->odp_mrs_count++;
+	mutex_unlock(&ctx->per_mm_list_lock);
+
+	return 0;
+}
+
+static void free_per_mm(struct rcu_head *rcu)
+{
+	kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu));
+}
+
+void put_per_mm(struct ib_umem_odp *umem_odp)
+{
+	struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
+	struct ib_ucontext *ctx = umem_odp->umem.context;
+	bool need_free;
+
+	mutex_lock(&ctx->per_mm_list_lock);
+	umem_odp->per_mm = NULL;
+	per_mm->odp_mrs_count--;
+	need_free = per_mm->odp_mrs_count == 0;
+	if (need_free)
+		list_del(&per_mm->ucontext_list);
+	mutex_unlock(&ctx->per_mm_list_lock);
+
+	if (!need_free)
+		return;
+
+	/*
+	 * NOTE! mmu_notifier_unregister() can happen between a start/end
+	 * callback, resulting in an start/end, and thus an unbalanced
+	 * lock. This doesn't really matter to us since we are about to kfree
+	 * the memory that holds the lock, however LOCKDEP doesn't like this.
+	 */
+	down_write(&per_mm->umem_rwsem);
+	per_mm->active = false;
+	up_write(&per_mm->umem_rwsem);
+
+	WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root));
+	mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm);
+	put_pid(per_mm->tgid);
+	mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm);
+}
+
+struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
+				      unsigned long addr, size_t size)
+{
+	struct ib_ucontext *ctx = per_mm->context;
 	struct ib_umem_odp *odp_data;
+	struct ib_umem *umem;
 	int pages = size >> PAGE_SHIFT;
 	int ret;
 
-	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
-	if (!umem)
+	odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
+	if (!odp_data)
 		return ERR_PTR(-ENOMEM);
-
-	umem->context    = context;
+	umem = &odp_data->umem;
+	umem->context    = ctx;
 	umem->length     = size;
 	umem->address    = addr;
 	umem->page_shift = PAGE_SHIFT;
 	umem->writable   = 1;
-
-	odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
-	if (!odp_data) {
-		ret = -ENOMEM;
-		goto out_umem;
-	}
-	odp_data->umem = umem;
+	umem->is_odp = 1;
+	odp_data->per_mm = per_mm;
 
 	mutex_init(&odp_data->umem_mutex);
 	init_completion(&odp_data->notifier_completion);
@@ -314,39 +383,34 @@ struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
 		goto out_page_list;
 	}
 
-	down_write(&context->umem_rwsem);
-	context->odp_mrs_count++;
-	rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
-	if (likely(!atomic_read(&context->notifier_count)))
-		odp_data->mn_counters_active = true;
-	else
-		list_add(&odp_data->no_private_counters,
-			 &context->no_private_counters);
-	up_write(&context->umem_rwsem);
-
-	umem->odp_data = odp_data;
+	/*
+	 * Caller must ensure that the umem_odp that the per_mm came from
+	 * cannot be freed during the call to ib_alloc_odp_umem.
+	 */
+	mutex_lock(&ctx->per_mm_list_lock);
+	per_mm->odp_mrs_count++;
+	mutex_unlock(&ctx->per_mm_list_lock);
+	add_umem_to_per_mm(odp_data);
 
-	return umem;
+	return odp_data;
 
 out_page_list:
 	vfree(odp_data->page_list);
 out_odp_data:
 	kfree(odp_data);
-out_umem:
-	kfree(umem);
 	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL(ib_alloc_odp_umem);
 
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
-		    int access)
+int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
 {
+	struct ib_umem *umem = &umem_odp->umem;
+	/*
+	 * NOTE: This must called in a process context where umem->owning_mm
+	 * == current->mm
+	 */
+	struct mm_struct *mm = umem->owning_mm;
 	int ret_val;
-	struct pid *our_pid;
-	struct mm_struct *mm = get_task_mm(current);
-
-	if (!mm)
-		return -EINVAL;
 
 	if (access & IB_ACCESS_HUGETLB) {
 		struct vm_area_struct *vma;
@@ -366,111 +430,43 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
 		umem->hugetlb = 0;
 	}
 
-	/* Prevent creating ODP MRs in child processes */
-	rcu_read_lock();
-	our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
-	rcu_read_unlock();
-	put_pid(our_pid);
-	if (context->tgid != our_pid) {
-		ret_val = -EINVAL;
-		goto out_mm;
-	}
-
-	umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
-	if (!umem->odp_data) {
-		ret_val = -ENOMEM;
-		goto out_mm;
-	}
-	umem->odp_data->umem = umem;
-
-	mutex_init(&umem->odp_data->umem_mutex);
+	mutex_init(&umem_odp->umem_mutex);
 
-	init_completion(&umem->odp_data->notifier_completion);
+	init_completion(&umem_odp->notifier_completion);
 
 	if (ib_umem_num_pages(umem)) {
-		umem->odp_data->page_list =
-			vzalloc(array_size(sizeof(*umem->odp_data->page_list),
+		umem_odp->page_list =
+			vzalloc(array_size(sizeof(*umem_odp->page_list),
 					   ib_umem_num_pages(umem)));
-		if (!umem->odp_data->page_list) {
-			ret_val = -ENOMEM;
-			goto out_odp_data;
-		}
+		if (!umem_odp->page_list)
+			return -ENOMEM;
 
-		umem->odp_data->dma_list =
-			vzalloc(array_size(sizeof(*umem->odp_data->dma_list),
+		umem_odp->dma_list =
+			vzalloc(array_size(sizeof(*umem_odp->dma_list),
 					   ib_umem_num_pages(umem)));
-		if (!umem->odp_data->dma_list) {
+		if (!umem_odp->dma_list) {
 			ret_val = -ENOMEM;
 			goto out_page_list;
 		}
 	}
 
-	/*
-	 * When using MMU notifiers, we will get a
-	 * notification before the "current" task (and MM) is
-	 * destroyed. We use the umem_rwsem semaphore to synchronize.
-	 */
-	down_write(&context->umem_rwsem);
-	context->odp_mrs_count++;
-	if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
-		rbt_ib_umem_insert(&umem->odp_data->interval_tree,
-				   &context->umem_tree);
-	if (likely(!atomic_read(&context->notifier_count)) ||
-	    context->odp_mrs_count == 1)
-		umem->odp_data->mn_counters_active = true;
-	else
-		list_add(&umem->odp_data->no_private_counters,
-			 &context->no_private_counters);
-	downgrade_write(&context->umem_rwsem);
-
-	if (context->odp_mrs_count == 1) {
-		/*
-		 * Note that at this point, no MMU notifier is running
-		 * for this context!
-		 */
-		atomic_set(&context->notifier_count, 0);
-		INIT_HLIST_NODE(&context->mn.hlist);
-		context->mn.ops = &ib_umem_notifiers;
-		/*
-		 * Lock-dep detects a false positive for mmap_sem vs.
-		 * umem_rwsem, due to not grasping downgrade_write correctly.
-		 */
-		lockdep_off();
-		ret_val = mmu_notifier_register(&context->mn, mm);
-		lockdep_on();
-		if (ret_val) {
-			pr_err("Failed to register mmu_notifier %d\n", ret_val);
-			ret_val = -EBUSY;
-			goto out_mutex;
-		}
-	}
-
-	up_read(&context->umem_rwsem);
+	ret_val = get_per_mm(umem_odp);
+	if (ret_val)
+		goto out_dma_list;
+	add_umem_to_per_mm(umem_odp);
 
-	/*
-	 * Note that doing an mmput can cause a notifier for the relevant mm.
-	 * If the notifier is called while we hold the umem_rwsem, this will
-	 * cause a deadlock. Therefore, we release the reference only after we
-	 * released the semaphore.
-	 */
-	mmput(mm);
 	return 0;
 
-out_mutex:
-	up_read(&context->umem_rwsem);
-	vfree(umem->odp_data->dma_list);
+out_dma_list:
+	vfree(umem_odp->dma_list);
 out_page_list:
-	vfree(umem->odp_data->page_list);
-out_odp_data:
-	kfree(umem->odp_data);
-out_mm:
-	mmput(mm);
+	vfree(umem_odp->page_list);
 	return ret_val;
 }
 
-void ib_umem_odp_release(struct ib_umem *umem)
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
 {
-	struct ib_ucontext *context = umem->context;
+	struct ib_umem *umem = &umem_odp->umem;
 
 	/*
 	 * Ensure that no more pages are mapped in the umem.
@@ -478,61 +474,13 @@ void ib_umem_odp_release(struct ib_umem *umem)
 	 * It is the driver's responsibility to ensure, before calling us,
 	 * that the hardware will not attempt to access the MR any more.
 	 */
-	ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem),
+	ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
 				    ib_umem_end(umem));
 
-	down_write(&context->umem_rwsem);
-	if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
-		rbt_ib_umem_remove(&umem->odp_data->interval_tree,
-				   &context->umem_tree);
-	context->odp_mrs_count--;
-	if (!umem->odp_data->mn_counters_active) {
-		list_del(&umem->odp_data->no_private_counters);
-		complete_all(&umem->odp_data->notifier_completion);
-	}
-
-	/*
-	 * Downgrade the lock to a read lock. This ensures that the notifiers
-	 * (who lock the mutex for reading) will be able to finish, and we
-	 * will be able to enventually obtain the mmu notifiers SRCU. Note
-	 * that since we are doing it atomically, no other user could register
-	 * and unregister while we do the check.
-	 */
-	downgrade_write(&context->umem_rwsem);
-	if (!context->odp_mrs_count) {
-		struct task_struct *owning_process = NULL;
-		struct mm_struct *owning_mm        = NULL;
-
-		owning_process = get_pid_task(context->tgid,
-					      PIDTYPE_PID);
-		if (owning_process == NULL)
-			/*
-			 * The process is already dead, notifier were removed
-			 * already.
-			 */
-			goto out;
-
-		owning_mm = get_task_mm(owning_process);
-		if (owning_mm == NULL)
-			/*
-			 * The process' mm is already dead, notifier were
-			 * removed already.
-			 */
-			goto out_put_task;
-		mmu_notifier_unregister(&context->mn, owning_mm);
-
-		mmput(owning_mm);
-
-out_put_task:
-		put_task_struct(owning_process);
-	}
-out:
-	up_read(&context->umem_rwsem);
-
-	vfree(umem->odp_data->dma_list);
-	vfree(umem->odp_data->page_list);
-	kfree(umem->odp_data);
-	kfree(umem);
+	remove_umem_from_per_mm(umem_odp);
+	put_per_mm(umem_odp);
+	vfree(umem_odp->dma_list);
+	vfree(umem_odp->page_list);
 }
 
 /*
@@ -544,7 +492,7 @@ out:
  * @access_mask: access permissions needed for this page.
  * @current_seq: sequence number for synchronization with invalidations.
  *               the sequence number is taken from
- *               umem->odp_data->notifiers_seq.
+ *               umem_odp->notifiers_seq.
  *
  * The function returns -EFAULT if the DMA mapping operation fails. It returns
  * -EAGAIN if a concurrent invalidation prevents us from updating the page.
@@ -554,12 +502,13 @@ out:
  * umem.
  */
 static int ib_umem_odp_map_dma_single_page(
-		struct ib_umem *umem,
+		struct ib_umem_odp *umem_odp,
 		int page_index,
 		struct page *page,
 		u64 access_mask,
 		unsigned long current_seq)
 {
+	struct ib_umem *umem = &umem_odp->umem;
 	struct ib_device *dev = umem->context->device;
 	dma_addr_t dma_addr;
 	int stored_page = 0;
@@ -571,11 +520,11 @@ static int ib_umem_odp_map_dma_single_page(
 	 * handle case of a racing notifier. This check also allows us to bail
 	 * early if we have a notifier running in parallel with us.
 	 */
-	if (ib_umem_mmu_notifier_retry(umem, current_seq)) {
+	if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) {
 		ret = -EAGAIN;
 		goto out;
 	}
-	if (!(umem->odp_data->dma_list[page_index])) {
+	if (!(umem_odp->dma_list[page_index])) {
 		dma_addr = ib_dma_map_page(dev,
 					   page,
 					   0, BIT(umem->page_shift),
@@ -584,15 +533,15 @@ static int ib_umem_odp_map_dma_single_page(
 			ret = -EFAULT;
 			goto out;
 		}
-		umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
-		umem->odp_data->page_list[page_index] = page;
+		umem_odp->dma_list[page_index] = dma_addr | access_mask;
+		umem_odp->page_list[page_index] = page;
 		umem->npages++;
 		stored_page = 1;
-	} else if (umem->odp_data->page_list[page_index] == page) {
-		umem->odp_data->dma_list[page_index] |= access_mask;
+	} else if (umem_odp->page_list[page_index] == page) {
+		umem_odp->dma_list[page_index] |= access_mask;
 	} else {
 		pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
-		       umem->odp_data->page_list[page_index], page);
+		       umem_odp->page_list[page_index], page);
 		/* Better remove the mapping now, to prevent any further
 		 * damage. */
 		remove_existing_mapping = 1;
@@ -605,7 +554,7 @@ out:
 
 	if (remove_existing_mapping && umem->context->invalidate_range) {
 		invalidate_page_trampoline(
-			umem,
+			umem_odp,
 			ib_umem_start(umem) + (page_index >> umem->page_shift),
 			ib_umem_start(umem) + ((page_index + 1) >>
 					       umem->page_shift),
@@ -621,7 +570,7 @@ out:
  *
  * Pins the range of pages passed in the argument, and maps them to
  * DMA addresses. The DMA addresses of the mapped pages is updated in
- * umem->odp_data->dma_list.
+ * umem_odp->dma_list.
  *
  * Returns the number of pages mapped in success, negative error code
  * for failure.
@@ -629,7 +578,7 @@ out:
  * the function from completing its task.
  * An -ENOENT error code indicates that userspace process is being terminated
  * and mm was already destroyed.
- * @umem: the umem to map and pin
+ * @umem_odp: the umem to map and pin
  * @user_virt: the address from which we need to map.
  * @bcnt: the minimal number of bytes to pin and map. The mapping might be
  *        bigger due to alignment, and may also be smaller in case of an error
@@ -639,13 +588,15 @@ out:
  *               range.
  * @current_seq: the MMU notifiers sequance value for synchronization with
  *               invalidations. the sequance number is read from
- *               umem->odp_data->notifiers_seq before calling this function
+ *               umem_odp->notifiers_seq before calling this function
  */
-int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
-			      u64 access_mask, unsigned long current_seq)
+int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
+			      u64 bcnt, u64 access_mask,
+			      unsigned long current_seq)
 {
+	struct ib_umem *umem = &umem_odp->umem;
 	struct task_struct *owning_process  = NULL;
-	struct mm_struct   *owning_mm       = NULL;
+	struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
 	struct page       **local_page_list = NULL;
 	u64 page_mask, off;
 	int j, k, ret = 0, start_idx, npages = 0, page_shift;
@@ -669,15 +620,14 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 	user_virt = user_virt & page_mask;
 	bcnt += off; /* Charge for the first page offset as well. */
 
-	owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
-	if (owning_process == NULL) {
+	/*
+	 * owning_process is allowed to be NULL, this means somehow the mm is
+	 * existing beyond the lifetime of the originating process.. Presumably
+	 * mmget_not_zero will fail in this case.
+	 */
+	owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID);
+	if (WARN_ON(!mmget_not_zero(umem_odp->umem.owning_mm))) {
 		ret = -EINVAL;
-		goto out_no_task;
-	}
-
-	owning_mm = get_task_mm(owning_process);
-	if (owning_mm == NULL) {
-		ret = -ENOENT;
 		goto out_put_task;
 	}
 
@@ -709,7 +659,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 			break;
 
 		bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
-		mutex_lock(&umem->odp_data->umem_mutex);
+		mutex_lock(&umem_odp->umem_mutex);
 		for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
 			if (user_virt & ~page_mask) {
 				p += PAGE_SIZE;
@@ -722,7 +672,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 			}
 
 			ret = ib_umem_odp_map_dma_single_page(
-					umem, k, local_page_list[j],
+					umem_odp, k, local_page_list[j],
 					access_mask, current_seq);
 			if (ret < 0)
 				break;
@@ -730,7 +680,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 			p = page_to_phys(local_page_list[j]);
 			k++;
 		}
-		mutex_unlock(&umem->odp_data->umem_mutex);
+		mutex_unlock(&umem_odp->umem_mutex);
 
 		if (ret < 0) {
 			/* Release left over pages when handling errors. */
@@ -749,16 +699,17 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 
 	mmput(owning_mm);
 out_put_task:
-	put_task_struct(owning_process);
-out_no_task:
+	if (owning_process)
+		put_task_struct(owning_process);
 	free_page((unsigned long)local_page_list);
 	return ret;
 }
 EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
 
-void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
+void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 				 u64 bound)
 {
+	struct ib_umem *umem = &umem_odp->umem;
 	int idx;
 	u64 addr;
 	struct ib_device *dev = umem->context->device;
@@ -770,12 +721,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
 	 * faults from completion. We might be racing with other
 	 * invalidations, so we must make sure we free each page only
 	 * once. */
-	mutex_lock(&umem->odp_data->umem_mutex);
+	mutex_lock(&umem_odp->umem_mutex);
 	for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
 		idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
-		if (umem->odp_data->page_list[idx]) {
-			struct page *page = umem->odp_data->page_list[idx];
-			dma_addr_t dma = umem->odp_data->dma_list[idx];
+		if (umem_odp->page_list[idx]) {
+			struct page *page = umem_odp->page_list[idx];
+			dma_addr_t dma = umem_odp->dma_list[idx];
 			dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
 
 			WARN_ON(!dma_addr);
@@ -798,12 +749,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
 			/* on demand pinning support */
 			if (!umem->context->invalidate_range)
 				put_page(page);
-			umem->odp_data->page_list[idx] = NULL;
-			umem->odp_data->dma_list[idx] = 0;
+			umem_odp->page_list[idx] = NULL;
+			umem_odp->dma_list[idx] = 0;
 			umem->npages--;
 		}
 	}
-	mutex_unlock(&umem->odp_data->umem_mutex);
+	mutex_unlock(&umem_odp->umem_mutex);
 }
 EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
 
@@ -830,7 +781,7 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
 			return -EAGAIN;
 		next = rbt_ib_umem_iter_next(node, start, last - 1);
 		umem = container_of(node, struct ib_umem_odp, interval_tree);
-		ret_val = cb(umem->umem, start, last, cookie) || ret_val;
+		ret_val = cb(umem, start, last, cookie) || ret_val;
 	}
 
 	return ret_val;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index c34a6852d691..f55f48f6b272 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -138,7 +138,7 @@ static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
 static dev_t dynamic_umad_dev;
 static dev_t dynamic_issm_dev;
 
-static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
+static DEFINE_IDA(umad_ida);
 
 static void ib_umad_add_one(struct ib_device *device);
 static void ib_umad_remove_one(struct ib_device *device, void *client_data);
@@ -1132,7 +1132,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
 	if (!port)
 		return -ENODEV;
 
-	return sprintf(buf, "%s\n", port->ib_dev->name);
+	return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev));
 }
 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
 
@@ -1159,11 +1159,10 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
 	dev_t base_umad;
 	dev_t base_issm;
 
-	devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
-	if (devnum >= IB_UMAD_MAX_PORTS)
+	devnum = ida_alloc_max(&umad_ida, IB_UMAD_MAX_PORTS - 1, GFP_KERNEL);
+	if (devnum < 0)
 		return -1;
 	port->dev_num = devnum;
-	set_bit(devnum, dev_map);
 	if (devnum >= IB_UMAD_NUM_FIXED_MINOR) {
 		base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
 		base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
@@ -1227,7 +1226,7 @@ err_dev:
 
 err_cdev:
 	cdev_del(&port->cdev);
-	clear_bit(devnum, dev_map);
+	ida_free(&umad_ida, devnum);
 
 	return -1;
 }
@@ -1261,7 +1260,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
 	}
 
 	mutex_unlock(&port->file_mutex);
-	clear_bit(port->dev_num, dev_map);
+	ida_free(&umad_ida, port->dev_num);
 }
 
 static void ib_umad_add_one(struct ib_device *device)
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 5df8e548cc14..c97935a0c7c6 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -100,13 +100,14 @@ struct ib_uverbs_device {
 	atomic_t				refcount;
 	int					num_comp_vectors;
 	struct completion			comp;
-	struct device			       *dev;
+	struct device				dev;
+	/* First group for device attributes, NULL terminated array */
+	const struct attribute_group		*groups[2];
 	struct ib_device	__rcu	       *ib_dev;
 	int					devnum;
 	struct cdev			        cdev;
 	struct rb_root				xrcd_tree;
 	struct mutex				xrcd_tree_mutex;
-	struct kobject				kobj;
 	struct srcu_struct			disassociate_srcu;
 	struct mutex				lists_mutex; /* protect lists */
 	struct list_head			uverbs_file_list;
@@ -146,7 +147,6 @@ struct ib_uverbs_file {
 	struct ib_event_handler			event_handler;
 	struct ib_uverbs_async_event_file       *async_file;
 	struct list_head			list;
-	int					is_closed;
 
 	/*
 	 * To access the uobjects list hw_destroy_rwsem must be held for write
@@ -158,6 +158,9 @@ struct ib_uverbs_file {
 	spinlock_t		uobjects_lock;
 	struct list_head	uobjects;
 
+	struct mutex umap_lock;
+	struct list_head umaps;
+
 	u64 uverbs_cmd_mask;
 	u64 uverbs_ex_cmd_mask;
 
@@ -218,12 +221,6 @@ struct ib_ucq_object {
 	u32			async_events_reported;
 };
 
-struct ib_uflow_resources;
-struct ib_uflow_object {
-	struct ib_uobject		uobject;
-	struct ib_uflow_resources	*resources;
-};
-
 extern const struct file_operations uverbs_event_fops;
 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
 struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index e012ca80f9d1..a93853770e3c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -117,18 +117,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	/* ufile is required when some objects are released */
 	ucontext->ufile = file;
 
-	rcu_read_lock();
-	ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
-	rcu_read_unlock();
-	ucontext->closing = 0;
+	ucontext->closing = false;
 	ucontext->cleanup_retryable = false;
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	ucontext->umem_tree = RB_ROOT_CACHED;
-	init_rwsem(&ucontext->umem_rwsem);
-	ucontext->odp_mrs_count = 0;
-	INIT_LIST_HEAD(&ucontext->no_private_counters);
-
+	mutex_init(&ucontext->per_mm_list_lock);
+	INIT_LIST_HEAD(&ucontext->per_mm_list);
 	if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
 		ucontext->invalidate_range = NULL;
 
@@ -172,7 +166,6 @@ err_fd:
 	put_unused_fd(resp.async_fd);
 
 err_free:
-	put_pid(ucontext->tgid);
 	ib_dev->dealloc_ucontext(ucontext);
 
 err_alloc:
@@ -2769,16 +2762,7 @@ out_put:
 	return ret ? ret : in_len;
 }
 
-struct ib_uflow_resources {
-	size_t			max;
-	size_t			num;
-	size_t			collection_num;
-	size_t			counters_num;
-	struct ib_counters	**counters;
-	struct ib_flow_action	**collection;
-};
-
-static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
+struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
 {
 	struct ib_uflow_resources *resources;
 
@@ -2808,6 +2792,7 @@ err:
 
 	return NULL;
 }
+EXPORT_SYMBOL(flow_resources_alloc);
 
 void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
 {
@@ -2826,10 +2811,11 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
 	kfree(uflow_res->counters);
 	kfree(uflow_res);
 }
+EXPORT_SYMBOL(ib_uverbs_flow_resources_free);
 
-static void flow_resources_add(struct ib_uflow_resources *uflow_res,
-			       enum ib_flow_spec_type type,
-			       void *ibobj)
+void flow_resources_add(struct ib_uflow_resources *uflow_res,
+			enum ib_flow_spec_type type,
+			void *ibobj)
 {
 	WARN_ON(uflow_res->num >= uflow_res->max);
 
@@ -2850,6 +2836,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res,
 
 	uflow_res->num++;
 }
+EXPORT_SYMBOL(flow_resources_add);
 
 static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
 				       struct ib_uverbs_flow_spec *kern_spec,
@@ -3484,7 +3471,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	struct ib_uverbs_create_flow	  cmd;
 	struct ib_uverbs_create_flow_resp resp;
 	struct ib_uobject		  *uobj;
-	struct ib_uflow_object		  *uflow;
 	struct ib_flow			  *flow_id;
 	struct ib_uverbs_flow_attr	  *kern_flow_attr;
 	struct ib_flow_attr		  *flow_attr;
@@ -3623,13 +3609,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		err = PTR_ERR(flow_id);
 		goto err_free;
 	}
-	atomic_inc(&qp->usecnt);
-	flow_id->qp = qp;
-	flow_id->device = qp->device;
-	flow_id->uobject = uobj;
-	uobj->object = flow_id;
-	uflow = container_of(uobj, typeof(*uflow), uobject);
-	uflow->resources = uflow_res;
+
+	ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res);
 
 	memset(&resp, 0, sizeof(resp));
 	resp.flow_handle = uobj->id;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 1a6b229e3db3..b0e493e8d860 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -57,6 +57,7 @@ struct bundle_priv {
 	struct ib_uverbs_attr *uattrs;
 
 	DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
+	DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN);
 
 	/*
 	 * Must be last. bundle ends in a flex array which overlaps
@@ -143,6 +144,86 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
 			   0, uattr->len - len);
 }
 
+static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
+				     const struct uverbs_api_attr *attr_uapi,
+				     struct uverbs_objs_arr_attr *attr,
+				     struct ib_uverbs_attr *uattr,
+				     u32 attr_bkey)
+{
+	const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+	size_t array_len;
+	u32 *idr_vals;
+	int ret = 0;
+	size_t i;
+
+	if (uattr->attr_data.reserved)
+		return -EINVAL;
+
+	if (uattr->len % sizeof(u32))
+		return -EINVAL;
+
+	array_len = uattr->len / sizeof(u32);
+	if (array_len < spec->u2.objs_arr.min_len ||
+	    array_len > spec->u2.objs_arr.max_len)
+		return -EINVAL;
+
+	attr->uobjects =
+		uverbs_alloc(&pbundle->bundle,
+			     array_size(array_len, sizeof(*attr->uobjects)));
+	if (IS_ERR(attr->uobjects))
+		return PTR_ERR(attr->uobjects);
+
+	/*
+	 * Since idr is 4B and *uobjects is >= 4B, we can use attr->uobjects
+	 * to store idrs array and avoid additional memory allocation. The
+	 * idrs array is offset to the end of the uobjects array so we will be
+	 * able to read idr and replace with a pointer.
+	 */
+	idr_vals = (u32 *)(attr->uobjects + array_len) - array_len;
+
+	if (uattr->len > sizeof(uattr->data)) {
+		ret = copy_from_user(idr_vals, u64_to_user_ptr(uattr->data),
+				     uattr->len);
+		if (ret)
+			return -EFAULT;
+	} else {
+		memcpy(idr_vals, &uattr->data, uattr->len);
+	}
+
+	for (i = 0; i != array_len; i++) {
+		attr->uobjects[i] = uverbs_get_uobject_from_file(
+			spec->u2.objs_arr.obj_type, pbundle->bundle.ufile,
+			spec->u2.objs_arr.access, idr_vals[i]);
+		if (IS_ERR(attr->uobjects[i])) {
+			ret = PTR_ERR(attr->uobjects[i]);
+			break;
+		}
+	}
+
+	attr->len = i;
+	__set_bit(attr_bkey, pbundle->spec_finalize);
+	return ret;
+}
+
+static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
+				  struct uverbs_objs_arr_attr *attr,
+				  bool commit)
+{
+	const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+	int current_ret;
+	int ret = 0;
+	size_t i;
+
+	for (i = 0; i != attr->len; i++) {
+		current_ret = uverbs_finalize_object(
+			attr->uobjects[i], spec->u2.objs_arr.access, commit);
+		if (!ret)
+			ret = current_ret;
+	}
+
+	return ret;
+}
+
 static int uverbs_process_attr(struct bundle_priv *pbundle,
 			       const struct uverbs_api_attr *attr_uapi,
 			       struct ib_uverbs_attr *uattr, u32 attr_bkey)
@@ -246,6 +327,11 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
 		}
 
 		break;
+
+	case UVERBS_ATTR_TYPE_IDRS_ARRAY:
+		return uverbs_process_idrs_array(pbundle, attr_uapi,
+						 &e->objs_arr_attr, uattr,
+						 attr_bkey);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -300,8 +386,7 @@ static int uverbs_set_attr(struct bundle_priv *pbundle,
 			return -EPROTONOSUPPORT;
 		return 0;
 	}
-	attr = srcu_dereference(
-		*slot, &pbundle->bundle.ufile->device->disassociate_srcu);
+	attr = rcu_dereference_protected(*slot, true);
 
 	/* Reject duplicate attributes from user-space */
 	if (test_bit(attr_bkey, pbundle->bundle.attr_present))
@@ -384,6 +469,7 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
 	unsigned int i;
 	int ret = 0;
 
+	/* fast path for simple uobjects */
 	i = -1;
 	while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
 				  i + 1)) < key_bitmap_len) {
@@ -397,6 +483,30 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
 			ret = current_ret;
 	}
 
+	i = -1;
+	while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len,
+				  i + 1)) < key_bitmap_len) {
+		struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
+		const struct uverbs_api_attr *attr_uapi;
+		void __rcu **slot;
+		int current_ret;
+
+		slot = uapi_get_attr_for_method(
+			pbundle,
+			pbundle->method_key | uapi_bkey_to_key_attr(i));
+		if (WARN_ON(!slot))
+			continue;
+
+		attr_uapi = rcu_dereference_protected(*slot, true);
+
+		if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
+			current_ret = uverbs_free_idrs_array(
+				attr_uapi, &attr->objs_arr_attr, commit);
+			if (!ret)
+				ret = current_ret;
+		}
+	}
+
 	for (memblock = pbundle->allocated_mem; memblock;) {
 		struct bundle_alloc_head *tmp = memblock;
 
@@ -429,7 +539,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
 			uapi_key_ioctl_method(hdr->method_id));
 	if (unlikely(!slot))
 		return -EPROTONOSUPPORT;
-	method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu);
+	method_elm = rcu_dereference_protected(*slot, true);
 
 	if (!method_elm->use_stack) {
 		pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
@@ -461,6 +571,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
 	memset(pbundle->bundle.attr_present, 0,
 	       sizeof(pbundle->bundle.attr_present));
 	memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
+	memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
 
 	ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
 	destroy_ret = bundle_destroy(pbundle, ret == 0);
@@ -611,3 +722,26 @@ int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
 	return 0;
 }
 EXPORT_SYMBOL(uverbs_copy_to);
+
+int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		      size_t idx, s64 lower_bound, u64 upper_bound,
+		      s64  *def_val)
+{
+	const struct uverbs_attr *attr;
+
+	attr = uverbs_attr_get(attrs_bundle, idx);
+	if (IS_ERR(attr)) {
+		if ((PTR_ERR(attr) != -ENOENT) || !def_val)
+			return PTR_ERR(attr);
+
+		*to = *def_val;
+	} else {
+		*to = attr->ptr_attr.data;
+	}
+
+	if (*to < lower_bound || (*to > 0 && (u64)*to > upper_bound))
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(_uverbs_get_const);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 50152c1b1004..6d373f5515b7 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -45,6 +45,7 @@
 #include <linux/cdev.h>
 #include <linux/anon_inodes.h>
 #include <linux/slab.h>
+#include <linux/sched/mm.h>
 
 #include <linux/uaccess.h>
 
@@ -72,7 +73,7 @@ enum {
 static dev_t dynamic_uverbs_dev;
 static struct class *uverbs_class;
 
-static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
+static DEFINE_IDA(uverbs_ida);
 
 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
 				     const char __user *buf, int in_len,
@@ -169,20 +170,16 @@ int uverbs_dealloc_mw(struct ib_mw *mw)
 	return ret;
 }
 
-static void ib_uverbs_release_dev(struct kobject *kobj)
+static void ib_uverbs_release_dev(struct device *device)
 {
 	struct ib_uverbs_device *dev =
-		container_of(kobj, struct ib_uverbs_device, kobj);
+			container_of(device, struct ib_uverbs_device, dev);
 
 	uverbs_destroy_api(dev->uapi);
 	cleanup_srcu_struct(&dev->disassociate_srcu);
 	kfree(dev);
 }
 
-static struct kobj_type ib_uverbs_dev_ktype = {
-	.release = ib_uverbs_release_dev,
-};
-
 static void ib_uverbs_release_async_event_file(struct kref *ref)
 {
 	struct ib_uverbs_async_event_file *file =
@@ -265,7 +262,7 @@ void ib_uverbs_release_file(struct kref *ref)
 	if (atomic_dec_and_test(&file->device->refcount))
 		ib_uverbs_comp_dev(file->device);
 
-	kobject_put(&file->device->kobj);
+	put_device(&file->device->dev);
 	kfree(file);
 }
 
@@ -817,6 +814,226 @@ out:
 }
 
 /*
+ * Each time we map IO memory into user space this keeps track of the mapping.
+ * When the device is hot-unplugged we 'zap' the mmaps in user space to point
+ * to the zero page and allow the hot unplug to proceed.
+ *
+ * This is necessary for cases like PCI physical hot unplug as the actual BAR
+ * memory may vanish after this and access to it from userspace could MCE.
+ *
+ * RDMA drivers supporting disassociation must have their user space designed
+ * to cope in some way with their IO pages going to the zero page.
+ */
+struct rdma_umap_priv {
+	struct vm_area_struct *vma;
+	struct list_head list;
+};
+
+static const struct vm_operations_struct rdma_umap_ops;
+
+static void rdma_umap_priv_init(struct rdma_umap_priv *priv,
+				struct vm_area_struct *vma)
+{
+	struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+
+	priv->vma = vma;
+	vma->vm_private_data = priv;
+	vma->vm_ops = &rdma_umap_ops;
+
+	mutex_lock(&ufile->umap_lock);
+	list_add(&priv->list, &ufile->umaps);
+	mutex_unlock(&ufile->umap_lock);
+}
+
+/*
+ * The VMA has been dup'd, initialize the vm_private_data with a new tracking
+ * struct
+ */
+static void rdma_umap_open(struct vm_area_struct *vma)
+{
+	struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+	struct rdma_umap_priv *opriv = vma->vm_private_data;
+	struct rdma_umap_priv *priv;
+
+	if (!opriv)
+		return;
+
+	/* We are racing with disassociation */
+	if (!down_read_trylock(&ufile->hw_destroy_rwsem))
+		goto out_zap;
+	/*
+	 * Disassociation already completed, the VMA should already be zapped.
+	 */
+	if (!ufile->ucontext)
+		goto out_unlock;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		goto out_unlock;
+	rdma_umap_priv_init(priv, vma);
+
+	up_read(&ufile->hw_destroy_rwsem);
+	return;
+
+out_unlock:
+	up_read(&ufile->hw_destroy_rwsem);
+out_zap:
+	/*
+	 * We can't allow the VMA to be created with the actual IO pages, that
+	 * would break our API contract, and it can't be stopped at this
+	 * point, so zap it.
+	 */
+	vma->vm_private_data = NULL;
+	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+}
+
+static void rdma_umap_close(struct vm_area_struct *vma)
+{
+	struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+	struct rdma_umap_priv *priv = vma->vm_private_data;
+
+	if (!priv)
+		return;
+
+	/*
+	 * The vma holds a reference on the struct file that created it, which
+	 * in turn means that the ib_uverbs_file is guaranteed to exist at
+	 * this point.
+	 */
+	mutex_lock(&ufile->umap_lock);
+	list_del(&priv->list);
+	mutex_unlock(&ufile->umap_lock);
+	kfree(priv);
+}
+
+static const struct vm_operations_struct rdma_umap_ops = {
+	.open = rdma_umap_open,
+	.close = rdma_umap_close,
+};
+
+static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
+						 struct vm_area_struct *vma,
+						 unsigned long size)
+{
+	struct ib_uverbs_file *ufile = ucontext->ufile;
+	struct rdma_umap_priv *priv;
+
+	if (vma->vm_end - vma->vm_start != size)
+		return ERR_PTR(-EINVAL);
+
+	/* Driver is using this wrong, must be called by ib_uverbs_mmap */
+	if (WARN_ON(!vma->vm_file ||
+		    vma->vm_file->private_data != ufile))
+		return ERR_PTR(-EINVAL);
+	lockdep_assert_held(&ufile->device->disassociate_srcu);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+	return priv;
+}
+
+/*
+ * Map IO memory into a process. This is to be called by drivers as part of
+ * their mmap() functions if they wish to send something like PCI-E BAR memory
+ * to userspace.
+ */
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+		      unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+	struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
+
+	if (IS_ERR(priv))
+		return PTR_ERR(priv);
+
+	vma->vm_page_prot = prot;
+	if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
+		kfree(priv);
+		return -EAGAIN;
+	}
+
+	rdma_umap_priv_init(priv, vma);
+	return 0;
+}
+EXPORT_SYMBOL(rdma_user_mmap_io);
+
+/*
+ * The page case is here for a slightly different reason, the driver expects
+ * to be able to free the page it is sharing to user space when it destroys
+ * its ucontext, which means we need to zap the user space references.
+ *
+ * We could handle this differently by providing an API to allocate a shared
+ * page and then only freeing the shared page when the last ufile is
+ * destroyed.
+ */
+int rdma_user_mmap_page(struct ib_ucontext *ucontext,
+			struct vm_area_struct *vma, struct page *page,
+			unsigned long size)
+{
+	struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
+
+	if (IS_ERR(priv))
+		return PTR_ERR(priv);
+
+	if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size,
+			    vma->vm_page_prot)) {
+		kfree(priv);
+		return -EAGAIN;
+	}
+
+	rdma_umap_priv_init(priv, vma);
+	return 0;
+}
+EXPORT_SYMBOL(rdma_user_mmap_page);
+
+void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
+{
+	struct rdma_umap_priv *priv, *next_priv;
+
+	lockdep_assert_held(&ufile->hw_destroy_rwsem);
+
+	while (1) {
+		struct mm_struct *mm = NULL;
+
+		/* Get an arbitrary mm pointer that hasn't been cleaned yet */
+		mutex_lock(&ufile->umap_lock);
+		if (!list_empty(&ufile->umaps)) {
+			mm = list_first_entry(&ufile->umaps,
+					      struct rdma_umap_priv, list)
+				     ->vma->vm_mm;
+			mmget(mm);
+		}
+		mutex_unlock(&ufile->umap_lock);
+		if (!mm)
+			return;
+
+		/*
+		 * The umap_lock is nested under mmap_sem since it used within
+		 * the vma_ops callbacks, so we have to clean the list one mm
+		 * at a time to get the lock ordering right. Typically there
+		 * will only be one mm, so no big deal.
+		 */
+		down_write(&mm->mmap_sem);
+		mutex_lock(&ufile->umap_lock);
+		list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
+					  list) {
+			struct vm_area_struct *vma = priv->vma;
+
+			if (vma->vm_mm != mm)
+				continue;
+			list_del_init(&priv->list);
+
+			zap_vma_ptes(vma, vma->vm_start,
+				     vma->vm_end - vma->vm_start);
+			vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
+		}
+		mutex_unlock(&ufile->umap_lock);
+		up_write(&mm->mmap_sem);
+		mmput(mm);
+	}
+}
+
+/*
  * ib_uverbs_open() does not need the BKL:
  *
  *  - the ib_uverbs_device structures are properly reference counted and
@@ -839,6 +1056,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 	if (!atomic_inc_not_zero(&dev->refcount))
 		return -ENXIO;
 
+	get_device(&dev->dev);
 	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
 	mutex_lock(&dev->lists_mutex);
 	ib_dev = srcu_dereference(dev->ib_dev,
@@ -876,9 +1094,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 	spin_lock_init(&file->uobjects_lock);
 	INIT_LIST_HEAD(&file->uobjects);
 	init_rwsem(&file->hw_destroy_rwsem);
+	mutex_init(&file->umap_lock);
+	INIT_LIST_HEAD(&file->umaps);
 
 	filp->private_data = file;
-	kobject_get(&dev->kobj);
 	list_add_tail(&file->list, &dev->uverbs_file_list);
 	mutex_unlock(&dev->lists_mutex);
 	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
@@ -899,6 +1118,7 @@ err:
 	if (atomic_dec_and_test(&dev->refcount))
 		ib_uverbs_comp_dev(dev);
 
+	put_device(&dev->dev);
 	return ret;
 }
 
@@ -909,10 +1129,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
 	uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
 
 	mutex_lock(&file->device->lists_mutex);
-	if (!file->is_closed) {
-		list_del(&file->list);
-		file->is_closed = 1;
-	}
+	list_del_init(&file->list);
 	mutex_unlock(&file->device->lists_mutex);
 
 	if (file->async_file)
@@ -951,37 +1168,34 @@ static struct ib_client uverbs_client = {
 	.remove = ib_uverbs_remove_one
 };
 
-static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
+static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
 			  char *buf)
 {
+	struct ib_uverbs_device *dev =
+			container_of(device, struct ib_uverbs_device, dev);
 	int ret = -ENODEV;
 	int srcu_key;
-	struct ib_uverbs_device *dev = dev_get_drvdata(device);
 	struct ib_device *ib_dev;
 
-	if (!dev)
-		return -ENODEV;
-
 	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
 	ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
 	if (ib_dev)
-		ret = sprintf(buf, "%s\n", ib_dev->name);
+		ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev));
 	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
 
 	return ret;
 }
-static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+static DEVICE_ATTR_RO(ibdev);
 
-static ssize_t show_dev_abi_version(struct device *device,
-				    struct device_attribute *attr, char *buf)
+static ssize_t abi_version_show(struct device *device,
+				struct device_attribute *attr, char *buf)
 {
-	struct ib_uverbs_device *dev = dev_get_drvdata(device);
+	struct ib_uverbs_device *dev =
+			container_of(device, struct ib_uverbs_device, dev);
 	int ret = -ENODEV;
 	int srcu_key;
 	struct ib_device *ib_dev;
 
-	if (!dev)
-		return -ENODEV;
 	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
 	ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
 	if (ib_dev)
@@ -990,7 +1204,17 @@ static ssize_t show_dev_abi_version(struct device *device,
 
 	return ret;
 }
-static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
+static DEVICE_ATTR_RO(abi_version);
+
+static struct attribute *ib_dev_attrs[] = {
+	&dev_attr_abi_version.attr,
+	&dev_attr_ibdev.attr,
+	NULL,
+};
+
+static const struct attribute_group dev_attr_group = {
+	.attrs = ib_dev_attrs,
+};
 
 static CLASS_ATTR_STRING(abi_version, S_IRUGO,
 			 __stringify(IB_USER_VERBS_ABI_VERSION));
@@ -1028,65 +1252,56 @@ static void ib_uverbs_add_one(struct ib_device *device)
 		return;
 	}
 
+	device_initialize(&uverbs_dev->dev);
+	uverbs_dev->dev.class = uverbs_class;
+	uverbs_dev->dev.parent = device->dev.parent;
+	uverbs_dev->dev.release = ib_uverbs_release_dev;
+	uverbs_dev->groups[0] = &dev_attr_group;
+	uverbs_dev->dev.groups = uverbs_dev->groups;
 	atomic_set(&uverbs_dev->refcount, 1);
 	init_completion(&uverbs_dev->comp);
 	uverbs_dev->xrcd_tree = RB_ROOT;
 	mutex_init(&uverbs_dev->xrcd_tree_mutex);
-	kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
 	mutex_init(&uverbs_dev->lists_mutex);
 	INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
 	INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
+	rcu_assign_pointer(uverbs_dev->ib_dev, device);
+	uverbs_dev->num_comp_vectors = device->num_comp_vectors;
 
-	devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
-	if (devnum >= IB_UVERBS_MAX_DEVICES)
+	devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
+			       GFP_KERNEL);
+	if (devnum < 0)
 		goto err;
 	uverbs_dev->devnum = devnum;
-	set_bit(devnum, dev_map);
 	if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
 		base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
 	else
 		base = IB_UVERBS_BASE_DEV + devnum;
 
-	rcu_assign_pointer(uverbs_dev->ib_dev, device);
-	uverbs_dev->num_comp_vectors = device->num_comp_vectors;
-
 	if (ib_uverbs_create_uapi(device, uverbs_dev))
 		goto err_uapi;
 
-	cdev_init(&uverbs_dev->cdev, NULL);
+	uverbs_dev->dev.devt = base;
+	dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
+
+	cdev_init(&uverbs_dev->cdev,
+		  device->mmap ? &uverbs_mmap_fops : &uverbs_fops);
 	uverbs_dev->cdev.owner = THIS_MODULE;
-	uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
-	cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj);
-	kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
-	if (cdev_add(&uverbs_dev->cdev, base, 1))
-		goto err_cdev;
-
-	uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
-					uverbs_dev->cdev.dev, uverbs_dev,
-					"uverbs%d", uverbs_dev->devnum);
-	if (IS_ERR(uverbs_dev->dev))
-		goto err_cdev;
-
-	if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
-		goto err_class;
-	if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
-		goto err_class;
 
-	ib_set_client_data(device, &uverbs_client, uverbs_dev);
+	ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
+	if (ret)
+		goto err_uapi;
 
+	ib_set_client_data(device, &uverbs_client, uverbs_dev);
 	return;
 
-err_class:
-	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
-err_cdev:
-	cdev_del(&uverbs_dev->cdev);
 err_uapi:
-	clear_bit(devnum, dev_map);
+	ida_free(&uverbs_ida, devnum);
 err:
 	if (atomic_dec_and_test(&uverbs_dev->refcount))
 		ib_uverbs_comp_dev(uverbs_dev);
 	wait_for_completion(&uverbs_dev->comp);
-	kobject_put(&uverbs_dev->kobj);
+	put_device(&uverbs_dev->dev);
 	return;
 }
 
@@ -1107,8 +1322,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 	while (!list_empty(&uverbs_dev->uverbs_file_list)) {
 		file = list_first_entry(&uverbs_dev->uverbs_file_list,
 					struct ib_uverbs_file, list);
-		file->is_closed = 1;
-		list_del(&file->list);
+		list_del_init(&file->list);
 		kref_get(&file->ref);
 
 		/* We must release the mutex before going ahead and calling
@@ -1156,10 +1370,8 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
 	if (!uverbs_dev)
 		return;
 
-	dev_set_drvdata(uverbs_dev->dev, NULL);
-	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
-	cdev_del(&uverbs_dev->cdev);
-	clear_bit(uverbs_dev->devnum, dev_map);
+	cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
+	ida_free(&uverbs_ida, uverbs_dev->devnum);
 
 	if (device->disassociate_ucontext) {
 		/* We disassociate HW resources and immediately return.
@@ -1182,7 +1394,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
 	if (wait_clients)
 		wait_for_completion(&uverbs_dev->comp);
 
-	kobject_put(&uverbs_dev->kobj);
+	put_device(&uverbs_dev->dev);
 }
 
 static char *uverbs_devnode(struct device *dev, umode_t *mode)
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index d8cfafe23bd9..cb9486ad5c67 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -326,11 +326,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
 	if (IS_ERR(action))
 		return PTR_ERR(action);
 
-	atomic_set(&action->usecnt, 0);
-	action->device = ib_dev;
-	action->type = IB_FLOW_ACTION_ESP;
-	action->uobject = uobj;
-	uobj->object = action;
+	uverbs_flow_action_fill_action(action, uobj, ib_dev,
+				       IB_FLOW_ACTION_ESP);
 
 	return 0;
 }
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index be854628a7c6..86f3fc5e04b4 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -73,6 +73,18 @@ static int uapi_merge_method(struct uverbs_api *uapi,
 		if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN)
 			method_elm->driver_method |= is_driver;
 
+		/*
+		 * Like other uobject based things we only support a single
+		 * uobject being NEW'd or DESTROY'd
+		 */
+		if (attr->attr.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
+			u8 access = attr->attr.u2.objs_arr.access;
+
+			if (WARN_ON(access == UVERBS_ACCESS_NEW ||
+				    access == UVERBS_ACCESS_DESTROY))
+				return -EINVAL;
+		}
+
 		attr_slot =
 			uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id),
 				     sizeof(*attr_slot));
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 8ec7418e99f0..178899e3ce73 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -264,7 +264,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
 	}
 
 	pd->res.type = RDMA_RESTRACK_PD;
-	pd->res.kern_name = caller;
+	rdma_restrack_set_task(&pd->res, caller);
 	rdma_restrack_add(&pd->res);
 
 	if (mr_access_flags) {
@@ -710,7 +710,7 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
 
 	ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
 					   ah_attr->roce.dmac,
-					   sgid_attr->ndev, &hop_limit);
+					   sgid_attr, &hop_limit);
 
 	grh->hop_limit = hop_limit;
 	return ret;
@@ -1509,8 +1509,7 @@ static const struct {
 };
 
 bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
-			enum ib_qp_type type, enum ib_qp_attr_mask mask,
-			enum rdma_link_layer ll)
+			enum ib_qp_type type, enum ib_qp_attr_mask mask)
 {
 	enum ib_qp_attr_mask req_param, opt_param;
 
@@ -1629,14 +1628,16 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
 
 	if (rdma_ib_or_roce(qp->device, port)) {
 		if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
-			pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
-				__func__, qp->device->name);
+			dev_warn(&qp->device->dev,
+				 "%s rq_psn overflow, masking to 24 bits\n",
+				 __func__);
 			attr->rq_psn &= 0xffffff;
 		}
 
 		if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
-			pr_warn("%s: %s sq_psn overflow, masking to 24 bits\n",
-				__func__, qp->device->name);
+			dev_warn(&qp->device->dev,
+				 " %s sq_psn overflow, masking to 24 bits\n",
+				 __func__);
 			attr->sq_psn &= 0xffffff;
 		}
 	}
@@ -1888,7 +1889,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
 		cq->cq_context    = cq_context;
 		atomic_set(&cq->usecnt, 0);
 		cq->res.type = RDMA_RESTRACK_CQ;
-		cq->res.kern_name = caller;
+		rdma_restrack_set_task(&cq->res, caller);
 		rdma_restrack_add(&cq->res);
 	}
 
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 96f76896488d..31baa8939a4f 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -40,7 +40,6 @@
 #ifndef __BNXT_RE_H__
 #define __BNXT_RE_H__
 #define ROCE_DRV_MODULE_NAME		"bnxt_re"
-#define ROCE_DRV_MODULE_VERSION		"1.0.0"
 
 #define BNXT_RE_DESC	"Broadcom NetXtreme-C/E RoCE Driver"
 #define BNXT_RE_PAGE_SHIFT_4K		(12)
@@ -120,6 +119,8 @@ struct bnxt_re_dev {
 #define BNXT_RE_FLAG_HAVE_L2_REF		3
 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN		4
 #define BNXT_RE_FLAG_QOS_WORK_REG		5
+#define BNXT_RE_FLAG_RESOURCES_ALLOCATED	7
+#define BNXT_RE_FLAG_RESOURCES_INITIALIZED	8
 #define BNXT_RE_FLAG_ISSUE_ROCE_STATS          29
 	struct net_device		*netdev;
 	unsigned int			version, major, minor;
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 77416bc61e6e..604b71875f5f 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -68,6 +68,8 @@ static const char * const bnxt_re_stat_name[] = {
 	[BNXT_RE_TX_PKTS]		=  "tx_pkts",
 	[BNXT_RE_TX_BYTES]		=  "tx_bytes",
 	[BNXT_RE_RECOVERABLE_ERRORS]	=  "recoverable_errors",
+	[BNXT_RE_RX_DROPS]		=  "rx_roce_drops",
+	[BNXT_RE_RX_DISCARDS]		=  "rx_roce_discards",
 	[BNXT_RE_TO_RETRANSMITS]        = "to_retransmits",
 	[BNXT_RE_SEQ_ERR_NAKS_RCVD]     = "seq_err_naks_rcvd",
 	[BNXT_RE_MAX_RETRY_EXCEEDED]    = "max_retry_exceeded",
@@ -106,7 +108,8 @@ static const char * const bnxt_re_stat_name[] = {
 	[BNXT_RE_RES_CQ_LOAD_ERR]       = "res_cq_load_err",
 	[BNXT_RE_RES_SRQ_LOAD_ERR]      = "res_srq_load_err",
 	[BNXT_RE_RES_TX_PCI_ERR]        = "res_tx_pci_err",
-	[BNXT_RE_RES_RX_PCI_ERR]        = "res_rx_pci_err"
+	[BNXT_RE_RES_RX_PCI_ERR]        = "res_rx_pci_err",
+	[BNXT_RE_OUT_OF_SEQ_ERR]        = "oos_drop_count"
 };
 
 int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
@@ -128,6 +131,10 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
 	if (bnxt_re_stats) {
 		stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
 			le64_to_cpu(bnxt_re_stats->tx_bcast_pkts);
+		stats->value[BNXT_RE_RX_DROPS] =
+			le64_to_cpu(bnxt_re_stats->rx_drop_pkts);
+		stats->value[BNXT_RE_RX_DISCARDS] =
+			le64_to_cpu(bnxt_re_stats->rx_discard_pkts);
 		stats->value[BNXT_RE_RX_PKTS] =
 			le64_to_cpu(bnxt_re_stats->rx_ucast_pkts);
 		stats->value[BNXT_RE_RX_BYTES] =
@@ -220,6 +227,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
 				rdev->stats.res_tx_pci_err;
 		stats->value[BNXT_RE_RES_RX_PCI_ERR]    =
 				rdev->stats.res_rx_pci_err;
+		stats->value[BNXT_RE_OUT_OF_SEQ_ERR]    =
+				rdev->stats.res_oos_drop_count;
 	}
 
 	return ARRAY_SIZE(bnxt_re_stat_name);
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h
index a01a922717d5..76399f477e5c 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.h
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -51,6 +51,8 @@ enum bnxt_re_hw_stats {
 	BNXT_RE_TX_PKTS,
 	BNXT_RE_TX_BYTES,
 	BNXT_RE_RECOVERABLE_ERRORS,
+	BNXT_RE_RX_DROPS,
+	BNXT_RE_RX_DISCARDS,
 	BNXT_RE_TO_RETRANSMITS,
 	BNXT_RE_SEQ_ERR_NAKS_RCVD,
 	BNXT_RE_MAX_RETRY_EXCEEDED,
@@ -90,6 +92,7 @@ enum bnxt_re_hw_stats {
 	BNXT_RE_RES_SRQ_LOAD_ERR,
 	BNXT_RE_RES_TX_PCI_ERR,
 	BNXT_RE_RES_RX_PCI_ERR,
+	BNXT_RE_OUT_OF_SEQ_ERR,
 	BNXT_RE_NUM_COUNTERS
 };
 
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index bc2b9e038439..54fdd4cf5288 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1598,8 +1598,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 		curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
 		new_qp_state = qp_attr->qp_state;
 		if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
-					ib_qp->qp_type, qp_attr_mask,
-					IB_LINK_LAYER_ETHERNET)) {
+					ib_qp->qp_type, qp_attr_mask)) {
 			dev_err(rdev_to_dev(rdev),
 				"Invalid attribute mask: %#x specified ",
 				qp_attr_mask);
@@ -2664,6 +2663,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
 	nq->budget++;
 
 	atomic_inc(&rdev->cq_count);
+	spin_lock_init(&cq->cq_lock);
 
 	if (context) {
 		struct bnxt_re_cq_resp resp;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 85cd1a3593d6..cf2282654210 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -67,7 +67,7 @@
 #include "hw_counters.h"
 
 static char version[] =
-		BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
+		BNXT_RE_DESC "\n";
 
 MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
 MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
@@ -535,6 +535,34 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
 	return en_dev;
 }
 
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+			   char *buf)
+{
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
+{
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *bnxt_re_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	NULL
+};
+
+static const struct attribute_group bnxt_re_dev_attr_group = {
+	.attrs = bnxt_re_attributes,
+};
+
 static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
 {
 	ib_unregister_device(&rdev->ibdev);
@@ -547,7 +575,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 	/* ib device init */
 	ibdev->owner = THIS_MODULE;
 	ibdev->node_type = RDMA_NODE_IB_CA;
-	strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
 	strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
 		strlen(BNXT_RE_DESC) + 5);
 	ibdev->phys_port_cnt = 1;
@@ -639,34 +666,11 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 	ibdev->get_hw_stats             = bnxt_re_ib_get_hw_stats;
 	ibdev->alloc_hw_stats           = bnxt_re_ib_alloc_hw_stats;
 
+	rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
 	ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
-	return ib_register_device(ibdev, NULL);
-}
-
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-			char *buf)
-{
-	struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
-
-	return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
-}
-
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
-			char *buf)
-{
-	struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
+	return ib_register_device(ibdev, "bnxt_re%d", NULL);
 }
 
-static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
-static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
-
-static struct device_attribute *bnxt_re_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type
-};
-
 static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
 {
 	dev_put(rdev->netdev);
@@ -864,10 +868,8 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
 {
 	int i;
 
-	if (rdev->nq[0].hwq.max_elements) {
-		for (i = 1; i < rdev->num_msix; i++)
-			bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
-	}
+	for (i = 1; i < rdev->num_msix; i++)
+		bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
 
 	if (rdev->qplib_res.rcfw)
 		bnxt_qplib_cleanup_res(&rdev->qplib_res);
@@ -876,6 +878,7 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
 static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
 {
 	int rc = 0, i;
+	int num_vec_enabled = 0;
 
 	bnxt_qplib_init_res(&rdev->qplib_res);
 
@@ -891,9 +894,13 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
 				"Failed to enable NQ with rc = 0x%x", rc);
 			goto fail;
 		}
+		num_vec_enabled++;
 	}
 	return 0;
 fail:
+	for (i = num_vec_enabled; i >= 0; i--)
+		bnxt_qplib_disable_nq(&rdev->nq[i]);
+
 	return rc;
 }
 
@@ -925,6 +932,7 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
 static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
 {
 	int rc = 0, i;
+	int num_vec_created = 0;
 
 	/* Configure and allocate resources for qplib */
 	rdev->qplib_res.rcfw = &rdev->rcfw;
@@ -951,7 +959,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
 		if (rc) {
 			dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x",
 				i, rc);
-			goto dealloc_dpi;
+			goto free_nq;
 		}
 		rc = bnxt_re_net_ring_alloc
 			(rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr,
@@ -964,14 +972,17 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
 			dev_err(rdev_to_dev(rdev),
 				"Failed to allocate NQ fw id with rc = 0x%x",
 				rc);
+			bnxt_qplib_free_nq(&rdev->nq[i]);
 			goto free_nq;
 		}
+		num_vec_created++;
 	}
 	return 0;
 free_nq:
-	for (i = 0; i < rdev->num_msix - 1; i++)
+	for (i = num_vec_created; i >= 0; i--) {
+		bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
 		bnxt_qplib_free_nq(&rdev->nq[i]);
-dealloc_dpi:
+	}
 	bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
 			       &rdev->qplib_res.dpi_tbl,
 			       &rdev->dpi_privileged);
@@ -989,12 +1000,17 @@ static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
 	struct ib_event ib_event;
 
 	ib_event.device = ibdev;
-	if (qp)
+	if (qp) {
 		ib_event.element.qp = qp;
-	else
+		ib_event.event = event;
+		if (qp->event_handler)
+			qp->event_handler(&ib_event, qp->qp_context);
+
+	} else {
 		ib_event.element.port_num = port_num;
-	ib_event.event = event;
-	ib_dispatch_event(&ib_event);
+		ib_event.event = event;
+		ib_dispatch_event(&ib_event);
+	}
 }
 
 #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN      0x02
@@ -1189,20 +1205,20 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
 
 static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
 {
-	int i, rc;
+	int rc;
 
 	if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
-		for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
-			device_remove_file(&rdev->ibdev.dev,
-					   bnxt_re_attributes[i]);
 		/* Cleanup ib dev */
 		bnxt_re_unregister_ib(rdev);
 	}
 	if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
-		cancel_delayed_work(&rdev->worker);
+		cancel_delayed_work_sync(&rdev->worker);
 
-	bnxt_re_cleanup_res(rdev);
-	bnxt_re_free_res(rdev);
+	if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
+			       &rdev->flags))
+		bnxt_re_cleanup_res(rdev);
+	if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags))
+		bnxt_re_free_res(rdev);
 
 	if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
 		rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
@@ -1241,7 +1257,7 @@ static void bnxt_re_worker(struct work_struct *work)
 
 static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 {
-	int i, j, rc;
+	int rc;
 
 	bool locked;
 
@@ -1331,12 +1347,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 		pr_err("Failed to allocate resources: %#x\n", rc);
 		goto fail;
 	}
+	set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
 	rc = bnxt_re_init_res(rdev);
 	if (rc) {
 		pr_err("Failed to initialize resources: %#x\n", rc);
 		goto fail;
 	}
 
+	set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
+
 	if (!rdev->is_virtfn) {
 		rc = bnxt_re_setup_qos(rdev);
 		if (rc)
@@ -1358,20 +1377,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 	}
 	set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
 	dev_info(rdev_to_dev(rdev), "Device registered successfully");
-	for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
-		rc = device_create_file(&rdev->ibdev.dev,
-					bnxt_re_attributes[i]);
-		if (rc) {
-			dev_err(rdev_to_dev(rdev),
-				"Failed to create IB sysfs: %#x", rc);
-			/* Must clean up all created device files */
-			for (j = 0; j < i; j++)
-				device_remove_file(&rdev->ibdev.dev,
-						   bnxt_re_attributes[j]);
-			bnxt_re_unregister_ib(rdev);
-			goto fail;
-		}
-	}
 	ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
 			 &rdev->active_width);
 	set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 6ad0d46ab879..b98b054148cd 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -36,6 +36,8 @@
  * Description: Fast Path Operators
  */
 
+#define dev_fmt(fmt) "QPLIB: " fmt
+
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
@@ -71,8 +73,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
 
 	if (!qp->sq.flushed) {
 		dev_dbg(&scq->hwq.pdev->dev,
-			"QPLIB: FP: Adding to SQ Flush list = %p",
-			qp);
+			"FP: Adding to SQ Flush list = %p\n", qp);
 		bnxt_qplib_cancel_phantom_processing(qp);
 		list_add_tail(&qp->sq_flush, &scq->sqf_head);
 		qp->sq.flushed = true;
@@ -80,8 +81,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
 	if (!qp->srq) {
 		if (!qp->rq.flushed) {
 			dev_dbg(&rcq->hwq.pdev->dev,
-				"QPLIB: FP: Adding to RQ Flush list = %p",
-				qp);
+				"FP: Adding to RQ Flush list = %p\n", qp);
 			list_add_tail(&qp->rq_flush, &rcq->rqf_head);
 			qp->rq.flushed = true;
 		}
@@ -207,7 +207,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
 		if (!qp->sq_hdr_buf) {
 			rc = -ENOMEM;
 			dev_err(&res->pdev->dev,
-				"QPLIB: Failed to create sq_hdr_buf");
+				"Failed to create sq_hdr_buf\n");
 			goto fail;
 		}
 	}
@@ -221,7 +221,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
 		if (!qp->rq_hdr_buf) {
 			rc = -ENOMEM;
 			dev_err(&res->pdev->dev,
-				"QPLIB: Failed to create rq_hdr_buf");
+				"Failed to create rq_hdr_buf\n");
 			goto fail;
 		}
 	}
@@ -277,8 +277,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
 				num_cqne_processed++;
 			else
 				dev_warn(&nq->pdev->dev,
-					 "QPLIB: cqn - type 0x%x not handled",
-					 type);
+					 "cqn - type 0x%x not handled\n", type);
 			spin_unlock_bh(&cq->compl_lock);
 			break;
 		}
@@ -298,7 +297,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
 				num_srqne_processed++;
 			else
 				dev_warn(&nq->pdev->dev,
-					 "QPLIB: SRQ event 0x%x not handled",
+					 "SRQ event 0x%x not handled\n",
 					 nqsrqe->event);
 			break;
 		}
@@ -306,8 +305,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
 			break;
 		default:
 			dev_warn(&nq->pdev->dev,
-				 "QPLIB: nqe with type = 0x%x not handled",
-				 type);
+				 "nqe with type = 0x%x not handled\n", type);
 			break;
 		}
 		raw_cons++;
@@ -360,7 +358,8 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
 	}
 
 	/* Make sure the HW is stopped! */
-	bnxt_qplib_nq_stop_irq(nq, true);
+	if (nq->requested)
+		bnxt_qplib_nq_stop_irq(nq, true);
 
 	if (nq->bar_reg_iomem)
 		iounmap(nq->bar_reg_iomem);
@@ -396,7 +395,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
 	rc = irq_set_affinity_hint(nq->vector, &nq->mask);
 	if (rc) {
 		dev_warn(&nq->pdev->dev,
-			 "QPLIB: set affinity failed; vector: %d nq_idx: %d\n",
+			 "set affinity failed; vector: %d nq_idx: %d\n",
 			 nq->vector, nq_indx);
 	}
 	nq->requested = true;
@@ -443,7 +442,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
 	rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true);
 	if (rc) {
 		dev_err(&nq->pdev->dev,
-			"QPLIB: Failed to request irq for nq-idx %d", nq_idx);
+			"Failed to request irq for nq-idx %d\n", nq_idx);
 		goto fail;
 	}
 
@@ -662,8 +661,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
 
 	spin_lock(&srq_hwq->lock);
 	if (srq->start_idx == srq->last_idx) {
-		dev_err(&srq_hwq->pdev->dev, "QPLIB: FP: SRQ (0x%x) is full!",
-			srq->id);
+		dev_err(&srq_hwq->pdev->dev,
+			"FP: SRQ (0x%x) is full!\n", srq->id);
 		rc = -EINVAL;
 		spin_unlock(&srq_hwq->lock);
 		goto done;
@@ -1324,7 +1323,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 		}
 	}
 	if (i == res->sgid_tbl.max)
-		dev_warn(&res->pdev->dev, "QPLIB: SGID not found??");
+		dev_warn(&res->pdev->dev, "SGID not found??\n");
 
 	qp->ah.hop_limit = sb->hop_limit;
 	qp->ah.traffic_class = sb->traffic_class;
@@ -1536,7 +1535,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
 
 	if (bnxt_qplib_queue_full(sq)) {
 		dev_err(&sq->hwq.pdev->dev,
-			"QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x",
+			"prod = %#x cons = %#x qdepth = %#x delta = %#x\n",
 			sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
 			sq->q_full_delta);
 		rc = -ENOMEM;
@@ -1561,7 +1560,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
 		/* Copy the inline data */
 		if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
 			dev_warn(&sq->hwq.pdev->dev,
-				 "QPLIB: Inline data length > 96 detected");
+				 "Inline data length > 96 detected\n");
 			data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
 		} else {
 			data_len = wqe->inline_len;
@@ -1776,7 +1775,7 @@ done:
 			queue_work(qp->scq->nq->cqn_wq, &nq_work->work);
 		} else {
 			dev_err(&sq->hwq.pdev->dev,
-				"QPLIB: FP: Failed to allocate SQ nq_work!");
+				"FP: Failed to allocate SQ nq_work!\n");
 			rc = -ENOMEM;
 		}
 	}
@@ -1815,13 +1814,12 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
 	if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
 		sch_handler = true;
 		dev_dbg(&rq->hwq.pdev->dev,
-			"%s Error QP. Scheduling for poll_cq\n",
-			__func__);
+			"%s: Error QP. Scheduling for poll_cq\n", __func__);
 		goto queue_err;
 	}
 	if (bnxt_qplib_queue_full(rq)) {
 		dev_err(&rq->hwq.pdev->dev,
-			"QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
+			"FP: QP (0x%x) RQ is full!\n", qp->id);
 		rc = -EINVAL;
 		goto done;
 	}
@@ -1870,7 +1868,7 @@ queue_err:
 			queue_work(qp->rcq->nq->cqn_wq, &nq_work->work);
 		} else {
 			dev_err(&rq->hwq.pdev->dev,
-				"QPLIB: FP: Failed to allocate RQ nq_work!");
+				"FP: Failed to allocate RQ nq_work!\n");
 			rc = -ENOMEM;
 		}
 	}
@@ -1932,7 +1930,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 
 	if (!cq->dpi) {
 		dev_err(&rcfw->pdev->dev,
-			"QPLIB: FP: CREATE_CQ failed due to NULL DPI");
+			"FP: CREATE_CQ failed due to NULL DPI\n");
 		return -EINVAL;
 	}
 	req.dpi = cpu_to_le32(cq->dpi->dpi);
@@ -1969,6 +1967,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 	INIT_LIST_HEAD(&cq->sqf_head);
 	INIT_LIST_HEAD(&cq->rqf_head);
 	spin_lock_init(&cq->compl_lock);
+	spin_lock_init(&cq->flush_lock);
 
 	bnxt_qplib_arm_cq_enable(cq);
 	return 0;
@@ -2172,7 +2171,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
 						 *  comes back
 						 */
 						dev_dbg(&cq->hwq.pdev->dev,
-							"FP:Got Phantom CQE");
+							"FP: Got Phantom CQE\n");
 						sq->condition = false;
 						sq->single = true;
 						rc = 0;
@@ -2189,7 +2188,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
 			peek_raw_cq_cons++;
 		}
 		dev_err(&cq->hwq.pdev->dev,
-			"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x",
+			"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
 			cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
 		rc = -EINVAL;
 	}
@@ -2213,7 +2212,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 				      le64_to_cpu(hwcqe->qp_handle));
 	if (!qp) {
 		dev_err(&cq->hwq.pdev->dev,
-			"QPLIB: FP: Process Req qp is NULL");
+			"FP: Process Req qp is NULL\n");
 		return -EINVAL;
 	}
 	sq = &qp->sq;
@@ -2221,16 +2220,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 	cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
 	if (cqe_sq_cons > sq->hwq.max_elements) {
 		dev_err(&cq->hwq.pdev->dev,
-			"QPLIB: FP: CQ Process req reported ");
-		dev_err(&cq->hwq.pdev->dev,
-			"QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
+			"FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
 			cqe_sq_cons, sq->hwq.max_elements);
 		return -EINVAL;
 	}
 
 	if (qp->sq.flushed) {
 		dev_dbg(&cq->hwq.pdev->dev,
-			"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+			"%s: QP in Flush QP = %p\n", __func__, qp);
 		goto done;
 	}
 	/* Require to walk the sq's swq to fabricate CQEs for all previously
@@ -2262,9 +2259,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 		    hwcqe->status != CQ_REQ_STATUS_OK) {
 			cqe->status = hwcqe->status;
 			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: FP: CQ Processed Req ");
-			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
+				"FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n",
 				sw_sq_cons, cqe->wr_id, cqe->status);
 			cqe++;
 			(*budget)--;
@@ -2330,12 +2325,12 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
 	qp = (struct bnxt_qplib_qp *)((unsigned long)
 				      le64_to_cpu(hwcqe->qp_handle));
 	if (!qp) {
-		dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL");
+		dev_err(&cq->hwq.pdev->dev, "process_cq RC qp is NULL\n");
 		return -EINVAL;
 	}
 	if (qp->rq.flushed) {
 		dev_dbg(&cq->hwq.pdev->dev,
-			"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+			"%s: QP in Flush QP = %p\n", __func__, qp);
 		goto done;
 	}
 
@@ -2356,9 +2351,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
 			return -EINVAL;
 		if (wr_id_idx >= srq->hwq.max_elements) {
 			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: FP: CQ Process RC ");
-			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
+				"FP: CQ Process RC wr_id idx 0x%x exceeded SRQ max 0x%x\n",
 				wr_id_idx, srq->hwq.max_elements);
 			return -EINVAL;
 		}
@@ -2371,9 +2364,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
 		rq = &qp->rq;
 		if (wr_id_idx >= rq->hwq.max_elements) {
 			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: FP: CQ Process RC ");
-			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
+				"FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n",
 				wr_id_idx, rq->hwq.max_elements);
 			return -EINVAL;
 		}
@@ -2409,12 +2400,12 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
 	qp = (struct bnxt_qplib_qp *)((unsigned long)
 				      le64_to_cpu(hwcqe->qp_handle));
 	if (!qp) {
-		dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL");
+		dev_err(&cq->hwq.pdev->dev, "process_cq UD qp is NULL\n");
 		return -EINVAL;
 	}
 	if (qp->rq.flushed) {
 		dev_dbg(&cq->hwq.pdev->dev,
-			"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+			"%s: QP in Flush QP = %p\n", __func__, qp);
 		goto done;
 	}
 	cqe = *pcqe;
@@ -2439,9 +2430,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
 
 		if (wr_id_idx >= srq->hwq.max_elements) {
 			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: FP: CQ Process UD ");
-			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
+				"FP: CQ Process UD wr_id idx 0x%x exceeded SRQ max 0x%x\n",
 				wr_id_idx, srq->hwq.max_elements);
 			return -EINVAL;
 		}
@@ -2454,9 +2443,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
 		rq = &qp->rq;
 		if (wr_id_idx >= rq->hwq.max_elements) {
 			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: FP: CQ Process UD ");
-			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
+				"FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n",
 				wr_id_idx, rq->hwq.max_elements);
 			return -EINVAL;
 		}
@@ -2508,13 +2495,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
 	qp = (struct bnxt_qplib_qp *)((unsigned long)
 				      le64_to_cpu(hwcqe->qp_handle));
 	if (!qp) {
-		dev_err(&cq->hwq.pdev->dev,
-			"QPLIB: process_cq Raw/QP1 qp is NULL");
+		dev_err(&cq->hwq.pdev->dev, "process_cq Raw/QP1 qp is NULL\n");
 		return -EINVAL;
 	}
 	if (qp->rq.flushed) {
 		dev_dbg(&cq->hwq.pdev->dev,
-			"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+			"%s: QP in Flush QP = %p\n", __func__, qp);
 		goto done;
 	}
 	cqe = *pcqe;
@@ -2543,14 +2529,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
 		srq = qp->srq;
 		if (!srq) {
 			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: FP: SRQ used but not defined??");
+				"FP: SRQ used but not defined??\n");
 			return -EINVAL;
 		}
 		if (wr_id_idx >= srq->hwq.max_elements) {
 			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: FP: CQ Process Raw/QP1 ");
-			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
+				"FP: CQ Process Raw/QP1 wr_id idx 0x%x exceeded SRQ max 0x%x\n",
 				wr_id_idx, srq->hwq.max_elements);
 			return -EINVAL;
 		}
@@ -2563,9 +2547,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
 		rq = &qp->rq;
 		if (wr_id_idx >= rq->hwq.max_elements) {
 			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
-			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: ix 0x%x exceeded RQ max 0x%x",
+				"FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n",
 				wr_id_idx, rq->hwq.max_elements);
 			return -EINVAL;
 		}
@@ -2600,14 +2582,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
 	/* Check the Status */
 	if (hwcqe->status != CQ_TERMINAL_STATUS_OK)
 		dev_warn(&cq->hwq.pdev->dev,
-			 "QPLIB: FP: CQ Process Terminal Error status = 0x%x",
+			 "FP: CQ Process Terminal Error status = 0x%x\n",
 			 hwcqe->status);
 
 	qp = (struct bnxt_qplib_qp *)((unsigned long)
 				      le64_to_cpu(hwcqe->qp_handle));
 	if (!qp) {
 		dev_err(&cq->hwq.pdev->dev,
-			"QPLIB: FP: CQ Process terminal qp is NULL");
+			"FP: CQ Process terminal qp is NULL\n");
 		return -EINVAL;
 	}
 
@@ -2623,16 +2605,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
 
 	if (cqe_cons > sq->hwq.max_elements) {
 		dev_err(&cq->hwq.pdev->dev,
-			"QPLIB: FP: CQ Process terminal reported ");
-		dev_err(&cq->hwq.pdev->dev,
-			"QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
+			"FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
 			cqe_cons, sq->hwq.max_elements);
 		goto do_rq;
 	}
 
 	if (qp->sq.flushed) {
 		dev_dbg(&cq->hwq.pdev->dev,
-			"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+			"%s: QP in Flush QP = %p\n", __func__, qp);
 		goto sq_done;
 	}
 
@@ -2673,16 +2653,14 @@ do_rq:
 		goto done;
 	} else if (cqe_cons > rq->hwq.max_elements) {
 		dev_err(&cq->hwq.pdev->dev,
-			"QPLIB: FP: CQ Processed terminal ");
-		dev_err(&cq->hwq.pdev->dev,
-			"QPLIB: reported rq_cons_idx 0x%x exceeds max 0x%x",
+			"FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n",
 			cqe_cons, rq->hwq.max_elements);
 		goto done;
 	}
 
 	if (qp->rq.flushed) {
 		dev_dbg(&cq->hwq.pdev->dev,
-			"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+			"%s: QP in Flush QP = %p\n", __func__, qp);
 		rc = 0;
 		goto done;
 	}
@@ -2704,7 +2682,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
 	/* Check the Status */
 	if (hwcqe->status != CQ_CUTOFF_STATUS_OK) {
 		dev_err(&cq->hwq.pdev->dev,
-			"QPLIB: FP: CQ Process Cutoff Error status = 0x%x",
+			"FP: CQ Process Cutoff Error status = 0x%x\n",
 			hwcqe->status);
 		return -EINVAL;
 	}
@@ -2724,16 +2702,12 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
 
 	spin_lock_irqsave(&cq->flush_lock, flags);
 	list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
-		dev_dbg(&cq->hwq.pdev->dev,
-			"QPLIB: FP: Flushing SQ QP= %p",
-			qp);
+		dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing SQ QP= %p\n", qp);
 		__flush_sq(&qp->sq, qp, &cqe, &budget);
 	}
 
 	list_for_each_entry(qp, &cq->rqf_head, rq_flush) {
-		dev_dbg(&cq->hwq.pdev->dev,
-			"QPLIB: FP: Flushing RQ QP= %p",
-			qp);
+		dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing RQ QP= %p\n", qp);
 		__flush_rq(&qp->rq, qp, &cqe, &budget);
 	}
 	spin_unlock_irqrestore(&cq->flush_lock, flags);
@@ -2801,7 +2775,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 			goto exit;
 		default:
 			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: process_cq unknown type 0x%lx",
+				"process_cq unknown type 0x%lx\n",
 				hw_cqe->cqe_type_toggle &
 				CQ_BASE_CQE_TYPE_MASK);
 			rc = -EINVAL;
@@ -2814,7 +2788,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 			 * next one
 			 */
 			dev_err(&cq->hwq.pdev->dev,
-				"QPLIB: process_cqe error rc = 0x%x", rc);
+				"process_cqe error rc = 0x%x\n", rc);
 		}
 		raw_cons++;
 	}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 2852d350ada1..be4e33e9f962 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -35,6 +35,9 @@
  *
  * Description: RDMA Controller HW interface
  */
+
+#define dev_fmt(fmt) "QPLIB: " fmt
+
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
@@ -96,14 +99,13 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
 	     opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
 	     opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
 		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW not initialized, reject opcode 0x%x",
-			opcode);
+			"RCFW not initialized, reject opcode 0x%x\n", opcode);
 		return -EINVAL;
 	}
 
 	if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
 	    opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
+		dev_err(&rcfw->pdev->dev, "RCFW already initialized!\n");
 		return -EINVAL;
 	}
 
@@ -115,7 +117,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
 	 */
 	spin_lock_irqsave(&cmdq->lock, flags);
 	if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
+		dev_err(&rcfw->pdev->dev, "RCFW: CMDQ is full!\n");
 		spin_unlock_irqrestore(&cmdq->lock, flags);
 		return -EAGAIN;
 	}
@@ -154,7 +156,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
 		cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
 		if (!cmdqe) {
 			dev_err(&rcfw->pdev->dev,
-				"QPLIB: RCFW request failed with no cmdqe!");
+				"RCFW request failed with no cmdqe!\n");
 			goto done;
 		}
 		/* Copy a segment of the req cmd to the cmdq */
@@ -210,7 +212,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 
 		if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
 			/* send failed */
-			dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed",
+			dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x send failed\n",
 				cookie, opcode);
 			return rc;
 		}
@@ -224,7 +226,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 		rc = __wait_for_resp(rcfw, cookie);
 	if (rc) {
 		/* timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
+		dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n",
 			cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
 		set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags);
 		return rc;
@@ -232,7 +234,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 
 	if (evnt->status) {
 		/* failed with status */
-		dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x",
+		dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
 			cookie, opcode, evnt->status);
 		rc = -EFAULT;
 	}
@@ -298,9 +300,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 		qp_id = le32_to_cpu(err_event->xid);
 		qp = rcfw->qp_tbl[qp_id].qp_handle;
 		dev_dbg(&rcfw->pdev->dev,
-			"QPLIB: Received QP error notification");
+			"Received QP error notification\n");
 		dev_dbg(&rcfw->pdev->dev,
-			"QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
+			"qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
 			qp_id, err_event->req_err_state_reason,
 			err_event->res_err_state_reason);
 		if (!qp)
@@ -309,8 +311,17 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 		rcfw->aeq_handler(rcfw, qp_event, qp);
 		break;
 	default:
-		/* Command Response */
-		spin_lock_irqsave(&cmdq->lock, flags);
+		/*
+		 * Command Response
+		 * cmdq->lock needs to be acquired to synchronie
+		 * the command send and completion reaping. This function
+		 * is always called with creq->lock held. Using
+		 * the nested variant of spin_lock.
+		 *
+		 */
+
+		spin_lock_irqsave_nested(&cmdq->lock, flags,
+					 SINGLE_DEPTH_NESTING);
 		cookie = le16_to_cpu(qp_event->cookie);
 		mcookie = qp_event->cookie;
 		blocked = cookie & RCFW_CMD_IS_BLOCKING;
@@ -322,14 +333,16 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 			memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
 			crsqe->resp = NULL;
 		} else {
-			dev_err(&rcfw->pdev->dev,
-				"QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x",
-				crsqe->resp ? "mismatch" : "collision",
-				crsqe->resp ? crsqe->resp->cookie : 0, mcookie);
+			if (crsqe->resp && crsqe->resp->cookie)
+				dev_err(&rcfw->pdev->dev,
+					"CMD %s cookie sent=%#x, recd=%#x\n",
+					crsqe->resp ? "mismatch" : "collision",
+					crsqe->resp ? crsqe->resp->cookie : 0,
+					mcookie);
 		}
 		if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
 			dev_warn(&rcfw->pdev->dev,
-				 "QPLIB: CMD bit %d was not requested", cbit);
+				 "CMD bit %d was not requested\n", cbit);
 		cmdq->cons += crsqe->req_size;
 		crsqe->req_size = 0;
 
@@ -376,14 +389,14 @@ static void bnxt_qplib_service_creq(unsigned long data)
 			    (rcfw, (struct creq_func_event *)creqe))
 				rcfw->creq_func_event_processed++;
 			else
-				dev_warn
-				(&rcfw->pdev->dev, "QPLIB:aeqe:%#x Not handled",
-				 type);
+				dev_warn(&rcfw->pdev->dev,
+					 "aeqe:%#x Not handled\n", type);
 			break;
 		default:
-			dev_warn(&rcfw->pdev->dev, "QPLIB: creqe with ");
-			dev_warn(&rcfw->pdev->dev,
-				 "QPLIB: op_event = 0x%x not handled", type);
+			if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT)
+				dev_warn(&rcfw->pdev->dev,
+					 "creqe with event 0x%x not handled\n",
+					 type);
 			break;
 		}
 		raw_cons++;
@@ -551,7 +564,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
 				      BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE,
 				      HWQ_TYPE_L2_CMPL)) {
 		dev_err(&rcfw->pdev->dev,
-			"QPLIB: HW channel CREQ allocation failed");
+			"HW channel CREQ allocation failed\n");
 		goto fail;
 	}
 	rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
@@ -560,7 +573,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
 				      BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
 				      HWQ_TYPE_CTX)) {
 		dev_err(&rcfw->pdev->dev,
-			"QPLIB: HW channel CMDQ allocation failed");
+			"HW channel CMDQ allocation failed\n");
 		goto fail;
 	}
 
@@ -605,21 +618,18 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
 
 	bnxt_qplib_rcfw_stop_irq(rcfw, true);
 
-	if (rcfw->cmdq_bar_reg_iomem)
-		iounmap(rcfw->cmdq_bar_reg_iomem);
-	rcfw->cmdq_bar_reg_iomem = NULL;
-
-	if (rcfw->creq_bar_reg_iomem)
-		iounmap(rcfw->creq_bar_reg_iomem);
-	rcfw->creq_bar_reg_iomem = NULL;
+	iounmap(rcfw->cmdq_bar_reg_iomem);
+	iounmap(rcfw->creq_bar_reg_iomem);
 
 	indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size);
 	if (indx != rcfw->bmap_size)
 		dev_err(&rcfw->pdev->dev,
-			"QPLIB: disabling RCFW with pending cmd-bit %lx", indx);
+			"disabling RCFW with pending cmd-bit %lx\n", indx);
 	kfree(rcfw->cmdq_bitmap);
 	rcfw->bmap_size = 0;
 
+	rcfw->cmdq_bar_reg_iomem = NULL;
+	rcfw->creq_bar_reg_iomem = NULL;
 	rcfw->aeq_handler = NULL;
 	rcfw->vector = 0;
 }
@@ -681,8 +691,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 					      RCFW_COMM_BASE_OFFSET,
 					      RCFW_COMM_SIZE);
 	if (!rcfw->cmdq_bar_reg_iomem) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: CMDQ BAR region %d mapping failed",
+		dev_err(&rcfw->pdev->dev, "CMDQ BAR region %d mapping failed\n",
 			rcfw->cmdq_bar_reg);
 		return -ENOMEM;
 	}
@@ -697,14 +706,15 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 	res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
 	if (!res_base)
 		dev_err(&rcfw->pdev->dev,
-			"QPLIB: CREQ BAR region %d resc start is 0!",
+			"CREQ BAR region %d resc start is 0!\n",
 			rcfw->creq_bar_reg);
 	rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
 						   4);
 	if (!rcfw->creq_bar_reg_iomem) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: CREQ BAR region %d mapping failed",
+		dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n",
 			rcfw->creq_bar_reg);
+		iounmap(rcfw->cmdq_bar_reg_iomem);
+		rcfw->cmdq_bar_reg_iomem = NULL;
 		return -ENOMEM;
 	}
 	rcfw->creq_qp_event_processed = 0;
@@ -717,7 +727,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 	rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true);
 	if (rc) {
 		dev_err(&rcfw->pdev->dev,
-			"QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc);
+			"Failed to request IRQ for CREQ rc = 0x%x\n", rc);
 		bnxt_qplib_disable_rcfw_channel(rcfw);
 		return rc;
 	}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 46416dfe8830..9a8687dc0a79 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -154,6 +154,8 @@ struct bnxt_qplib_qp_node {
 	void *qp_handle;        /* ptr to qplib_qp */
 };
 
+#define BNXT_QPLIB_OOS_COUNT_MASK 0xFFFFFFFF
+
 /* RCFW Communication Channels */
 struct bnxt_qplib_rcfw {
 	struct pci_dev		*pdev;
@@ -190,6 +192,8 @@ struct bnxt_qplib_rcfw {
 	struct bnxt_qplib_crsq	*crsqe_tbl;
 	int qp_tbl_size;
 	struct bnxt_qplib_qp_node *qp_tbl;
+	u64 oos_prev;
+	u32 init_oos_stats;
 };
 
 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 539a5d44e6db..59eeac55626f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -36,6 +36,8 @@
  * Description: QPLib resource manager
  */
 
+#define dev_fmt(fmt) "QPLIB: " fmt
+
 #include <linux/spinlock.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
@@ -68,8 +70,7 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
 						  pbl->pg_map_arr[i]);
 			else
 				dev_warn(&pdev->dev,
-					 "QPLIB: PBL free pg_arr[%d] empty?!",
-					 i);
+					 "PBL free pg_arr[%d] empty?!\n", i);
 			pbl->pg_arr[i] = NULL;
 		}
 	}
@@ -537,7 +538,7 @@ static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res,
 				     struct bnxt_qplib_pkey_tbl *pkey_tbl)
 {
 	if (!pkey_tbl->tbl)
-		dev_dbg(&res->pdev->dev, "QPLIB: PKEY tbl not present");
+		dev_dbg(&res->pdev->dev, "PKEY tbl not present\n");
 	else
 		kfree(pkey_tbl->tbl);
 
@@ -578,7 +579,7 @@ int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
 			  struct bnxt_qplib_pd *pd)
 {
 	if (test_and_set_bit(pd->id, pdt->tbl)) {
-		dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d",
+		dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n",
 			 pd->id);
 		return -EINVAL;
 	}
@@ -639,11 +640,11 @@ int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
 			   struct bnxt_qplib_dpi     *dpi)
 {
 	if (dpi->dpi >= dpit->max) {
-		dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d", dpi->dpi);
+		dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d\n", dpi->dpi);
 		return -EINVAL;
 	}
 	if (test_and_set_bit(dpi->dpi, dpit->tbl)) {
-		dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d",
+		dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d\n",
 			 dpi->dpi);
 		return -EINVAL;
 	}
@@ -673,22 +674,21 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res     *res,
 	u32 dbr_len, bytes;
 
 	if (dpit->dbr_bar_reg_iomem) {
-		dev_err(&res->pdev->dev,
-			"QPLIB: DBR BAR region %d already mapped", dbr_bar_reg);
+		dev_err(&res->pdev->dev, "DBR BAR region %d already mapped\n",
+			dbr_bar_reg);
 		return -EALREADY;
 	}
 
 	bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg);
 	if (!bar_reg_base) {
-		dev_err(&res->pdev->dev,
-			"QPLIB: BAR region %d resc start failed", dbr_bar_reg);
+		dev_err(&res->pdev->dev, "BAR region %d resc start failed\n",
+			dbr_bar_reg);
 		return -ENOMEM;
 	}
 
 	dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset;
 	if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) {
-		dev_err(&res->pdev->dev, "QPLIB: Invalid DBR length %d",
-			dbr_len);
+		dev_err(&res->pdev->dev, "Invalid DBR length %d\n", dbr_len);
 		return -ENOMEM;
 	}
 
@@ -696,8 +696,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res     *res,
 						  dbr_len);
 	if (!dpit->dbr_bar_reg_iomem) {
 		dev_err(&res->pdev->dev,
-			"QPLIB: FP: DBR BAR region %d mapping failed",
-			dbr_bar_reg);
+			"FP: DBR BAR region %d mapping failed\n", dbr_bar_reg);
 		return -ENOMEM;
 	}
 
@@ -767,7 +766,7 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
 	stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
 					&stats->dma_map, GFP_KERNEL);
 	if (!stats->dma) {
-		dev_err(&pdev->dev, "QPLIB: Stats DMA allocation failed");
+		dev_err(&pdev->dev, "Stats DMA allocation failed\n");
 		return -ENOMEM;
 	}
 	return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 4097f3fa25c5..5216b5f844cc 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -36,6 +36,8 @@
  * Description: Slow Path Operators
  */
 
+#define dev_fmt(fmt) "QPLIB: " fmt
+
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
@@ -89,7 +91,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 	sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
 	if (!sbuf) {
 		dev_err(&rcfw->pdev->dev,
-			"QPLIB: SP: QUERY_FUNC alloc side buffer failed");
+			"SP: QUERY_FUNC alloc side buffer failed\n");
 		return -ENOMEM;
 	}
 
@@ -135,8 +137,16 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 	attr->max_srq = le16_to_cpu(sb->max_srq);
 	attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
 	attr->max_srq_sges = sb->max_srq_sge;
-	/* Bono only reports 1 PKEY for now, but it can support > 1 */
 	attr->max_pkey = le32_to_cpu(sb->max_pkeys);
+	/*
+	 * Some versions of FW reports more than 0xFFFF.
+	 * Restrict it for now to 0xFFFF to avoid
+	 * reporting trucated value
+	 */
+	if (attr->max_pkey > 0xFFFF) {
+		/* ib_port_attr::pkey_tbl_len is u16 */
+		attr->max_pkey = 0xFFFF;
+	}
 
 	attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
 	attr->l2_db_size = (sb->l2_db_space_size + 1) *
@@ -186,8 +196,7 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
 					  (void *)&resp,
 					  NULL, 0);
 	if (rc) {
-		dev_err(&res->pdev->dev,
-			"QPLIB: Failed to set function resources");
+		dev_err(&res->pdev->dev, "Failed to set function resources\n");
 	}
 	return rc;
 }
@@ -199,7 +208,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
 {
 	if (index >= sgid_tbl->max) {
 		dev_err(&res->pdev->dev,
-			"QPLIB: Index %d exceeded SGID table max (%d)",
+			"Index %d exceeded SGID table max (%d)\n",
 			index, sgid_tbl->max);
 		return -EINVAL;
 	}
@@ -217,13 +226,12 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 	int index;
 
 	if (!sgid_tbl) {
-		dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
+		dev_err(&res->pdev->dev, "SGID table not allocated\n");
 		return -EINVAL;
 	}
 	/* Do we need a sgid_lock here? */
 	if (!sgid_tbl->active) {
-		dev_err(&res->pdev->dev,
-			"QPLIB: SGID table has no active entries");
+		dev_err(&res->pdev->dev, "SGID table has no active entries\n");
 		return -ENOMEM;
 	}
 	for (index = 0; index < sgid_tbl->max; index++) {
@@ -231,7 +239,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 			break;
 	}
 	if (index == sgid_tbl->max) {
-		dev_warn(&res->pdev->dev, "GID not found in the SGID table");
+		dev_warn(&res->pdev->dev, "GID not found in the SGID table\n");
 		return 0;
 	}
 	/* Remove GID from the SGID table */
@@ -244,7 +252,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 		RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
 		if (sgid_tbl->hw_id[index] == 0xFFFF) {
 			dev_err(&res->pdev->dev,
-				"QPLIB: GID entry contains an invalid HW id");
+				"GID entry contains an invalid HW id\n");
 			return -EINVAL;
 		}
 		req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
@@ -258,7 +266,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 	sgid_tbl->vlan[index] = 0;
 	sgid_tbl->active--;
 	dev_dbg(&res->pdev->dev,
-		"QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x",
+		"SGID deleted hw_id[0x%x] = 0x%x active = 0x%x\n",
 		 index, sgid_tbl->hw_id[index], sgid_tbl->active);
 	sgid_tbl->hw_id[index] = (u16)-1;
 
@@ -277,20 +285,19 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 	int i, free_idx;
 
 	if (!sgid_tbl) {
-		dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
+		dev_err(&res->pdev->dev, "SGID table not allocated\n");
 		return -EINVAL;
 	}
 	/* Do we need a sgid_lock here? */
 	if (sgid_tbl->active == sgid_tbl->max) {
-		dev_err(&res->pdev->dev, "QPLIB: SGID table is full");
+		dev_err(&res->pdev->dev, "SGID table is full\n");
 		return -ENOMEM;
 	}
 	free_idx = sgid_tbl->max;
 	for (i = 0; i < sgid_tbl->max; i++) {
 		if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
 			dev_dbg(&res->pdev->dev,
-				"QPLIB: SGID entry already exist in entry %d!",
-				i);
+				"SGID entry already exist in entry %d!\n", i);
 			*index = i;
 			return -EALREADY;
 		} else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
@@ -301,7 +308,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 	}
 	if (free_idx == sgid_tbl->max) {
 		dev_err(&res->pdev->dev,
-			"QPLIB: SGID table is FULL but count is not MAX??");
+			"SGID table is FULL but count is not MAX??\n");
 		return -ENOMEM;
 	}
 	if (update) {
@@ -348,7 +355,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 		sgid_tbl->vlan[free_idx] = 1;
 
 	dev_dbg(&res->pdev->dev,
-		"QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x",
+		"SGID added hw_id[0x%x] = 0x%x active = 0x%x\n",
 		 free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active);
 
 	*index = free_idx;
@@ -404,7 +411,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
 	}
 	if (index >= pkey_tbl->max) {
 		dev_err(&res->pdev->dev,
-			"QPLIB: Index %d exceeded PKEY table max (%d)",
+			"Index %d exceeded PKEY table max (%d)\n",
 			index, pkey_tbl->max);
 		return -EINVAL;
 	}
@@ -419,14 +426,13 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
 	int i, rc = 0;
 
 	if (!pkey_tbl) {
-		dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
+		dev_err(&res->pdev->dev, "PKEY table not allocated\n");
 		return -EINVAL;
 	}
 
 	/* Do we need a pkey_lock here? */
 	if (!pkey_tbl->active) {
-		dev_err(&res->pdev->dev,
-			"QPLIB: PKEY table has no active entries");
+		dev_err(&res->pdev->dev, "PKEY table has no active entries\n");
 		return -ENOMEM;
 	}
 	for (i = 0; i < pkey_tbl->max; i++) {
@@ -435,8 +441,7 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
 	}
 	if (i == pkey_tbl->max) {
 		dev_err(&res->pdev->dev,
-			"QPLIB: PKEY 0x%04x not found in the pkey table",
-			*pkey);
+			"PKEY 0x%04x not found in the pkey table\n", *pkey);
 		return -ENOMEM;
 	}
 	memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey));
@@ -453,13 +458,13 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
 	int i, free_idx, rc = 0;
 
 	if (!pkey_tbl) {
-		dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
+		dev_err(&res->pdev->dev, "PKEY table not allocated\n");
 		return -EINVAL;
 	}
 
 	/* Do we need a pkey_lock here? */
 	if (pkey_tbl->active == pkey_tbl->max) {
-		dev_err(&res->pdev->dev, "QPLIB: PKEY table is full");
+		dev_err(&res->pdev->dev, "PKEY table is full\n");
 		return -ENOMEM;
 	}
 	free_idx = pkey_tbl->max;
@@ -471,7 +476,7 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
 	}
 	if (free_idx == pkey_tbl->max) {
 		dev_err(&res->pdev->dev,
-			"QPLIB: PKEY table is FULL but count is not MAX??");
+			"PKEY table is FULL but count is not MAX??\n");
 		return -ENOMEM;
 	}
 	/* Add PKEY to the pkey_tbl */
@@ -555,8 +560,7 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 	int rc;
 
 	if (mrw->lkey == 0xFFFFFFFF) {
-		dev_info(&res->pdev->dev,
-			 "QPLIB: SP: Free a reserved lkey MRW");
+		dev_info(&res->pdev->dev, "SP: Free a reserved lkey MRW\n");
 		return 0;
 	}
 
@@ -666,9 +670,8 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 			pages++;
 
 		if (pages > MAX_PBL_LVL_1_PGS) {
-			dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages ");
 			dev_err(&res->pdev->dev,
-				"requested (0x%x) exceeded max (0x%x)",
+				"SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n",
 				pages, MAX_PBL_LVL_1_PGS);
 			return -ENOMEM;
 		}
@@ -684,7 +687,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 					       HWQ_TYPE_CTX);
 		if (rc) {
 			dev_err(&res->pdev->dev,
-				"SP: Reg MR memory allocation failed");
+				"SP: Reg MR memory allocation failed\n");
 			return -ENOMEM;
 		}
 		/* Write to the hwq */
@@ -795,7 +798,7 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
 	sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
 	if (!sbuf) {
 		dev_err(&rcfw->pdev->dev,
-			"QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed");
+			"SP: QUERY_ROCE_STATS alloc side buffer failed\n");
 		return -ENOMEM;
 	}
 
@@ -845,6 +848,16 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
 	stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err);
 	stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err);
 	stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err);
+	if (!rcfw->init_oos_stats) {
+		rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
+		rcfw->init_oos_stats = 1;
+	} else {
+		stats->res_oos_drop_count +=
+				(le64_to_cpu(sb->res_oos_drop_count) -
+				 rcfw->oos_prev) & BNXT_QPLIB_OOS_COUNT_MASK;
+		rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
+	}
+
 bail:
 	bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
 	return rc;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 9d3e8b994945..8079d7f5a008 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -205,6 +205,16 @@ struct bnxt_qplib_roce_stats {
 	/* res_tx_pci_err is 64 b */
 	u64 res_rx_pci_err;
 	/* res_rx_pci_err is 64 b */
+	u64 res_oos_drop_count;
+	/* res_oos_drop_count */
+	u64     active_qp_count_p0;
+	/* port 0 active qps */
+	u64     active_qp_count_p1;
+	/* port 1 active qps */
+	u64     active_qp_count_p2;
+	/* port 2 active qps */
+	u64     active_qp_count_p3;
+	/* port 3 active qps */
 };
 
 int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 3e5a4f760d0e..8a9ead419ac2 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -2929,6 +2929,11 @@ struct creq_query_roce_stats_resp_sb {
 	__le64	res_srq_load_err;
 	__le64	res_tx_pci_err;
 	__le64	res_rx_pci_err;
+	__le64  res_oos_drop_count;
+	__le64  active_qp_count_p0;
+	__le64  active_qp_count_p1;
+	__le64  active_qp_count_p2;
+	__le64  active_qp_count_p3;
 };
 
 /* QP error notification event (16 bytes) */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 1b9ff21aa1d5..ebbec02cebe0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1127,17 +1127,18 @@ static int iwch_query_port(struct ib_device *ibdev,
 	return 0;
 }
 
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
-			char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
 {
 	struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
 						 ibdev.dev);
 	pr_debug("%s dev 0x%p\n", __func__, dev);
 	return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
 }
+static DEVICE_ATTR_RO(hw_rev);
 
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
-			char *buf)
+static ssize_t hca_type_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
 						 ibdev.dev);
@@ -1148,9 +1149,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
 	lldev->ethtool_ops->get_drvinfo(lldev, &info);
 	return sprintf(buf, "%s\n", info.driver);
 }
+static DEVICE_ATTR_RO(hca_type);
 
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
-			  char *buf)
+static ssize_t board_id_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
 						 ibdev.dev);
@@ -1158,6 +1160,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
 	return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
 		       iwch_dev->rdev.rnic_info.pdev->device);
 }
+static DEVICE_ATTR_RO(board_id);
 
 enum counters {
 	IPINRECEIVES,
@@ -1274,14 +1277,15 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
 	return stats->num_counters;
 }
 
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *iwch_class_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	&dev_attr_board_id.attr,
+	NULL
+};
 
-static struct device_attribute *iwch_class_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type,
-	&dev_attr_board_id,
+static const struct attribute_group iwch_attr_group = {
+	.attrs = iwch_class_attributes,
 };
 
 static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -1316,10 +1320,8 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
 int iwch_register_device(struct iwch_dev *dev)
 {
 	int ret;
-	int i;
 
 	pr_debug("%s iwch_dev %p\n", __func__, dev);
-	strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
 	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
 	memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
 	dev->ibdev.owner = THIS_MODULE;
@@ -1402,33 +1404,16 @@ int iwch_register_device(struct iwch_dev *dev)
 	       sizeof(dev->ibdev.iwcm->ifname));
 
 	dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
-	ret = ib_register_device(&dev->ibdev, NULL);
+	rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
+	ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL);
 	if (ret)
-		goto bail1;
-
-	for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
-		ret = device_create_file(&dev->ibdev.dev,
-					 iwch_class_attributes[i]);
-		if (ret) {
-			goto bail2;
-		}
-	}
-	return 0;
-bail2:
-	ib_unregister_device(&dev->ibdev);
-bail1:
-	kfree(dev->ibdev.iwcm);
+		kfree(dev->ibdev.iwcm);
 	return ret;
 }
 
 void iwch_unregister_device(struct iwch_dev *dev)
 {
-	int i;
-
 	pr_debug("%s iwch_dev %p\n", __func__, dev);
-	for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
-		device_remove_file(&dev->ibdev.dev,
-				   iwch_class_attributes[i]);
 	ib_unregister_device(&dev->ibdev);
 	kfree(dev->ibdev.iwcm);
 	return;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 0f83cbec33f3..615413bd3e8d 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -403,8 +403,7 @@ void _c4iw_free_ep(struct kref *kref)
 				 ep->com.local_addr.ss_family);
 		dst_release(ep->dst);
 		cxgb4_l2t_release(ep->l2t);
-		if (ep->mpa_skb)
-			kfree_skb(ep->mpa_skb);
+		kfree_skb(ep->mpa_skb);
 	}
 	if (!skb_queue_empty(&ep->com.ep_skb_list))
 		skb_queue_purge(&ep->com.ep_skb_list);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 6d3042794094..1fd8798d91a7 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -161,7 +161,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	cq->gts = rdev->lldi.gts_reg;
 	cq->rdev = rdev;
 
-	cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
+	cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, CXGB4_BAR2_QTYPE_INGRESS,
 				      &cq->bar2_qid,
 				      user ? &cq->bar2_pa : NULL);
 	if (user && !cq->bar2_pa) {
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 4eda6872e617..cbb3c0ddd990 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -373,8 +373,8 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
 	return 0;
 }
 
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
-			char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
 {
 	struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
 						 ibdev.dev);
@@ -382,9 +382,10 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
 	return sprintf(buf, "%d\n",
 		       CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
 }
+static DEVICE_ATTR_RO(hw_rev);
 
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
-			char *buf)
+static ssize_t hca_type_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
 						 ibdev.dev);
@@ -395,9 +396,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
 	lldev->ethtool_ops->get_drvinfo(lldev, &info);
 	return sprintf(buf, "%s\n", info.driver);
 }
+static DEVICE_ATTR_RO(hca_type);
 
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
-			  char *buf)
+static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
 {
 	struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
 						 ibdev.dev);
@@ -405,6 +407,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
 	return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
 		       c4iw_dev->rdev.lldi.pdev->device);
 }
+static DEVICE_ATTR_RO(board_id);
 
 enum counters {
 	IP4INSEGS,
@@ -461,14 +464,15 @@ static int c4iw_get_mib(struct ib_device *ibdev,
 	return stats->num_counters;
 }
 
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *c4iw_class_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	&dev_attr_board_id.attr,
+	NULL
+};
 
-static struct device_attribute *c4iw_class_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type,
-	&dev_attr_board_id,
+static const struct attribute_group c4iw_attr_group = {
+	.attrs = c4iw_class_attributes,
 };
 
 static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -530,12 +534,10 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
 void c4iw_register_device(struct work_struct *work)
 {
 	int ret;
-	int i;
 	struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work);
 	struct c4iw_dev *dev = ctx->dev;
 
 	pr_debug("c4iw_dev %p\n", dev);
-	strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
 	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
 	memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
 	dev->ibdev.owner = THIS_MODULE;
@@ -626,20 +628,13 @@ void c4iw_register_device(struct work_struct *work)
 	memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
 	       sizeof(dev->ibdev.iwcm->ifname));
 
+	rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
 	dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
-	ret = ib_register_device(&dev->ibdev, NULL);
+	ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL);
 	if (ret)
 		goto err_kfree_iwcm;
-
-	for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) {
-		ret = device_create_file(&dev->ibdev.dev,
-					 c4iw_class_attributes[i]);
-		if (ret)
-			goto err_unregister_device;
-	}
 	return;
-err_unregister_device:
-	ib_unregister_device(&dev->ibdev);
+
 err_kfree_iwcm:
 	kfree(dev->ibdev.iwcm);
 err_dealloc_ctx:
@@ -651,12 +646,7 @@ err_dealloc_ctx:
 
 void c4iw_unregister_device(struct c4iw_dev *dev)
 {
-	int i;
-
 	pr_debug("c4iw_dev %p\n", dev);
-	for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
-		device_remove_file(&dev->ibdev.dev,
-				   c4iw_class_attributes[i]);
 	ib_unregister_device(&dev->ibdev);
 	kfree(dev->ibdev.iwcm);
 	return;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 62d6f197ec0b..13478f3b7057 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -279,12 +279,13 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 
 	wq->db = rdev->lldi.db_reg;
 
-	wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
+	wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid,
+					 CXGB4_BAR2_QTYPE_EGRESS,
 					 &wq->sq.bar2_qid,
 					 user ? &wq->sq.bar2_pa : NULL);
 	if (need_rq)
 		wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
-						 T4_BAR2_QTYPE_EGRESS,
+						 CXGB4_BAR2_QTYPE_EGRESS,
 						 &wq->rq.bar2_qid,
 						 user ? &wq->rq.bar2_pa : NULL);
 
@@ -2572,7 +2573,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
 	memset(wq->queue, 0, wq->memsize);
 	dma_unmap_addr_set(wq, mapping, wq->dma_addr);
 
-	wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
+	wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
 				      &wq->bar2_qid,
 			user ? &wq->bar2_pa : NULL);
 
@@ -2813,8 +2814,7 @@ err_free_queue:
 	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
 		       srq->wr_waitp);
 err_free_skb:
-	if (srq->destroy_skb)
-		kfree_skb(srq->destroy_skb);
+	kfree_skb(srq->destroy_skb);
 err_free_srq_idx:
 	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
 err_free_wr_wait:
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index f451ba912f47..ff790390c91a 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -8,12 +8,42 @@
 #
 obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
 
-hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
-	eprom.o exp_rcv.o file_ops.o firmware.o \
-	init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
-	qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
-	uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
-	verbs_txreq.o vnic_main.o vnic_sdma.o
+hfi1-y := \
+	affinity.o \
+	chip.o \
+	device.o \
+	driver.o \
+	efivar.o \
+	eprom.o \
+	exp_rcv.o \
+	file_ops.o \
+	firmware.o \
+	init.o \
+	intr.o \
+	iowait.o \
+	mad.o \
+	mmu_rb.o \
+	msix.o \
+	pcie.o \
+	pio.o \
+	pio_copy.o \
+	platform.o \
+	qp.o \
+	qsfp.o \
+	rc.o \
+	ruc.o \
+	sdma.o \
+	sysfs.o \
+	trace.o \
+	uc.o \
+	ud.o \
+	user_exp_rcv.o \
+	user_pages.o \
+	user_sdma.o \
+	verbs.o \
+	verbs_txreq.o \
+	vnic_main.o \
+	vnic_sdma.o
 
 ifdef CONFIG_DEBUG_FS
 hfi1-y += debugfs.o
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index bedd5fba33b0..2baf38cc1e23 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -817,10 +817,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
 	set = &entry->def_intr;
 	cpumask_set_cpu(cpu, &set->mask);
 	cpumask_set_cpu(cpu, &set->used);
-	for (i = 0; i < dd->num_msix_entries; i++) {
+	for (i = 0; i < dd->msix_info.max_requested; i++) {
 		struct hfi1_msix_entry *other_msix;
 
-		other_msix = &dd->msix_entries[i];
+		other_msix = &dd->msix_info.msix_entries[i];
 		if (other_msix->type != IRQ_SDMA || other_msix == msix)
 			continue;
 
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index e1668bcc2d13..9b20479dc710 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -67,8 +67,6 @@
 #include "debugfs.h"
 #include "fault.h"
 
-#define NUM_IB_PORTS 1
-
 uint kdeth_qp;
 module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
 MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
@@ -1100,9 +1098,9 @@ struct err_reg_info {
 	const char *desc;
 };
 
-#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
-#define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
-#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
+#define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START)
+#define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START)
+#define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START)
 
 /*
  * Helpers for building HFI and DC error interrupt table entries.  Different
@@ -8181,7 +8179,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
 /**
  * is_rcv_urgent_int() - User receive context urgent IRQ handler
  * @dd: valid dd
- * @source: logical IRQ source (ofse from IS_RCVURGENT_START)
+ * @source: logical IRQ source (offset from IS_RCVURGENT_START)
  *
  * RX block receive urgent interrupt.  Source is < 160.
  *
@@ -8231,7 +8229,7 @@ static const struct is_table is_table[] = {
 				is_sdma_eng_err_name,	is_sdma_eng_err_int },
 { IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
 				is_sendctxt_err_name,	is_sendctxt_err_int },
-{ IS_SDMA_START,	     IS_SDMA_END,
+{ IS_SDMA_START,	     IS_SDMA_IDLE_END,
 				is_sdma_eng_name,	is_sdma_eng_int },
 { IS_VARIOUS_START,	     IS_VARIOUS_END,
 				is_various_name,	is_various_int },
@@ -8257,7 +8255,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
 
 	/* avoids a double compare by walking the table in-order */
 	for (entry = &is_table[0]; entry->is_name; entry++) {
-		if (source < entry->end) {
+		if (source <= entry->end) {
 			trace_hfi1_interrupt(dd, entry, source);
 			entry->is_int(dd, source - entry->start);
 			return;
@@ -8276,7 +8274,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
  * context DATA IRQs are threaded and are not supported by this handler.
  *
  */
-static irqreturn_t general_interrupt(int irq, void *data)
+irqreturn_t general_interrupt(int irq, void *data)
 {
 	struct hfi1_devdata *dd = data;
 	u64 regs[CCE_NUM_INT_CSRS];
@@ -8309,7 +8307,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
 	return handled;
 }
 
-static irqreturn_t sdma_interrupt(int irq, void *data)
+irqreturn_t sdma_interrupt(int irq, void *data)
 {
 	struct sdma_engine *sde = data;
 	struct hfi1_devdata *dd = sde->dd;
@@ -8401,7 +8399,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
  * invoked) is finished.  The intent is to avoid extra interrupts while we
  * are processing packets anyway.
  */
-static irqreturn_t receive_context_interrupt(int irq, void *data)
+irqreturn_t receive_context_interrupt(int irq, void *data)
 {
 	struct hfi1_ctxtdata *rcd = data;
 	struct hfi1_devdata *dd = rcd->dd;
@@ -8441,7 +8439,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
  * Receive packet thread handler.  This expects to be invoked with the
  * receive interrupt still blocked.
  */
-static irqreturn_t receive_context_thread(int irq, void *data)
+irqreturn_t receive_context_thread(int irq, void *data)
 {
 	struct hfi1_ctxtdata *rcd = data;
 	int present;
@@ -9651,30 +9649,10 @@ void qsfp_event(struct work_struct *work)
 	}
 }
 
-static void init_qsfp_int(struct hfi1_devdata *dd)
+void init_qsfp_int(struct hfi1_devdata *dd)
 {
 	struct hfi1_pportdata *ppd = dd->pport;
-	u64 qsfp_mask, cce_int_mask;
-	const int qsfp1_int_smask = QSFP1_INT % 64;
-	const int qsfp2_int_smask = QSFP2_INT % 64;
-
-	/*
-	 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
-	 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
-	 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
-	 * the index of the appropriate CSR in the CCEIntMask CSR array
-	 */
-	cce_int_mask = read_csr(dd, CCE_INT_MASK +
-				(8 * (QSFP1_INT / 64)));
-	if (dd->hfi1_id) {
-		cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
-		write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
-			  cce_int_mask);
-	} else {
-		cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
-		write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
-			  cce_int_mask);
-	}
+	u64 qsfp_mask;
 
 	qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
 	/* Clear current status to avoid spurious interrupts */
@@ -9691,6 +9669,12 @@ static void init_qsfp_int(struct hfi1_devdata *dd)
 	write_csr(dd,
 		  dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
 		  qsfp_mask);
+
+	/* Enable the appropriate QSFP IRQ source */
+	if (!dd->hfi1_id)
+		set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true);
+	else
+		set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true);
 }
 
 /*
@@ -10577,12 +10561,29 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
 	}
 }
 
-/*
- * Verify if BCT for data VLs is non-zero.
+/**
+ * data_vls_operational() - Verify if data VL BCT credits and MTU
+ *			    are both set.
+ * @ppd: pointer to hfi1_pportdata structure
+ *
+ * Return: true - Ok, false -otherwise.
  */
 static inline bool data_vls_operational(struct hfi1_pportdata *ppd)
 {
-	return !!ppd->actual_vls_operational;
+	int i;
+	u64 reg;
+
+	if (!ppd->actual_vls_operational)
+		return false;
+
+	for (i = 0; i < ppd->vls_supported; i++) {
+		reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i));
+		if ((reg && !ppd->dd->vld[i].mtu) ||
+		    (!reg && ppd->dd->vld[i].mtu))
+			return false;
+	}
+
+	return true;
 }
 
 /*
@@ -10695,7 +10696,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 
 		if (!data_vls_operational(ppd)) {
 			dd_dev_err(dd,
-				   "%s: data VLs not operational\n", __func__);
+				   "%s: Invalid data VL credits or mtu\n",
+				   __func__);
 			ret = -EINVAL;
 			break;
 		}
@@ -11932,10 +11934,16 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
 
 		rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
 	}
-	if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
+	if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) {
+		set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
+			      IS_RCVAVAIL_START + rcd->ctxt, true);
 		rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
-	if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
+	}
+	if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) {
+		set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
+			      IS_RCVAVAIL_START + rcd->ctxt, false);
 		rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
+	}
 	if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
 		rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
 	if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
@@ -11965,6 +11973,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
 		rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
 	if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
 		rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
+	if (op & HFI1_RCVCTRL_URGENT_ENB)
+		set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
+			      IS_RCVURGENT_START + rcd->ctxt, true);
+	if (op & HFI1_RCVCTRL_URGENT_DIS)
+		set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
+			      IS_RCVURGENT_START + rcd->ctxt, false);
+
 	hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
 	write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
 
@@ -12963,63 +12978,71 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
 	return ret;
 }
 
+/* ========================================================================= */
+
 /**
- * get_int_mask - get 64 bit int mask
- * @dd - the devdata
- * @i - the csr (relative to CCE_INT_MASK)
+ * read_mod_write() - Calculate the IRQ register index and set/clear the bits
+ * @dd: valid devdata
+ * @src: IRQ source to determine register index from
+ * @bits: the bits to set or clear
+ * @set: true == set the bits, false == clear the bits
  *
- * Returns the mask with the urgent interrupt mask
- * bit clear for kernel receive contexts.
  */
-static u64 get_int_mask(struct hfi1_devdata *dd, u32 i)
+static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
+			   bool set)
 {
-	u64 mask = U64_MAX; /* default to no change */
-
-	if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) {
-		int j = (i - (IS_RCVURGENT_START / 64)) * 64;
-		int k = !j ? IS_RCVURGENT_START % 64 : 0;
+	u64 reg;
+	u16 idx = src / BITS_PER_REGISTER;
 
-		if (j)
-			j -= IS_RCVURGENT_START % 64;
-		/* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */
-		for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++)
-			/* convert to bit in mask and clear */
-			mask &= ~BIT_ULL(k);
-	}
-	return mask;
+	spin_lock(&dd->irq_src_lock);
+	reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
+	if (set)
+		reg |= bits;
+	else
+		reg &= ~bits;
+	write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
+	spin_unlock(&dd->irq_src_lock);
 }
 
-/* ========================================================================= */
-
-/*
- * Enable/disable chip from delivering interrupts.
+/**
+ * set_intr_bits() - Enable/disable a range (one or more) IRQ sources
+ * @dd: valid devdata
+ * @first: first IRQ source to set/clear
+ * @last: last IRQ source (inclusive) to set/clear
+ * @set: true == set the bits, false == clear the bits
+ *
+ * If first == last, set the exact source.
  */
-void set_intr_state(struct hfi1_devdata *dd, u32 enable)
+int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
 {
-	int i;
+	u64 bits = 0;
+	u64 bit;
+	u16 src;
 
-	/*
-	 * In HFI, the mask needs to be 1 to allow interrupts.
-	 */
-	if (enable) {
-		/* enable all interrupts but urgent on kernel contexts */
-		for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
-			u64 mask = get_int_mask(dd, i);
+	if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES)
+		return -EINVAL;
 
-			write_csr(dd, CCE_INT_MASK + (8 * i), mask);
-		}
+	if (last < first)
+		return -ERANGE;
 
-		init_qsfp_int(dd);
-	} else {
-		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-			write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
+	for (src = first; src <= last; src++) {
+		bit = src % BITS_PER_REGISTER;
+		/* wrapped to next register? */
+		if (!bit && bits) {
+			read_mod_write(dd, src - 1, bits, set);
+			bits = 0;
+		}
+		bits |= BIT_ULL(bit);
 	}
+	read_mod_write(dd, last, bits, set);
+
+	return 0;
 }
 
 /*
  * Clear all interrupt sources on the chip.
  */
-static void clear_all_interrupts(struct hfi1_devdata *dd)
+void clear_all_interrupts(struct hfi1_devdata *dd)
 {
 	int i;
 
@@ -13043,38 +13066,11 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
 	write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
 }
 
-/**
- * hfi1_clean_up_interrupts() - Free all IRQ resources
- * @dd: valid device data data structure
- *
- * Free the MSIx and assoicated PCI resources, if they have been allocated.
- */
-void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
-{
-	int i;
-	struct hfi1_msix_entry *me = dd->msix_entries;
-
-	/* remove irqs - must happen before disabling/turning off */
-	for (i = 0; i < dd->num_msix_entries; i++, me++) {
-		if (!me->arg) /* => no irq, no affinity */
-			continue;
-		hfi1_put_irq_affinity(dd, me);
-		pci_free_irq(dd->pcidev, i, me->arg);
-	}
-
-	/* clean structures */
-	kfree(dd->msix_entries);
-	dd->msix_entries = NULL;
-	dd->num_msix_entries = 0;
-
-	pci_free_irq_vectors(dd->pcidev);
-}
-
 /*
  * Remap the interrupt source from the general handler to the given MSI-X
  * interrupt.
  */
-static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
+void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
 {
 	u64 reg;
 	int m, n;
@@ -13098,8 +13094,7 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
 	write_csr(dd, CCE_INT_MAP + (8 * m), reg);
 }
 
-static void remap_sdma_interrupts(struct hfi1_devdata *dd,
-				  int engine, int msix_intr)
+void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr)
 {
 	/*
 	 * SDMA engine interrupt sources grouped by type, rather than
@@ -13108,204 +13103,16 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
 	 *	SDMAProgress
 	 *	SDMAIdle
 	 */
-	remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
-		   msix_intr);
-	remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
-		   msix_intr);
-	remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
-		   msix_intr);
-}
-
-static int request_msix_irqs(struct hfi1_devdata *dd)
-{
-	int first_general, last_general;
-	int first_sdma, last_sdma;
-	int first_rx, last_rx;
-	int i, ret = 0;
-
-	/* calculate the ranges we are going to use */
-	first_general = 0;
-	last_general = first_general + 1;
-	first_sdma = last_general;
-	last_sdma = first_sdma + dd->num_sdma;
-	first_rx = last_sdma;
-	last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts;
-
-	/* VNIC MSIx interrupts get mapped when VNIC contexts are created */
-	dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
-
-	/*
-	 * Sanity check - the code expects all SDMA chip source
-	 * interrupts to be in the same CSR, starting at bit 0.  Verify
-	 * that this is true by checking the bit location of the start.
-	 */
-	BUILD_BUG_ON(IS_SDMA_START % 64);
-
-	for (i = 0; i < dd->num_msix_entries; i++) {
-		struct hfi1_msix_entry *me = &dd->msix_entries[i];
-		const char *err_info;
-		irq_handler_t handler;
-		irq_handler_t thread = NULL;
-		void *arg = NULL;
-		int idx;
-		struct hfi1_ctxtdata *rcd = NULL;
-		struct sdma_engine *sde = NULL;
-		char name[MAX_NAME_SIZE];
-
-		/* obtain the arguments to pci_request_irq */
-		if (first_general <= i && i < last_general) {
-			idx = i - first_general;
-			handler = general_interrupt;
-			arg = dd;
-			snprintf(name, sizeof(name),
-				 DRIVER_NAME "_%d", dd->unit);
-			err_info = "general";
-			me->type = IRQ_GENERAL;
-		} else if (first_sdma <= i && i < last_sdma) {
-			idx = i - first_sdma;
-			sde = &dd->per_sdma[idx];
-			handler = sdma_interrupt;
-			arg = sde;
-			snprintf(name, sizeof(name),
-				 DRIVER_NAME "_%d sdma%d", dd->unit, idx);
-			err_info = "sdma";
-			remap_sdma_interrupts(dd, idx, i);
-			me->type = IRQ_SDMA;
-		} else if (first_rx <= i && i < last_rx) {
-			idx = i - first_rx;
-			rcd = hfi1_rcd_get_by_index_safe(dd, idx);
-			if (rcd) {
-				/*
-				 * Set the interrupt register and mask for this
-				 * context's interrupt.
-				 */
-				rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
-				rcd->imask = ((u64)1) <<
-					  ((IS_RCVAVAIL_START + idx) % 64);
-				handler = receive_context_interrupt;
-				thread = receive_context_thread;
-				arg = rcd;
-				snprintf(name, sizeof(name),
-					 DRIVER_NAME "_%d kctxt%d",
-					 dd->unit, idx);
-				err_info = "receive context";
-				remap_intr(dd, IS_RCVAVAIL_START + idx, i);
-				me->type = IRQ_RCVCTXT;
-				rcd->msix_intr = i;
-				hfi1_rcd_put(rcd);
-			}
-		} else {
-			/* not in our expected range - complain, then
-			 * ignore it
-			 */
-			dd_dev_err(dd,
-				   "Unexpected extra MSI-X interrupt %d\n", i);
-			continue;
-		}
-		/* no argument, no interrupt */
-		if (!arg)
-			continue;
-		/* make sure the name is terminated */
-		name[sizeof(name) - 1] = 0;
-		me->irq = pci_irq_vector(dd->pcidev, i);
-		ret = pci_request_irq(dd->pcidev, i, handler, thread, arg,
-				      name);
-		if (ret) {
-			dd_dev_err(dd,
-				   "unable to allocate %s interrupt, irq %d, index %d, err %d\n",
-				   err_info, me->irq, idx, ret);
-			return ret;
-		}
-		/*
-		 * assign arg after pci_request_irq call, so it will be
-		 * cleaned up
-		 */
-		me->arg = arg;
-
-		ret = hfi1_get_irq_affinity(dd, me);
-		if (ret)
-			dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
-	}
-
-	return ret;
-}
-
-void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
-{
-	int i;
-
-	for (i = 0; i < dd->vnic.num_ctxt; i++) {
-		struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
-		struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
-
-		synchronize_irq(me->irq);
-	}
-}
-
-void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
-{
-	struct hfi1_devdata *dd = rcd->dd;
-	struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
-
-	if (!me->arg) /* => no irq, no affinity */
-		return;
-
-	hfi1_put_irq_affinity(dd, me);
-	pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
-
-	me->arg = NULL;
-}
-
-void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
-{
-	struct hfi1_devdata *dd = rcd->dd;
-	struct hfi1_msix_entry *me;
-	int idx = rcd->ctxt;
-	void *arg = rcd;
-	int ret;
-
-	rcd->msix_intr = dd->vnic.msix_idx++;
-	me = &dd->msix_entries[rcd->msix_intr];
-
-	/*
-	 * Set the interrupt register and mask for this
-	 * context's interrupt.
-	 */
-	rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
-	rcd->imask = ((u64)1) <<
-		  ((IS_RCVAVAIL_START + idx) % 64);
-	me->type = IRQ_RCVCTXT;
-	me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
-	remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
-
-	ret = pci_request_irq(dd->pcidev, rcd->msix_intr,
-			      receive_context_interrupt,
-			      receive_context_thread, arg,
-			      DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
-	if (ret) {
-		dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
-			   me->irq, idx, ret);
-		return;
-	}
-	/*
-	 * assign arg after pci_request_irq call, so it will be
-	 * cleaned up
-	 */
-	me->arg = arg;
-
-	ret = hfi1_get_irq_affinity(dd, me);
-	if (ret) {
-		dd_dev_err(dd,
-			   "unable to pin IRQ %d\n", ret);
-		pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
-	}
+	remap_intr(dd, IS_SDMA_START + engine, msix_intr);
+	remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr);
+	remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr);
 }
 
 /*
  * Set the general handler to accept all interrupts, remap all
  * chip interrupts back to MSI-X 0.
  */
-static void reset_interrupts(struct hfi1_devdata *dd)
+void reset_interrupts(struct hfi1_devdata *dd)
 {
 	int i;
 
@@ -13318,54 +13125,33 @@ static void reset_interrupts(struct hfi1_devdata *dd)
 		write_csr(dd, CCE_INT_MAP + (8 * i), 0);
 }
 
+/**
+ * set_up_interrupts() - Initialize the IRQ resources and state
+ * @dd: valid devdata
+ *
+ */
 static int set_up_interrupts(struct hfi1_devdata *dd)
 {
-	u32 total;
-	int ret, request;
-
-	/*
-	 * Interrupt count:
-	 *	1 general, "slow path" interrupt (includes the SDMA engines
-	 *		slow source, SDMACleanupDone)
-	 *	N interrupts - one per used SDMA engine
-	 *	M interrupt - one per kernel receive context
-	 *	V interrupt - one for each VNIC context
-	 */
-	total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
-
-	/* ask for MSI-X interrupts */
-	request = request_msix(dd, total);
-	if (request < 0) {
-		ret = request;
-		goto fail;
-	} else {
-		dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
-					   GFP_KERNEL);
-		if (!dd->msix_entries) {
-			ret = -ENOMEM;
-			goto fail;
-		}
-		/* using MSI-X */
-		dd->num_msix_entries = total;
-		dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
-	}
+	int ret;
 
 	/* mask all interrupts */
-	set_intr_state(dd, 0);
+	set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
+
 	/* clear all pending interrupts */
 	clear_all_interrupts(dd);
 
 	/* reset general handler mask, chip MSI-X mappings */
 	reset_interrupts(dd);
 
-	ret = request_msix_irqs(dd);
+	/* ask for MSI-X interrupts */
+	ret = msix_initialize(dd);
 	if (ret)
-		goto fail;
+		return ret;
 
-	return 0;
+	ret = msix_request_irqs(dd);
+	if (ret)
+		msix_clean_up_interrupts(dd);
 
-fail:
-	hfi1_clean_up_interrupts(dd);
 	return ret;
 }
 
@@ -14918,20 +14704,16 @@ err_exit:
 }
 
 /**
- * Allocate and initialize the device structure for the hfi.
+ * hfi1_init_dd() - Initialize most of the dd structure.
  * @dev: the pci_dev for hfi1_ib device
  * @ent: pci_device_id struct for this dev
  *
- * Also allocates, initializes, and returns the devdata struct for this
- * device instance
- *
  * This is global, and is called directly at init to set up the
  * chip-specific function pointers for later use.
  */
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
-				  const struct pci_device_id *ent)
+int hfi1_init_dd(struct hfi1_devdata *dd)
 {
-	struct hfi1_devdata *dd;
+	struct pci_dev *pdev = dd->pcidev;
 	struct hfi1_pportdata *ppd;
 	u64 reg;
 	int i, ret;
@@ -14942,13 +14724,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 		"Functional simulator"
 	};
 	struct pci_dev *parent = pdev->bus->self;
-	u32 sdma_engines;
+	u32 sdma_engines = chip_sdma_engines(dd);
 
-	dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
-				sizeof(struct hfi1_pportdata));
-	if (IS_ERR(dd))
-		goto bail;
-	sdma_engines = chip_sdma_engines(dd);
 	ppd = dd->pport;
 	for (i = 0; i < dd->num_pports; i++, ppd++) {
 		int vl;
@@ -15127,6 +14904,12 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	if (ret)
 		goto bail_cleanup;
 
+	/*
+	 * This should probably occur in hfi1_pcie_init(), but historically
+	 * occurs after the do_pcie_gen3_transition() code.
+	 */
+	tune_pcie_caps(dd);
+
 	/* start setting dd values and adjusting CSRs */
 	init_early_variables(dd);
 
@@ -15239,14 +15022,13 @@ bail_free_cntrs:
 	free_cntrs(dd);
 bail_clear_intr:
 	hfi1_comp_vectors_clean_up(dd);
-	hfi1_clean_up_interrupts(dd);
+	msix_clean_up_interrupts(dd);
 bail_cleanup:
 	hfi1_pcie_ddcleanup(dd);
 bail_free:
 	hfi1_free_devdata(dd);
-	dd = ERR_PTR(ret);
 bail:
-	return dd;
+	return ret;
 }
 
 static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 36b04d6300e5..6b9c8f12dff8 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -52,9 +52,7 @@
  */
 
 /* sizes */
-#define CCE_NUM_MSIX_VECTORS 256
-#define CCE_NUM_INT_CSRS 12
-#define CCE_NUM_INT_MAP_CSRS 96
+#define BITS_PER_REGISTER (BITS_PER_BYTE * sizeof(u64))
 #define NUM_INTERRUPT_SOURCES 768
 #define RXE_NUM_CONTEXTS 160
 #define RXE_PER_CONTEXT_SIZE 0x1000	/* 4k */
@@ -161,34 +159,49 @@
 	(CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \
 	CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT)
 
-/* interrupt source numbers */
-#define IS_GENERAL_ERR_START	  0
-#define IS_SDMAENG_ERR_START	 16
-#define IS_SENDCTXT_ERR_START	 32
-#define IS_SDMA_START		192 /* includes SDmaProgress,SDmaIdle */
+/* Specific IRQ sources */
+#define CCE_ERR_INT		  0
+#define RXE_ERR_INT		  1
+#define MISC_ERR_INT		  2
+#define PIO_ERR_INT		  4
+#define SDMA_ERR_INT		  5
+#define EGRESS_ERR_INT		  6
+#define TXE_ERR_INT		  7
+#define PBC_INT			240
+#define GPIO_ASSERT_INT		241
+#define QSFP1_INT		242
+#define QSFP2_INT		243
+#define TCRIT_INT		244
+
+/* interrupt source ranges */
+#define IS_FIRST_SOURCE		CCE_ERR_INT
+#define IS_GENERAL_ERR_START		  0
+#define IS_SDMAENG_ERR_START		 16
+#define IS_SENDCTXT_ERR_START		 32
+#define IS_SDMA_START			192
+#define IS_SDMA_PROGRESS_START		208
+#define IS_SDMA_IDLE_START		224
 #define IS_VARIOUS_START		240
 #define IS_DC_START			248
 #define IS_RCVAVAIL_START		256
 #define IS_RCVURGENT_START		416
 #define IS_SENDCREDIT_START		576
 #define IS_RESERVED_START		736
-#define IS_MAX_SOURCES		768
+#define IS_LAST_SOURCE			767
 
 /* derived interrupt source values */
-#define IS_GENERAL_ERR_END		IS_SDMAENG_ERR_START
-#define IS_SDMAENG_ERR_END		IS_SENDCTXT_ERR_START
-#define IS_SENDCTXT_ERR_END		IS_SDMA_START
-#define IS_SDMA_END			IS_VARIOUS_START
-#define IS_VARIOUS_END		IS_DC_START
-#define IS_DC_END			IS_RCVAVAIL_START
-#define IS_RCVAVAIL_END		IS_RCVURGENT_START
-#define IS_RCVURGENT_END		IS_SENDCREDIT_START
-#define IS_SENDCREDIT_END		IS_RESERVED_START
-#define IS_RESERVED_END		IS_MAX_SOURCES
-
-/* absolute interrupt numbers for QSFP1Int and QSFP2Int */
-#define QSFP1_INT		242
-#define QSFP2_INT		243
+#define IS_GENERAL_ERR_END		7
+#define IS_SDMAENG_ERR_END		31
+#define IS_SENDCTXT_ERR_END		191
+#define IS_SDMA_END                     207
+#define IS_SDMA_PROGRESS_END            223
+#define IS_SDMA_IDLE_END		239
+#define IS_VARIOUS_END			244
+#define IS_DC_END			255
+#define IS_RCVAVAIL_END			415
+#define IS_RCVURGENT_END		575
+#define IS_SENDCREDIT_END		735
+#define IS_RESERVED_END			IS_LAST_SOURCE
 
 /* DCC_CFG_PORT_CONFIG logical link states */
 #define LSTATE_DOWN    0x1
@@ -1416,6 +1429,18 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
 void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
 void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
 
+irqreturn_t general_interrupt(int irq, void *data);
+irqreturn_t sdma_interrupt(int irq, void *data);
+irqreturn_t receive_context_interrupt(int irq, void *data);
+irqreturn_t receive_context_thread(int irq, void *data);
+
+int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
+void init_qsfp_int(struct hfi1_devdata *dd);
+void clear_all_interrupts(struct hfi1_devdata *dd);
+void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
+void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
+void reset_interrupts(struct hfi1_devdata *dd);
+
 /*
  * Interrupt source table.
  *
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index ee6dca5e2a2f..c6163a347e93 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -878,6 +878,10 @@
 #define SEND_CTRL (TXE + 0x000000000000)
 #define SEND_CTRL_CM_RESET_SMASK 0x4ull
 #define SEND_CTRL_SEND_ENABLE_SMASK 0x1ull
+#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
+#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xFFull
+#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
+		<< SEND_CTRL_UNSUPPORTED_VL_SHIFT)
 #define SEND_CTRL_VL_ARBITER_ENABLE_SMASK 0x2ull
 #define SEND_CTXT_CHECK_ENABLE (TXE + 0x000000100080)
 #define SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK 0x80ull
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 1fc75647e47b..c22ebc774a6a 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -681,7 +681,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 		     HFI1_RCVCTRL_TAILUPD_DIS |
 		     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
 		     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
-		     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
+		     HFI1_RCVCTRL_NO_EGR_DROP_DIS |
+		     HFI1_RCVCTRL_URGENT_DIS, uctxt);
 	/* Clear the context's J_KEY */
 	hfi1_clear_ctxt_jkey(dd, uctxt);
 	/*
@@ -1096,6 +1097,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
 	hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey);
 
 	rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
+	rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB;
 	if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
 		rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
 	/*
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index d9470317983f..1401b6ea4a28 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -80,6 +80,7 @@
 #include "qsfp.h"
 #include "platform.h"
 #include "affinity.h"
+#include "msix.h"
 
 /* bumped 1 from s/w major version of TrueScale */
 #define HFI1_CHIP_VERS_MAJ 3U
@@ -620,6 +621,8 @@ struct rvt_sge_state;
 #define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000
 #define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000
 #define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000
+#define HFI1_RCVCTRL_URGENT_ENB 0x40000
+#define HFI1_RCVCTRL_URGENT_DIS 0x80000
 
 /* partition enforcement flags */
 #define HFI1_PART_ENFORCE_IN	0x1
@@ -667,6 +670,14 @@ struct hfi1_msix_entry {
 	struct irq_affinity_notify notify;
 };
 
+struct hfi1_msix_info {
+	/* lock to synchronize in_use_msix access */
+	spinlock_t msix_lock;
+	DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS);
+	struct hfi1_msix_entry *msix_entries;
+	u16 max_requested;
+};
+
 /* per-SL CCA information */
 struct cca_timer {
 	struct hrtimer hrtimer;
@@ -992,7 +1003,6 @@ struct hfi1_vnic_data {
 	struct idr vesw_idr;
 	u8 rmt_start;
 	u8 num_ctxt;
-	u32 msix_idx;
 };
 
 struct hfi1_vnic_vport_info;
@@ -1205,11 +1215,6 @@ struct hfi1_devdata {
 
 	struct diag_client *diag_client;
 
-	/* MSI-X information */
-	struct hfi1_msix_entry *msix_entries;
-	u32 num_msix_entries;
-	u32 first_dyn_msix_idx;
-
 	/* general interrupt: mask of handled interrupts */
 	u64 gi_mask[CCE_NUM_INT_CSRS];
 
@@ -1223,6 +1228,9 @@ struct hfi1_devdata {
 	 */
 	struct timer_list synth_stats_timer;
 
+	/* MSI-X information */
+	struct hfi1_msix_info msix_info;
+
 	/*
 	 * device counters
 	 */
@@ -1349,6 +1357,8 @@ struct hfi1_devdata {
 
 	/* vnic data */
 	struct hfi1_vnic_data vnic;
+	/* Lock to protect IRQ SRC register access */
+	spinlock_t irq_src_lock;
 };
 
 static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
@@ -1431,9 +1441,6 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
 void set_all_slowpath(struct hfi1_devdata *dd);
-void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
-void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
-void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
 
 extern const struct pci_device_id hfi1_pci_tbl[];
 void hfi1_make_ud_req_9B(struct rvt_qp *qp,
@@ -1887,10 +1894,8 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
 #define HFI1_CTXT_WAITING_URG 4
 
 /* free up any allocated data at closes */
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
-				  const struct pci_device_id *ent);
+int hfi1_init_dd(struct hfi1_devdata *dd);
 void hfi1_free_devdata(struct hfi1_devdata *dd);
-struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
 
 /* LED beaconing functions */
 void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
@@ -1963,6 +1968,7 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
  */
 
 extern const char ib_hfi1_version[];
+extern const struct attribute_group ib_hfi1_attr_group;
 
 int hfi1_device_create(struct hfi1_devdata *dd);
 void hfi1_device_remove(struct hfi1_devdata *dd);
@@ -1974,16 +1980,15 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
 /* Hook for sysfs read of QSFP */
 int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
 
-int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent);
-void hfi1_clean_up_interrupts(struct hfi1_devdata *dd);
+int hfi1_pcie_init(struct hfi1_devdata *dd);
 void hfi1_pcie_cleanup(struct pci_dev *pdev);
 int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
 void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
 int pcie_speeds(struct hfi1_devdata *dd);
-int request_msix(struct hfi1_devdata *dd, u32 msireq);
 int restore_pci_variables(struct hfi1_devdata *dd);
 int save_pci_variables(struct hfi1_devdata *dd);
 int do_pcie_gen3_transition(struct hfi1_devdata *dd);
+void tune_pcie_caps(struct hfi1_devdata *dd);
 int parse_platform_config(struct hfi1_devdata *dd);
 int get_platform_config_field(struct hfi1_devdata *dd,
 			      enum platform_config_table_type_encoding
@@ -2124,19 +2129,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
 	return base_sdma_integrity;
 }
 
-/*
- * hfi1_early_err is used (only!) to print early errors before devdata is
- * allocated, or when dd->pcidev may not be valid, and at the tail end of
- * cleanup when devdata may have been freed, etc.  hfi1_dev_porterr is
- * the same as dd_dev_err, but is used when the message really needs
- * the IB port# to be definitive as to what's happening..
- */
-#define hfi1_early_err(dev, fmt, ...) \
-	dev_err(dev, fmt, ##__VA_ARGS__)
-
-#define hfi1_early_info(dev, fmt, ...) \
-	dev_info(dev, fmt, ##__VA_ARGS__)
-
 #define dd_dev_emerg(dd, fmt, ...) \
 	dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
 		  rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 758d273c32cf..09044905284f 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -83,6 +83,8 @@
 #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
 #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
 
+#define NUM_IB_PORTS 1
+
 /*
  * Number of user receive contexts we are configured to use (to allow for more
  * pio buffers per ctxt, etc.)  Zero means use one user context per CPU.
@@ -654,9 +656,8 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
 	ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
 
 	if (loopback) {
-		hfi1_early_err(&pdev->dev,
-			       "Faking data partition 0x8001 in idx %u\n",
-			       !default_pkey_idx);
+		dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n",
+			   !default_pkey_idx);
 		ppd->pkeys[!default_pkey_idx] = 0x8001;
 	}
 
@@ -702,9 +703,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
 	return;
 
 bail:
-
-	hfi1_early_err(&pdev->dev,
-		       "Congestion Control Agent disabled for port %d\n", port);
+	dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
 }
 
 /*
@@ -833,6 +832,23 @@ wq_error:
 }
 
 /**
+ * enable_general_intr() - Enable the IRQs that will be handled by the
+ * general interrupt handler.
+ * @dd: valid devdata
+ *
+ */
+static void enable_general_intr(struct hfi1_devdata *dd)
+{
+	set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
+	set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
+	set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
+	set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
+	set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
+	set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
+	set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
+}
+
+/**
  * hfi1_init - do the actual initialization sequence on the chip
  * @dd: the hfi1_ib device
  * @reinit: re-initializing, so don't allocate new memory
@@ -916,6 +932,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
 				   "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
 			ret = lastfail;
 		}
+		/* enable IRQ */
 		hfi1_rcd_put(rcd);
 	}
 
@@ -954,7 +971,8 @@ done:
 			HFI1_STATUS_INITTED;
 	if (!ret) {
 		/* enable all interrupts from the chip */
-		set_intr_state(dd, 1);
+		enable_general_intr(dd);
+		init_qsfp_int(dd);
 
 		/* chip is OK for user apps; mark it as initialized */
 		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
@@ -1051,9 +1069,9 @@ static void shutdown_device(struct hfi1_devdata *dd)
 	}
 	dd->flags &= ~HFI1_INITTED;
 
-	/* mask and clean up interrupts, but not errors */
-	set_intr_state(dd, 0);
-	hfi1_clean_up_interrupts(dd);
+	/* mask and clean up interrupts */
+	set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
+	msix_clean_up_interrupts(dd);
 
 	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 		ppd = dd->pport + pidx;
@@ -1246,15 +1264,19 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
 	kobject_put(&dd->kobj);
 }
 
-/*
- * Allocate our primary per-unit data structure.  Must be done via verbs
- * allocator, because the verbs cleanup process both does cleanup and
- * free of the data structure.
+/**
+ * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
+ * @pdev: Valid PCI device
+ * @extra: How many bytes to alloc past the default
+ *
+ * Must be done via verbs allocator, because the verbs cleanup process
+ * both does cleanup and free of the data structure.
  * "extra" is for chip-specific data.
  *
  * Use the idr mechanism to get a unit number for this unit.
  */
-struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
+static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
+					       size_t extra)
 {
 	unsigned long flags;
 	struct hfi1_devdata *dd;
@@ -1287,8 +1309,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
 	idr_preload_end();
 
 	if (ret < 0) {
-		hfi1_early_err(&pdev->dev,
-			       "Could not allocate unit ID: error %d\n", -ret);
+		dev_err(&pdev->dev,
+			"Could not allocate unit ID: error %d\n", -ret);
 		goto bail;
 	}
 	rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
@@ -1309,6 +1331,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
 	spin_lock_init(&dd->pio_map_lock);
 	mutex_init(&dd->dc8051_lock);
 	init_waitqueue_head(&dd->event_queue);
+	spin_lock_init(&dd->irq_src_lock);
 
 	dd->int_counter = alloc_percpu(u64);
 	if (!dd->int_counter) {
@@ -1481,9 +1504,6 @@ static int __init hfi1_mod_init(void)
 	idr_init(&hfi1_unit_table);
 
 	hfi1_dbg_init();
-	ret = hfi1_wss_init();
-	if (ret < 0)
-		goto bail_wss;
 	ret = pci_register_driver(&hfi1_pci_driver);
 	if (ret < 0) {
 		pr_err("Unable to register driver: error %d\n", -ret);
@@ -1492,8 +1512,6 @@ static int __init hfi1_mod_init(void)
 	goto bail; /* all OK */
 
 bail_dev:
-	hfi1_wss_exit();
-bail_wss:
 	hfi1_dbg_exit();
 	idr_destroy(&hfi1_unit_table);
 	dev_cleanup();
@@ -1510,7 +1528,6 @@ static void __exit hfi1_mod_cleanup(void)
 {
 	pci_unregister_driver(&hfi1_pci_driver);
 	node_affinity_destroy_all();
-	hfi1_wss_exit();
 	hfi1_dbg_exit();
 
 	idr_destroy(&hfi1_unit_table);
@@ -1604,23 +1621,23 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
 	hfi1_free_devdata(dd);
 }
 
-static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
+static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
 {
 	if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
-		hfi1_early_err(dev, "Receive header queue count too small\n");
+		dd_dev_err(dd, "Receive header queue count too small\n");
 		return -EINVAL;
 	}
 
 	if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
-		hfi1_early_err(dev,
-			       "Receive header queue count cannot be greater than %u\n",
-			       HFI1_MAX_HDRQ_EGRBUF_CNT);
+		dd_dev_err(dd,
+			   "Receive header queue count cannot be greater than %u\n",
+			   HFI1_MAX_HDRQ_EGRBUF_CNT);
 		return -EINVAL;
 	}
 
 	if (thecnt % HDRQ_INCREMENT) {
-		hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
-			       thecnt, HDRQ_INCREMENT);
+		dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
+			   thecnt, HDRQ_INCREMENT);
 		return -EINVAL;
 	}
 
@@ -1639,22 +1656,29 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* Validate dev ids */
 	if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
 	      ent->device == PCI_DEVICE_ID_INTEL1)) {
-		hfi1_early_err(&pdev->dev,
-			       "Failing on unknown Intel deviceid 0x%x\n",
-			       ent->device);
+		dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
+			ent->device);
 		ret = -ENODEV;
 		goto bail;
 	}
 
+	/* Allocate the dd so we can get to work */
+	dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
+				sizeof(struct hfi1_pportdata));
+	if (IS_ERR(dd)) {
+		ret = PTR_ERR(dd);
+		goto bail;
+	}
+
 	/* Validate some global module parameters */
-	ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
+	ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
 	if (ret)
 		goto bail;
 
 	/* use the encoding function as a sanitization check */
 	if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
-		hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
-			       hfi1_hdrq_entsize);
+		dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
+			   hfi1_hdrq_entsize);
 		ret = -EINVAL;
 		goto bail;
 	}
@@ -1676,10 +1700,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			clamp_val(eager_buffer_size,
 				  MIN_EAGER_BUFFER * 8,
 				  MAX_EAGER_BUFFER_TOTAL);
-		hfi1_early_info(&pdev->dev, "Eager buffer size %u\n",
-				eager_buffer_size);
+		dd_dev_info(dd, "Eager buffer size %u\n",
+			    eager_buffer_size);
 	} else {
-		hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n");
+		dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
 		ret = -EINVAL;
 		goto bail;
 	}
@@ -1687,7 +1711,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* restrict value of hfi1_rcvarr_split */
 	hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
 
-	ret = hfi1_pcie_init(pdev, ent);
+	ret = hfi1_pcie_init(dd);
 	if (ret)
 		goto bail;
 
@@ -1695,12 +1719,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * Do device-specific initialization, function table setup, dd
 	 * allocation, etc.
 	 */
-	dd = hfi1_init_dd(pdev, ent);
-
-	if (IS_ERR(dd)) {
-		ret = PTR_ERR(dd);
+	ret = hfi1_init_dd(dd);
+	if (ret)
 		goto clean_bail; /* error already printed */
-	}
 
 	ret = create_workqueues(dd);
 	if (ret)
@@ -1731,7 +1752,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
 
 	if (initfail || ret) {
-		hfi1_clean_up_interrupts(dd);
+		msix_clean_up_interrupts(dd);
 		stop_timers(dd);
 		flush_workqueue(ib_wq);
 		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c
new file mode 100644
index 000000000000..582f1ba136ff
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/iowait.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#include "iowait.h"
+#include "trace_iowait.h"
+
+void iowait_set_flag(struct iowait *wait, u32 flag)
+{
+	trace_hfi1_iowait_set(wait, flag);
+	set_bit(flag, &wait->flags);
+}
+
+bool iowait_flag_set(struct iowait *wait, u32 flag)
+{
+	return test_bit(flag, &wait->flags);
+}
+
+inline void iowait_clear_flag(struct iowait *wait, u32 flag)
+{
+	trace_hfi1_iowait_clear(wait, flag);
+	clear_bit(flag, &wait->flags);
+}
+
+/**
+ * iowait_init() - initialize wait structure
+ * @wait: wait struct to initialize
+ * @tx_limit: limit for overflow queuing
+ * @func: restart function for workqueue
+ * @sleep: sleep function for no space
+ * @resume: wakeup function for no space
+ *
+ * This function initializes the iowait
+ * structure embedded in the QP or PQ.
+ *
+ */
+void iowait_init(struct iowait *wait, u32 tx_limit,
+		 void (*func)(struct work_struct *work),
+		 void (*tidfunc)(struct work_struct *work),
+		 int (*sleep)(struct sdma_engine *sde,
+			      struct iowait_work *wait,
+			      struct sdma_txreq *tx,
+			      uint seq,
+			      bool pkts_sent),
+		 void (*wakeup)(struct iowait *wait, int reason),
+		 void (*sdma_drained)(struct iowait *wait))
+{
+	int i;
+
+	wait->count = 0;
+	INIT_LIST_HEAD(&wait->list);
+	init_waitqueue_head(&wait->wait_dma);
+	init_waitqueue_head(&wait->wait_pio);
+	atomic_set(&wait->sdma_busy, 0);
+	atomic_set(&wait->pio_busy, 0);
+	wait->tx_limit = tx_limit;
+	wait->sleep = sleep;
+	wait->wakeup = wakeup;
+	wait->sdma_drained = sdma_drained;
+	wait->flags = 0;
+	for (i = 0; i < IOWAIT_SES; i++) {
+		wait->wait[i].iow = wait;
+		INIT_LIST_HEAD(&wait->wait[i].tx_head);
+		if (i == IOWAIT_IB_SE)
+			INIT_WORK(&wait->wait[i].iowork, func);
+		else
+			INIT_WORK(&wait->wait[i].iowork, tidfunc);
+	}
+}
+
+/**
+ * iowait_cancel_work - cancel all work in iowait
+ * @w: the iowait struct
+ */
+void iowait_cancel_work(struct iowait *w)
+{
+	cancel_work_sync(&iowait_get_ib_work(w)->iowork);
+	cancel_work_sync(&iowait_get_tid_work(w)->iowork);
+}
+
+/**
+ * iowait_set_work_flag - set work flag based on leg
+ * @w - the iowait work struct
+ */
+int iowait_set_work_flag(struct iowait_work *w)
+{
+	if (w == &w->iow->wait[IOWAIT_IB_SE]) {
+		iowait_set_flag(w->iow, IOWAIT_PENDING_IB);
+		return IOWAIT_IB_SE;
+	}
+	iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
+	return IOWAIT_TID_SE;
+}
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 3d9c32c7c340..23a58ac0d47c 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -1,7 +1,7 @@
 #ifndef _HFI1_IOWAIT_H
 #define _HFI1_IOWAIT_H
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
 
 #include <linux/list.h>
 #include <linux/workqueue.h>
+#include <linux/wait.h>
 #include <linux/sched.h>
 
 #include "sdma_txreq.h"
@@ -59,16 +60,47 @@
  */
 typedef void (*restart_t)(struct work_struct *work);
 
+#define IOWAIT_PENDING_IB  0x0
+#define IOWAIT_PENDING_TID 0x1
+
+/*
+ * A QP can have multiple Send Engines (SEs).
+ *
+ * The current use case is for supporting a TID RDMA
+ * packet build/xmit mechanism independent from verbs.
+ */
+#define IOWAIT_SES 2
+#define IOWAIT_IB_SE 0
+#define IOWAIT_TID_SE 1
+
 struct sdma_txreq;
 struct sdma_engine;
 /**
- * struct iowait - linkage for delayed progress/waiting
+ * @iowork: the work struct
+ * @tx_head: list of prebuilt packets
+ * @iow: the parent iowait structure
+ *
+ * This structure is the work item (process) specific
+ * details associated with the each of the two SEs of the
+ * QP.
+ *
+ * The workstruct and the queued TXs are unique to each
+ * SE.
+ */
+struct iowait;
+struct iowait_work {
+	struct work_struct iowork;
+	struct list_head tx_head;
+	struct iowait *iow;
+};
+
+/**
  * @list: used to add/insert into QP/PQ wait lists
- * @lock: uses to record the list head lock
  * @tx_head: overflow list of sdma_txreq's
  * @sleep: no space callback
  * @wakeup: space callback wakeup
  * @sdma_drained: sdma count drained
+ * @lock: lock protected head of wait queue
  * @iowork: workqueue overhead
  * @wait_dma: wait for sdma_busy == 0
  * @wait_pio: wait for pio_busy == 0
@@ -76,6 +108,8 @@ struct sdma_engine;
  * @count: total number of descriptors in tx_head'ed list
  * @tx_limit: limit for overflow queuing
  * @tx_count: number of tx entry's in tx_head'ed list
+ * @flags: wait flags (one per QP)
+ * @wait: SE array
  *
  * This is to be embedded in user's state structure
  * (QP or PQ).
@@ -98,13 +132,11 @@ struct sdma_engine;
  * Waiters explicity know that, but the destroy
  * code that unwaits QPs does not.
  */
-
 struct iowait {
 	struct list_head list;
-	struct list_head tx_head;
 	int (*sleep)(
 		struct sdma_engine *sde,
-		struct iowait *wait,
+		struct iowait_work *wait,
 		struct sdma_txreq *tx,
 		uint seq,
 		bool pkts_sent
@@ -112,7 +144,6 @@ struct iowait {
 	void (*wakeup)(struct iowait *wait, int reason);
 	void (*sdma_drained)(struct iowait *wait);
 	seqlock_t *lock;
-	struct work_struct iowork;
 	wait_queue_head_t wait_dma;
 	wait_queue_head_t wait_pio;
 	atomic_t sdma_busy;
@@ -121,63 +152,37 @@ struct iowait {
 	u32 tx_limit;
 	u32 tx_count;
 	u8 starved_cnt;
+	unsigned long flags;
+	struct iowait_work wait[IOWAIT_SES];
 };
 
 #define SDMA_AVAIL_REASON 0
 
-/**
- * iowait_init() - initialize wait structure
- * @wait: wait struct to initialize
- * @tx_limit: limit for overflow queuing
- * @func: restart function for workqueue
- * @sleep: sleep function for no space
- * @resume: wakeup function for no space
- *
- * This function initializes the iowait
- * structure embedded in the QP or PQ.
- *
- */
+void iowait_set_flag(struct iowait *wait, u32 flag);
+bool iowait_flag_set(struct iowait *wait, u32 flag);
+void iowait_clear_flag(struct iowait *wait, u32 flag);
 
-static inline void iowait_init(
-	struct iowait *wait,
-	u32 tx_limit,
-	void (*func)(struct work_struct *work),
-	int (*sleep)(
-		struct sdma_engine *sde,
-		struct iowait *wait,
-		struct sdma_txreq *tx,
-		uint seq,
-		bool pkts_sent),
-	void (*wakeup)(struct iowait *wait, int reason),
-	void (*sdma_drained)(struct iowait *wait))
-{
-	wait->count = 0;
-	wait->lock = NULL;
-	INIT_LIST_HEAD(&wait->list);
-	INIT_LIST_HEAD(&wait->tx_head);
-	INIT_WORK(&wait->iowork, func);
-	init_waitqueue_head(&wait->wait_dma);
-	init_waitqueue_head(&wait->wait_pio);
-	atomic_set(&wait->sdma_busy, 0);
-	atomic_set(&wait->pio_busy, 0);
-	wait->tx_limit = tx_limit;
-	wait->sleep = sleep;
-	wait->wakeup = wakeup;
-	wait->sdma_drained = sdma_drained;
-}
+void iowait_init(struct iowait *wait, u32 tx_limit,
+		 void (*func)(struct work_struct *work),
+		 void (*tidfunc)(struct work_struct *work),
+		 int (*sleep)(struct sdma_engine *sde,
+			      struct iowait_work *wait,
+			      struct sdma_txreq *tx,
+			      uint seq,
+			      bool pkts_sent),
+		 void (*wakeup)(struct iowait *wait, int reason),
+		 void (*sdma_drained)(struct iowait *wait));
 
 /**
- * iowait_schedule() - initialize wait structure
+ * iowait_schedule() - schedule the default send engine work
  * @wait: wait struct to schedule
  * @wq: workqueue for schedule
  * @cpu: cpu
  */
-static inline void iowait_schedule(
-	struct iowait *wait,
-	struct workqueue_struct *wq,
-	int cpu)
+static inline bool iowait_schedule(struct iowait *wait,
+				   struct workqueue_struct *wq, int cpu)
 {
-	queue_work_on(cpu, wq, &wait->iowork);
+	return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
 }
 
 /**
@@ -228,6 +233,8 @@ static inline void iowait_sdma_add(struct iowait *wait, int count)
  */
 static inline int iowait_sdma_dec(struct iowait *wait)
 {
+	if (!wait)
+		return 0;
 	return atomic_dec_and_test(&wait->sdma_busy);
 }
 
@@ -267,11 +274,13 @@ static inline void iowait_pio_inc(struct iowait *wait)
 }
 
 /**
- * iowait_sdma_dec - note pio complete
+ * iowait_pio_dec - note pio complete
  * @wait: iowait structure
  */
 static inline int iowait_pio_dec(struct iowait *wait)
 {
+	if (!wait)
+		return 0;
 	return atomic_dec_and_test(&wait->pio_busy);
 }
 
@@ -293,9 +302,9 @@ static inline void iowait_drain_wakeup(struct iowait *wait)
 /**
  * iowait_get_txhead() - get packet off of iowait list
  *
- * @wait wait struture
+ * @wait iowait_work struture
  */
-static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
+static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
 {
 	struct sdma_txreq *tx = NULL;
 
@@ -309,6 +318,28 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
 	return tx;
 }
 
+static inline u16 iowait_get_desc(struct iowait_work *w)
+{
+	u16 num_desc = 0;
+	struct sdma_txreq *tx = NULL;
+
+	if (!list_empty(&w->tx_head)) {
+		tx = list_first_entry(&w->tx_head, struct sdma_txreq,
+				      list);
+		num_desc = tx->num_desc;
+	}
+	return num_desc;
+}
+
+static inline u32 iowait_get_all_desc(struct iowait *w)
+{
+	u32 num_desc = 0;
+
+	num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]);
+	num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]);
+	return num_desc;
+}
+
 /**
  * iowait_queue - Put the iowait on a wait queue
  * @pkts_sent: have some packets been sent before queuing?
@@ -372,12 +403,57 @@ static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
 }
 
 /**
- * iowait_packet_queued() - determine if a packet is already built
- * @wait: the wait structure
+ * iowait_packet_queued() - determine if a packet is queued
+ * @wait: the iowait_work structure
  */
-static inline bool iowait_packet_queued(struct iowait *wait)
+static inline bool iowait_packet_queued(struct iowait_work *wait)
 {
 	return !list_empty(&wait->tx_head);
 }
 
+/**
+ * inc_wait_count - increment wait counts
+ * @w: the log work struct
+ * @n: the count
+ */
+static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n)
+{
+	if (!w)
+		return;
+	w->iow->tx_count++;
+	w->iow->count += n;
+}
+
+/**
+ * iowait_get_tid_work - return iowait_work for tid SE
+ * @w: the iowait struct
+ */
+static inline struct iowait_work *iowait_get_tid_work(struct iowait *w)
+{
+	return &w->wait[IOWAIT_TID_SE];
+}
+
+/**
+ * iowait_get_ib_work - return iowait_work for ib SE
+ * @w: the iowait struct
+ */
+static inline struct iowait_work *iowait_get_ib_work(struct iowait *w)
+{
+	return &w->wait[IOWAIT_IB_SE];
+}
+
+/**
+ * iowait_ioww_to_iow - return iowait given iowait_work
+ * @w: the iowait_work struct
+ */
+static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w)
+{
+	if (likely(w))
+		return w->iow;
+	return NULL;
+}
+
+void iowait_cancel_work(struct iowait *w);
+int iowait_set_work_flag(struct iowait_work *w);
+
 #endif
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 0307405491e0..88a0cf930136 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -4836,7 +4836,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
 	int ret;
 	int pkey_idx;
 	int local_mad = 0;
-	u32 resp_len = 0;
+	u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 
 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
new file mode 100644
index 000000000000..d920b165d696
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+#include "affinity.h"
+#include "sdma.h"
+
+/**
+ * msix_initialize() - Calculate, request and configure MSIx IRQs
+ * @dd: valid hfi1 devdata
+ *
+ */
+int msix_initialize(struct hfi1_devdata *dd)
+{
+	u32 total;
+	int ret;
+	struct hfi1_msix_entry *entries;
+
+	/*
+	 * MSIx interrupt count:
+	 *	one for the general, "slow path" interrupt
+	 *	one per used SDMA engine
+	 *	one per kernel receive context
+	 *	one for each VNIC context
+	 *      ...any new IRQs should be added here.
+	 */
+	total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
+
+	if (total >= CCE_NUM_MSIX_VECTORS)
+		return -EINVAL;
+
+	ret = pci_alloc_irq_vectors(dd->pcidev, total, total, PCI_IRQ_MSIX);
+	if (ret < 0) {
+		dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", ret);
+		return ret;
+	}
+
+	entries = kcalloc(total, sizeof(*dd->msix_info.msix_entries),
+			  GFP_KERNEL);
+	if (!entries) {
+		pci_free_irq_vectors(dd->pcidev);
+		return -ENOMEM;
+	}
+
+	dd->msix_info.msix_entries = entries;
+	spin_lock_init(&dd->msix_info.msix_lock);
+	bitmap_zero(dd->msix_info.in_use_msix, total);
+	dd->msix_info.max_requested = total;
+	dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
+
+	return 0;
+}
+
+/**
+ * msix_request_irq() - Allocate a free MSIx IRQ
+ * @dd: valid devdata
+ * @arg: context information for the IRQ
+ * @handler: IRQ handler
+ * @thread: IRQ thread handler (could be NULL)
+ * @idx: zero base idx if multiple devices are needed
+ * @type: affinty IRQ type
+ *
+ * Allocated an MSIx vector if available, and then create the appropriate
+ * meta data needed to keep track of the pci IRQ request.
+ *
+ * Return:
+ *   < 0   Error
+ *   >= 0  MSIx vector
+ *
+ */
+static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
+			    irq_handler_t handler, irq_handler_t thread,
+			    u32 idx, enum irq_type type)
+{
+	unsigned long nr;
+	int irq;
+	int ret;
+	const char *err_info;
+	char name[MAX_NAME_SIZE];
+	struct hfi1_msix_entry *me;
+
+	/* Allocate an MSIx vector */
+	spin_lock(&dd->msix_info.msix_lock);
+	nr = find_first_zero_bit(dd->msix_info.in_use_msix,
+				 dd->msix_info.max_requested);
+	if (nr < dd->msix_info.max_requested)
+		__set_bit(nr, dd->msix_info.in_use_msix);
+	spin_unlock(&dd->msix_info.msix_lock);
+
+	if (nr == dd->msix_info.max_requested)
+		return -ENOSPC;
+
+	/* Specific verification and determine the name */
+	switch (type) {
+	case IRQ_GENERAL:
+		/* general interrupt must be MSIx vector 0 */
+		if (nr) {
+			spin_lock(&dd->msix_info.msix_lock);
+			__clear_bit(nr, dd->msix_info.in_use_msix);
+			spin_unlock(&dd->msix_info.msix_lock);
+			dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
+				   nr);
+			return -EINVAL;
+		}
+		snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
+		err_info = "general";
+		break;
+	case IRQ_SDMA:
+		snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
+			 dd->unit, idx);
+		err_info = "sdma";
+		break;
+	case IRQ_RCVCTXT:
+		snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
+			 dd->unit, idx);
+		err_info = "receive context";
+		break;
+	case IRQ_OTHER:
+	default:
+		return -EINVAL;
+	}
+	name[sizeof(name) - 1] = 0;
+
+	irq = pci_irq_vector(dd->pcidev, nr);
+	ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
+	if (ret) {
+		dd_dev_err(dd,
+			   "%s: request for IRQ %d failed, MSIx %d, err %d\n",
+			   err_info, irq, idx, ret);
+		spin_lock(&dd->msix_info.msix_lock);
+		__clear_bit(nr, dd->msix_info.in_use_msix);
+		spin_unlock(&dd->msix_info.msix_lock);
+		return ret;
+	}
+
+	/*
+	 * assign arg after pci_request_irq call, so it will be
+	 * cleaned up
+	 */
+	me = &dd->msix_info.msix_entries[nr];
+	me->irq = irq;
+	me->arg = arg;
+	me->type = type;
+
+	/* This is a request, so a failure is not fatal */
+	ret = hfi1_get_irq_affinity(dd, me);
+	if (ret)
+		dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
+
+	return nr;
+}
+
+/**
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * @rcd: valid rcd context
+ *
+ */
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+	int nr;
+
+	nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
+			      receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
+	if (nr < 0)
+		return nr;
+
+	/*
+	 * Set the interrupt register and mask for this
+	 * context's interrupt.
+	 */
+	rcd->ireg = (IS_RCVAVAIL_START + rcd->ctxt) / 64;
+	rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + rcd->ctxt) % 64);
+	rcd->msix_intr = nr;
+	remap_intr(rcd->dd, IS_RCVAVAIL_START + rcd->ctxt, nr);
+
+	return 0;
+}
+
+/**
+ * msix_request_smda_ira() - Helper for getting SDMA IRQ resources
+ * @sde: valid sdma engine
+ *
+ */
+int msix_request_sdma_irq(struct sdma_engine *sde)
+{
+	int nr;
+
+	nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
+			      sde->this_idx, IRQ_SDMA);
+	if (nr < 0)
+		return nr;
+	sde->msix_intr = nr;
+	remap_sdma_interrupts(sde->dd, sde->this_idx, nr);
+
+	return 0;
+}
+
+/**
+ * enable_sdma_src() - Helper to enable SDMA IRQ srcs
+ * @dd: valid devdata structure
+ * @i: index of SDMA engine
+ */
+static void enable_sdma_srcs(struct hfi1_devdata *dd, int i)
+{
+	set_intr_bits(dd, IS_SDMA_START + i, IS_SDMA_START + i, true);
+	set_intr_bits(dd, IS_SDMA_PROGRESS_START + i,
+		      IS_SDMA_PROGRESS_START + i, true);
+	set_intr_bits(dd, IS_SDMA_IDLE_START + i, IS_SDMA_IDLE_START + i, true);
+	set_intr_bits(dd, IS_SDMAENG_ERR_START + i, IS_SDMAENG_ERR_START + i,
+		      true);
+}
+
+/**
+ * msix_request_irqs() - Allocate all MSIx IRQs
+ * @dd: valid devdata structure
+ *
+ * Helper function to request the used MSIx IRQs.
+ *
+ */
+int msix_request_irqs(struct hfi1_devdata *dd)
+{
+	int i;
+	int ret;
+
+	ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < dd->num_sdma; i++) {
+		struct sdma_engine *sde = &dd->per_sdma[i];
+
+		ret = msix_request_sdma_irq(sde);
+		if (ret)
+			return ret;
+		enable_sdma_srcs(sde->dd, i);
+	}
+
+	for (i = 0; i < dd->n_krcv_queues; i++) {
+		struct hfi1_ctxtdata *rcd = hfi1_rcd_get_by_index_safe(dd, i);
+
+		if (rcd)
+			ret = msix_request_rcd_irq(rcd);
+		hfi1_rcd_put(rcd);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * msix_free_irq() - Free the specified MSIx resources and IRQ
+ * @dd: valid devdata
+ * @msix_intr: MSIx vector to free.
+ *
+ */
+void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr)
+{
+	struct hfi1_msix_entry *me;
+
+	if (msix_intr >= dd->msix_info.max_requested)
+		return;
+
+	me = &dd->msix_info.msix_entries[msix_intr];
+
+	if (!me->arg) /* => no irq, no affinity */
+		return;
+
+	hfi1_put_irq_affinity(dd, me);
+	pci_free_irq(dd->pcidev, msix_intr, me->arg);
+
+	me->arg = NULL;
+
+	spin_lock(&dd->msix_info.msix_lock);
+	__clear_bit(msix_intr, dd->msix_info.in_use_msix);
+	spin_unlock(&dd->msix_info.msix_lock);
+}
+
+/**
+ * hfi1_clean_up_msix_interrupts() - Free all MSIx IRQ resources
+ * @dd: valid device data data structure
+ *
+ * Free the MSIx and associated PCI resources, if they have been allocated.
+ */
+void msix_clean_up_interrupts(struct hfi1_devdata *dd)
+{
+	int i;
+	struct hfi1_msix_entry *me = dd->msix_info.msix_entries;
+
+	/* remove irqs - must happen before disabling/turning off */
+	for (i = 0; i < dd->msix_info.max_requested; i++, me++)
+		msix_free_irq(dd, i);
+
+	/* clean structures */
+	kfree(dd->msix_info.msix_entries);
+	dd->msix_info.msix_entries = NULL;
+	dd->msix_info.max_requested = 0;
+
+	pci_free_irq_vectors(dd->pcidev);
+}
+
+/**
+ * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
+ * @dd: valid devdata
+ */
+void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
+{
+	int i;
+
+	for (i = 0; i < dd->vnic.num_ctxt; i++) {
+		struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+		struct hfi1_msix_entry *me;
+
+		me = &dd->msix_info.msix_entries[rcd->msix_intr];
+
+		synchronize_irq(me->irq);
+	}
+}
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/drivers/infiniband/hw/hfi1/msix.h
index d08805032f01..a514881632a4 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ b/drivers/infiniband/hw/hfi1/msix.h
@@ -1,13 +1,12 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
- * Initialization code for multi buffer SHA256 algorithm for AVX2
+ * Copyright(c) 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2016 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -17,26 +16,21 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * Contact Information:
- *     Megha Dey <megha.dey@linux.intel.com>
- *
  * BSD LICENSE
  *
- * Copyright(c) 2016 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
@@ -49,21 +43,22 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
  */
+#ifndef _HFI1_MSIX_H
+#define _HFI1_MSIX_H
 
-#include "sha512_mb_mgr.h"
+#include "hfi.h"
 
-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
-{
-	unsigned int j;
+/* MSIx interface */
+int msix_initialize(struct hfi1_devdata *dd);
+int msix_request_irqs(struct hfi1_devdata *dd);
+void msix_clean_up_interrupts(struct hfi1_devdata *dd);
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
+int msix_request_sdma_irq(struct sdma_engine *sde);
+void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
 
-	/* initially all lanes are unused */
-	state->lens[0] = 0xFFFFFFFF00000000;
-	state->lens[1] = 0xFFFFFFFF00000001;
-	state->lens[2] = 0xFFFFFFFF00000002;
-	state->lens[3] = 0xFFFFFFFF00000003;
+/* VNIC interface */
+void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
 
-	state->unused_lanes = 0xFF03020100;
-	for (j = 0; j < 4; j++)
-		state->ldata[j].job_in_lane = NULL;
-}
+#endif
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index cca413eaa74e..c96d193bb236 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -61,19 +61,12 @@
  */
 
 /*
- * Code to adjust PCIe capabilities.
- */
-static void tune_pcie_caps(struct hfi1_devdata *);
-
-/*
  * Do all the common PCIe setup and initialization.
- * devdata is not yet allocated, and is not allocated until after this
- * routine returns success.  Therefore dd_dev_err() can't be used for error
- * printing.
  */
-int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
+int hfi1_pcie_init(struct hfi1_devdata *dd)
 {
 	int ret;
+	struct pci_dev *pdev = dd->pcidev;
 
 	ret = pci_enable_device(pdev);
 	if (ret) {
@@ -89,15 +82,13 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 		 * about that, it appears.  If the original BAR was retained
 		 * in the kernel data structures, this may be OK.
 		 */
-		hfi1_early_err(&pdev->dev, "pci enable failed: error %d\n",
-			       -ret);
-		goto done;
+		dd_dev_err(dd, "pci enable failed: error %d\n", -ret);
+		return ret;
 	}
 
 	ret = pci_request_regions(pdev, DRIVER_NAME);
 	if (ret) {
-		hfi1_early_err(&pdev->dev,
-			       "pci_request_regions fails: err %d\n", -ret);
+		dd_dev_err(dd, "pci_request_regions fails: err %d\n", -ret);
 		goto bail;
 	}
 
@@ -110,8 +101,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 		 */
 		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (ret) {
-			hfi1_early_err(&pdev->dev,
-				       "Unable to set DMA mask: %d\n", ret);
+			dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret);
 			goto bail;
 		}
 		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
@@ -119,18 +109,16 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
 	}
 	if (ret) {
-		hfi1_early_err(&pdev->dev,
-			       "Unable to set DMA consistent mask: %d\n", ret);
+		dd_dev_err(dd, "Unable to set DMA consistent mask: %d\n", ret);
 		goto bail;
 	}
 
 	pci_set_master(pdev);
 	(void)pci_enable_pcie_error_reporting(pdev);
-	goto done;
+	return 0;
 
 bail:
 	hfi1_pcie_cleanup(pdev);
-done:
 	return ret;
 }
 
@@ -206,7 +194,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
 		dd_dev_err(dd, "WC mapping of send buffers failed\n");
 		goto nomem;
 	}
-	dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE);
+	dd_dev_info(dd, "WC piobase: %p for %x\n", dd->piobase, TXE_PIO_SIZE);
 
 	dd->physaddr = addr;        /* used for io_remap, etc. */
 
@@ -344,26 +332,6 @@ int pcie_speeds(struct hfi1_devdata *dd)
 	return 0;
 }
 
-/*
- * Returns:
- *	- actual number of interrupts allocated or
- *      - error
- */
-int request_msix(struct hfi1_devdata *dd, u32 msireq)
-{
-	int nvec;
-
-	nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX);
-	if (nvec < 0) {
-		dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
-		return nvec;
-	}
-
-	tune_pcie_caps(dd);
-
-	return nvec;
-}
-
 /* restore command and BARs after a reset has wiped them out */
 int restore_pci_variables(struct hfi1_devdata *dd)
 {
@@ -479,14 +447,19 @@ error:
  * Check and optionally adjust them to maximize our throughput.
  */
 static int hfi1_pcie_caps;
-module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO);
+module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444);
 MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
 
 uint aspm_mode = ASPM_MODE_DISABLED;
-module_param_named(aspm, aspm_mode, uint, S_IRUGO);
+module_param_named(aspm, aspm_mode, uint, 0444);
 MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
 
-static void tune_pcie_caps(struct hfi1_devdata *dd)
+/**
+ * tune_pcie_caps() - Code to adjust PCIe capabilities.
+ * @dd: Valid device data structure
+ *
+ */
+void tune_pcie_caps(struct hfi1_devdata *dd)
 {
 	struct pci_dev *parent;
 	u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
@@ -1028,6 +1001,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
 	const u8 (*ctle_tunings)[4];
 	uint static_ctle_mode;
 	int return_error = 0;
+	u32 target_width;
 
 	/* PCIe Gen3 is for the ASIC only */
 	if (dd->icode != ICODE_RTL_SILICON)
@@ -1067,6 +1041,9 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
 		return 0;
 	}
 
+	/* Previous Gen1/Gen2 bus width */
+	target_width = dd->lbus_width;
+
 	/*
 	 * Do the Gen3 transition.  Steps are those of the PCIe Gen3
 	 * recipe.
@@ -1435,11 +1412,12 @@ retry:
 	dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
 		    dd->lbus_info);
 
-	if (dd->lbus_speed != target_speed) { /* not target */
+	if (dd->lbus_speed != target_speed ||
+	    dd->lbus_width < target_width) { /* not target */
 		/* maybe retry */
 		do_retry = retry_count < pcie_retry;
-		dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n",
-			   pcie_target, do_retry ? ", retrying" : "");
+		dd_dev_err(dd, "PCIe link speed or width did not match target%s\n",
+			   do_retry ? ", retrying" : "");
 		retry_count++;
 		if (do_retry) {
 			msleep(100); /* allow time to settle */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 752057647f09..9ab50d2308dc 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -71,14 +71,6 @@ void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl)
 	}
 }
 
-/* defined in header release 48 and higher */
-#ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT
-#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
-#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull
-#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
-		<< SEND_CTRL_UNSUPPORTED_VL_SHIFT)
-#endif
-
 /* global control of PIO send */
 void pio_send_control(struct hfi1_devdata *dd, int op)
 {
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 9b1e84a6b1cc..6f3bc4dab858 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -66,7 +66,7 @@ MODULE_PARM_DESC(qp_table_size, "QP table size");
 static void flush_tx_list(struct rvt_qp *qp);
 static int iowait_sleep(
 	struct sdma_engine *sde,
-	struct iowait *wait,
+	struct iowait_work *wait,
 	struct sdma_txreq *stx,
 	unsigned int seq,
 	bool pkts_sent);
@@ -134,15 +134,13 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
 
 };
 
-static void flush_tx_list(struct rvt_qp *qp)
+static void flush_list_head(struct list_head *l)
 {
-	struct hfi1_qp_priv *priv = qp->priv;
-
-	while (!list_empty(&priv->s_iowait.tx_head)) {
+	while (!list_empty(l)) {
 		struct sdma_txreq *tx;
 
 		tx = list_first_entry(
-			&priv->s_iowait.tx_head,
+			l,
 			struct sdma_txreq,
 			list);
 		list_del_init(&tx->list);
@@ -151,6 +149,14 @@ static void flush_tx_list(struct rvt_qp *qp)
 	}
 }
 
+static void flush_tx_list(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
+	flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
+}
+
 static void flush_iowait(struct rvt_qp *qp)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
@@ -282,33 +288,46 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 }
 
 /**
- * hfi1_check_send_wqe - validate wqe
+ * hfi1_setup_wqe - set up the wqe
  * @qp - The qp
  * @wqe - The built wqe
+ * @call_send - Determine if the send should be posted or scheduled.
  *
- * validate wqe.  This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
+ * Perform setup of the wqe.  This is called
+ * prior to inserting the wqe into the ring but after
+ * the wqe has been setup by RDMAVT. This function
+ * allows the driver the opportunity to perform
+ * validation and additional setup of the wqe.
  *
  * Returns 0 on success, -EINVAL on failure
  *
  */
-int hfi1_check_send_wqe(struct rvt_qp *qp,
-			struct rvt_swqe *wqe)
+int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
 {
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct rvt_ah *ah;
+	struct hfi1_pportdata *ppd;
+	struct hfi1_devdata *dd;
 
 	switch (qp->ibqp.qp_type) {
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 		if (wqe->length > 0x80000000U)
 			return -EINVAL;
+		if (wqe->length > qp->pmtu)
+			*call_send = false;
 		break;
 	case IB_QPT_SMI:
-		ah = ibah_to_rvtah(wqe->ud_wr.ah);
-		if (wqe->length > (1 << ah->log_pmtu))
+		/*
+		 * SM packets should exclusively use VL15 and their SL is
+		 * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
+		 * is created, SL is 0 in most cases and as a result some
+		 * fields (vl and pmtu) in ah may not be set correctly,
+		 * depending on the SL2SC and SC2VL tables at the time.
+		 */
+		ppd = ppd_from_ibp(ibp);
+		dd = dd_from_ppd(ppd);
+		if (wqe->length > dd->vld[15].mtu)
 			return -EINVAL;
 		break;
 	case IB_QPT_GSI:
@@ -321,7 +340,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
 	default:
 		break;
 	}
-	return wqe->length <= piothreshold;
+	return 0;
 }
 
 /**
@@ -333,7 +352,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
  * It is only used in the post send, which doesn't hold
  * the s_lock.
  */
-void _hfi1_schedule_send(struct rvt_qp *qp)
+bool _hfi1_schedule_send(struct rvt_qp *qp)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_ibport *ibp =
@@ -341,10 +360,10 @@ void _hfi1_schedule_send(struct rvt_qp *qp)
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 
-	iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
-			priv->s_sde ?
-			priv->s_sde->cpu :
-			cpumask_first(cpumask_of_node(dd->node)));
+	return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
+			       priv->s_sde ?
+			       priv->s_sde->cpu :
+			       cpumask_first(cpumask_of_node(dd->node)));
 }
 
 static void qp_pio_drain(struct rvt_qp *qp)
@@ -372,12 +391,32 @@ static void qp_pio_drain(struct rvt_qp *qp)
  *
  * This schedules qp progress and caller should hold
  * the s_lock.
+ * @return true if the first leg is scheduled;
+ * false if the first leg is not scheduled.
  */
-void hfi1_schedule_send(struct rvt_qp *qp)
+bool hfi1_schedule_send(struct rvt_qp *qp)
 {
 	lockdep_assert_held(&qp->s_lock);
-	if (hfi1_send_ok(qp))
+	if (hfi1_send_ok(qp)) {
 		_hfi1_schedule_send(qp);
+		return true;
+	}
+	if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+		iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
+				IOWAIT_PENDING_IB);
+	return false;
+}
+
+static void hfi1_qp_schedule(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+	bool ret;
+
+	if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
+		ret = hfi1_schedule_send(qp);
+		if (ret)
+			iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
+	}
 }
 
 void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
@@ -388,16 +427,22 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
 	if (qp->s_flags & flag) {
 		qp->s_flags &= ~flag;
 		trace_hfi1_qpwakeup(qp, flag);
-		hfi1_schedule_send(qp);
+		hfi1_qp_schedule(qp);
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	/* Notify hfi1_destroy_qp() if it is waiting. */
 	rvt_put_qp(qp);
 }
 
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
+{
+	if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
+		qp->s_flags &= ~RVT_S_BUSY;
+}
+
 static int iowait_sleep(
 	struct sdma_engine *sde,
-	struct iowait *wait,
+	struct iowait_work *wait,
 	struct sdma_txreq *stx,
 	uint seq,
 	bool pkts_sent)
@@ -438,7 +483,7 @@ static int iowait_sleep(
 			rvt_get_qp(qp);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~RVT_S_BUSY;
+		hfi1_qp_unbusy(qp, wait);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		ret = -EBUSY;
 	} else {
@@ -637,6 +682,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 		&priv->s_iowait,
 		1,
 		_hfi1_do_send,
+		NULL,
 		iowait_sleep,
 		iowait_wakeup,
 		iowait_sdma_drained);
@@ -686,7 +732,7 @@ void stop_send_queue(struct rvt_qp *qp)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 
-	cancel_work_sync(&priv->s_iowait.iowork);
+	iowait_cancel_work(&priv->s_iowait);
 }
 
 void quiesce_qp(struct rvt_qp *qp)
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index 078cff7560b6..7adb6dff6813 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -58,18 +58,6 @@ extern unsigned int hfi1_qp_table_size;
 extern const struct rvt_operation_params hfi1_post_parms[];
 
 /*
- * Send if not busy or waiting for I/O and either
- * a RC response is pending or we can process send work requests.
- */
-static inline int hfi1_send_ok(struct rvt_qp *qp)
-{
-	return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
-		(verbs_txreq_queued(qp) ||
-		(qp->s_flags & RVT_S_RESP_PENDING) ||
-		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
-}
-
-/*
  * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
  *
  * HFI1_S_AHG_VALID - ahg header valid on chip
@@ -90,6 +78,20 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
 #define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
 
 /*
+ * Send if not busy or waiting for I/O and either
+ * a RC response is pending or we can process send work requests.
+ */
+static inline int hfi1_send_ok(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	return !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
+		(verbs_txreq_queued(iowait_get_ib_work(&priv->s_iowait)) ||
+		(qp->s_flags & RVT_S_RESP_PENDING) ||
+		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
+}
+
+/*
  * free_ahg - clear ahg from QP
  */
 static inline void clear_ahg(struct rvt_qp *qp)
@@ -129,8 +131,8 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
 
 void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter);
 
-void _hfi1_schedule_send(struct rvt_qp *qp);
-void hfi1_schedule_send(struct rvt_qp *qp);
+bool _hfi1_schedule_send(struct rvt_qp *qp);
+bool hfi1_schedule_send(struct rvt_qp *qp);
 
 void hfi1_migrate_qp(struct rvt_qp *qp);
 
@@ -150,4 +152,5 @@ void quiesce_qp(struct rvt_qp *qp);
 u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
 int mtu_to_path_mtu(u32 mtu);
 void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait);
 #endif /* _QP_H */
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 9bd63abb2dfe..188aa4f686a0 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -309,7 +309,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 		}
 		clear_ahg(qp);
 		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
-		hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
+		rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
 			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
 		/* will get called again */
 		goto done_free_tx;
@@ -378,9 +378,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 						wqe->wr.ex.invalidate_rkey);
 					local_ops = 1;
 				}
-				hfi1_send_complete(qp, wqe,
-						   err ? IB_WC_LOC_PROT_ERR
-						       : IB_WC_SUCCESS);
+				rvt_send_complete(qp, wqe,
+						  err ? IB_WC_LOC_PROT_ERR
+						      : IB_WC_SUCCESS);
 				if (local_ops)
 					atomic_dec(&qp->local_ops_pending);
 				goto done_free_tx;
@@ -1043,7 +1043,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 			hfi1_migrate_qp(qp);
 			qp->s_retry = qp->s_retry_cnt;
 		} else if (qp->s_last == qp->s_acked) {
-			hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+			rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
 			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			return;
 		} else { /* need to handle delayed completion */
@@ -1468,7 +1468,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 			ibp->rvp.n_other_naks++;
 class_b:
 			if (qp->s_last == qp->s_acked) {
-				hfi1_send_complete(qp, wqe, status);
+				rvt_send_complete(qp, wqe, status);
 				rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			}
 			break;
@@ -1644,7 +1644,8 @@ read_middle:
 		qp->s_rdma_read_len -= pmtu;
 		update_last_psn(qp, psn);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
-		hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false);
+		rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+			     data, pmtu, false, false);
 		goto bail;
 
 	case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1684,7 +1685,8 @@ read_last:
 		if (unlikely(tlen != qp->s_rdma_read_len))
 			goto ack_len_err;
 		aeth = be32_to_cpu(ohdr->u.aeth);
-		hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false);
+		rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+			     data, tlen, false, false);
 		WARN_ON(qp->s_rdma_read_sge.num_sge);
 		(void)do_rc_ack(qp, aeth, psn,
 				 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1704,7 +1706,7 @@ ack_len_err:
 	status = IB_WC_LOC_LEN_ERR;
 ack_err:
 	if (qp->s_last == qp->s_acked) {
-		hfi1_send_complete(qp, wqe, status);
+		rvt_send_complete(qp, wqe, status);
 		rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 	}
 ack_done:
@@ -2144,7 +2146,7 @@ send_middle:
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto nack_inv;
-		hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
+		rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
 		break;
 
 	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -2200,7 +2202,7 @@ send_last:
 		wc.byte_len = tlen + qp->r_rcv_len;
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto nack_inv;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last);
+		rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
 		rvt_put_ss(&qp->r_sge);
 		qp->r_msn++;
 		if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 5f56f3c1b4c4..7fb317c711df 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -156,333 +156,6 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
 }
 
 /**
- * ruc_loopback - handle UC and RC loopback requests
- * @sqp: the sending QP
- *
- * This is called from hfi1_do_send() to
- * forward a WQE addressed to the same HFI.
- * Note that although we are single threaded due to the send engine, we still
- * have to protect against post_send().  We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void ruc_loopback(struct rvt_qp *sqp)
-{
-	struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
-	struct rvt_qp *qp;
-	struct rvt_swqe *wqe;
-	struct rvt_sge *sge;
-	unsigned long flags;
-	struct ib_wc wc;
-	u64 sdata;
-	atomic64_t *maddr;
-	enum ib_wc_status send_status;
-	bool release;
-	int ret;
-	bool copy_last = false;
-	int local_ops = 0;
-
-	rcu_read_lock();
-
-	/*
-	 * Note that we check the responder QP state after
-	 * checking the requester's state.
-	 */
-	qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
-			    sqp->remote_qpn);
-
-	spin_lock_irqsave(&sqp->s_lock, flags);
-
-	/* Return if we are already busy processing a work request. */
-	if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) ||
-	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
-		goto unlock;
-
-	sqp->s_flags |= RVT_S_BUSY;
-
-again:
-	if (sqp->s_last == READ_ONCE(sqp->s_head))
-		goto clr_busy;
-	wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
-
-	/* Return if it is not OK to start a new work request. */
-	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
-			goto clr_busy;
-		/* We are in the error state, flush the work request. */
-		send_status = IB_WC_WR_FLUSH_ERR;
-		goto flush_send;
-	}
-
-	/*
-	 * We can rely on the entry not changing without the s_lock
-	 * being held until we update s_last.
-	 * We increment s_cur to indicate s_last is in progress.
-	 */
-	if (sqp->s_last == sqp->s_cur) {
-		if (++sqp->s_cur >= sqp->s_size)
-			sqp->s_cur = 0;
-	}
-	spin_unlock_irqrestore(&sqp->s_lock, flags);
-
-	if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
-	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
-		ibp->rvp.n_pkt_drops++;
-		/*
-		 * For RC, the requester would timeout and retry so
-		 * shortcut the timeouts and just signal too many retries.
-		 */
-		if (sqp->ibqp.qp_type == IB_QPT_RC)
-			send_status = IB_WC_RETRY_EXC_ERR;
-		else
-			send_status = IB_WC_SUCCESS;
-		goto serr;
-	}
-
-	memset(&wc, 0, sizeof(wc));
-	send_status = IB_WC_SUCCESS;
-
-	release = true;
-	sqp->s_sge.sge = wqe->sg_list[0];
-	sqp->s_sge.sg_list = wqe->sg_list + 1;
-	sqp->s_sge.num_sge = wqe->wr.num_sge;
-	sqp->s_len = wqe->length;
-	switch (wqe->wr.opcode) {
-	case IB_WR_REG_MR:
-		goto send_comp;
-
-	case IB_WR_LOCAL_INV:
-		if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
-			if (rvt_invalidate_rkey(sqp,
-						wqe->wr.ex.invalidate_rkey))
-				send_status = IB_WC_LOC_PROT_ERR;
-			local_ops = 1;
-		}
-		goto send_comp;
-
-	case IB_WR_SEND_WITH_INV:
-		if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
-			wc.wc_flags = IB_WC_WITH_INVALIDATE;
-			wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
-		}
-		goto send;
-
-	case IB_WR_SEND_WITH_IMM:
-		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.ex.imm_data = wqe->wr.ex.imm_data;
-		/* FALLTHROUGH */
-	case IB_WR_SEND:
-send:
-		ret = rvt_get_rwqe(qp, false);
-		if (ret < 0)
-			goto op_err;
-		if (!ret)
-			goto rnr_nak;
-		break;
-
-	case IB_WR_RDMA_WRITE_WITH_IMM:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-			goto inv_err;
-		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.ex.imm_data = wqe->wr.ex.imm_data;
-		ret = rvt_get_rwqe(qp, true);
-		if (ret < 0)
-			goto op_err;
-		if (!ret)
-			goto rnr_nak;
-		/* skip copy_last set and qp_access_flags recheck */
-		goto do_write;
-	case IB_WR_RDMA_WRITE:
-		copy_last = rvt_is_user_qp(qp);
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-			goto inv_err;
-do_write:
-		if (wqe->length == 0)
-			break;
-		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
-					  wqe->rdma_wr.remote_addr,
-					  wqe->rdma_wr.rkey,
-					  IB_ACCESS_REMOTE_WRITE)))
-			goto acc_err;
-		qp->r_sge.sg_list = NULL;
-		qp->r_sge.num_sge = 1;
-		qp->r_sge.total_len = wqe->length;
-		break;
-
-	case IB_WR_RDMA_READ:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
-			goto inv_err;
-		if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
-					  wqe->rdma_wr.remote_addr,
-					  wqe->rdma_wr.rkey,
-					  IB_ACCESS_REMOTE_READ)))
-			goto acc_err;
-		release = false;
-		sqp->s_sge.sg_list = NULL;
-		sqp->s_sge.num_sge = 1;
-		qp->r_sge.sge = wqe->sg_list[0];
-		qp->r_sge.sg_list = wqe->sg_list + 1;
-		qp->r_sge.num_sge = wqe->wr.num_sge;
-		qp->r_sge.total_len = wqe->length;
-		break;
-
-	case IB_WR_ATOMIC_CMP_AND_SWP:
-	case IB_WR_ATOMIC_FETCH_AND_ADD:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
-			goto inv_err;
-		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
-					  wqe->atomic_wr.remote_addr,
-					  wqe->atomic_wr.rkey,
-					  IB_ACCESS_REMOTE_ATOMIC)))
-			goto acc_err;
-		/* Perform atomic OP and save result. */
-		maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
-		sdata = wqe->atomic_wr.compare_add;
-		*(u64 *)sqp->s_sge.sge.vaddr =
-			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
-			(u64)atomic64_add_return(sdata, maddr) - sdata :
-			(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
-				      sdata, wqe->atomic_wr.swap);
-		rvt_put_mr(qp->r_sge.sge.mr);
-		qp->r_sge.num_sge = 0;
-		goto send_comp;
-
-	default:
-		send_status = IB_WC_LOC_QP_OP_ERR;
-		goto serr;
-	}
-
-	sge = &sqp->s_sge.sge;
-	while (sqp->s_len) {
-		u32 len = sqp->s_len;
-
-		if (len > sge->length)
-			len = sge->length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		WARN_ON_ONCE(len == 0);
-		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (!release)
-				rvt_put_mr(sge->mr);
-			if (--sqp->s_sge.num_sge)
-				*sge = *sqp->s_sge.sg_list++;
-		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= RVT_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-		sqp->s_len -= len;
-	}
-	if (release)
-		rvt_put_ss(&qp->r_sge);
-
-	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
-		goto send_comp;
-
-	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-	else
-		wc.opcode = IB_WC_RECV;
-	wc.wr_id = qp->r_wr_id;
-	wc.status = IB_WC_SUCCESS;
-	wc.byte_len = wqe->length;
-	wc.qp = &qp->ibqp;
-	wc.src_qp = qp->remote_qpn;
-	wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
-	wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
-	wc.port_num = 1;
-	/* Signal completion event if the solicited bit is set. */
-	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
-		     wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
-	spin_lock_irqsave(&sqp->s_lock, flags);
-	ibp->rvp.n_loop_pkts++;
-flush_send:
-	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
-	hfi1_send_complete(sqp, wqe, send_status);
-	if (local_ops) {
-		atomic_dec(&sqp->local_ops_pending);
-		local_ops = 0;
-	}
-	goto again;
-
-rnr_nak:
-	/* Handle RNR NAK */
-	if (qp->ibqp.qp_type == IB_QPT_UC)
-		goto send_comp;
-	ibp->rvp.n_rnr_naks++;
-	/*
-	 * Note: we don't need the s_lock held since the BUSY flag
-	 * makes this single threaded.
-	 */
-	if (sqp->s_rnr_retry == 0) {
-		send_status = IB_WC_RNR_RETRY_EXC_ERR;
-		goto serr;
-	}
-	if (sqp->s_rnr_retry_cnt < 7)
-		sqp->s_rnr_retry--;
-	spin_lock_irqsave(&sqp->s_lock, flags);
-	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
-		goto clr_busy;
-	rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
-				IB_AETH_CREDIT_SHIFT);
-	goto clr_busy;
-
-op_err:
-	send_status = IB_WC_REM_OP_ERR;
-	wc.status = IB_WC_LOC_QP_OP_ERR;
-	goto err;
-
-inv_err:
-	send_status = IB_WC_REM_INV_REQ_ERR;
-	wc.status = IB_WC_LOC_QP_OP_ERR;
-	goto err;
-
-acc_err:
-	send_status = IB_WC_REM_ACCESS_ERR;
-	wc.status = IB_WC_LOC_PROT_ERR;
-err:
-	/* responder goes to error state */
-	rvt_rc_error(qp, wc.status);
-
-serr:
-	spin_lock_irqsave(&sqp->s_lock, flags);
-	hfi1_send_complete(sqp, wqe, send_status);
-	if (sqp->ibqp.qp_type == IB_QPT_RC) {
-		int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
-		sqp->s_flags &= ~RVT_S_BUSY;
-		spin_unlock_irqrestore(&sqp->s_lock, flags);
-		if (lastwqe) {
-			struct ib_event ev;
-
-			ev.device = sqp->ibqp.device;
-			ev.element.qp = &sqp->ibqp;
-			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
-		}
-		goto done;
-	}
-clr_busy:
-	sqp->s_flags &= ~RVT_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
-	rcu_read_unlock();
-}
-
-/**
  * hfi1_make_grh - construct a GRH header
  * @ibp: a pointer to the IB port
  * @hdr: a pointer to the GRH header being constructed
@@ -825,8 +498,8 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp)
 
 void _hfi1_do_send(struct work_struct *work)
 {
-	struct iowait *wait = container_of(work, struct iowait, iowork);
-	struct rvt_qp *qp = iowait_to_qp(wait);
+	struct iowait_work *w = container_of(work, struct iowait_work, iowork);
+	struct rvt_qp *qp = iowait_to_qp(w->iow);
 
 	hfi1_do_send(qp, true);
 }
@@ -850,6 +523,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 	ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ps.ppd = ppd_from_ibp(ps.ibp);
 	ps.in_thread = in_thread;
+	ps.wait = iowait_get_ib_work(&priv->s_iowait);
 
 	trace_hfi1_rc_do_send(qp, in_thread);
 
@@ -858,7 +532,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 		if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
 				   ~((1 << ps.ppd->lmc) - 1)) ==
 				  ps.ppd->lid)) {
-			ruc_loopback(qp);
+			rvt_ruc_loopback(qp);
 			return;
 		}
 		make_req = hfi1_make_rc_req;
@@ -868,7 +542,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 		if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
 				   ~((1 << ps.ppd->lmc) - 1)) ==
 				  ps.ppd->lid)) {
-			ruc_loopback(qp);
+			rvt_ruc_loopback(qp);
 			return;
 		}
 		make_req = hfi1_make_uc_req;
@@ -883,6 +557,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 
 	/* Return if we are already busy processing a work request. */
 	if (!hfi1_send_ok(qp)) {
+		if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+			iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
 		spin_unlock_irqrestore(&qp->s_lock, ps.flags);
 		return;
 	}
@@ -896,7 +572,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 	ps.pkts_sent = false;
 
 	/* insure a pre-built packet is handled  */
-	ps.s_txreq = get_waiting_verbs_txreq(qp);
+	ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
 	do {
 		/* Check for a constructed packet to be sent. */
 		if (ps.s_txreq) {
@@ -907,6 +583,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 			 */
 			if (hfi1_verbs_send(qp, &ps))
 				return;
+
 			/* allow other tasks to run */
 			if (schedule_send_yield(qp, &ps))
 				return;
@@ -917,44 +594,3 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 	iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
 	spin_unlock_irqrestore(&qp->s_lock, ps.flags);
 }
-
-/*
- * This should be called with s_lock held.
- */
-void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
-			enum ib_wc_status status)
-{
-	u32 old_last, last;
-
-	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
-		return;
-
-	last = qp->s_last;
-	old_last = last;
-	trace_hfi1_qp_send_completion(qp, wqe, last);
-	if (++last >= qp->s_size)
-		last = 0;
-	trace_hfi1_qp_send_completion(qp, wqe, last);
-	qp->s_last = last;
-	/* See post_send() */
-	barrier();
-	rvt_put_swqe(wqe);
-	if (qp->ibqp.qp_type == IB_QPT_UD ||
-	    qp->ibqp.qp_type == IB_QPT_SMI ||
-	    qp->ibqp.qp_type == IB_QPT_GSI)
-		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
-
-	rvt_qp_swqe_complete(qp,
-			     wqe,
-			     ib_hfi1_wc_opcode[wqe->wr.opcode],
-			     status);
-
-	if (qp->s_acked == old_last)
-		qp->s_acked = last;
-	if (qp->s_cur == old_last)
-		qp->s_cur = last;
-	if (qp->s_tail == old_last)
-		qp->s_tail = last;
-	if (qp->state == IB_QPS_SQD && last == qp->s_cur)
-		qp->s_draining = 0;
-}
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 88e326d6cc49..891d2386d1ca 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -378,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde,
 	__sdma_txclean(sde->dd, tx);
 	if (complete)
 		(*complete)(tx, res);
-	if (wait && iowait_sdma_dec(wait))
+	if (iowait_sdma_dec(wait))
 		iowait_drain_wakeup(wait);
 }
 
@@ -1758,7 +1758,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
 	struct iowait *wait, *nw;
 	struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
 	uint i, n = 0, seq, max_idx = 0;
-	struct sdma_txreq *stx;
 	struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
 	u8 max_starved_cnt = 0;
 
@@ -1779,19 +1778,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
 					nw,
 					&sde->dmawait,
 					list) {
-				u16 num_desc = 0;
+				u32 num_desc;
 
 				if (!wait->wakeup)
 					continue;
 				if (n == ARRAY_SIZE(waits))
 					break;
-				if (!list_empty(&wait->tx_head)) {
-					stx = list_first_entry(
-						&wait->tx_head,
-						struct sdma_txreq,
-						list);
-					num_desc = stx->num_desc;
-				}
+				num_desc = iowait_get_all_desc(wait);
 				if (num_desc > avail)
 					break;
 				avail -= num_desc;
@@ -2346,7 +2339,7 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
  */
 static int sdma_check_progress(
 	struct sdma_engine *sde,
-	struct iowait *wait,
+	struct iowait_work *wait,
 	struct sdma_txreq *tx,
 	bool pkts_sent)
 {
@@ -2356,12 +2349,12 @@ static int sdma_check_progress(
 	if (tx->num_desc <= sde->desc_avail)
 		return -EAGAIN;
 	/* pulse the head_lock */
-	if (wait && wait->sleep) {
+	if (wait && iowait_ioww_to_iow(wait)->sleep) {
 		unsigned seq;
 
 		seq = raw_seqcount_begin(
 			(const seqcount_t *)&sde->head_lock.seqcount);
-		ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
+		ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
 		if (ret == -EAGAIN)
 			sde->desc_avail = sdma_descq_freecnt(sde);
 	} else {
@@ -2373,7 +2366,7 @@ static int sdma_check_progress(
 /**
  * sdma_send_txreq() - submit a tx req to ring
  * @sde: sdma engine to use
- * @wait: wait structure to use when full (may be NULL)
+ * @wait: SE wait structure to use when full (may be NULL)
  * @tx: sdma_txreq to submit
  * @pkts_sent: has any packet been sent yet?
  *
@@ -2386,7 +2379,7 @@ static int sdma_check_progress(
  * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
  */
 int sdma_send_txreq(struct sdma_engine *sde,
-		    struct iowait *wait,
+		    struct iowait_work *wait,
 		    struct sdma_txreq *tx,
 		    bool pkts_sent)
 {
@@ -2397,7 +2390,7 @@ int sdma_send_txreq(struct sdma_engine *sde,
 	/* user should have supplied entire packet */
 	if (unlikely(tx->tlen))
 		return -EINVAL;
-	tx->wait = wait;
+	tx->wait = iowait_ioww_to_iow(wait);
 	spin_lock_irqsave(&sde->tail_lock, flags);
 retry:
 	if (unlikely(!__sdma_running(sde)))
@@ -2406,14 +2399,14 @@ retry:
 		goto nodesc;
 	tail = submit_tx(sde, tx);
 	if (wait)
-		iowait_sdma_inc(wait);
+		iowait_sdma_inc(iowait_ioww_to_iow(wait));
 	sdma_update_tail(sde, tail);
 unlock:
 	spin_unlock_irqrestore(&sde->tail_lock, flags);
 	return ret;
 unlock_noconn:
 	if (wait)
-		iowait_sdma_inc(wait);
+		iowait_sdma_inc(iowait_ioww_to_iow(wait));
 	tx->next_descq_idx = 0;
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
 	tx->sn = sde->tail_sn++;
@@ -2422,10 +2415,7 @@ unlock_noconn:
 	spin_lock(&sde->flushlist_lock);
 	list_add_tail(&tx->list, &sde->flushlist);
 	spin_unlock(&sde->flushlist_lock);
-	if (wait) {
-		wait->tx_count++;
-		wait->count += tx->num_desc;
-	}
+	iowait_inc_wait_count(wait, tx->num_desc);
 	schedule_work(&sde->flush_worker);
 	ret = -ECOMM;
 	goto unlock;
@@ -2442,9 +2432,9 @@ nodesc:
 /**
  * sdma_send_txlist() - submit a list of tx req to ring
  * @sde: sdma engine to use
- * @wait: wait structure to use when full (may be NULL)
+ * @wait: SE wait structure to use when full (may be NULL)
  * @tx_list: list of sdma_txreqs to submit
- * @count: pointer to a u32 which, after return will contain the total number of
+ * @count: pointer to a u16 which, after return will contain the total number of
  *         sdma_txreqs removed from the tx_list. This will include sdma_txreqs
  *         whose SDMA descriptors are submitted to the ring and the sdma_txreqs
  *         which are added to SDMA engine flush list if the SDMA engine state is
@@ -2467,8 +2457,8 @@ nodesc:
  * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
  * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
  */
-int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
-		     struct list_head *tx_list, u32 *count_out)
+int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
+		     struct list_head *tx_list, u16 *count_out)
 {
 	struct sdma_txreq *tx, *tx_next;
 	int ret = 0;
@@ -2479,7 +2469,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
 	spin_lock_irqsave(&sde->tail_lock, flags);
 retry:
 	list_for_each_entry_safe(tx, tx_next, tx_list, list) {
-		tx->wait = wait;
+		tx->wait = iowait_ioww_to_iow(wait);
 		if (unlikely(!__sdma_running(sde)))
 			goto unlock_noconn;
 		if (unlikely(tx->num_desc > sde->desc_avail))
@@ -2500,8 +2490,9 @@ retry:
 update_tail:
 	total_count = submit_count + flush_count;
 	if (wait) {
-		iowait_sdma_add(wait, total_count);
-		iowait_starve_clear(submit_count > 0, wait);
+		iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
+		iowait_starve_clear(submit_count > 0,
+				    iowait_ioww_to_iow(wait));
 	}
 	if (tail != INVALID_TAIL)
 		sdma_update_tail(sde, tail);
@@ -2511,7 +2502,7 @@ update_tail:
 unlock_noconn:
 	spin_lock(&sde->flushlist_lock);
 	list_for_each_entry_safe(tx, tx_next, tx_list, list) {
-		tx->wait = wait;
+		tx->wait = iowait_ioww_to_iow(wait);
 		list_del_init(&tx->list);
 		tx->next_descq_idx = 0;
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
@@ -2520,10 +2511,7 @@ unlock_noconn:
 #endif
 		list_add_tail(&tx->list, &sde->flushlist);
 		flush_count++;
-		if (wait) {
-			wait->tx_count++;
-			wait->count += tx->num_desc;
-		}
+		iowait_inc_wait_count(wait, tx->num_desc);
 	}
 	spin_unlock(&sde->flushlist_lock);
 	schedule_work(&sde->flush_worker);
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 46c775f255d1..6dc63d7c5685 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -1,7 +1,7 @@
 #ifndef _HFI1_SDMA_H
 #define _HFI1_SDMA_H
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -62,16 +62,6 @@
 /* Hardware limit for SDMA packet size */
 #define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
 
-#define SDMA_TXREQ_S_OK        0
-#define SDMA_TXREQ_S_SENDERROR 1
-#define SDMA_TXREQ_S_ABORTED   2
-#define SDMA_TXREQ_S_SHUTDOWN  3
-
-/* flags bits */
-#define SDMA_TXREQ_F_URGENT       0x0001
-#define SDMA_TXREQ_F_AHG_COPY     0x0002
-#define SDMA_TXREQ_F_USE_AHG      0x0004
-
 #define SDMA_MAP_NONE          0
 #define SDMA_MAP_SINGLE        1
 #define SDMA_MAP_PAGE          2
@@ -415,6 +405,7 @@ struct sdma_engine {
 	struct list_head flushlist;
 	struct cpumask cpu_mask;
 	struct kobject kobj;
+	u32 msix_intr;
 };
 
 int sdma_init(struct hfi1_devdata *dd, u8 port);
@@ -849,16 +840,16 @@ static inline int sdma_txadd_kvaddr(
 			dd, SDMA_MAP_SINGLE, tx, addr, len);
 }
 
-struct iowait;
+struct iowait_work;
 
 int sdma_send_txreq(struct sdma_engine *sde,
-		    struct iowait *wait,
+		    struct iowait_work *wait,
 		    struct sdma_txreq *tx,
 		    bool pkts_sent);
 int sdma_send_txlist(struct sdma_engine *sde,
-		     struct iowait *wait,
+		     struct iowait_work *wait,
 		     struct list_head *tx_list,
-		     u32 *count);
+		     u16 *count_out);
 
 int sdma_ahg_alloc(struct sdma_engine *sde);
 void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 25e867393463..2be513d4c9da 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -494,17 +494,18 @@ static struct kobj_type hfi1_vl2mtu_ktype = {
  * Start of per-unit (or driver, in some cases, but replicated
  * per unit) functions (these get a device *)
  */
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-			char *buf)
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+			   char *buf)
 {
 	struct hfi1_ibdev *dev =
 		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 
 	return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
 }
+static DEVICE_ATTR_RO(hw_rev);
 
-static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
-			char *buf)
+static ssize_t board_id_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
 		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
@@ -517,8 +518,9 @@ static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
 		ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
 	return ret;
 }
+static DEVICE_ATTR_RO(board_id);
 
-static ssize_t show_boardversion(struct device *device,
+static ssize_t boardversion_show(struct device *device,
 				 struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
@@ -528,8 +530,9 @@ static ssize_t show_boardversion(struct device *device,
 	/* The string printed here is already newline-terminated. */
 	return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
 }
+static DEVICE_ATTR_RO(boardversion);
 
-static ssize_t show_nctxts(struct device *device,
+static ssize_t nctxts_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
@@ -546,8 +549,9 @@ static ssize_t show_nctxts(struct device *device,
 			 min(dd->num_user_contexts,
 			     (u32)dd->sc_sizes[SC_USER].count));
 }
+static DEVICE_ATTR_RO(nctxts);
 
-static ssize_t show_nfreectxts(struct device *device,
+static ssize_t nfreectxts_show(struct device *device,
 			       struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
@@ -557,8 +561,9 @@ static ssize_t show_nfreectxts(struct device *device,
 	/* Return the number of free user ports (contexts) available. */
 	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
 }
+static DEVICE_ATTR_RO(nfreectxts);
 
-static ssize_t show_serial(struct device *device,
+static ssize_t serial_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
@@ -567,8 +572,9 @@ static ssize_t show_serial(struct device *device,
 
 	return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
 }
+static DEVICE_ATTR_RO(serial);
 
-static ssize_t store_chip_reset(struct device *device,
+static ssize_t chip_reset_store(struct device *device,
 				struct device_attribute *attr, const char *buf,
 				size_t count)
 {
@@ -586,6 +592,7 @@ static ssize_t store_chip_reset(struct device *device,
 bail:
 	return ret < 0 ? ret : count;
 }
+static DEVICE_ATTR_WO(chip_reset);
 
 /*
  * Convert the reported temperature from an integer (reported in
@@ -598,7 +605,7 @@ bail:
 /*
  * Dump tempsense values, in decimal, to ease shell-scripts.
  */
-static ssize_t show_tempsense(struct device *device,
+static ssize_t tempsense_show(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
@@ -622,6 +629,7 @@ static ssize_t show_tempsense(struct device *device,
 	}
 	return ret;
 }
+static DEVICE_ATTR_RO(tempsense);
 
 /*
  * end of per-unit (or driver, in some cases, but replicated
@@ -629,24 +637,20 @@ static ssize_t show_tempsense(struct device *device,
  */
 
 /* start of per-unit file structures and support code */
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hfi, NULL);
-static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
-static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
-static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
-
-static struct device_attribute *hfi1_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_board_id,
-	&dev_attr_nctxts,
-	&dev_attr_nfreectxts,
-	&dev_attr_serial,
-	&dev_attr_boardversion,
-	&dev_attr_tempsense,
-	&dev_attr_chip_reset,
+static struct attribute *hfi1_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_board_id.attr,
+	&dev_attr_nctxts.attr,
+	&dev_attr_nfreectxts.attr,
+	&dev_attr_serial.attr,
+	&dev_attr_boardversion.attr,
+	&dev_attr_tempsense.attr,
+	&dev_attr_chip_reset.attr,
+	NULL,
+};
+
+const struct attribute_group ib_hfi1_attr_group = {
+	.attrs = hfi1_attributes,
 };
 
 int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
@@ -832,12 +836,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
 	struct device *class_dev = &dev->dev;
 	int i, j, ret;
 
-	for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
-		ret = device_create_file(&dev->dev, hfi1_attributes[i]);
-		if (ret)
-			goto bail;
-	}
-
 	for (i = 0; i < dd->num_sdma; i++) {
 		ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
 					   &sde_ktype, &class_dev->kobj,
@@ -855,9 +853,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
 
 	return 0;
 bail:
-	for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
-		device_remove_file(&dev->dev, hfi1_attributes[i]);
-
 	for (i = 0; i < dd->num_sdma; i++)
 		kobject_del(&dd->per_sdma[i].kobj);
 
diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 8540463ef3f7..84458f1325e1 100644
--- a/drivers/infiniband/hw/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -62,3 +62,4 @@ __print_symbolic(etype,                         \
 #include "trace_rx.h"
 #include "trace_tx.h"
 #include "trace_mmu.h"
+#include "trace_iowait.h"
diff --git a/drivers/infiniband/hw/hfi1/trace_iowait.h b/drivers/infiniband/hw/hfi1/trace_iowait.h
new file mode 100644
index 000000000000..27f4334ece2b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_iowait.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#if !defined(__HFI1_TRACE_IOWAIT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_IOWAIT_H
+
+#include <linux/tracepoint.h>
+#include "iowait.h"
+#include "verbs.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_iowait
+
+DECLARE_EVENT_CLASS(hfi1_iowait_template,
+		    TP_PROTO(struct iowait *wait, u32 flag),
+		    TP_ARGS(wait, flag),
+		    TP_STRUCT__entry(/* entry */
+			    __field(unsigned long, addr)
+			    __field(unsigned long, flags)
+			    __field(u32, flag)
+			    __field(u32, qpn)
+			    ),
+		    TP_fast_assign(/* assign */
+			    __entry->addr = (unsigned long)wait;
+			    __entry->flags = wait->flags;
+			    __entry->flag = (1 << flag);
+			    __entry->qpn = iowait_to_qp(wait)->ibqp.qp_num;
+			    ),
+		    TP_printk(/* print */
+			    "iowait 0x%lx qp %u flags 0x%lx flag 0x%x",
+			    __entry->addr,
+			    __entry->qpn,
+			    __entry->flags,
+			    __entry->flag
+			    )
+	);
+
+DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_set,
+	     TP_PROTO(struct iowait *wait, u32 flag),
+	     TP_ARGS(wait, flag));
+
+DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_clear,
+	     TP_PROTO(struct iowait *wait, u32 flag),
+	     TP_ARGS(wait, flag));
+
+#endif /* __HFI1_TRACE_IOWAIT_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_iowait
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index e254dcec6f64..6aca0c5a7f97 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -88,7 +88,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 		}
 		clear_ahg(qp);
 		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
-		hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+		rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		goto done_free_tx;
 	}
 
@@ -140,7 +140,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 					qp, wqe->wr.ex.invalidate_rkey);
 				local_ops = 1;
 			}
-			hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
+			rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
 							: IB_WC_SUCCESS);
 			if (local_ops)
 				atomic_dec(&qp->local_ops_pending);
@@ -426,7 +426,7 @@ send_first:
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto rewind;
-		hfi1_copy_sge(&qp->r_sge, data, pmtu, false, false);
+		rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
 		break;
 
 	case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -449,7 +449,7 @@ send_last:
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto rewind;
 		wc.opcode = IB_WC_RECV;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, false, false);
+		rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
 		rvt_put_ss(&qp->s_rdma_read_sge);
 last_imm:
 		wc.wr_id = qp->r_wr_id;
@@ -523,7 +523,7 @@ rdma_first:
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto drop;
-		hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
+		rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
 		break;
 
 	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -550,7 +550,7 @@ rdma_last_imm:
 		}
 		wc.byte_len = qp->r_len;
 		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
+		rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
 		rvt_put_ss(&qp->r_sge);
 		goto last_imm;
 
@@ -564,7 +564,7 @@ rdma_last:
 		tlen -= (hdrsize + extra_bytes);
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
+		rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
 		rvt_put_ss(&qp->r_sge);
 		break;
 
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 70d39fc450a1..4baa8f4d49de 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -210,8 +210,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 		}
 
 		hfi1_make_grh(ibp, &grh, &grd, 0, 0);
-		hfi1_copy_sge(&qp->r_sge, &grh,
-			      sizeof(grh), true, false);
+		rvt_copy_sge(qp, &qp->r_sge, &grh,
+			     sizeof(grh), true, false);
 		wc.wc_flags |= IB_WC_GRH;
 	} else {
 		rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
@@ -228,7 +228,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 		if (len > sge->sge_length)
 			len = sge->sge_length;
 		WARN_ON_ONCE(len == 0);
-		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, true, false);
+		rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
@@ -518,7 +518,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 			goto bail;
 		}
 		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
-		hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+		rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		goto done_free_tx;
 	}
 
@@ -560,7 +560,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 			ud_loopback(qp, wqe);
 			spin_lock_irqsave(&qp->s_lock, tflags);
 			ps->flags = tflags;
-			hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
+			rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
 			goto done_free_tx;
 		}
 	}
@@ -1019,8 +1019,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		goto drop;
 	}
 	if (packet->grh) {
-		hfi1_copy_sge(&qp->r_sge, packet->grh,
-			      sizeof(struct ib_grh), true, false);
+		rvt_copy_sge(qp, &qp->r_sge, packet->grh,
+			     sizeof(struct ib_grh), true, false);
 		wc.wc_flags |= IB_WC_GRH;
 	} else if (packet->etype == RHF_RCV_TYPE_BYPASS) {
 		struct ib_grh grh;
@@ -1030,14 +1030,14 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		 * out when creating 16B, add back the GRH here.
 		 */
 		hfi1_make_ext_grh(packet, &grh, slid, dlid);
-		hfi1_copy_sge(&qp->r_sge, &grh,
-			      sizeof(struct ib_grh), true, false);
+		rvt_copy_sge(qp, &qp->r_sge, &grh,
+			     sizeof(struct ib_grh), true, false);
 		wc.wc_flags |= IB_WC_GRH;
 	} else {
 		rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
 	}
-	hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
-		      true, false);
+	rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+		     true, false);
 	rvt_put_ss(&qp->r_sge);
 	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		return;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 5c88706121c1..3f0aadccd9f6 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -76,8 +76,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
 
 static unsigned initial_pkt_count = 8;
 
-static int user_sdma_send_pkts(struct user_sdma_request *req,
-			       unsigned maxpkts);
+static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
 static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
@@ -101,7 +100,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
 
 static int defer_packet_queue(
 	struct sdma_engine *sde,
-	struct iowait *wait,
+	struct iowait_work *wait,
 	struct sdma_txreq *txreq,
 	uint seq,
 	bool pkts_sent);
@@ -124,13 +123,13 @@ static struct mmu_rb_ops sdma_rb_ops = {
 
 static int defer_packet_queue(
 	struct sdma_engine *sde,
-	struct iowait *wait,
+	struct iowait_work *wait,
 	struct sdma_txreq *txreq,
 	uint seq,
 	bool pkts_sent)
 {
 	struct hfi1_user_sdma_pkt_q *pq =
-		container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
+		container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
 	struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
 	struct user_sdma_txreq *tx =
 		container_of(txreq, struct user_sdma_txreq, txreq);
@@ -187,13 +186,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
 	pq->ctxt = uctxt->ctxt;
 	pq->subctxt = fd->subctxt;
 	pq->n_max_reqs = hfi1_sdma_comp_ring_size;
-	pq->state = SDMA_PKT_Q_INACTIVE;
 	atomic_set(&pq->n_reqs, 0);
 	init_waitqueue_head(&pq->wait);
 	atomic_set(&pq->n_locked, 0);
 	pq->mm = fd->mm;
 
-	iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
+	iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
 		    activate_packet_queue, NULL);
 	pq->reqidx = 0;
 
@@ -276,7 +274,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
 		/* Wait until all requests have been freed. */
 		wait_event_interruptible(
 			pq->wait,
-			(READ_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
+			!atomic_read(&pq->n_reqs));
 		kfree(pq->reqs);
 		kfree(pq->req_in_use);
 		kmem_cache_destroy(pq->txreq_cache);
@@ -312,6 +310,13 @@ static u8 dlid_to_selector(u16 dlid)
 	return mapping[hash];
 }
 
+/**
+ * hfi1_user_sdma_process_request() - Process and start a user sdma request
+ * @fd: valid file descriptor
+ * @iovec: array of io vectors to process
+ * @dim: overall iovec array size
+ * @count: number of io vector array entries processed
+ */
 int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 				   struct iovec *iovec, unsigned long dim,
 				   unsigned long *count)
@@ -328,7 +333,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	u8 opcode, sc, vl;
 	u16 pkey;
 	u32 slid;
-	int req_queued = 0;
 	u16 dlid;
 	u32 selector;
 
@@ -392,7 +396,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	req->data_len  = 0;
 	req->pq = pq;
 	req->cq = cq;
-	req->status = -1;
 	req->ahg_idx = -1;
 	req->iov_idx = 0;
 	req->sent = 0;
@@ -400,12 +403,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	req->seqcomp = 0;
 	req->seqsubmitted = 0;
 	req->tids = NULL;
-	req->done = 0;
 	req->has_error = 0;
 	INIT_LIST_HEAD(&req->txps);
 
 	memcpy(&req->info, &info, sizeof(info));
 
+	/* The request is initialized, count it */
+	atomic_inc(&pq->n_reqs);
+
 	if (req_opcode(info.ctrl) == EXPECTED) {
 		/* expected must have a TID info and at least one data vector */
 		if (req->data_iovs < 2) {
@@ -500,7 +505,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 		ret = pin_vector_pages(req, &req->iovs[i]);
 		if (ret) {
 			req->data_iovs = i;
-			req->status = ret;
 			goto free_req;
 		}
 		req->data_len += req->iovs[i].iov.iov_len;
@@ -561,23 +565,11 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 		req->ahg_idx = sdma_ahg_alloc(req->sde);
 
 	set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
-	atomic_inc(&pq->n_reqs);
-	req_queued = 1;
+	pq->state = SDMA_PKT_Q_ACTIVE;
 	/* Send the first N packets in the request to buy us some time */
 	ret = user_sdma_send_pkts(req, pcount);
-	if (unlikely(ret < 0 && ret != -EBUSY)) {
-		req->status = ret;
+	if (unlikely(ret < 0 && ret != -EBUSY))
 		goto free_req;
-	}
-
-	/*
-	 * It is possible that the SDMA engine would have processed all the
-	 * submitted packets by the time we get here. Therefore, only set
-	 * packet queue state to ACTIVE if there are still uncompleted
-	 * requests.
-	 */
-	if (atomic_read(&pq->n_reqs))
-		xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
 
 	/*
 	 * This is a somewhat blocking send implementation.
@@ -588,14 +580,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	while (req->seqsubmitted != req->info.npkts) {
 		ret = user_sdma_send_pkts(req, pcount);
 		if (ret < 0) {
-			if (ret != -EBUSY) {
-				req->status = ret;
-				WRITE_ONCE(req->has_error, 1);
-				if (READ_ONCE(req->seqcomp) ==
-				    req->seqsubmitted - 1)
-					goto free_req;
-				return ret;
-			}
+			if (ret != -EBUSY)
+				goto free_req;
 			wait_event_interruptible_timeout(
 				pq->busy.wait_dma,
 				(pq->state == SDMA_PKT_Q_ACTIVE),
@@ -606,10 +592,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	*count += idx;
 	return 0;
 free_req:
-	user_sdma_free_request(req, true);
-	if (req_queued)
+	/*
+	 * If the submitted seqsubmitted == npkts, the completion routine
+	 * controls the final state.  If sequbmitted < npkts, wait for any
+	 * outstanding packets to finish before cleaning up.
+	 */
+	if (req->seqsubmitted < req->info.npkts) {
+		if (req->seqsubmitted)
+			wait_event(pq->busy.wait_dma,
+				   (req->seqcomp == req->seqsubmitted - 1));
+		user_sdma_free_request(req, true);
 		pq_update(pq);
-	set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
+		set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
+	}
 	return ret;
 }
 
@@ -760,9 +755,10 @@ static int user_sdma_txadd(struct user_sdma_request *req,
 	return ret;
 }
 
-static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
+static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
 {
-	int ret = 0, count;
+	int ret = 0;
+	u16 count;
 	unsigned npkts = 0;
 	struct user_sdma_txreq *tx = NULL;
 	struct hfi1_user_sdma_pkt_q *pq = NULL;
@@ -864,8 +860,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 
 				changes = set_txreq_header_ahg(req, tx,
 							       datalen);
-				if (changes < 0)
+				if (changes < 0) {
+					ret = changes;
 					goto free_tx;
+				}
 			}
 		} else {
 			ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@@ -914,10 +912,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 		npkts++;
 	}
 dosend:
-	ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+	ret = sdma_send_txlist(req->sde,
+			       iowait_get_ib_work(&pq->busy),
+			       &req->txps, &count);
 	req->seqsubmitted += count;
 	if (req->seqsubmitted == req->info.npkts) {
-		WRITE_ONCE(req->done, 1);
 		/*
 		 * The txreq has already been submitted to the HW queue
 		 * so we can free the AHG entry now. Corruption will not
@@ -1365,11 +1364,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
 	return idx;
 }
 
-/*
- * SDMA tx request completion callback. Called when the SDMA progress
- * state machine gets notification that the SDMA descriptors for this
- * tx request have been processed by the DMA engine. Called in
- * interrupt context.
+/**
+ * user_sdma_txreq_cb() - SDMA tx request completion callback.
+ * @txreq: valid sdma tx request
+ * @status: success/failure of request
+ *
+ * Called when the SDMA progress state machine gets notification that
+ * the SDMA descriptors for this tx request have been processed by the
+ * DMA engine. Called in interrupt context.
+ * Only do work on completed sequences.
  */
 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 {
@@ -1378,7 +1381,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 	struct user_sdma_request *req;
 	struct hfi1_user_sdma_pkt_q *pq;
 	struct hfi1_user_sdma_comp_q *cq;
-	u16 idx;
+	enum hfi1_sdma_comp_state state = COMPLETE;
 
 	if (!tx->req)
 		return;
@@ -1391,39 +1394,25 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 		SDMA_DBG(req, "SDMA completion with error %d",
 			 status);
 		WRITE_ONCE(req->has_error, 1);
+		state = ERROR;
 	}
 
 	req->seqcomp = tx->seqnum;
 	kmem_cache_free(pq->txreq_cache, tx);
-	tx = NULL;
-
-	idx = req->info.comp_idx;
-	if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
-		if (req->seqcomp == req->info.npkts - 1) {
-			req->status = 0;
-			user_sdma_free_request(req, false);
-			pq_update(pq);
-			set_comp_state(pq, cq, idx, COMPLETE, 0);
-		}
-	} else {
-		if (status != SDMA_TXREQ_S_OK)
-			req->status = status;
-		if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) &&
-		    (READ_ONCE(req->done) ||
-		     READ_ONCE(req->has_error))) {
-			user_sdma_free_request(req, false);
-			pq_update(pq);
-			set_comp_state(pq, cq, idx, ERROR, req->status);
-		}
-	}
+
+	/* sequence isn't complete?  We are done */
+	if (req->seqcomp != req->info.npkts - 1)
+		return;
+
+	user_sdma_free_request(req, false);
+	set_comp_state(pq, cq, req->info.comp_idx, state, status);
+	pq_update(pq);
 }
 
 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
 {
-	if (atomic_dec_and_test(&pq->n_reqs)) {
-		xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
+	if (atomic_dec_and_test(&pq->n_reqs))
 		wake_up(&pq->wait);
-	}
 }
 
 static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
@@ -1448,6 +1437,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
 		if (!node)
 			continue;
 
+		req->iovs[i].node = NULL;
+
 		if (unpin)
 			hfi1_mmu_rb_remove(req->pq->handler,
 					   &node->rb);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index d2bc77f75253..14dfd757dafd 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -105,9 +105,10 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size,
 #define TXREQ_FLAGS_REQ_ACK   BIT(0)      /* Set the ACK bit in the header */
 #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
 
-#define SDMA_PKT_Q_INACTIVE BIT(0)
-#define SDMA_PKT_Q_ACTIVE   BIT(1)
-#define SDMA_PKT_Q_DEFERRED BIT(2)
+enum pkt_q_sdma_state {
+	SDMA_PKT_Q_ACTIVE,
+	SDMA_PKT_Q_DEFERRED,
+};
 
 /*
  * Maximum retry attempts to submit a TX request
@@ -133,7 +134,7 @@ struct hfi1_user_sdma_pkt_q {
 	struct user_sdma_request *reqs;
 	unsigned long *req_in_use;
 	struct iowait busy;
-	unsigned state;
+	enum pkt_q_sdma_state state;
 	wait_queue_head_t wait;
 	unsigned long unpinned;
 	struct mmu_rb_handler *handler;
@@ -203,14 +204,12 @@ struct user_sdma_request {
 	s8 ahg_idx;
 
 	/* Writeable fields shared with interrupt */
-	u64 seqcomp ____cacheline_aligned_in_smp;
-	u64 seqsubmitted;
-	/* status of the last txreq completed */
-	int status;
+	u16 seqcomp ____cacheline_aligned_in_smp;
+	u16 seqsubmitted;
 
 	/* Send side fields */
 	struct list_head txps ____cacheline_aligned_in_smp;
-	u64 seqnum;
+	u16 seqnum;
 	/*
 	 * KDETH.OFFSET (TID) field
 	 * The offset can cover multiple packets, depending on the
@@ -228,7 +227,6 @@ struct user_sdma_request {
 	u16 tididx;
 	/* progress index moving along the iovs array */
 	u8 iov_idx;
-	u8 done;
 	u8 has_error;
 
 	struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
@@ -248,7 +246,7 @@ struct user_sdma_txreq {
 	struct user_sdma_request *req;
 	u16 flags;
 	unsigned int busycount;
-	u64 seqnum;
+	u16 seqnum;
 };
 
 int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index a7c586a5589d..48e11e510358 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -129,8 +129,6 @@ unsigned short piothreshold = 256;
 module_param(piothreshold, ushort, S_IRUGO);
 MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
 
-#define COPY_CACHELESS 1
-#define COPY_ADAPTIVE  2
 static unsigned int sge_copy_mode;
 module_param(sge_copy_mode, uint, S_IRUGO);
 MODULE_PARM_DESC(sge_copy_mode,
@@ -151,159 +149,13 @@ static int pio_wait(struct rvt_qp *qp,
 /* 16B trailing buffer */
 static const u8 trail_buf[MAX_16B_PADDING];
 
-static uint wss_threshold;
+static uint wss_threshold = 80;
 module_param(wss_threshold, uint, S_IRUGO);
 MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
 static uint wss_clean_period = 256;
 module_param(wss_clean_period, uint, S_IRUGO);
 MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
 
-/* memory working set size */
-struct hfi1_wss {
-	unsigned long *entries;
-	atomic_t total_count;
-	atomic_t clean_counter;
-	atomic_t clean_entry;
-
-	int threshold;
-	int num_entries;
-	long pages_mask;
-};
-
-static struct hfi1_wss wss;
-
-int hfi1_wss_init(void)
-{
-	long llc_size;
-	long llc_bits;
-	long table_size;
-	long table_bits;
-
-	/* check for a valid percent range - default to 80 if none or invalid */
-	if (wss_threshold < 1 || wss_threshold > 100)
-		wss_threshold = 80;
-	/* reject a wildly large period */
-	if (wss_clean_period > 1000000)
-		wss_clean_period = 256;
-	/* reject a zero period */
-	if (wss_clean_period == 0)
-		wss_clean_period = 1;
-
-	/*
-	 * Calculate the table size - the next power of 2 larger than the
-	 * LLC size.  LLC size is in KiB.
-	 */
-	llc_size = wss_llc_size() * 1024;
-	table_size = roundup_pow_of_two(llc_size);
-
-	/* one bit per page in rounded up table */
-	llc_bits = llc_size / PAGE_SIZE;
-	table_bits = table_size / PAGE_SIZE;
-	wss.pages_mask = table_bits - 1;
-	wss.num_entries = table_bits / BITS_PER_LONG;
-
-	wss.threshold = (llc_bits * wss_threshold) / 100;
-	if (wss.threshold == 0)
-		wss.threshold = 1;
-
-	atomic_set(&wss.clean_counter, wss_clean_period);
-
-	wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
-			      GFP_KERNEL);
-	if (!wss.entries) {
-		hfi1_wss_exit();
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-void hfi1_wss_exit(void)
-{
-	/* coded to handle partially initialized and repeat callers */
-	kfree(wss.entries);
-	wss.entries = NULL;
-}
-
-/*
- * Advance the clean counter.  When the clean period has expired,
- * clean an entry.
- *
- * This is implemented in atomics to avoid locking.  Because multiple
- * variables are involved, it can be racy which can lead to slightly
- * inaccurate information.  Since this is only a heuristic, this is
- * OK.  Any innaccuracies will clean themselves out as the counter
- * advances.  That said, it is unlikely the entry clean operation will
- * race - the next possible racer will not start until the next clean
- * period.
- *
- * The clean counter is implemented as a decrement to zero.  When zero
- * is reached an entry is cleaned.
- */
-static void wss_advance_clean_counter(void)
-{
-	int entry;
-	int weight;
-	unsigned long bits;
-
-	/* become the cleaner if we decrement the counter to zero */
-	if (atomic_dec_and_test(&wss.clean_counter)) {
-		/*
-		 * Set, not add, the clean period.  This avoids an issue
-		 * where the counter could decrement below the clean period.
-		 * Doing a set can result in lost decrements, slowing the
-		 * clean advance.  Since this a heuristic, this possible
-		 * slowdown is OK.
-		 *
-		 * An alternative is to loop, advancing the counter by a
-		 * clean period until the result is > 0. However, this could
-		 * lead to several threads keeping another in the clean loop.
-		 * This could be mitigated by limiting the number of times
-		 * we stay in the loop.
-		 */
-		atomic_set(&wss.clean_counter, wss_clean_period);
-
-		/*
-		 * Uniquely grab the entry to clean and move to next.
-		 * The current entry is always the lower bits of
-		 * wss.clean_entry.  The table size, wss.num_entries,
-		 * is always a power-of-2.
-		 */
-		entry = (atomic_inc_return(&wss.clean_entry) - 1)
-			& (wss.num_entries - 1);
-
-		/* clear the entry and count the bits */
-		bits = xchg(&wss.entries[entry], 0);
-		weight = hweight64((u64)bits);
-		/* only adjust the contended total count if needed */
-		if (weight)
-			atomic_sub(weight, &wss.total_count);
-	}
-}
-
-/*
- * Insert the given address into the working set array.
- */
-static void wss_insert(void *address)
-{
-	u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
-	u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
-	u32 nr = page & (BITS_PER_LONG - 1);
-
-	if (!test_and_set_bit(nr, &wss.entries[entry]))
-		atomic_inc(&wss.total_count);
-
-	wss_advance_clean_counter();
-}
-
-/*
- * Is the working set larger than the threshold?
- */
-static inline bool wss_exceeds_threshold(void)
-{
-	return atomic_read(&wss.total_count) >= wss.threshold;
-}
-
 /*
  * Translate ib_wr_opcode into ib_wc_opcode.
  */
@@ -438,79 +290,6 @@ static const u32 pio_opmask[BIT(3)] = {
  */
 __be64 ib_hfi1_sys_image_guid;
 
-/**
- * hfi1_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- * @release: boolean to release MR
- * @copy_last: do a separate copy of the last 8 bytes
- */
-void hfi1_copy_sge(
-	struct rvt_sge_state *ss,
-	void *data, u32 length,
-	bool release,
-	bool copy_last)
-{
-	struct rvt_sge *sge = &ss->sge;
-	int i;
-	bool in_last = false;
-	bool cacheless_copy = false;
-
-	if (sge_copy_mode == COPY_CACHELESS) {
-		cacheless_copy = length >= PAGE_SIZE;
-	} else if (sge_copy_mode == COPY_ADAPTIVE) {
-		if (length >= PAGE_SIZE) {
-			/*
-			 * NOTE: this *assumes*:
-			 * o The first vaddr is the dest.
-			 * o If multiple pages, then vaddr is sequential.
-			 */
-			wss_insert(sge->vaddr);
-			if (length >= (2 * PAGE_SIZE))
-				wss_insert(sge->vaddr + PAGE_SIZE);
-
-			cacheless_copy = wss_exceeds_threshold();
-		} else {
-			wss_advance_clean_counter();
-		}
-	}
-	if (copy_last) {
-		if (length > 8) {
-			length -= 8;
-		} else {
-			copy_last = false;
-			in_last = true;
-		}
-	}
-
-again:
-	while (length) {
-		u32 len = rvt_get_sge_length(sge, length);
-
-		WARN_ON_ONCE(len == 0);
-		if (unlikely(in_last)) {
-			/* enforce byte transfer ordering */
-			for (i = 0; i < len; i++)
-				((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
-		} else if (cacheless_copy) {
-			cacheless_memcpy(sge->vaddr, data, len);
-		} else {
-			memcpy(sge->vaddr, data, len);
-		}
-		rvt_update_sge(ss, len, release);
-		data += len;
-		length -= len;
-	}
-
-	if (copy_last) {
-		copy_last = false;
-		in_last = true;
-		length = 8;
-		goto again;
-	}
-}
-
 /*
  * Make sure the QP is ready and able to accept the given opcode.
  */
@@ -713,7 +492,7 @@ static void verbs_sdma_complete(
 
 	spin_lock(&qp->s_lock);
 	if (tx->wqe) {
-		hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
+		rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
 	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
 		struct hfi1_opa_header *hdr;
 
@@ -737,7 +516,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		write_seqlock(&dev->iowait_lock);
 		list_add_tail(&ps->s_txreq->txreq.list,
-			      &priv->s_iowait.tx_head);
+			      &ps->wait->tx_head);
 		if (list_empty(&priv->s_iowait.list)) {
 			if (list_empty(&dev->memwait))
 				mod_timer(&dev->mem_timer, jiffies + 1);
@@ -748,7 +527,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
 			rvt_get_qp(qp);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~RVT_S_BUSY;
+		hfi1_qp_unbusy(qp, ps->wait);
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -950,8 +729,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 		if (unlikely(ret))
 			goto bail_build;
 	}
-	ret =  sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq,
-			       ps->pkts_sent);
+	ret =  sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent);
 	if (unlikely(ret < 0)) {
 		if (ret == -ECOMM)
 			goto bail_ecomm;
@@ -1001,7 +779,7 @@ static int pio_wait(struct rvt_qp *qp,
 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		write_seqlock(&dev->iowait_lock);
 		list_add_tail(&ps->s_txreq->txreq.list,
-			      &priv->s_iowait.tx_head);
+			      &ps->wait->tx_head);
 		if (list_empty(&priv->s_iowait.list)) {
 			struct hfi1_ibdev *dev = &dd->verbs_dev;
 			int was_empty;
@@ -1020,7 +798,7 @@ static int pio_wait(struct rvt_qp *qp,
 				hfi1_sc_wantpiobuf_intr(sc, 1);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~RVT_S_BUSY;
+		hfi1_qp_unbusy(qp, ps->wait);
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1160,7 +938,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 pio_bail:
 	if (qp->s_wqe) {
 		spin_lock_irqsave(&qp->s_lock, flags);
-		hfi1_send_complete(qp, qp->s_wqe, wc_status);
+		rvt_send_complete(qp, qp->s_wqe, wc_status);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
 		spin_lock_irqsave(&qp->s_lock, flags);
@@ -1367,7 +1145,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 			hfi1_cdbg(PIO, "%s() Failed. Completing with err",
 				  __func__);
 			spin_lock_irqsave(&qp->s_lock, flags);
-			hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
+			rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 		}
 		return -EINVAL;
@@ -1943,7 +1721,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
 	dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
 	dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
 	dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
-	dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+	dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe;
 	dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
 						hfi1_comp_vect_mappings_lookup;
 
@@ -1956,10 +1734,16 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
 	dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
 	dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
 	dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
+	dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
+	dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
+	dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
 
 	/* post send table */
 	dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
 
+	/* opcode translation table */
+	dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode;
+
 	ppd = dd->pport;
 	for (i = 0; i < dd->num_pports; i++, ppd++)
 		rvt_init_port(&dd->verbs_dev.rdi,
@@ -1967,6 +1751,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
 			      i,
 			      ppd->pkeys);
 
+	rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev,
+				    &ib_hfi1_attr_group);
+
 	ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
 	if (ret)
 		goto err_verbs_txreq;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index a4d06502f06d..64c9054db5f3 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -166,11 +166,13 @@ struct hfi1_qp_priv {
  * This structure is used to hold commonly lookedup and computed values during
  * the send engine progress.
  */
+struct iowait_work;
 struct hfi1_pkt_state {
 	struct hfi1_ibdev *dev;
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
 	struct verbs_txreq *s_txreq;
+	struct iowait_work *wait;
 	unsigned long flags;
 	unsigned long timeout;
 	unsigned long timeout_int;
@@ -247,7 +249,7 @@ static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
 	return container_of(rdi, struct hfi1_ibdev, rdi);
 }
 
-static inline struct rvt_qp *iowait_to_qp(struct  iowait *s_iowait)
+static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
 {
 	struct hfi1_qp_priv *priv;
 
@@ -313,9 +315,6 @@ void hfi1_put_txreq(struct verbs_txreq *tx);
 
 int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
-void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
-		   bool release, bool copy_last);
-
 void hfi1_cnp_rcv(struct hfi1_packet *packet);
 
 void hfi1_uc_rcv(struct hfi1_packet *packet);
@@ -343,7 +342,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 		    int attr_mask, struct ib_udata *udata);
 void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
-int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+		   bool *call_send);
 
 extern const u32 rc_only_opcode;
 extern const u32 uc_only_opcode;
@@ -363,9 +363,6 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp);
 
 void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
 
-void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
-			enum ib_wc_status status);
-
 void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn);
 
 int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
@@ -390,28 +387,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc);
 
-int hfi1_wss_init(void);
-void hfi1_wss_exit(void);
-
-/* platform specific: return the lowest level cache (llc) size, in KiB */
-static inline int wss_llc_size(void)
-{
-	/* assume that the boot CPU value is universal for all CPUs */
-	return boot_cpu_data.x86_cache_size;
-}
-
-/* platform specific: cacheless copy */
-static inline void cacheless_memcpy(void *dst, void *src, size_t n)
-{
-	/*
-	 * Use the only available X64 cacheless copy.  Add a __user cast
-	 * to quiet sparse.  The src agument is already in the kernel so
-	 * there are no security issues.  The extra fault recovery machinery
-	 * is not invoked.
-	 */
-	__copy_user_nocache(dst, (void __user *)src, n, 0);
-}
-
 static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
 {
 	return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1c19bbc764b2..2a77af26a231 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -102,22 +102,19 @@ static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
 	return &tx->txreq;
 }
 
-static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp)
+static inline struct verbs_txreq *get_waiting_verbs_txreq(struct iowait_work *w)
 {
 	struct sdma_txreq *stx;
-	struct hfi1_qp_priv *priv = qp->priv;
 
-	stx = iowait_get_txhead(&priv->s_iowait);
+	stx = iowait_get_txhead(w);
 	if (stx)
 		return container_of(stx, struct verbs_txreq, txreq);
 	return NULL;
 }
 
-static inline bool verbs_txreq_queued(struct rvt_qp *qp)
+static inline bool verbs_txreq_queued(struct iowait_work *w)
 {
-	struct hfi1_qp_priv *priv = qp->priv;
-
-	return iowait_packet_queued(&priv->s_iowait);
+	return iowait_packet_queued(w);
 }
 
 void hfi1_put_txreq(struct verbs_txreq *tx);
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index c643d80c5a53..c9876d9e3cb9 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -120,7 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
 	uctxt->seq_cnt = 1;
 	uctxt->is_vnic = true;
 
-	hfi1_set_vnic_msix_info(uctxt);
+	msix_request_rcd_irq(uctxt);
 
 	hfi1_stats.sps_ctxts++;
 	dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
@@ -135,8 +135,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
 	dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
 	flush_wc();
 
-	hfi1_reset_vnic_msix_info(uctxt);
-
 	/*
 	 * Disable receive context and interrupt available, reset all
 	 * RcvCtxtCtrl bits to default values.
@@ -148,6 +146,10 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
 		     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
 		     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
 
+	/* msix_intr will always be > 0, only clean up if this is true */
+	if (uctxt->msix_intr)
+		msix_free_irq(dd, uctxt->msix_intr);
+
 	uctxt->event_flags = 0;
 
 	hfi1_clear_tids(uctxt);
@@ -626,7 +628,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
 	idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
 
 	/* ensure irqs see the change */
-	hfi1_vnic_synchronize_irq(dd);
+	msix_vnic_synchronize_irq(dd);
 
 	/* remove unread skbs */
 	for (i = 0; i < vinfo->num_rx_q; i++) {
@@ -690,8 +692,6 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
 		rc = hfi1_vnic_txreq_init(dd);
 		if (rc)
 			goto txreq_fail;
-
-		dd->vnic.msix_idx = dd->first_dyn_msix_idx;
 	}
 
 	for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index c3c96c5869ed..97bd940a056a 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2017 Intel Corporation.
+ * Copyright(c) 2017 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -198,8 +198,8 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
 		goto free_desc;
 	tx->retry_count = 0;
 
-	ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq,
-			      vnic_sdma->pkts_sent);
+	ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
+			      &tx->txreq, vnic_sdma->pkts_sent);
 	/* When -ECOMM, sdma callback will be called with ABORT status */
 	if (unlikely(ret && unlikely(ret != -ECOMM)))
 		goto free_desc;
@@ -230,13 +230,13 @@ tx_err:
  * become available.
  */
 static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
-				struct iowait *wait,
+				struct iowait_work *wait,
 				struct sdma_txreq *txreq,
 				uint seq,
 				bool pkts_sent)
 {
 	struct hfi1_vnic_sdma *vnic_sdma =
-		container_of(wait, struct hfi1_vnic_sdma, wait);
+		container_of(wait->iow, struct hfi1_vnic_sdma, wait);
 	struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
 	struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
 
@@ -247,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
 	vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
 	write_seqlock(&dev->iowait_lock);
 	if (list_empty(&vnic_sdma->wait.list))
-		iowait_queue(pkts_sent, wait, &sde->dmawait);
+		iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
 	write_sequnlock(&dev->iowait_lock);
 	return -EBUSY;
 }
@@ -285,7 +285,8 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
 	for (i = 0; i < vinfo->num_tx_q; i++) {
 		struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
 
-		iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep,
+		iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
+			    hfi1_vnic_sdma_sleep,
 			    hfi1_vnic_sdma_wakeup, NULL);
 		vnic_sdma->sde = &vinfo->dd->per_sdma[i];
 		vnic_sdma->dd = vinfo->dd;
@@ -295,10 +296,12 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
 
 		/* Add a free descriptor watermark for wakeups */
 		if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
+			struct iowait_work *work;
+
 			INIT_LIST_HEAD(&vnic_sdma->stx.list);
 			vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
-			list_add_tail(&vnic_sdma->stx.list,
-				      &vnic_sdma->wait.tx_head);
+			work = iowait_get_ib_work(&vnic_sdma->wait);
+			list_add_tail(&vnic_sdma->stx.list, &work->tx_head);
 		}
 	}
 }
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
index fddb5fdf92de..21c2100b2ea9 100644
--- a/drivers/infiniband/hw/hns/Kconfig
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -1,6 +1,7 @@
 config INFINIBAND_HNS
 	tristate "HNS RoCE Driver"
 	depends on NET_VENDOR_HISILICON
+	depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
 	depends on ARM64 || (COMPILE_TEST && 64BIT)
 	---help---
 	  This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 0d96c5bb38cd..9990dc9eb96a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -49,6 +49,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
 	struct hns_roce_ah *ah;
 	u16 vlan_tag = 0xffff;
 	const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+	bool vlan_en = false;
 
 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
 	if (!ah)
@@ -58,8 +59,10 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
 	memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
 
 	gid_attr = ah_attr->grh.sgid_attr;
-	if (is_vlan_dev(gid_attr->ndev))
+	if (is_vlan_dev(gid_attr->ndev)) {
 		vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
+		vlan_en = true;
+	}
 
 	if (vlan_tag < 0x1000)
 		vlan_tag |= (rdma_ah_get_sl(ah_attr) &
@@ -71,6 +74,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
 				     HNS_ROCE_PORT_NUM_SHIFT));
 	ah->av.gid_index = grh->sgid_index;
 	ah->av.vlan = cpu_to_le16(vlan_tag);
+	ah->av.vlan_en = vlan_en;
 	dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
 		ah->av.vlan);
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 9a24fd0ee3e7..d39bdfdb5de9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -88,8 +88,11 @@
 #define BITMAP_RR				1
 
 #define MR_TYPE_MR				0x00
+#define MR_TYPE_FRMR				0x01
 #define MR_TYPE_DMA				0x03
 
+#define HNS_ROCE_FRMR_MAX_PA			512
+
 #define PKEY_ID					0xffff
 #define GUID_LEN				8
 #define NODE_DESC_SIZE				64
@@ -193,6 +196,9 @@ enum {
 	HNS_ROCE_CAP_FLAG_RQ_INLINE		= BIT(2),
 	HNS_ROCE_CAP_FLAG_RECORD_DB		= BIT(3),
 	HNS_ROCE_CAP_FLAG_SQ_RECORD_DB		= BIT(4),
+	HNS_ROCE_CAP_FLAG_MW			= BIT(7),
+	HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
+	HNS_ROCE_CAP_FLAG_ATOMIC		= BIT(10),
 };
 
 enum hns_roce_mtt_type {
@@ -219,19 +225,11 @@ struct hns_roce_uar {
 	unsigned long	logic_idx;
 };
 
-struct hns_roce_vma_data {
-	struct list_head list;
-	struct vm_area_struct *vma;
-	struct mutex *vma_list_mutex;
-};
-
 struct hns_roce_ucontext {
 	struct ib_ucontext	ibucontext;
 	struct hns_roce_uar	uar;
 	struct list_head	page_list;
 	struct mutex		page_mutex;
-	struct list_head	vma_list;
-	struct mutex		vma_list_mutex;
 };
 
 struct hns_roce_pd {
@@ -293,6 +291,16 @@ struct hns_roce_mtt {
 	enum hns_roce_mtt_type	mtt_type;
 };
 
+struct hns_roce_mw {
+	struct ib_mw		ibmw;
+	u32			pdn;
+	u32			rkey;
+	int			enabled; /* MW's active status */
+	u32			pbl_hop_num;
+	u32			pbl_ba_pg_sz;
+	u32			pbl_buf_pg_sz;
+};
+
 /* Only support 4K page size for mr register */
 #define MR_SIZE_4K 0
 
@@ -304,6 +312,7 @@ struct hns_roce_mr {
 	u32			key; /* Key of MR */
 	u32			pd;   /* PD num of MR */
 	u32			access;/* Access permission of MR */
+	u32			npages;
 	int			enabled; /* MR's active status */
 	int			type;	/* MR's register type */
 	u64			*pbl_buf;/* MR's PBL space */
@@ -457,6 +466,7 @@ struct hns_roce_av {
 	u8          dgid[HNS_ROCE_GID_SIZE];
 	u8          mac[6];
 	__le16      vlan;
+	bool	    vlan_en;
 };
 
 struct hns_roce_ah {
@@ -656,6 +666,7 @@ struct hns_roce_eq_table {
 };
 
 struct hns_roce_caps {
+	u64		fw_ver;
 	u8		num_ports;
 	int		gid_table_len[HNS_ROCE_MAX_PORTS];
 	int		pkey_table_len[HNS_ROCE_MAX_PORTS];
@@ -665,7 +676,9 @@ struct hns_roce_caps {
 	u32		max_sq_sg;	/* 2 */
 	u32		max_sq_inline;	/* 32 */
 	u32		max_rq_sg;	/* 2 */
+	u32		max_extend_sg;
 	int		num_qps;	/* 256k */
+	int             reserved_qps;
 	u32		max_wqes;	/* 16k */
 	u32		max_sq_desc_sz;	/* 64 */
 	u32		max_rq_desc_sz;	/* 64 */
@@ -738,6 +751,7 @@ struct hns_roce_work {
 	struct hns_roce_dev *hr_dev;
 	struct work_struct work;
 	u32 qpn;
+	u32 cqn;
 	int event_type;
 	int sub_type;
 };
@@ -764,6 +778,8 @@ struct hns_roce_hw {
 				struct hns_roce_mr *mr, int flags, u32 pdn,
 				int mr_access_flags, u64 iova, u64 size,
 				void *mb_buf);
+	int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
+	int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
 	void (*write_cqc)(struct hns_roce_dev *hr_dev,
 			  struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
 			  dma_addr_t dma_handle, int nent, u32 vector);
@@ -863,6 +879,11 @@ static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr)
 	return container_of(ibmr, struct hns_roce_mr, ibmr);
 }
 
+static inline struct hns_roce_mw *to_hr_mw(struct ib_mw *ibmw)
+{
+	return container_of(ibmw, struct hns_roce_mw, ibmw);
+}
+
 static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp)
 {
 	return container_of(ibqp, struct hns_roce_qp, ibqp);
@@ -968,12 +989,20 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
 			   u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
 			   struct ib_udata *udata);
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+				u32 max_num_sg);
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+		       unsigned int *sg_offset);
 int hns_roce_dereg_mr(struct ib_mr *ibmr);
 int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
 		       struct hns_roce_cmd_mailbox *mailbox,
 		       unsigned long mpt_index);
 unsigned long key_to_hw_index(u32 key);
 
+struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
+				struct ib_udata *udata);
+int hns_roce_dealloc_mw(struct ib_mw *ibmw);
+
 void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
 		       struct hns_roce_buf *buf);
 int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 081aa91fc162..ca05810c92dc 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -731,7 +731,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 	cq_init_attr.comp_vector	= 0;
 	cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL);
 	if (IS_ERR(cq)) {
-		dev_err(dev, "Create cq for reseved loop qp failed!");
+		dev_err(dev, "Create cq for reserved loop qp failed!");
 		return -ENOMEM;
 	}
 	free_mr->mr_free_cq = to_hr_cq(cq);
@@ -744,7 +744,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 
 	pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
 	if (IS_ERR(pd)) {
-		dev_err(dev, "Create pd for reseved loop qp failed!");
+		dev_err(dev, "Create pd for reserved loop qp failed!");
 		ret = -ENOMEM;
 		goto alloc_pd_failed;
 	}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 0218c0f8c2a7..a4c62ae23a9a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -54,6 +54,59 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
 	dseg->len  = cpu_to_le32(sg->length);
 }
 
+static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+			 struct hns_roce_wqe_frmr_seg *fseg,
+			 const struct ib_reg_wr *wr)
+{
+	struct hns_roce_mr *mr = to_hr_mr(wr->mr);
+
+	/* use ib_access_flags */
+	roce_set_bit(rc_sq_wqe->byte_4,
+		     V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
+		     wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
+	roce_set_bit(rc_sq_wqe->byte_4,
+		     V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
+		     wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
+	roce_set_bit(rc_sq_wqe->byte_4,
+		     V2_RC_FRMR_WQE_BYTE_4_RR_S,
+		     wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
+	roce_set_bit(rc_sq_wqe->byte_4,
+		     V2_RC_FRMR_WQE_BYTE_4_RW_S,
+		     wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
+	roce_set_bit(rc_sq_wqe->byte_4,
+		     V2_RC_FRMR_WQE_BYTE_4_LW_S,
+		     wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
+
+	/* Data structure reuse may lead to confusion */
+	rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff);
+	rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32);
+
+	rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
+	rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
+	rc_sq_wqe->rkey = cpu_to_le32(wr->key);
+	rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
+
+	fseg->pbl_size = cpu_to_le32(mr->pbl_size);
+	roce_set_field(fseg->mode_buf_pg_sz,
+		       V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
+		       V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
+		       mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+	roce_set_bit(fseg->mode_buf_pg_sz,
+		     V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
+}
+
+static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
+			   const struct ib_atomic_wr *wr)
+{
+	if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+		aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
+		aseg->cmp_data  = cpu_to_le64(wr->compare_add);
+	} else {
+		aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
+		aseg->cmp_data  = 0;
+	}
+}
+
 static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
 			   unsigned int *sge_ind)
 {
@@ -121,6 +174,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 		}
 
 		if (wr->opcode == IB_WR_RDMA_READ) {
+			*bad_wr =  wr;
 			dev_err(hr_dev->dev, "Not support inline data!\n");
 			return -EINVAL;
 		}
@@ -179,6 +233,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 	struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
 	struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
 	struct hns_roce_qp *qp = to_hr_qp(ibqp);
+	struct hns_roce_wqe_frmr_seg *fseg;
 	struct device *dev = hr_dev->dev;
 	struct hns_roce_v2_db sq_db;
 	struct ib_qp_attr attr;
@@ -191,6 +246,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 	int attr_mask;
 	u32 tmp_len;
 	int ret = 0;
+	u32 hr_op;
 	u8 *smac;
 	int nreq;
 	int i;
@@ -356,6 +412,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 				       V2_UD_SEND_WQE_BYTE_40_PORTN_S,
 				       qp->port);
 
+			roce_set_bit(ud_sq_wqe->byte_40,
+				     V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
+				     ah->av.vlan_en ? 1 : 0);
 			roce_set_field(ud_sq_wqe->byte_48,
 				       V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
 				       V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
@@ -406,99 +465,100 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 			roce_set_bit(rc_sq_wqe->byte_4,
 				     V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
 
+			wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
 			switch (wr->opcode) {
 			case IB_WR_RDMA_READ:
-				roce_set_field(rc_sq_wqe->byte_4,
-					       V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-					       V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-					       HNS_ROCE_V2_WQE_OP_RDMA_READ);
+				hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ;
 				rc_sq_wqe->rkey =
 					cpu_to_le32(rdma_wr(wr)->rkey);
 				rc_sq_wqe->va =
 					cpu_to_le64(rdma_wr(wr)->remote_addr);
 				break;
 			case IB_WR_RDMA_WRITE:
-				roce_set_field(rc_sq_wqe->byte_4,
-					       V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-					       V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-					       HNS_ROCE_V2_WQE_OP_RDMA_WRITE);
+				hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE;
 				rc_sq_wqe->rkey =
 					cpu_to_le32(rdma_wr(wr)->rkey);
 				rc_sq_wqe->va =
 					cpu_to_le64(rdma_wr(wr)->remote_addr);
 				break;
 			case IB_WR_RDMA_WRITE_WITH_IMM:
-				roce_set_field(rc_sq_wqe->byte_4,
-				       V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-				       V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-				       HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM);
+				hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM;
 				rc_sq_wqe->rkey =
 					cpu_to_le32(rdma_wr(wr)->rkey);
 				rc_sq_wqe->va =
 					cpu_to_le64(rdma_wr(wr)->remote_addr);
 				break;
 			case IB_WR_SEND:
-				roce_set_field(rc_sq_wqe->byte_4,
-					       V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-					       V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-					       HNS_ROCE_V2_WQE_OP_SEND);
+				hr_op = HNS_ROCE_V2_WQE_OP_SEND;
 				break;
 			case IB_WR_SEND_WITH_INV:
-				roce_set_field(rc_sq_wqe->byte_4,
-				       V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-				       V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-				       HNS_ROCE_V2_WQE_OP_SEND_WITH_INV);
+				hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_INV;
 				break;
 			case IB_WR_SEND_WITH_IMM:
-				roce_set_field(rc_sq_wqe->byte_4,
-					      V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-					      V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-					      HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM);
+				hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM;
 				break;
 			case IB_WR_LOCAL_INV:
-				roce_set_field(rc_sq_wqe->byte_4,
-					       V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-					       V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-					       HNS_ROCE_V2_WQE_OP_LOCAL_INV);
+				hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV;
+				roce_set_bit(rc_sq_wqe->byte_4,
+					       V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
+				rc_sq_wqe->inv_key =
+					    cpu_to_le32(wr->ex.invalidate_rkey);
+				break;
+			case IB_WR_REG_MR:
+				hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
+				fseg = wqe;
+				set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
 				break;
 			case IB_WR_ATOMIC_CMP_AND_SWP:
-				roce_set_field(rc_sq_wqe->byte_4,
-					  V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-					  V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-					  HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP);
+				hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
+				rc_sq_wqe->rkey =
+					cpu_to_le32(atomic_wr(wr)->rkey);
+				rc_sq_wqe->va =
+					cpu_to_le64(atomic_wr(wr)->remote_addr);
 				break;
 			case IB_WR_ATOMIC_FETCH_AND_ADD:
-				roce_set_field(rc_sq_wqe->byte_4,
-					 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-					 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-					 HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD);
+				hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD;
+				rc_sq_wqe->rkey =
+					cpu_to_le32(atomic_wr(wr)->rkey);
+				rc_sq_wqe->va =
+					cpu_to_le64(atomic_wr(wr)->remote_addr);
 				break;
 			case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
-				roce_set_field(rc_sq_wqe->byte_4,
-				      V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-				      V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-				      HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP);
+				hr_op =
+				       HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP;
 				break;
 			case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
-				roce_set_field(rc_sq_wqe->byte_4,
-				     V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-				     V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-				     HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD);
+				hr_op =
+				      HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD;
 				break;
 			default:
-				roce_set_field(rc_sq_wqe->byte_4,
-					       V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-					       V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-					       HNS_ROCE_V2_WQE_OP_MASK);
+				hr_op = HNS_ROCE_V2_WQE_OP_MASK;
 				break;
 			}
 
-			wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
+			roce_set_field(rc_sq_wqe->byte_4,
+				       V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+				       V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
+
+			if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+			    wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+				struct hns_roce_v2_wqe_data_seg *dseg;
+
+				dseg = wqe;
+				set_data_seg_v2(dseg, wr->sg_list);
+				wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
+				set_atomic_seg(wqe, atomic_wr(wr));
+				roce_set_field(rc_sq_wqe->byte_16,
+					       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+					       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
+					       wr->num_sge);
+			} else if (wr->opcode != IB_WR_REG_MR) {
+				ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
+							wqe, &sge_ind, bad_wr);
+				if (ret)
+					goto out;
+			}
 
-			ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
-						&sge_ind, bad_wr);
-			if (ret)
-				goto out;
 			ind++;
 		} else {
 			dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
@@ -935,7 +995,24 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
 
 	resp = (struct hns_roce_query_version *)desc.data;
 	hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version);
-	hr_dev->vendor_id = le32_to_cpu(resp->rocee_vendor_id);
+	hr_dev->vendor_id = hr_dev->pci_dev->vendor;
+
+	return 0;
+}
+
+static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_query_fw_info *resp;
+	struct hns_roce_cmq_desc desc;
+	int ret;
+
+	hns_roce_cmq_setup_basic_desc(&desc, HNS_QUERY_FW_VER, true);
+	ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+	if (ret)
+		return ret;
+
+	resp = (struct hns_roce_query_fw_info *)desc.data;
+	hr_dev->caps.fw_ver = (u64)(le32_to_cpu(resp->fw_ver));
 
 	return 0;
 }
@@ -1158,6 +1235,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 
 	ret = hns_roce_cmq_query_hw_info(hr_dev);
 	if (ret) {
+		dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n",
+			ret);
+		return ret;
+	}
+
+	ret = hns_roce_query_fw_ver(hr_dev);
+	if (ret) {
 		dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n",
 			ret);
 		return ret;
@@ -1185,14 +1269,16 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 		return ret;
 	}
 
-	hr_dev->vendor_part_id = 0;
-	hr_dev->sys_image_guid = 0;
+
+	hr_dev->vendor_part_id = hr_dev->pci_dev->device;
+	hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
 
 	caps->num_qps		= HNS_ROCE_V2_MAX_QP_NUM;
 	caps->max_wqes		= HNS_ROCE_V2_MAX_WQE_NUM;
 	caps->num_cqs		= HNS_ROCE_V2_MAX_CQ_NUM;
 	caps->max_cqes		= HNS_ROCE_V2_MAX_CQE_NUM;
 	caps->max_sq_sg		= HNS_ROCE_V2_MAX_SQ_SGE_NUM;
+	caps->max_extend_sg	= HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
 	caps->max_rq_sg		= HNS_ROCE_V2_MAX_RQ_SGE_NUM;
 	caps->max_sq_inline	= HNS_ROCE_V2_MAX_SQ_INLINE;
 	caps->num_uars		= HNS_ROCE_V2_UAR_NUM;
@@ -1222,6 +1308,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 	caps->reserved_mrws	= 1;
 	caps->reserved_uars	= 0;
 	caps->reserved_cqs	= 0;
+	caps->reserved_qps	= HNS_ROCE_V2_RSV_QPS;
 
 	caps->qpc_ba_pg_sz	= 0;
 	caps->qpc_buf_pg_sz	= 0;
@@ -1255,6 +1342,11 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 				  HNS_ROCE_CAP_FLAG_RQ_INLINE |
 				  HNS_ROCE_CAP_FLAG_RECORD_DB |
 				  HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
+
+	if (hr_dev->pci_dev->revision == 0x21)
+		caps->flags |= HNS_ROCE_CAP_FLAG_MW |
+			       HNS_ROCE_CAP_FLAG_FRMR;
+
 	caps->pkey_table_len[0] = 1;
 	caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
 	caps->ceqe_depth	= HNS_ROCE_V2_COMP_EQE_NUM;
@@ -1262,6 +1354,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 	caps->local_ca_ack_delay = 0;
 	caps->max_mtu = IB_MTU_4096;
 
+	if (hr_dev->pci_dev->revision == 0x21)
+		caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
+
 	ret = hns_roce_v2_set_bt(hr_dev);
 	if (ret)
 		dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
@@ -1690,10 +1785,11 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 
 	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0);
 	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
-	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0);
+	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
 	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S,
 		     (mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
-	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 0);
+	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S,
+		     mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
 	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
 		     (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
 	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
@@ -1817,6 +1913,88 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
 	return 0;
 }
 
+static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
+{
+	struct hns_roce_v2_mpt_entry *mpt_entry;
+
+	mpt_entry = mb_buf;
+	memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+	roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
+		       V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
+	roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
+		       V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
+	roce_set_field(mpt_entry->byte_4_pd_hop_st,
+		       V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
+		       V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
+		       mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+	roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+		       V2_MPT_BYTE_4_PD_S, mr->pd);
+
+	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
+	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
+	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
+
+	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
+	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
+	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
+	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
+
+	mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
+
+	mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
+	roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
+		       V2_MPT_BYTE_48_PBL_BA_H_S,
+		       upper_32_bits(mr->pbl_ba >> 3));
+
+	roce_set_field(mpt_entry->byte_64_buf_pa1,
+		       V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
+		       V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+		       mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+
+	return 0;
+}
+
+static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
+{
+	struct hns_roce_v2_mpt_entry *mpt_entry;
+
+	mpt_entry = mb_buf;
+	memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+	roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
+		       V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
+	roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+		       V2_MPT_BYTE_4_PD_S, mw->pdn);
+	roce_set_field(mpt_entry->byte_4_pd_hop_st,
+		       V2_MPT_BYTE_4_PBL_HOP_NUM_M,
+		       V2_MPT_BYTE_4_PBL_HOP_NUM_S,
+		       mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ?
+		       0 : mw->pbl_hop_num);
+	roce_set_field(mpt_entry->byte_4_pd_hop_st,
+		       V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
+		       V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
+		       mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+
+	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
+	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
+
+	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
+	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
+	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
+	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S,
+		     mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
+
+	roce_set_field(mpt_entry->byte_64_buf_pa1,
+		       V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
+		       V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+		       mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+
+	mpt_entry->lkey = cpu_to_le32(mw->rkey);
+
+	return 0;
+}
+
 static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
 {
 	return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
@@ -2274,6 +2452,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 		wc->src_qp = (u8)roce_get_field(cqe->byte_32,
 						V2_CQE_BYTE_32_RMT_QPN_M,
 						V2_CQE_BYTE_32_RMT_QPN_S);
+		wc->slid = 0;
 		wc->wc_flags |= (roce_get_bit(cqe->byte_32,
 					      V2_CQE_BYTE_32_GRH_S) ?
 					      IB_WC_GRH : 0);
@@ -2287,7 +2466,14 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 		wc->smac[5] = roce_get_field(cqe->byte_28,
 					     V2_CQE_BYTE_28_SMAC_5_M,
 					     V2_CQE_BYTE_28_SMAC_5_S);
-		wc->vlan_id = 0xffff;
+		if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) {
+			wc->vlan_id = (u16)roce_get_field(cqe->byte_28,
+							  V2_CQE_BYTE_28_VID_M,
+							  V2_CQE_BYTE_28_VID_S);
+		} else {
+			wc->vlan_id = 0xffff;
+		}
+
 		wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
 		wc->network_hdr_type = roce_get_field(cqe->byte_28,
 						    V2_CQE_BYTE_28_PORT_TYPE_M,
@@ -2589,21 +2775,16 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 	roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0);
 	roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0);
 
-	roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_MAPID_M,
-		       V2_QPC_BYTE_60_MAPID_S, 0);
+	roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_TEMPID_M,
+		       V2_QPC_BYTE_60_TEMPID_S, 0);
 
-	roce_set_bit(qpc_mask->byte_60_qpst_mapid,
-		     V2_QPC_BYTE_60_INNER_MAP_IND_S, 0);
-	roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_MAP_IND_S,
-		     0);
-	roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_RQ_MAP_IND_S,
-		     0);
-	roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_EXT_MAP_IND_S,
-		     0);
-	roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_RLS_IND_S,
-		     0);
-	roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_EXT_IND_S,
-		     0);
+	roce_set_field(qpc_mask->byte_60_qpst_tempid,
+		       V2_QPC_BYTE_60_SCC_TOKEN_M, V2_QPC_BYTE_60_SCC_TOKEN_S,
+		       0);
+	roce_set_bit(qpc_mask->byte_60_qpst_tempid,
+		     V2_QPC_BYTE_60_SQ_DB_DOING_S, 0);
+	roce_set_bit(qpc_mask->byte_60_qpst_tempid,
+		     V2_QPC_BYTE_60_RQ_DB_DOING_S, 0);
 	roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0);
 	roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0);
 
@@ -2685,7 +2866,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 	roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M,
 		       V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0);
 
-	roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RSVD_RAQ_MAP_S, 0);
+	roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S,
+		     0);
 	roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M,
 		       V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0);
 	roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M,
@@ -2694,8 +2876,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 	roce_set_field(qpc_mask->byte_144_raq,
 		       V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M,
 		       V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0);
-	roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S,
-		     0);
 	roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M,
 		       V2_QPC_BYTE_144_RAQ_CREDIT_S, 0);
 	roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0);
@@ -2721,14 +2901,12 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 		       V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M,
 		       V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0);
 
-	roce_set_field(context->byte_168_irrl_idx,
-		       V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
-		       V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
-		       ilog2((unsigned int)hr_qp->sq.wqe_cnt));
-	roce_set_field(qpc_mask->byte_168_irrl_idx,
-		       V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
-		       V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
-
+	roce_set_bit(qpc_mask->byte_168_irrl_idx,
+		     V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S, 0);
+	roce_set_bit(qpc_mask->byte_168_irrl_idx,
+		     V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S, 0);
+	roce_set_bit(qpc_mask->byte_168_irrl_idx,
+		     V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S, 0);
 	roce_set_bit(qpc_mask->byte_168_irrl_idx,
 		     V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0);
 	roce_set_bit(qpc_mask->byte_168_irrl_idx,
@@ -2746,6 +2924,9 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 	roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S,
 		     0);
 
+	roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
+	roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0);
+
 	roce_set_field(qpc_mask->byte_176_msg_pktn,
 		       V2_QPC_BYTE_176_MSG_USE_PKTN_M,
 		       V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0);
@@ -2790,6 +2971,13 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 		       V2_QPC_BYTE_232_IRRL_SGE_IDX_M,
 		       V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0);
 
+	roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_SO_LP_VLD_S,
+		     0);
+	roce_set_bit(qpc_mask->byte_232_irrl_sge,
+		     V2_QPC_BYTE_232_FENCE_LP_VLD_S, 0);
+	roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_IRRL_LP_VLD_S,
+		     0);
+
 	qpc_mask->irrl_cur_sge_offset = 0;
 
 	roce_set_field(qpc_mask->byte_240_irrl_tail,
@@ -2955,13 +3143,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
 		roce_set_field(qpc_mask->byte_56_dqpn_err,
 			       V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
 	}
-	roce_set_field(context->byte_168_irrl_idx,
-		       V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
-		       V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
-		       ilog2((unsigned int)hr_qp->sq.wqe_cnt));
-	roce_set_field(qpc_mask->byte_168_irrl_idx,
-		       V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
-		       V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
 }
 
 static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
@@ -3271,13 +3452,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
 	 * we should set all bits of the relevant fields in context mask to
 	 * 0 at the same time, else set them to 0x1.
 	 */
-	roce_set_field(context->byte_60_qpst_mapid,
-		       V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
-		       V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, attr->retry_cnt);
-	roce_set_field(qpc_mask->byte_60_qpst_mapid,
-		       V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
-		       V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, 0);
-
 	context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
 	roce_set_field(context->byte_168_irrl_idx,
 		       V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
@@ -3538,6 +3712,17 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 			memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
 		}
 
+		if (is_vlan_dev(gid_attr->ndev)) {
+			roce_set_bit(context->byte_76_srqn_op_en,
+				     V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
+			roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+				     V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
+			roce_set_bit(context->byte_168_irrl_idx,
+				     V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
+			roce_set_bit(qpc_mask->byte_168_irrl_idx,
+				     V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
+		}
+
 		roce_set_field(context->byte_24_mtu_tc,
 			       V2_QPC_BYTE_24_VLAN_ID_M,
 			       V2_QPC_BYTE_24_VLAN_ID_S, vlan);
@@ -3584,8 +3769,15 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 			       V2_QPC_BYTE_24_HOP_LIMIT_M,
 			       V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
 
-		roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
-			       V2_QPC_BYTE_24_TC_S, grh->traffic_class);
+		if (hr_dev->pci_dev->revision == 0x21 &&
+		    gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+			roce_set_field(context->byte_24_mtu_tc,
+				       V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
+				       grh->traffic_class >> 2);
+		else
+			roce_set_field(context->byte_24_mtu_tc,
+				       V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
+				       grh->traffic_class);
 		roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
 			       V2_QPC_BYTE_24_TC_S, 0);
 		roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
@@ -3606,9 +3798,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 		set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
 
 	/* Every status migrate must change state */
-	roce_set_field(context->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M,
+	roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
 		       V2_QPC_BYTE_60_QP_ST_S, new_state);
-	roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M,
+	roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
 		       V2_QPC_BYTE_60_QP_ST_S, 0);
 
 	/* SW pass context to HW */
@@ -3728,7 +3920,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 		goto out;
 	}
 
-	state = roce_get_field(context->byte_60_qpst_mapid,
+	state = roce_get_field(context->byte_60_qpst_tempid,
 			       V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S);
 	tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
 	if (tmp_qp_state == -1) {
@@ -3995,13 +4187,103 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
 {
 	struct hns_roce_work *irq_work =
 				container_of(work, struct hns_roce_work, work);
+	struct device *dev = irq_work->hr_dev->dev;
 	u32 qpn = irq_work->qpn;
+	u32 cqn = irq_work->cqn;
 
 	switch (irq_work->event_type) {
+	case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+		dev_info(dev, "Path migrated succeeded.\n");
+		break;
+	case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+		dev_warn(dev, "Path migration failed.\n");
+		break;
+	case HNS_ROCE_EVENT_TYPE_COMM_EST:
+		dev_info(dev, "Communication established.\n");
+		break;
+	case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+		dev_warn(dev, "Send queue drained.\n");
+		break;
 	case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+		dev_err(dev, "Local work queue catastrophic error.\n");
+		hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+		switch (irq_work->sub_type) {
+		case HNS_ROCE_LWQCE_QPC_ERROR:
+			dev_err(dev, "QP %d, QPC error.\n", qpn);
+			break;
+		case HNS_ROCE_LWQCE_MTU_ERROR:
+			dev_err(dev, "QP %d, MTU error.\n", qpn);
+			break;
+		case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
+			dev_err(dev, "QP %d, WQE BA addr error.\n", qpn);
+			break;
+		case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
+			dev_err(dev, "QP %d, WQE addr error.\n", qpn);
+			break;
+		case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
+			dev_err(dev, "QP %d, WQE shift error.\n", qpn);
+			break;
+		default:
+			dev_err(dev, "Unhandled sub_event type %d.\n",
+				irq_work->sub_type);
+			break;
+		}
+		break;
 	case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+		dev_err(dev, "Invalid request local work queue error.\n");
+		hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+		break;
 	case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+		dev_err(dev, "Local access violation work queue error.\n");
 		hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+		switch (irq_work->sub_type) {
+		case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
+			dev_err(dev, "QP %d, R_key violation.\n", qpn);
+			break;
+		case HNS_ROCE_LAVWQE_LENGTH_ERROR:
+			dev_err(dev, "QP %d, length error.\n", qpn);
+			break;
+		case HNS_ROCE_LAVWQE_VA_ERROR:
+			dev_err(dev, "QP %d, VA error.\n", qpn);
+			break;
+		case HNS_ROCE_LAVWQE_PD_ERROR:
+			dev_err(dev, "QP %d, PD error.\n", qpn);
+			break;
+		case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
+			dev_err(dev, "QP %d, rw acc error.\n", qpn);
+			break;
+		case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
+			dev_err(dev, "QP %d, key state error.\n", qpn);
+			break;
+		case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
+			dev_err(dev, "QP %d, MR operation error.\n", qpn);
+			break;
+		default:
+			dev_err(dev, "Unhandled sub_event type %d.\n",
+				irq_work->sub_type);
+			break;
+		}
+		break;
+	case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+		dev_warn(dev, "SRQ limit reach.\n");
+		break;
+	case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+		dev_warn(dev, "SRQ last wqe reach.\n");
+		break;
+	case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+		dev_err(dev, "SRQ catas error.\n");
+		break;
+	case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+		dev_err(dev, "CQ 0x%x access err.\n", cqn);
+		break;
+	case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+		dev_warn(dev, "CQ 0x%x overflow\n", cqn);
+		break;
+	case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
+		dev_warn(dev, "DB overflow.\n");
+		break;
+	case HNS_ROCE_EVENT_TYPE_FLR:
+		dev_warn(dev, "Function level reset.\n");
 		break;
 	default:
 		break;
@@ -4011,7 +4293,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
 }
 
 static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
-				      struct hns_roce_eq *eq, u32 qpn)
+				      struct hns_roce_eq *eq,
+				      u32 qpn, u32 cqn)
 {
 	struct hns_roce_work *irq_work;
 
@@ -4022,6 +4305,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
 	INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
 	irq_work->hr_dev = hr_dev;
 	irq_work->qpn = qpn;
+	irq_work->cqn = cqn;
 	irq_work->event_type = eq->event_type;
 	irq_work->sub_type = eq->sub_type;
 	queue_work(hr_dev->irq_workq, &(irq_work->work));
@@ -4058,124 +4342,6 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
 	hns_roce_write64_k(doorbell, eq->doorbell);
 }
 
-static void hns_roce_v2_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
-						  struct hns_roce_aeqe *aeqe,
-						  u32 qpn)
-{
-	struct device *dev = hr_dev->dev;
-	int sub_type;
-
-	dev_warn(dev, "Local work queue catastrophic error.\n");
-	sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
-				  HNS_ROCE_V2_AEQE_SUB_TYPE_S);
-	switch (sub_type) {
-	case HNS_ROCE_LWQCE_QPC_ERROR:
-		dev_warn(dev, "QP %d, QPC error.\n", qpn);
-		break;
-	case HNS_ROCE_LWQCE_MTU_ERROR:
-		dev_warn(dev, "QP %d, MTU error.\n", qpn);
-		break;
-	case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
-		dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
-		break;
-	case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
-		dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
-		break;
-	case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
-		dev_warn(dev, "QP %d, WQE shift error.\n", qpn);
-		break;
-	default:
-		dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
-		break;
-	}
-}
-
-static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
-					    struct hns_roce_aeqe *aeqe, u32 qpn)
-{
-	struct device *dev = hr_dev->dev;
-	int sub_type;
-
-	dev_warn(dev, "Local access violation work queue error.\n");
-	sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
-				  HNS_ROCE_V2_AEQE_SUB_TYPE_S);
-	switch (sub_type) {
-	case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
-		dev_warn(dev, "QP %d, R_key violation.\n", qpn);
-		break;
-	case HNS_ROCE_LAVWQE_LENGTH_ERROR:
-		dev_warn(dev, "QP %d, length error.\n", qpn);
-		break;
-	case HNS_ROCE_LAVWQE_VA_ERROR:
-		dev_warn(dev, "QP %d, VA error.\n", qpn);
-		break;
-	case HNS_ROCE_LAVWQE_PD_ERROR:
-		dev_err(dev, "QP %d, PD error.\n", qpn);
-		break;
-	case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
-		dev_warn(dev, "QP %d, rw acc error.\n", qpn);
-		break;
-	case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
-		dev_warn(dev, "QP %d, key state error.\n", qpn);
-		break;
-	case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
-		dev_warn(dev, "QP %d, MR operation error.\n", qpn);
-		break;
-	default:
-		dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
-		break;
-	}
-}
-
-static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
-				      struct hns_roce_aeqe *aeqe,
-				      int event_type, u32 qpn)
-{
-	struct device *dev = hr_dev->dev;
-
-	switch (event_type) {
-	case HNS_ROCE_EVENT_TYPE_COMM_EST:
-		dev_warn(dev, "Communication established.\n");
-		break;
-	case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
-		dev_warn(dev, "Send queue drained.\n");
-		break;
-	case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
-		hns_roce_v2_wq_catas_err_handle(hr_dev, aeqe, qpn);
-		break;
-	case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
-		dev_warn(dev, "Invalid request local work queue error.\n");
-		break;
-	case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
-		hns_roce_v2_local_wq_access_err_handle(hr_dev, aeqe, qpn);
-		break;
-	default:
-		break;
-	}
-
-	hns_roce_qp_event(hr_dev, qpn, event_type);
-}
-
-static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
-				      struct hns_roce_aeqe *aeqe,
-				      int event_type, u32 cqn)
-{
-	struct device *dev = hr_dev->dev;
-
-	switch (event_type) {
-	case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
-		dev_warn(dev, "CQ 0x%x access err.\n", cqn);
-		break;
-	case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
-		dev_warn(dev, "CQ 0x%x overflow\n", cqn);
-		break;
-	default:
-		break;
-	}
-
-	hns_roce_cq_event(hr_dev, cqn, event_type);
-}
-
 static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
 {
 	u32 buf_chk_sz;
@@ -4251,31 +4417,23 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 
 		switch (event_type) {
 		case HNS_ROCE_EVENT_TYPE_PATH_MIG:
-			dev_warn(dev, "Path migrated succeeded.\n");
-			break;
 		case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
-			dev_warn(dev, "Path migration failed.\n");
-			break;
 		case HNS_ROCE_EVENT_TYPE_COMM_EST:
 		case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
 		case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
 		case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
 		case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
-			hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type,
-						  qpn);
+			hns_roce_qp_event(hr_dev, qpn, event_type);
 			break;
 		case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
 		case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
 		case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
-			dev_warn(dev, "SRQ not support.\n");
 			break;
 		case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
 		case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
-			hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type,
-						  cqn);
+			hns_roce_cq_event(hr_dev, cqn, event_type);
 			break;
 		case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
-			dev_warn(dev, "DB overflow.\n");
 			break;
 		case HNS_ROCE_EVENT_TYPE_MB:
 			hns_roce_cmd_event(hr_dev,
@@ -4284,10 +4442,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 					le64_to_cpu(aeqe->event.cmd.out_param));
 			break;
 		case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
-			dev_warn(dev, "CEQ overflow.\n");
 			break;
 		case HNS_ROCE_EVENT_TYPE_FLR:
-			dev_warn(dev, "Function level reset.\n");
 			break;
 		default:
 			dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
@@ -4304,7 +4460,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 			dev_warn(dev, "cons_index overflow, set back to 0.\n");
 			eq->cons_index = 0;
 		}
-		hns_roce_v2_init_irq_work(hr_dev, eq, qpn);
+		hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn);
 	}
 
 	set_eq_cons_index_v2(eq);
@@ -5125,6 +5281,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
 		create_singlethread_workqueue("hns_roce_irq_workqueue");
 	if (!hr_dev->irq_workq) {
 		dev_err(dev, "Create irq workqueue failed!\n");
+		ret = -ENOMEM;
 		goto err_request_irq_fail;
 	}
 
@@ -5195,6 +5352,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
 	.set_mac = hns_roce_v2_set_mac,
 	.write_mtpt = hns_roce_v2_write_mtpt,
 	.rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt,
+	.frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt,
+	.mw_write_mtpt = hns_roce_v2_mw_write_mtpt,
 	.write_cqc = hns_roce_v2_write_cqc,
 	.set_hem = hns_roce_v2_set_hem,
 	.clear_hem = hns_roce_v2_clear_hem,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 14aa308befef..8bc820635bbd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -50,6 +50,7 @@
 #define HNS_ROCE_V2_MAX_CQE_NUM			0x10000
 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM		0x100
 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM		0xff
+#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM		0x200000
 #define HNS_ROCE_V2_MAX_SQ_INLINE		0x20
 #define HNS_ROCE_V2_UAR_NUM			256
 #define HNS_ROCE_V2_PHY_UAR_NUM			1
@@ -78,6 +79,7 @@
 #define HNS_ROCE_INVALID_LKEY			0x100
 #define HNS_ROCE_CMQ_TX_TIMEOUT			30000
 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE	2
+#define HNS_ROCE_V2_RSV_QPS			8
 
 #define HNS_ROCE_CONTEXT_HOP_NUM		1
 #define HNS_ROCE_MTT_HOP_NUM			1
@@ -201,6 +203,7 @@ enum {
 
 /* CMQ command */
 enum hns_roce_opcode_type {
+	HNS_QUERY_FW_VER				= 0x0001,
 	HNS_ROCE_OPC_QUERY_HW_VER			= 0x8000,
 	HNS_ROCE_OPC_CFG_GLOBAL_PARAM			= 0x8001,
 	HNS_ROCE_OPC_ALLOC_PF_RES			= 0x8004,
@@ -324,6 +327,7 @@ struct hns_roce_v2_cq_context {
 
 enum{
 	V2_MPT_ST_VALID = 0x1,
+	V2_MPT_ST_FREE	= 0x2,
 };
 
 enum hns_roce_v2_qp_state {
@@ -350,7 +354,7 @@ struct hns_roce_v2_qp_context {
 	__le32	dmac;
 	__le32	byte_52_udpspn_dmac;
 	__le32	byte_56_dqpn_err;
-	__le32	byte_60_qpst_mapid;
+	__le32	byte_60_qpst_tempid;
 	__le32	qkey_xrcd;
 	__le32	byte_68_rq_db;
 	__le32	rq_db_record_addr;
@@ -492,26 +496,15 @@ struct hns_roce_v2_qp_context {
 #define	V2_QPC_BYTE_56_LP_PKTN_INI_S 28
 #define V2_QPC_BYTE_56_LP_PKTN_INI_M GENMASK(31, 28)
 
-#define	V2_QPC_BYTE_60_MAPID_S 0
-#define V2_QPC_BYTE_60_MAPID_M GENMASK(12, 0)
+#define	V2_QPC_BYTE_60_TEMPID_S 0
+#define V2_QPC_BYTE_60_TEMPID_M GENMASK(7, 0)
 
-#define	V2_QPC_BYTE_60_INNER_MAP_IND_S 13
+#define V2_QPC_BYTE_60_SCC_TOKEN_S 8
+#define V2_QPC_BYTE_60_SCC_TOKEN_M GENMASK(26, 8)
 
-#define	V2_QPC_BYTE_60_SQ_MAP_IND_S 14
+#define	V2_QPC_BYTE_60_SQ_DB_DOING_S 27
 
-#define	V2_QPC_BYTE_60_RQ_MAP_IND_S 15
-
-#define	V2_QPC_BYTE_60_TEMPID_S 16
-#define V2_QPC_BYTE_60_TEMPID_M  GENMASK(22, 16)
-
-#define	V2_QPC_BYTE_60_EXT_MAP_IND_S 23
-
-#define	V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S 24
-#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M GENMASK(26, 24)
-
-#define V2_QPC_BYTE_60_SQ_RLS_IND_S 27
-
-#define	V2_QPC_BYTE_60_SQ_EXT_IND_S 28
+#define	V2_QPC_BYTE_60_RQ_DB_DOING_S 28
 
 #define	V2_QPC_BYTE_60_QP_ST_S 29
 #define V2_QPC_BYTE_60_QP_ST_M GENMASK(31, 29)
@@ -534,6 +527,7 @@ struct hns_roce_v2_qp_context {
 
 #define	V2_QPC_BYTE_76_RQIE_S 28
 
+#define	V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
 #define	V2_QPC_BYTE_80_RX_CQN_S 0
 #define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
 
@@ -588,7 +582,7 @@ struct hns_roce_v2_qp_context {
 #define	V2_QPC_BYTE_140_RR_MAX_S 12
 #define V2_QPC_BYTE_140_RR_MAX_M GENMASK(14, 12)
 
-#define	V2_QPC_BYTE_140_RSVD_RAQ_MAP_S 15
+#define	V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S 15
 
 #define	V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S 16
 #define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M GENMASK(23, 16)
@@ -599,8 +593,6 @@ struct hns_roce_v2_qp_context {
 #define	V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S 0
 #define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M GENMASK(23, 0)
 
-#define V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S 24
-
 #define V2_QPC_BYTE_144_RAQ_CREDIT_S 25
 #define V2_QPC_BYTE_144_RAQ_CREDIT_M GENMASK(29, 25)
 
@@ -637,9 +629,10 @@ struct hns_roce_v2_qp_context {
 #define	V2_QPC_BYTE_168_LP_SGEN_INI_S 22
 #define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 22)
 
-#define	V2_QPC_BYTE_168_SQ_SHIFT_BAK_S 24
-#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_M GENMASK(27, 24)
-
+#define V2_QPC_BYTE_168_SQ_VLAN_EN_S 24
+#define V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S 25
+#define V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S 26
+#define V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S 27
 #define	V2_QPC_BYTE_168_IRRL_IDX_LSB_S 28
 #define V2_QPC_BYTE_168_IRRL_IDX_LSB_M GENMASK(31, 28)
 
@@ -725,6 +718,10 @@ struct hns_roce_v2_qp_context {
 #define	V2_QPC_BYTE_232_IRRL_SGE_IDX_S 20
 #define V2_QPC_BYTE_232_IRRL_SGE_IDX_M GENMASK(28, 20)
 
+#define V2_QPC_BYTE_232_SO_LP_VLD_S 29
+#define V2_QPC_BYTE_232_FENCE_LP_VLD_S 30
+#define V2_QPC_BYTE_232_IRRL_LP_VLD_S 31
+
 #define	V2_QPC_BYTE_240_IRRL_TAIL_REAL_S 0
 #define V2_QPC_BYTE_240_IRRL_TAIL_REAL_M GENMASK(7, 0)
 
@@ -743,6 +740,9 @@ struct hns_roce_v2_qp_context {
 #define	V2_QPC_BYTE_244_RNR_CNT_S 27
 #define V2_QPC_BYTE_244_RNR_CNT_M GENMASK(29, 27)
 
+#define V2_QPC_BYTE_244_LCL_OP_FLG_S 30
+#define V2_QPC_BYTE_244_IRRL_RD_FLG_S 31
+
 #define	V2_QPC_BYTE_248_IRRL_PSN_S 0
 #define V2_QPC_BYTE_248_IRRL_PSN_M GENMASK(23, 0)
 
@@ -818,6 +818,11 @@ struct hns_roce_v2_cqe {
 #define	V2_CQE_BYTE_28_PORT_TYPE_S 16
 #define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16)
 
+#define V2_CQE_BYTE_28_VID_S 18
+#define V2_CQE_BYTE_28_VID_M GENMASK(29, 18)
+
+#define V2_CQE_BYTE_28_VID_VLD_S 30
+
 #define	V2_CQE_BYTE_32_RMT_QPN_S 0
 #define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0)
 
@@ -878,8 +883,19 @@ struct hns_roce_v2_mpt_entry {
 
 #define V2_MPT_BYTE_8_LW_EN_S 7
 
+#define V2_MPT_BYTE_8_MW_CNT_S 8
+#define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8)
+
+#define V2_MPT_BYTE_12_FRE_S 0
+
 #define V2_MPT_BYTE_12_PA_S 1
 
+#define V2_MPT_BYTE_12_MR_MW_S 4
+
+#define V2_MPT_BYTE_12_BPD_S 5
+
+#define V2_MPT_BYTE_12_BQP_S 6
+
 #define V2_MPT_BYTE_12_INNER_PA_VLD_S 7
 
 #define V2_MPT_BYTE_12_MW_BIND_QPN_S 8
@@ -988,6 +1004,8 @@ struct hns_roce_v2_ud_send_wqe {
 #define	V2_UD_SEND_WQE_BYTE_40_PORTN_S 24
 #define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24)
 
+#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
+
 #define	V2_UD_SEND_WQE_BYTE_40_LBI_S 31
 
 #define	V2_UD_SEND_WQE_DMAC_0_S 0
@@ -1042,6 +1060,16 @@ struct hns_roce_v2_rc_send_wqe {
 
 #define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
 
+#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19
+
+#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20
+
+#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21
+
+#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22
+
+#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
+
 #define	V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
 
@@ -1051,6 +1079,16 @@ struct hns_roce_v2_rc_send_wqe {
 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
 
+struct hns_roce_wqe_frmr_seg {
+	__le32	pbl_size;
+	__le32	mode_buf_pg_sz;
+};
+
+#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S	4
+#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M	GENMASK(7, 4)
+
+#define V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S 8
+
 struct hns_roce_v2_wqe_data_seg {
 	__le32    len;
 	__le32    lkey;
@@ -1068,6 +1106,11 @@ struct hns_roce_query_version {
 	__le32 rsv[5];
 };
 
+struct hns_roce_query_fw_info {
+	__le32 fw_ver;
+	__le32 rsv[5];
+};
+
 struct hns_roce_cfg_llm_a {
 	__le32 base_addr_l;
 	__le32 base_addr_h;
@@ -1564,4 +1607,9 @@ struct hns_roce_eq_context {
 #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
 #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
 
+struct hns_roce_wqe_atomic_seg {
+	__le64          fetchadd_swap_data;
+	__le64          cmp_data;
+};
+
 #endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index c5cae9a38c04..1b3ee514f2ef 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -196,6 +196,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
 
 	memset(props, 0, sizeof(*props));
 
+	props->fw_ver = hr_dev->caps.fw_ver;
 	props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
 	props->max_mr_size = (u64)(~(0ULL));
 	props->page_size_cap = hr_dev->caps.page_size_cap;
@@ -215,7 +216,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
 	props->max_pd = hr_dev->caps.num_pds;
 	props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma;
 	props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma;
-	props->atomic_cap = IB_ATOMIC_NONE;
+	props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ?
+			    IB_ATOMIC_HCA : IB_ATOMIC_NONE;
 	props->max_pkeys = 1;
 	props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
 
@@ -344,8 +346,6 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
 	if (ret)
 		goto error_fail_uar_alloc;
 
-	INIT_LIST_HEAD(&context->vma_list);
-	mutex_init(&context->vma_list_mutex);
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
 		INIT_LIST_HEAD(&context->page_list);
 		mutex_init(&context->page_mutex);
@@ -376,76 +376,34 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	return 0;
 }
 
-static void hns_roce_vma_open(struct vm_area_struct *vma)
-{
-	vma->vm_ops = NULL;
-}
-
-static void hns_roce_vma_close(struct vm_area_struct *vma)
-{
-	struct hns_roce_vma_data *vma_data;
-
-	vma_data = (struct hns_roce_vma_data *)vma->vm_private_data;
-	vma_data->vma = NULL;
-	mutex_lock(vma_data->vma_list_mutex);
-	list_del(&vma_data->list);
-	mutex_unlock(vma_data->vma_list_mutex);
-	kfree(vma_data);
-}
-
-static const struct vm_operations_struct hns_roce_vm_ops = {
-	.open = hns_roce_vma_open,
-	.close = hns_roce_vma_close,
-};
-
-static int hns_roce_set_vma_data(struct vm_area_struct *vma,
-				 struct hns_roce_ucontext *context)
-{
-	struct list_head *vma_head = &context->vma_list;
-	struct hns_roce_vma_data *vma_data;
-
-	vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL);
-	if (!vma_data)
-		return -ENOMEM;
-
-	vma_data->vma = vma;
-	vma_data->vma_list_mutex = &context->vma_list_mutex;
-	vma->vm_private_data = vma_data;
-	vma->vm_ops = &hns_roce_vm_ops;
-
-	mutex_lock(&context->vma_list_mutex);
-	list_add(&vma_data->list, vma_head);
-	mutex_unlock(&context->vma_list_mutex);
-
-	return 0;
-}
-
 static int hns_roce_mmap(struct ib_ucontext *context,
 			 struct vm_area_struct *vma)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
 
-	if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0)
-		return -EINVAL;
+	switch (vma->vm_pgoff) {
+	case 0:
+		return rdma_user_mmap_io(context, vma,
+					 to_hr_ucontext(context)->uar.pfn,
+					 PAGE_SIZE,
+					 pgprot_noncached(vma->vm_page_prot));
+
+	/* vm_pgoff: 1 -- TPTR */
+	case 1:
+		if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size)
+			return -EINVAL;
+		/*
+		 * FIXME: using io_remap_pfn_range on the dma address returned
+		 * by dma_alloc_coherent is totally wrong.
+		 */
+		return rdma_user_mmap_io(context, vma,
+					 hr_dev->tptr_dma_addr >> PAGE_SHIFT,
+					 hr_dev->tptr_size,
+					 vma->vm_page_prot);
 
-	if (vma->vm_pgoff == 0) {
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-		if (io_remap_pfn_range(vma, vma->vm_start,
-				       to_hr_ucontext(context)->uar.pfn,
-				       PAGE_SIZE, vma->vm_page_prot))
-			return -EAGAIN;
-	} else if (vma->vm_pgoff == 1 && hr_dev->tptr_dma_addr &&
-		   hr_dev->tptr_size) {
-		/* vm_pgoff: 1 -- TPTR */
-		if (io_remap_pfn_range(vma, vma->vm_start,
-				       hr_dev->tptr_dma_addr >> PAGE_SHIFT,
-				       hr_dev->tptr_size,
-				       vma->vm_page_prot))
-			return -EAGAIN;
-	} else
+	default:
 		return -EINVAL;
-
-	return hns_roce_set_vma_data(vma, to_hr_ucontext(context));
+	}
 }
 
 static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
@@ -471,21 +429,6 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
 
 static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext)
 {
-	struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
-	struct hns_roce_vma_data *vma_data, *n;
-	struct vm_area_struct *vma;
-
-	mutex_lock(&context->vma_list_mutex);
-	list_for_each_entry_safe(vma_data, n, &context->vma_list, list) {
-		vma = vma_data->vma;
-		zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
-
-		vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
-		vma->vm_ops = NULL;
-		list_del(&vma_data->list);
-		kfree(vma_data);
-	}
-	mutex_unlock(&context->vma_list_mutex);
 }
 
 static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
@@ -508,7 +451,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
 	spin_lock_init(&iboe->lock);
 
 	ib_dev = &hr_dev->ib_dev;
-	strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX);
 
 	ib_dev->owner			= THIS_MODULE;
 	ib_dev->node_type		= RDMA_NODE_IB_CA;
@@ -584,12 +526,27 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
 		ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
 	}
 
+	/* MW */
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) {
+		ib_dev->alloc_mw = hns_roce_alloc_mw;
+		ib_dev->dealloc_mw = hns_roce_dealloc_mw;
+		ib_dev->uverbs_cmd_mask |=
+					(1ULL << IB_USER_VERBS_CMD_ALLOC_MW) |
+					(1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
+	}
+
+	/* FRMR */
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
+		ib_dev->alloc_mr		= hns_roce_alloc_mr;
+		ib_dev->map_mr_sg		= hns_roce_map_mr_sg;
+	}
+
 	/* OTHERS */
 	ib_dev->get_port_immutable	= hns_roce_port_immutable;
 	ib_dev->disassociate_ucontext	= hns_roce_disassociate_ucontext;
 
 	ib_dev->driver_id = RDMA_DRIVER_HNS;
-	ret = ib_register_device(ib_dev, NULL);
+	ret = ib_register_device(ib_dev, "hns_%d", NULL);
 	if (ret) {
 		dev_err(dev, "ib_register_device failed!\n");
 		return ret;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index eb26a5f6fc58..521ad2aa3a4e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -329,7 +329,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
 	u64 bt_idx;
 	u64 size;
 
-	mhop_num = hr_dev->caps.pbl_hop_num;
+	mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
 	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
 	pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
 
@@ -351,7 +351,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
 
 		mr->pbl_size = npages;
 		mr->pbl_ba = mr->pbl_dma_addr;
-		mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
+		mr->pbl_hop_num = mhop_num;
 		mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
 		mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
 		return 0;
@@ -511,7 +511,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
 	mr->key = hw_index_to_key(index);	/* MR key */
 
 	if (size == ~0ull) {
-		mr->type = MR_TYPE_DMA;
 		mr->pbl_buf = NULL;
 		mr->pbl_dma_addr = 0;
 		/* PBL multi-hop addressing parameters */
@@ -522,7 +521,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
 		mr->pbl_l1_dma_addr = NULL;
 		mr->pbl_l0_dma_addr = 0;
 	} else {
-		mr->type = MR_TYPE_MR;
 		if (!hr_dev->caps.pbl_hop_num) {
 			mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
 							 &(mr->pbl_dma_addr),
@@ -548,9 +546,9 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
 	u32 mhop_num;
 	u64 bt_idx;
 
-	npages = ib_umem_page_count(mr->umem);
+	npages = mr->pbl_size;
 	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
-	mhop_num = hr_dev->caps.pbl_hop_num;
+	mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
 
 	if (mhop_num == HNS_ROCE_HOP_NUM_0)
 		return;
@@ -636,7 +634,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
 	}
 
 	if (mr->size != ~0ULL) {
-		npages = ib_umem_page_count(mr->umem);
+		if (mr->type == MR_TYPE_MR)
+			npages = ib_umem_page_count(mr->umem);
 
 		if (!hr_dev->caps.pbl_hop_num)
 			dma_free_coherent(dev, (unsigned int)(npages * 8),
@@ -674,7 +673,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
 		goto err_table;
 	}
 
-	ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+	if (mr->type != MR_TYPE_FRMR)
+		ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+	else
+		ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
 	if (ret) {
 		dev_err(dev, "Write mtpt fail!\n");
 		goto err_page;
@@ -855,6 +857,8 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
 	if (mr == NULL)
 		return  ERR_PTR(-ENOMEM);
 
+	mr->type = MR_TYPE_DMA;
+
 	/* Allocate memory region key */
 	ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
 				~0ULL, acc, 0, mr);
@@ -1031,6 +1035,8 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 		}
 	}
 
+	mr->type = MR_TYPE_MR;
+
 	ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
 				access_flags, n, mr);
 	if (ret)
@@ -1201,3 +1207,193 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr)
 
 	return ret;
 }
+
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+				u32 max_num_sg)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+	struct device *dev = hr_dev->dev;
+	struct hns_roce_mr *mr;
+	u64 length;
+	u32 page_size;
+	int ret;
+
+	page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
+	length = max_num_sg * page_size;
+
+	if (mr_type != IB_MR_TYPE_MEM_REG)
+		return ERR_PTR(-EINVAL);
+
+	if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
+		dev_err(dev, "max_num_sg larger than %d\n",
+			HNS_ROCE_FRMR_MAX_PA);
+		return ERR_PTR(-EINVAL);
+	}
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->type = MR_TYPE_FRMR;
+
+	/* Allocate memory region key */
+	ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
+				0, max_num_sg, mr);
+	if (ret)
+		goto err_free;
+
+	ret = hns_roce_mr_enable(hr_dev, mr);
+	if (ret)
+		goto err_mr;
+
+	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+	mr->umem = NULL;
+
+	return &mr->ibmr;
+
+err_mr:
+	hns_roce_mr_free(to_hr_dev(pd->device), mr);
+
+err_free:
+	kfree(mr);
+	return ERR_PTR(ret);
+}
+
+static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
+{
+	struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+	mr->pbl_buf[mr->npages++] = cpu_to_le64(addr);
+
+	return 0;
+}
+
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+		       unsigned int *sg_offset)
+{
+	struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+	mr->npages = 0;
+
+	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+}
+
+static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
+			     struct hns_roce_mw *mw)
+{
+	struct device *dev = hr_dev->dev;
+	int ret;
+
+	if (mw->enabled) {
+		ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey)
+					 & (hr_dev->caps.num_mtpts - 1));
+		if (ret)
+			dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret);
+
+		hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
+				   key_to_hw_index(mw->rkey));
+	}
+
+	hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
+			     key_to_hw_index(mw->rkey), BITMAP_NO_RR);
+}
+
+static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
+			      struct hns_roce_mw *mw)
+{
+	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
+	struct hns_roce_cmd_mailbox *mailbox;
+	struct device *dev = hr_dev->dev;
+	unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
+	int ret;
+
+	/* prepare HEM entry memory */
+	ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+	if (ret)
+		return ret;
+
+	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+	if (IS_ERR(mailbox)) {
+		ret = PTR_ERR(mailbox);
+		goto err_table;
+	}
+
+	ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
+	if (ret) {
+		dev_err(dev, "MW write mtpt fail!\n");
+		goto err_page;
+	}
+
+	ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
+				 mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+	if (ret) {
+		dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret);
+		goto err_page;
+	}
+
+	mw->enabled = 1;
+
+	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+	return 0;
+
+err_page:
+	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+err_table:
+	hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+
+	return ret;
+}
+
+struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+				struct ib_udata *udata)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
+	struct hns_roce_mw *mw;
+	unsigned long index = 0;
+	int ret;
+
+	mw = kmalloc(sizeof(*mw), GFP_KERNEL);
+	if (!mw)
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate a key for mw from bitmap */
+	ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
+	if (ret)
+		goto err_bitmap;
+
+	mw->rkey = hw_index_to_key(index);
+
+	mw->ibmw.rkey = mw->rkey;
+	mw->ibmw.type = type;
+	mw->pdn = to_hr_pd(ib_pd)->pdn;
+	mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
+	mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
+	mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
+
+	ret = hns_roce_mw_enable(hr_dev, mw);
+	if (ret)
+		goto err_mw;
+
+	return &mw->ibmw;
+
+err_mw:
+	hns_roce_mw_free(hr_dev, mw);
+
+err_bitmap:
+	kfree(mw);
+
+	return ERR_PTR(ret);
+}
+
+int hns_roce_dealloc_mw(struct ib_mw *ibmw)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
+	struct hns_roce_mw *mw = to_hr_mw(ibmw);
+
+	hns_roce_mw_free(hr_dev, mw);
+	kfree(mw);
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index efb7e961ca65..5ebf481a39d9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -31,6 +31,7 @@
  * SOFTWARE.
  */
 
+#include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <rdma/ib_addr.h>
 #include <rdma/ib_umem.h>
@@ -343,6 +344,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
 {
 	u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
 	u8 max_sq_stride = ilog2(roundup_sq_stride);
+	u32 ex_sge_num;
 	u32 page_size;
 	u32 max_cnt;
 
@@ -372,7 +374,18 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
 	if (hr_qp->sq.max_gs > 2)
 		hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
 							(hr_qp->sq.max_gs - 2));
+
+	if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
+		if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
+			dev_err(hr_dev->dev,
+				"The extended sge cnt error! sge_cnt=%d\n",
+				hr_qp->sge.sge_cnt);
+			return -EINVAL;
+		}
+	}
+
 	hr_qp->sge.sge_shift = 4;
+	ex_sge_num = hr_qp->sge.sge_cnt;
 
 	/* Get buf size, SQ and RQ  are aligned to page_szie */
 	if (hr_dev->caps.max_sq_sg <= 2) {
@@ -386,6 +399,8 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
 					     hr_qp->sq.wqe_shift), PAGE_SIZE);
 	} else {
 		page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
+		hr_qp->sge.sge_cnt =
+		       max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num);
 		hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
 					     hr_qp->rq.wqe_shift), page_size) +
 				   HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
@@ -394,7 +409,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
 					     hr_qp->sq.wqe_shift), page_size);
 
 		hr_qp->sq.offset = 0;
-		if (hr_qp->sge.sge_cnt) {
+		if (ex_sge_num) {
 			hr_qp->sge.offset = HNS_ROCE_ALOGN_UP(
 							(hr_qp->sq.wqe_cnt <<
 							hr_qp->sq.wqe_shift),
@@ -465,6 +480,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
 		hr_qp->sge.sge_shift = 4;
 	}
 
+	if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
+		if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
+			dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
+				hr_qp->sge.sge_cnt);
+			return -EINVAL;
+		}
+	}
+
 	/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
 	page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
 	hr_qp->sq.offset = 0;
@@ -472,6 +495,8 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
 				 page_size);
 
 	if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
+		hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
+					(u32)hr_qp->sge.sge_cnt);
 		hr_qp->sge.offset = size;
 		size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt <<
 					  hr_qp->sge.sge_shift, page_size);
@@ -952,8 +977,8 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		}
 	}
 
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
-				IB_LINK_LAYER_ETHERNET)) {
+	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+				attr_mask)) {
 		dev_err(dev, "ib_modify_qp_is_ok failed\n");
 		goto out;
 	}
@@ -1106,14 +1131,20 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
 {
 	struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
 	int reserved_from_top = 0;
+	int reserved_from_bot;
 	int ret;
 
 	spin_lock_init(&qp_table->lock);
 	INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC);
 
-	/* A port include two SQP, six port total 12 */
+	/* In hw v1, a port include two SQP, six ports total 12 */
+	if (hr_dev->caps.max_sq_sg <= 2)
+		reserved_from_bot = SQP_NUM;
+	else
+		reserved_from_bot = hr_dev->caps.reserved_qps;
+
 	ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
-				   hr_dev->caps.num_qps - 1, SQP_NUM,
+				   hr_dev->caps.num_qps - 1, reserved_from_bot,
 				   reserved_from_top);
 	if (ret) {
 		dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n",
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 423818a7d333..771eb6bd0785 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1689,7 +1689,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
 	unsigned long flags;
 
 	rtnl_lock();
-	for_each_netdev_rcu(&init_net, ip_dev) {
+	for_each_netdev(&init_net, ip_dev) {
 		if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) &&
 		      (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
 		     (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index e2e6c74a7452..102875872bea 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2135,10 +2135,10 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
 }
 
 /**
- * i40iw_show_rev
+ * hw_rev_show
  */
-static ssize_t i40iw_show_rev(struct device *dev,
-			      struct device_attribute *attr, char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
 {
 	struct i40iw_ib_device *iwibdev = container_of(dev,
 						       struct i40iw_ib_device,
@@ -2147,34 +2147,37 @@ static ssize_t i40iw_show_rev(struct device *dev,
 
 	return sprintf(buf, "%x\n", hw_rev);
 }
+static DEVICE_ATTR_RO(hw_rev);
 
 /**
- * i40iw_show_hca
+ * hca_type_show
  */
-static ssize_t i40iw_show_hca(struct device *dev,
-			      struct device_attribute *attr, char *buf)
+static ssize_t hca_type_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "I40IW\n");
 }
+static DEVICE_ATTR_RO(hca_type);
 
 /**
- * i40iw_show_board
+ * board_id_show
  */
-static ssize_t i40iw_show_board(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
+static ssize_t board_id_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
 }
+static DEVICE_ATTR_RO(board_id);
 
-static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
+static struct attribute *i40iw_dev_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	&dev_attr_board_id.attr,
+	NULL
+};
 
-static struct device_attribute *i40iw_dev_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type,
-	&dev_attr_board_id
+static const struct attribute_group i40iw_attr_group = {
+	.attrs = i40iw_dev_attributes,
 };
 
 /**
@@ -2752,7 +2755,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
 		i40iw_pr_err("iwdev == NULL\n");
 		return NULL;
 	}
-	strlcpy(iwibdev->ibdev.name, "i40iw%d", IB_DEVICE_NAME_MAX);
 	iwibdev->ibdev.owner = THIS_MODULE;
 	iwdev->iwibdev = iwibdev;
 	iwibdev->iwdev = iwdev;
@@ -2851,20 +2853,6 @@ void i40iw_port_ibevent(struct i40iw_device *iwdev)
 }
 
 /**
- * i40iw_unregister_rdma_device - unregister of iwarp from IB
- * @iwibdev: rdma device ptr
- */
-static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i)
-		device_remove_file(&iwibdev->ibdev.dev,
-				   i40iw_dev_attributes[i]);
-	ib_unregister_device(&iwibdev->ibdev);
-}
-
-/**
  * i40iw_destroy_rdma_device - destroy rdma device and free resources
  * @iwibdev: IB device ptr
  */
@@ -2873,7 +2861,7 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
 	if (!iwibdev)
 		return;
 
-	i40iw_unregister_rdma_device(iwibdev);
+	ib_unregister_device(&iwibdev->ibdev);
 	kfree(iwibdev->ibdev.iwcm);
 	iwibdev->ibdev.iwcm = NULL;
 	wait_event_timeout(iwibdev->iwdev->close_wq,
@@ -2888,32 +2876,19 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
  */
 int i40iw_register_rdma_device(struct i40iw_device *iwdev)
 {
-	int i, ret;
+	int ret;
 	struct i40iw_ib_device *iwibdev;
 
 	iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
 	if (!iwdev->iwibdev)
 		return -ENOMEM;
 	iwibdev = iwdev->iwibdev;
-
+	rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
 	iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
-	ret = ib_register_device(&iwibdev->ibdev, NULL);
+	ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", NULL);
 	if (ret)
 		goto error;
 
-	for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) {
-		ret =
-		    device_create_file(&iwibdev->ibdev.dev,
-				       i40iw_dev_attributes[i]);
-		if (ret) {
-			while (i > 0) {
-				i--;
-				device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]);
-			}
-			ib_unregister_device(&iwibdev->ibdev);
-			goto error;
-		}
-	}
 	return 0;
 error:
 	kfree(iwdev->iwibdev->ibdev.iwcm);
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index db4aa13ebae0..d1de3285fd88 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,6 +1,7 @@
 config MLX4_INFINIBAND
 	tristate "Mellanox ConnectX HCA support"
 	depends on NETDEVICES && ETHERNET && PCI && INET
+	depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
 	depends on MAY_USE_DEVLINK
 	select NET_VENDOR_MELLANOX
 	select MLX4_CORE
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index e5466d786bb1..8942f5f7f04d 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -807,15 +807,17 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 	int err;
 	struct ib_port_attr pattr;
 
-	if (in_wc && in_wc->qp->qp_num) {
-		pr_debug("received MAD: slid:%d sqpn:%d "
-			"dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
-			in_wc->slid, in_wc->src_qp,
-			in_wc->dlid_path_bits,
-			in_wc->qp->qp_num,
-			in_wc->wc_flags,
-			in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
-			be16_to_cpu(in_mad->mad_hdr.attr_id));
+	if (in_wc && in_wc->qp) {
+		pr_debug("received MAD: port:%d slid:%d sqpn:%d "
+			 "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n",
+			 port_num,
+			 in_wc->slid, in_wc->src_qp,
+			 in_wc->dlid_path_bits,
+			 in_wc->qp->qp_num,
+			 in_wc->wc_flags,
+			 be64_to_cpu(in_mad->mad_hdr.tid),
+			 in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
+			 be16_to_cpu(in_mad->mad_hdr.attr_id));
 		if (in_wc->wc_flags & IB_WC_GRH) {
 			pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
 				 be64_to_cpu(in_grh->sgid.global.subnet_prefix),
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0bbeaaae47e0..0def2323459c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1140,144 +1140,50 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	return 0;
 }
 
-static void  mlx4_ib_vma_open(struct vm_area_struct *area)
-{
-	/* vma_open is called when a new VMA is created on top of our VMA.
-	 * This is done through either mremap flow or split_vma (usually due
-	 * to mlock, madvise, munmap, etc.). We do not support a clone of the
-	 * vma, as this VMA is strongly hardware related. Therefore we set the
-	 * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
-	 * calling us again and trying to do incorrect actions. We assume that
-	 * the original vma size is exactly a single page that there will be no
-	 * "splitting" operations on.
-	 */
-	area->vm_ops = NULL;
-}
-
-static void  mlx4_ib_vma_close(struct vm_area_struct *area)
-{
-	struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
-
-	/* It's guaranteed that all VMAs opened on a FD are closed before the
-	 * file itself is closed, therefore no sync is needed with the regular
-	 * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
-	 * with accessing the vma as part of mlx4_ib_disassociate_ucontext.
-	 * The close operation is usually called under mm->mmap_sem except when
-	 * process is exiting.  The exiting case is handled explicitly as part
-	 * of mlx4_ib_disassociate_ucontext.
-	 */
-	mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
-				area->vm_private_data;
-
-	/* set the vma context pointer to null in the mlx4_ib driver's private
-	 * data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
-	 */
-	mlx4_ib_vma_priv_data->vma = NULL;
-}
-
-static const struct vm_operations_struct mlx4_ib_vm_ops = {
-	.open = mlx4_ib_vma_open,
-	.close = mlx4_ib_vma_close
-};
-
 static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
 {
-	int i;
-	struct vm_area_struct *vma;
-	struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
-
-	/* need to protect from a race on closing the vma as part of
-	 * mlx4_ib_vma_close().
-	 */
-	for (i = 0; i < HW_BAR_COUNT; i++) {
-		vma = context->hw_bar_info[i].vma;
-		if (!vma)
-			continue;
-
-		zap_vma_ptes(context->hw_bar_info[i].vma,
-			     context->hw_bar_info[i].vma->vm_start, PAGE_SIZE);
-
-		context->hw_bar_info[i].vma->vm_flags &=
-			~(VM_SHARED | VM_MAYSHARE);
-		/* context going to be destroyed, should not access ops any more */
-		context->hw_bar_info[i].vma->vm_ops = NULL;
-	}
-}
-
-static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
-				 struct mlx4_ib_vma_private_data *vma_private_data)
-{
-	vma_private_data->vma = vma;
-	vma->vm_private_data = vma_private_data;
-	vma->vm_ops =  &mlx4_ib_vm_ops;
 }
 
 static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 {
 	struct mlx4_ib_dev *dev = to_mdev(context->device);
-	struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
 
-	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
-		return -EINVAL;
-
-	if (vma->vm_pgoff == 0) {
-		/* We prevent double mmaping on same context */
-		if (mucontext->hw_bar_info[HW_BAR_DB].vma)
-			return -EINVAL;
-
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-		if (io_remap_pfn_range(vma, vma->vm_start,
-				       to_mucontext(context)->uar.pfn,
-				       PAGE_SIZE, vma->vm_page_prot))
-			return -EAGAIN;
-
-		mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
+	switch (vma->vm_pgoff) {
+	case 0:
+		return rdma_user_mmap_io(context, vma,
+					 to_mucontext(context)->uar.pfn,
+					 PAGE_SIZE,
+					 pgprot_noncached(vma->vm_page_prot));
 
-	} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
-		/* We prevent double mmaping on same context */
-		if (mucontext->hw_bar_info[HW_BAR_BF].vma)
+	case 1:
+		if (dev->dev->caps.bf_reg_size == 0)
 			return -EINVAL;
+		return rdma_user_mmap_io(
+			context, vma,
+			to_mucontext(context)->uar.pfn +
+				dev->dev->caps.num_uars,
+			PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot));
 
-		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-
-		if (io_remap_pfn_range(vma, vma->vm_start,
-				       to_mucontext(context)->uar.pfn +
-				       dev->dev->caps.num_uars,
-				       PAGE_SIZE, vma->vm_page_prot))
-			return -EAGAIN;
-
-		mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
-
-	} else if (vma->vm_pgoff == 3) {
+	case 3: {
 		struct mlx4_clock_params params;
 		int ret;
 
-		/* We prevent double mmaping on same context */
-		if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
-			return -EINVAL;
-
 		ret = mlx4_get_internal_clock_params(dev->dev, &params);
-
 		if (ret)
 			return ret;
 
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-		if (io_remap_pfn_range(vma, vma->vm_start,
-				       (pci_resource_start(dev->dev->persist->pdev,
-							   params.bar) +
-					params.offset)
-				       >> PAGE_SHIFT,
-				       PAGE_SIZE, vma->vm_page_prot))
-			return -EAGAIN;
-
-		mlx4_ib_set_vma_data(vma,
-				     &mucontext->hw_bar_info[HW_BAR_CLOCK]);
-	} else {
-		return -EINVAL;
+		return rdma_user_mmap_io(
+			context, vma,
+			(pci_resource_start(dev->dev->persist->pdev,
+					    params.bar) +
+			 params.offset) >>
+				PAGE_SHIFT,
+			PAGE_SIZE, pgprot_noncached(vma->vm_page_prot));
 	}
 
-	return 0;
+	default:
+		return -EINVAL;
+	}
 }
 
 static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
@@ -2133,39 +2039,43 @@ out:
 	return err;
 }
 
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
-			char *buf)
+static ssize_t hca_type_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
 {
 	struct mlx4_ib_dev *dev =
 		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
 	return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
 }
+static DEVICE_ATTR_RO(hca_type);
 
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-			char *buf)
+static ssize_t hw_rev_show(struct device *device,
+			   struct device_attribute *attr, char *buf)
 {
 	struct mlx4_ib_dev *dev =
 		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
 	return sprintf(buf, "%x\n", dev->dev->rev_id);
 }
+static DEVICE_ATTR_RO(hw_rev);
 
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
-			  char *buf)
+static ssize_t board_id_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
 {
 	struct mlx4_ib_dev *dev =
 		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
 	return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
 		       dev->dev->board_id);
 }
+static DEVICE_ATTR_RO(board_id);
 
-static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
+static struct attribute *mlx4_class_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	&dev_attr_board_id.attr,
+	NULL
+};
 
-static struct device_attribute *mlx4_class_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type,
-	&dev_attr_board_id
+static const struct attribute_group mlx4_attr_group = {
+	.attrs = mlx4_class_attributes,
 };
 
 struct diag_counter {
@@ -2636,7 +2546,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->dev = dev;
 	ibdev->bond_next_port	= 0;
 
-	strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
 	ibdev->ib_dev.owner		= THIS_MODULE;
 	ibdev->ib_dev.node_type		= RDMA_NODE_IB_CA;
 	ibdev->ib_dev.local_dma_lkey	= dev->caps.reserved_lkey;
@@ -2898,8 +2807,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	if (mlx4_ib_alloc_diag_counters(ibdev))
 		goto err_steer_free_bitmap;
 
+	rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
 	ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
-	if (ib_register_device(&ibdev->ib_dev, NULL))
+	if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", NULL))
 		goto err_diag_counters;
 
 	if (mlx4_ib_mad_init(ibdev))
@@ -2922,12 +2832,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 			goto err_notif;
 	}
 
-	for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
-		if (device_create_file(&ibdev->ib_dev.dev,
-				       mlx4_class_attributes[j]))
-			goto err_notif;
-	}
-
 	ibdev->ib_active = true;
 	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 		devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 81ffc007e0a1..d844831179cf 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -673,7 +673,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
 			if (!list_empty(&group->pending_list))
 				req = list_first_entry(&group->pending_list,
 						struct mcast_req, group_list);
-			if ((method == IB_MGMT_METHOD_GET_RESP)) {
+			if (method == IB_MGMT_METHOD_GET_RESP) {
 					if (req) {
 						send_reply_to_slave(req->func, group, &req->sa_mad, status);
 						--group->func[req->func].num_pend_reqs;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e10dccc7958f..8850dfc3826d 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -80,16 +80,11 @@ enum hw_bar_type {
 	HW_BAR_COUNT
 };
 
-struct mlx4_ib_vma_private_data {
-	struct vm_area_struct *vma;
-};
-
 struct mlx4_ib_ucontext {
 	struct ib_ucontext	ibucontext;
 	struct mlx4_uar		uar;
 	struct list_head	db_page_list;
 	struct mutex		db_page_mutex;
-	struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
 	struct list_head	wqn_ranges_list;
 	struct mutex		wqn_ranges_mutex; /* protect wqn_ranges_list */
 };
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 6dd3cd2c2f80..0711ca1dfb8f 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2629,7 +2629,6 @@ enum {
 static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 			      int attr_mask, struct ib_udata *udata)
 {
-	enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
 	struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
 	struct mlx4_ib_qp *qp = to_mqp(ibqp);
 	enum ib_qp_state cur_state, new_state;
@@ -2639,13 +2638,8 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
-	if (cur_state != new_state || cur_state != IB_QPS_RESET) {
-		int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
-		ll = rdma_port_get_link_layer(&dev->ib_dev, port);
-	}
-
 	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-				attr_mask, ll)) {
+				attr_mask)) {
 		pr_debug("qpn 0x%x: invalid attribute mask specified "
 			 "for transition %d to %d. qp_type %d,"
 			 " attr_mask 0x%x\n",
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index e219093d2764..752bdd536130 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -818,9 +818,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
 	if (!mlx4_is_master(dev->dev))
 		return 0;
 
-	dev->iov_parent =
-		kobject_create_and_add("iov",
-				       kobject_get(dev->ib_dev.ports_parent->parent));
+	dev->iov_parent = kobject_create_and_add("iov", &dev->ib_dev.dev.kobj);
 	if (!dev->iov_parent) {
 		ret = -ENOMEM;
 		goto err;
@@ -850,7 +848,6 @@ err_add_entries:
 err_ports:
 	kobject_put(dev->iov_parent);
 err:
-	kobject_put(dev->ib_dev.ports_parent->parent);
 	pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
 	return ret;
 }
@@ -886,5 +883,4 @@ void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
 	kobject_put(device->ports_parent);
 	kobject_put(device->iov_parent);
 	kobject_put(device->iov_parent);
-	kobject_put(device->ib_dev.ports_parent->parent);
 }
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index c84fef9a8a08..ca060a2e2b36 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -197,3 +197,132 @@ int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
 	return  mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT,
 				     0, 0);
 }
+
+void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_tir_in)]   = {};
+	u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {};
+
+	MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
+	MLX5_SET(destroy_tir_in, in, tirn, tirn);
+	MLX5_SET(destroy_tir_in, in, uid, uid);
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_tis_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
+
+	MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
+	MLX5_SET(destroy_tis_in, in, tisn, tisn);
+	MLX5_SET(destroy_tis_in, in, uid, uid);
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)]   = {};
+	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {};
+
+	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
+	MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
+	MLX5_SET(destroy_rqt_in, in, uid, uid);
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
+				    u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
+	int err;
+
+	MLX5_SET(alloc_transport_domain_in, in, opcode,
+		 MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
+
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*tdn = MLX5_GET(alloc_transport_domain_out, out,
+				transport_domain);
+
+	return err;
+}
+
+void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
+				       u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
+
+	MLX5_SET(dealloc_transport_domain_in, in, opcode,
+		 MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+	MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
+{
+	u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)]   = {};
+
+	MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
+	MLX5_SET(dealloc_pd_in, in, pd, pdn);
+	MLX5_SET(dealloc_pd_in, in, uid, uid);
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+			u32 qpn, u16 uid)
+{
+	u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)]   = {};
+	void *gid;
+
+	MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
+	MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
+	MLX5_SET(attach_to_mcg_in, in, uid, uid);
+	gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
+	memcpy(gid, mgid, sizeof(*mgid));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+			u32 qpn, u16 uid)
+{
+	u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)]   = {};
+	void *gid;
+
+	MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+	MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
+	MLX5_SET(detach_from_mcg_in, in, uid, uid);
+	gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
+	memcpy(gid, mgid, sizeof(*mgid));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid)
+{
+	u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)]   = {};
+	int err;
+
+	MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
+	MLX5_SET(alloc_xrcd_in, in, uid, uid);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
+	return err;
+}
+
+int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
+{
+	u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)]   = {};
+
+	MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+	MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
+	MLX5_SET(dealloc_xrcd_in, in, uid, uid);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 88cbb1c41703..c03c56455534 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -47,4 +47,18 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
 int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
 			 u64 length, u32 alignment);
 int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length);
+void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
+void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
+void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
+void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
+int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
+				    u16 uid);
+void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
+				       u16 uid);
+int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+			u32 qpn, u16 uid);
+int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+			u32 qpn, u16 uid);
+int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
+int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
 #endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index cca1820802b8..7d769b5538b4 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -874,6 +874,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 		cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD;
 	}
 
+	MLX5_SET(create_cq_in, *cqb, uid, to_mucontext(context)->devx_uid);
 	return 0;
 
 err_cqb:
@@ -1454,7 +1455,7 @@ ex:
 	return err;
 }
 
-int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
+int mlx5_ib_get_cqe_size(struct ib_cq *ibcq)
 {
 	struct mlx5_ib_cq *cq;
 
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 66dc337e49a7..61aab7c0c513 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -19,7 +19,7 @@
 #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
 struct devx_obj {
 	struct mlx5_core_dev	*mdev;
-	u32			obj_id;
+	u64			obj_id;
 	u32			dinlen; /* destroy inbox length */
 	u32			dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
 };
@@ -45,13 +45,14 @@ static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
 	return to_mucontext(ib_uverbs_get_ucontext(file));
 }
 
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev)
 {
 	u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
 	u64 general_obj_types;
 	void *hdr;
 	int err;
+	u16 uid;
 
 	hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr);
 
@@ -60,9 +61,6 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *contex
 	    !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM))
 		return -EINVAL;
 
-	if (!capable(CAP_NET_RAW))
-		return -EPERM;
-
 	MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
 	MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX);
 
@@ -70,19 +68,18 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *contex
 	if (err)
 		return err;
 
-	context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
-	return 0;
+	uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return uid;
 }
 
-void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
-			  struct mlx5_ib_ucontext *context)
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
 {
 	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
 
 	MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
 	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX);
-	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, uid);
 
 	mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
@@ -109,150 +106,218 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
 	}
 }
 
+/*
+ * As the obj_id in the firmware is not globally unique the object type
+ * must be considered upon checking for a valid object id.
+ * For that the opcode of the creator command is encoded as part of the obj_id.
+ */
+static u64 get_enc_obj_id(u16 opcode, u32 obj_id)
+{
+	return ((u64)opcode << 32) | obj_id;
+}
+
 static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
 {
 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
-	u32 obj_id;
+	u64 obj_id;
 
 	switch (opcode) {
 	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
 	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
-		obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJECT,
+					MLX5_GET(general_obj_in_cmd_hdr, in,
+						 obj_id));
 		break;
 	case MLX5_CMD_OP_QUERY_MKEY:
-		obj_id = MLX5_GET(query_mkey_in, in, mkey_index);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY,
+					MLX5_GET(query_mkey_in, in,
+						 mkey_index));
 		break;
 	case MLX5_CMD_OP_QUERY_CQ:
-		obj_id = MLX5_GET(query_cq_in, in, cqn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+					MLX5_GET(query_cq_in, in, cqn));
 		break;
 	case MLX5_CMD_OP_MODIFY_CQ:
-		obj_id = MLX5_GET(modify_cq_in, in, cqn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+					MLX5_GET(modify_cq_in, in, cqn));
 		break;
 	case MLX5_CMD_OP_QUERY_SQ:
-		obj_id = MLX5_GET(query_sq_in, in, sqn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+					MLX5_GET(query_sq_in, in, sqn));
 		break;
 	case MLX5_CMD_OP_MODIFY_SQ:
-		obj_id = MLX5_GET(modify_sq_in, in, sqn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+					MLX5_GET(modify_sq_in, in, sqn));
 		break;
 	case MLX5_CMD_OP_QUERY_RQ:
-		obj_id = MLX5_GET(query_rq_in, in, rqn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+					MLX5_GET(query_rq_in, in, rqn));
 		break;
 	case MLX5_CMD_OP_MODIFY_RQ:
-		obj_id = MLX5_GET(modify_rq_in, in, rqn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+					MLX5_GET(modify_rq_in, in, rqn));
 		break;
 	case MLX5_CMD_OP_QUERY_RMP:
-		obj_id = MLX5_GET(query_rmp_in, in, rmpn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
+					MLX5_GET(query_rmp_in, in, rmpn));
 		break;
 	case MLX5_CMD_OP_MODIFY_RMP:
-		obj_id = MLX5_GET(modify_rmp_in, in, rmpn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
+					MLX5_GET(modify_rmp_in, in, rmpn));
 		break;
 	case MLX5_CMD_OP_QUERY_RQT:
-		obj_id = MLX5_GET(query_rqt_in, in, rqtn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+					MLX5_GET(query_rqt_in, in, rqtn));
 		break;
 	case MLX5_CMD_OP_MODIFY_RQT:
-		obj_id = MLX5_GET(modify_rqt_in, in, rqtn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+					MLX5_GET(modify_rqt_in, in, rqtn));
 		break;
 	case MLX5_CMD_OP_QUERY_TIR:
-		obj_id = MLX5_GET(query_tir_in, in, tirn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+					MLX5_GET(query_tir_in, in, tirn));
 		break;
 	case MLX5_CMD_OP_MODIFY_TIR:
-		obj_id = MLX5_GET(modify_tir_in, in, tirn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+					MLX5_GET(modify_tir_in, in, tirn));
 		break;
 	case MLX5_CMD_OP_QUERY_TIS:
-		obj_id = MLX5_GET(query_tis_in, in, tisn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+					MLX5_GET(query_tis_in, in, tisn));
 		break;
 	case MLX5_CMD_OP_MODIFY_TIS:
-		obj_id = MLX5_GET(modify_tis_in, in, tisn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+					MLX5_GET(modify_tis_in, in, tisn));
 		break;
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
-		obj_id = MLX5_GET(query_flow_table_in, in, table_id);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
+					MLX5_GET(query_flow_table_in, in,
+						 table_id));
 		break;
 	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
-		obj_id = MLX5_GET(modify_flow_table_in, in, table_id);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
+					MLX5_GET(modify_flow_table_in, in,
+						 table_id));
 		break;
 	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
-		obj_id = MLX5_GET(query_flow_group_in, in, group_id);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP,
+					MLX5_GET(query_flow_group_in, in,
+						 group_id));
 		break;
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
-		obj_id = MLX5_GET(query_fte_in, in, flow_index);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
+					MLX5_GET(query_fte_in, in,
+						 flow_index));
 		break;
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
-		obj_id = MLX5_GET(set_fte_in, in, flow_index);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
+					MLX5_GET(set_fte_in, in, flow_index));
 		break;
 	case MLX5_CMD_OP_QUERY_Q_COUNTER:
-		obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER,
+					MLX5_GET(query_q_counter_in, in,
+						 counter_set_id));
 		break;
 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
-		obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER,
+					MLX5_GET(query_flow_counter_in, in,
+						 flow_counter_id));
 		break;
 	case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
-		obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
+					MLX5_GET(general_obj_in_cmd_hdr, in,
+						 obj_id));
 		break;
 	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
-		obj_id = MLX5_GET(query_scheduling_element_in, in,
-				  scheduling_element_id);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
+					MLX5_GET(query_scheduling_element_in,
+						 in, scheduling_element_id));
 		break;
 	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
-		obj_id = MLX5_GET(modify_scheduling_element_in, in,
-				  scheduling_element_id);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
+					MLX5_GET(modify_scheduling_element_in,
+						 in, scheduling_element_id));
 		break;
 	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
-		obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT,
+					MLX5_GET(add_vxlan_udp_dport_in, in,
+						 vxlan_udp_port));
 		break;
 	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
-		obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
+					MLX5_GET(query_l2_table_entry_in, in,
+						 table_index));
 		break;
 	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
-		obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
+					MLX5_GET(set_l2_table_entry_in, in,
+						 table_index));
 		break;
 	case MLX5_CMD_OP_QUERY_QP:
-		obj_id = MLX5_GET(query_qp_in, in, qpn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+					MLX5_GET(query_qp_in, in, qpn));
 		break;
 	case MLX5_CMD_OP_RST2INIT_QP:
-		obj_id = MLX5_GET(rst2init_qp_in, in, qpn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+					MLX5_GET(rst2init_qp_in, in, qpn));
 		break;
 	case MLX5_CMD_OP_INIT2RTR_QP:
-		obj_id = MLX5_GET(init2rtr_qp_in, in, qpn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+					MLX5_GET(init2rtr_qp_in, in, qpn));
 		break;
 	case MLX5_CMD_OP_RTR2RTS_QP:
-		obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+					MLX5_GET(rtr2rts_qp_in, in, qpn));
 		break;
 	case MLX5_CMD_OP_RTS2RTS_QP:
-		obj_id = MLX5_GET(rts2rts_qp_in, in, qpn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+					MLX5_GET(rts2rts_qp_in, in, qpn));
 		break;
 	case MLX5_CMD_OP_SQERR2RTS_QP:
-		obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+					MLX5_GET(sqerr2rts_qp_in, in, qpn));
 		break;
 	case MLX5_CMD_OP_2ERR_QP:
-		obj_id = MLX5_GET(qp_2err_in, in, qpn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+					MLX5_GET(qp_2err_in, in, qpn));
 		break;
 	case MLX5_CMD_OP_2RST_QP:
-		obj_id = MLX5_GET(qp_2rst_in, in, qpn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+					MLX5_GET(qp_2rst_in, in, qpn));
 		break;
 	case MLX5_CMD_OP_QUERY_DCT:
-		obj_id = MLX5_GET(query_dct_in, in, dctn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+					MLX5_GET(query_dct_in, in, dctn));
 		break;
 	case MLX5_CMD_OP_QUERY_XRQ:
-		obj_id = MLX5_GET(query_xrq_in, in, xrqn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
+					MLX5_GET(query_xrq_in, in, xrqn));
 		break;
 	case MLX5_CMD_OP_QUERY_XRC_SRQ:
-		obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
+					MLX5_GET(query_xrc_srq_in, in,
+						 xrc_srqn));
 		break;
 	case MLX5_CMD_OP_ARM_XRC_SRQ:
-		obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
+					MLX5_GET(arm_xrc_srq_in, in, xrc_srqn));
 		break;
 	case MLX5_CMD_OP_QUERY_SRQ:
-		obj_id = MLX5_GET(query_srq_in, in, srqn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ,
+					MLX5_GET(query_srq_in, in, srqn));
 		break;
 	case MLX5_CMD_OP_ARM_RQ:
-		obj_id = MLX5_GET(arm_rq_in, in, srq_number);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+					MLX5_GET(arm_rq_in, in, srq_number));
 		break;
 	case MLX5_CMD_OP_DRAIN_DCT:
 	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
-		obj_id = MLX5_GET(drain_dct_in, in, dctn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+					MLX5_GET(drain_dct_in, in, dctn));
 		break;
 	case MLX5_CMD_OP_ARM_XRQ:
-		obj_id = MLX5_GET(arm_xrq_in, in, xrqn);
+		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
+					MLX5_GET(arm_xrq_in, in, xrqn));
 		break;
 	default:
 		return false;
@@ -264,11 +329,102 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
 	return false;
 }
 
-static bool devx_is_obj_create_cmd(const void *in)
+static void devx_set_umem_valid(const void *in)
 {
 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
 
 	switch (opcode) {
+	case MLX5_CMD_OP_CREATE_MKEY:
+		MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
+		break;
+	case MLX5_CMD_OP_CREATE_CQ:
+	{
+		void *cqc;
+
+		MLX5_SET(create_cq_in, in, cq_umem_valid, 1);
+		cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+		MLX5_SET(cqc, cqc, dbr_umem_valid, 1);
+		break;
+	}
+	case MLX5_CMD_OP_CREATE_QP:
+	{
+		void *qpc;
+
+		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+		MLX5_SET(qpc, qpc, dbr_umem_valid, 1);
+		MLX5_SET(create_qp_in, in, wq_umem_valid, 1);
+		break;
+	}
+
+	case MLX5_CMD_OP_CREATE_RQ:
+	{
+		void *rqc, *wq;
+
+		rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+		wq  = MLX5_ADDR_OF(rqc, rqc, wq);
+		MLX5_SET(wq, wq, dbr_umem_valid, 1);
+		MLX5_SET(wq, wq, wq_umem_valid, 1);
+		break;
+	}
+
+	case MLX5_CMD_OP_CREATE_SQ:
+	{
+		void *sqc, *wq;
+
+		sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+		wq = MLX5_ADDR_OF(sqc, sqc, wq);
+		MLX5_SET(wq, wq, dbr_umem_valid, 1);
+		MLX5_SET(wq, wq, wq_umem_valid, 1);
+		break;
+	}
+
+	case MLX5_CMD_OP_MODIFY_CQ:
+		MLX5_SET(modify_cq_in, in, cq_umem_valid, 1);
+		break;
+
+	case MLX5_CMD_OP_CREATE_RMP:
+	{
+		void *rmpc, *wq;
+
+		rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx);
+		wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+		MLX5_SET(wq, wq, dbr_umem_valid, 1);
+		MLX5_SET(wq, wq, wq_umem_valid, 1);
+		break;
+	}
+
+	case MLX5_CMD_OP_CREATE_XRQ:
+	{
+		void *xrqc, *wq;
+
+		xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context);
+		wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+		MLX5_SET(wq, wq, dbr_umem_valid, 1);
+		MLX5_SET(wq, wq, wq_umem_valid, 1);
+		break;
+	}
+
+	case MLX5_CMD_OP_CREATE_XRC_SRQ:
+	{
+		void *xrc_srqc;
+
+		MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1);
+		xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in,
+					xrc_srq_context_entry);
+		MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1);
+		break;
+	}
+
+	default:
+		return;
+	}
+}
+
+static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
+{
+	*opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+	switch (*opcode) {
 	case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
 	case MLX5_CMD_OP_CREATE_MKEY:
 	case MLX5_CMD_OP_CREATE_CQ:
@@ -385,12 +541,49 @@ static bool devx_is_obj_query_cmd(const void *in)
 	}
 }
 
+static bool devx_is_whitelist_cmd(void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_QUERY_HCA_CAP:
+	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
+{
+	if (devx_is_whitelist_cmd(cmd_in)) {
+		struct mlx5_ib_dev *dev;
+
+		if (c->devx_uid)
+			return c->devx_uid;
+
+		dev = to_mdev(c->ibucontext.device);
+		if (dev->devx_whitelist_uid)
+			return dev->devx_whitelist_uid;
+
+		return -EOPNOTSUPP;
+	}
+
+	if (!c->devx_uid)
+		return -EINVAL;
+
+	if (!capable(CAP_NET_RAW))
+		return -EPERM;
+
+	return c->devx_uid;
+}
 static bool devx_is_general_cmd(void *in)
 {
 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
 
 	switch (opcode) {
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
+	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
 	case MLX5_CMD_OP_QUERY_VPORT_STATE:
 	case MLX5_CMD_OP_QUERY_ADAPTER:
 	case MLX5_CMD_OP_QUERY_ISSI:
@@ -498,14 +691,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
 					MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
 	void *cmd_out;
 	int err;
+	int uid;
 
 	c = devx_ufile2uctx(file);
 	if (IS_ERR(c))
 		return PTR_ERR(c);
 	dev = to_mdev(c->ibucontext.device);
 
-	if (!c->devx_uid)
-		return -EPERM;
+	uid = devx_get_uid(c, cmd_in);
+	if (uid < 0)
+		return uid;
 
 	/* Only white list of some general HCA commands are allowed for this method. */
 	if (!devx_is_general_cmd(cmd_in))
@@ -515,7 +710,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
 	if (IS_ERR(cmd_out))
 		return PTR_ERR(cmd_out);
 
-	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
 	err = mlx5_cmd_exec(dev->mdev, cmd_in,
 			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
 			    cmd_out, cmd_out_len);
@@ -726,11 +921,15 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
 	struct devx_obj *obj;
 	int err;
+	int uid;
+	u32 obj_id;
+	u16 opcode;
 
-	if (!c->devx_uid)
-		return -EPERM;
+	uid = devx_get_uid(c, cmd_in);
+	if (uid < 0)
+		return uid;
 
-	if (!devx_is_obj_create_cmd(cmd_in))
+	if (!devx_is_obj_create_cmd(cmd_in, &opcode))
 		return -EINVAL;
 
 	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -741,7 +940,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 	if (!obj)
 		return -ENOMEM;
 
-	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+	devx_set_umem_valid(cmd_in);
+
 	err = mlx5_cmd_exec(dev->mdev, cmd_in,
 			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN),
 			    cmd_out, cmd_out_len);
@@ -750,13 +951,15 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 
 	uobj->object = obj;
 	obj->mdev = dev->mdev;
-	devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id);
+	devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
+				   &obj_id);
 	WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
 
 	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
 	if (err)
 		goto obj_destroy;
 
+	obj->obj_id = get_enc_obj_id(opcode, obj_id);
 	return 0;
 
 obj_destroy:
@@ -778,9 +981,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
 	struct devx_obj *obj = uobj->object;
 	void *cmd_out;
 	int err;
+	int uid;
 
-	if (!c->devx_uid)
-		return -EPERM;
+	uid = devx_get_uid(c, cmd_in);
+	if (uid < 0)
+		return uid;
 
 	if (!devx_is_obj_modify_cmd(cmd_in))
 		return -EINVAL;
@@ -792,7 +997,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
 	if (IS_ERR(cmd_out))
 		return PTR_ERR(cmd_out);
 
-	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+	devx_set_umem_valid(cmd_in);
+
 	err = mlx5_cmd_exec(obj->mdev, cmd_in,
 			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
 			    cmd_out, cmd_out_len);
@@ -815,9 +1022,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
 	struct devx_obj *obj = uobj->object;
 	void *cmd_out;
 	int err;
+	int uid;
 
-	if (!c->devx_uid)
-		return -EPERM;
+	uid = devx_get_uid(c, cmd_in);
+	if (uid < 0)
+		return uid;
 
 	if (!devx_is_obj_query_cmd(cmd_in))
 		return -EINVAL;
@@ -829,7 +1038,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
 	if (IS_ERR(cmd_out))
 		return PTR_ERR(cmd_out);
 
-	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
 	err = mlx5_cmd_exec(obj->mdev, cmd_in,
 			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
 			    cmd_out, cmd_out_len);
@@ -928,6 +1137,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
 	int err;
 
 	if (!c->devx_uid)
+		return -EINVAL;
+
+	if (!capable(CAP_NET_RAW))
 		return -EPERM;
 
 	obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index 1a29f47f836e..f86cdcafdafc 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -7,7 +7,9 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/uverbs_types.h>
 #include <rdma/uverbs_ioctl.h>
+#include <rdma/uverbs_std_types.h>
 #include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
 #include <rdma/ib_umem.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/fs.h>
@@ -16,6 +18,24 @@
 #define UVERBS_MODULE_NAME mlx5_ib
 #include <rdma/uverbs_named_ioctl.h>
 
+static int
+mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
+			     enum mlx5_flow_namespace_type *namespace)
+{
+	switch (table_type) {
+	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
+		*namespace = MLX5_FLOW_NAMESPACE_BYPASS;
+		break;
+	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
+		*namespace = MLX5_FLOW_NAMESPACE_EGRESS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
 	[MLX5_IB_FLOW_TYPE_NORMAL] = {
 		.type = UVERBS_ATTR_TYPE_PTR_IN,
@@ -38,11 +58,15 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
 	},
 };
 
+#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
 	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
+	struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
 	struct mlx5_ib_flow_handler *flow_handler;
 	struct mlx5_ib_flow_matcher *fs_matcher;
+	struct ib_uobject **arr_flow_actions;
+	struct ib_uflow_resources *uflow_res;
 	void *devx_obj;
 	int dest_id, dest_type;
 	void *cmd_in;
@@ -52,6 +76,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
 	struct ib_uobject *uobj =
 		uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
 	struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+	int len, ret, i;
 
 	if (!capable(CAP_NET_RAW))
 		return -EPERM;
@@ -61,7 +86,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
 	dest_qp = uverbs_attr_is_valid(attrs,
 				       MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
 
-	if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))
+	fs_matcher = uverbs_attr_get_obj(attrs,
+					 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
+	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS &&
+	    ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)))
+		return -EINVAL;
+
+	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS &&
+	    (dest_devx || dest_qp))
 		return -EINVAL;
 
 	if (dest_devx) {
@@ -75,7 +107,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
 		 */
 		if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type))
 			return -EINVAL;
-	} else {
+	} else if (dest_qp) {
 		struct mlx5_ib_qp *mqp;
 
 		qp = uverbs_attr_get_obj(attrs,
@@ -92,6 +124,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
 		else
 			dest_id = mqp->raw_packet_qp.rq.tirn;
 		dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+	} else {
+		dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
 	}
 
 	if (dev->rep)
@@ -101,16 +135,48 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
 		attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
 	inlen = uverbs_attr_get_len(attrs,
 				    MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
-	fs_matcher = uverbs_attr_get_obj(attrs,
-					 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
-	flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen,
+
+	uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
+	if (!uflow_res)
+		return -ENOMEM;
+
+	len = uverbs_attr_get_uobjs_arr(attrs,
+		MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
+	for (i = 0; i < len; i++) {
+		struct mlx5_ib_flow_action *maction =
+			to_mflow_act(arr_flow_actions[i]->object);
+
+		ret = parse_flow_flow_action(maction, false, &flow_act);
+		if (ret)
+			goto err_out;
+		flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
+				   arr_flow_actions[i]->object);
+	}
+
+	ret = uverbs_copy_from(&flow_act.flow_tag, attrs,
+			       MLX5_IB_ATTR_CREATE_FLOW_TAG);
+	if (!ret) {
+		if (flow_act.flow_tag >= BIT(24)) {
+			ret = -EINVAL;
+			goto err_out;
+		}
+		flow_act.flags |= FLOW_ACT_HAS_TAG;
+	}
+
+	flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
+					       cmd_in, inlen,
 					       dest_id, dest_type);
-	if (IS_ERR(flow_handler))
-		return PTR_ERR(flow_handler);
+	if (IS_ERR(flow_handler)) {
+		ret = PTR_ERR(flow_handler);
+		goto err_out;
+	}
 
-	ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev);
+	ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
 
 	return 0;
+err_out:
+	ib_uverbs_flow_resources_free(uflow_res);
+	return ret;
 }
 
 static int flow_matcher_cleanup(struct ib_uobject *uobject,
@@ -134,12 +200,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
 		attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
 	struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
 	struct mlx5_ib_flow_matcher *obj;
+	u32 flags;
 	int err;
 
 	obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
 	if (!obj)
 		return -ENOMEM;
 
+	obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
 	obj->mask_len = uverbs_attr_get_len(
 		attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
 	err = uverbs_copy_from(&obj->matcher_mask,
@@ -165,6 +233,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
 	if (err)
 		goto end;
 
+	err = uverbs_get_flags32(&flags, attrs,
+				 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+				 IB_FLOW_ATTR_FLAGS_EGRESS);
+	if (err)
+		goto end;
+
+	if (flags) {
+		err = mlx5_ib_ft_type_to_namespace(
+			MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, &obj->ns_type);
+		if (err)
+			goto end;
+	}
+
 	uobj->object = obj;
 	obj->mdev = dev->mdev;
 	atomic_set(&obj->usecnt, 0);
@@ -175,6 +256,248 @@ end:
 	return err;
 }
 
+void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
+{
+	switch (maction->flow_action_raw.sub_type) {
+	case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
+		mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
+					   maction->flow_action_raw.action_id);
+		break;
+	case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
+		mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
+			maction->flow_action_raw.action_id);
+		break;
+	case MLX5_IB_FLOW_ACTION_DECAP:
+		break;
+	default:
+		break;
+	}
+}
+
+static struct ib_flow_action *
+mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
+			     enum mlx5_ib_uapi_flow_table_type ft_type,
+			     u8 num_actions, void *in)
+{
+	enum mlx5_flow_namespace_type namespace;
+	struct mlx5_ib_flow_action *maction;
+	int ret;
+
+	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+	if (ret)
+		return ERR_PTR(-EINVAL);
+
+	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+	if (!maction)
+		return ERR_PTR(-ENOMEM);
+
+	ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in,
+				       &maction->flow_action_raw.action_id);
+
+	if (ret) {
+		kfree(maction);
+		return ERR_PTR(ret);
+	}
+	maction->flow_action_raw.sub_type =
+		MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
+	maction->flow_action_raw.dev = dev;
+
+	return &maction->ib_action;
+}
+
+static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
+{
+	return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+					 max_modify_header_actions) ||
+	       MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, max_modify_header_actions);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
+	struct ib_uverbs_file *file,
+	struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
+	struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
+	enum mlx5_ib_uapi_flow_table_type ft_type;
+	struct ib_flow_action *action;
+	size_t num_actions;
+	void *in;
+	int len;
+	int ret;
+
+	if (!mlx5_ib_modify_header_supported(mdev))
+		return -EOPNOTSUPP;
+
+	in = uverbs_attr_get_alloced_ptr(attrs,
+		MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
+	len = uverbs_attr_get_len(attrs,
+		MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
+
+	if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto))
+		return -EINVAL;
+
+	ret = uverbs_get_const(&ft_type, attrs,
+			       MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
+	if (ret)
+		return ret;
+
+	num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto),
+	action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
+	if (IS_ERR(action))
+		return PTR_ERR(action);
+
+	uverbs_flow_action_fill_action(action, uobj, uobj->context->device,
+				       IB_FLOW_ACTION_UNSPECIFIED);
+
+	return 0;
+}
+
+static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
+						      u8 packet_reformat_type,
+						      u8 ft_type)
+{
+	switch (packet_reformat_type) {
+	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+			return MLX5_CAP_FLOWTABLE(ibdev->mdev,
+						  encap_general_header);
+		break;
+	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+			return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
+				reformat_l2_to_l3_tunnel);
+		break;
+	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
+				reformat_l3_tunnel_to_l2);
+		break;
+	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
+		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
+		break;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
+{
+	switch (dv_prt) {
+	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
+		break;
+	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+		*prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+		break;
+	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int mlx5_ib_flow_action_create_packet_reformat_ctx(
+	struct mlx5_ib_dev *dev,
+	struct mlx5_ib_flow_action *maction,
+	u8 ft_type, u8 dv_prt,
+	void *in, size_t len)
+{
+	enum mlx5_flow_namespace_type namespace;
+	u8 prm_prt;
+	int ret;
+
+	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+	if (ret)
+		return ret;
+
+	ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
+	if (ret)
+		return ret;
+
+	ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
+					 in, namespace,
+					 &maction->flow_action_raw.action_id);
+	if (ret)
+		return ret;
+
+	maction->flow_action_raw.sub_type =
+		MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
+	maction->flow_action_raw.dev = dev;
+
+	return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
+	struct ib_uverbs_file *file,
+	struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+		MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
+	struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
+	enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
+	enum mlx5_ib_uapi_flow_table_type ft_type;
+	struct mlx5_ib_flow_action *maction;
+	int ret;
+
+	ret = uverbs_get_const(&ft_type, attrs,
+			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
+	if (ret)
+		return ret;
+
+	ret = uverbs_get_const(&dv_prt, attrs,
+			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
+	if (ret)
+		return ret;
+
+	if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
+		return -EOPNOTSUPP;
+
+	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+	if (!maction)
+		return -ENOMEM;
+
+	if (dv_prt ==
+	    MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
+		maction->flow_action_raw.sub_type =
+			MLX5_IB_FLOW_ACTION_DECAP;
+		maction->flow_action_raw.dev = mdev;
+	} else {
+		void *in;
+		int len;
+
+		in = uverbs_attr_get_alloced_ptr(attrs,
+			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+		if (IS_ERR(in)) {
+			ret = PTR_ERR(in);
+			goto free_maction;
+		}
+
+		len = uverbs_attr_get_len(attrs,
+			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+
+		ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
+			maction, ft_type, dv_prt, in, len);
+		if (ret)
+			goto free_maction;
+	}
+
+	uverbs_flow_action_fill_action(&maction->ib_action, uobj,
+				       uobj->context->device,
+				       IB_FLOW_ACTION_UNSPECIFIED);
+	return 0;
+
+free_maction:
+	kfree(maction);
+	return ret;
+}
+
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_CREATE_FLOW,
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
@@ -195,7 +518,15 @@ DECLARE_UVERBS_NAMED_METHOD(
 			UVERBS_ACCESS_READ),
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
 			MLX5_IB_OBJECT_DEVX_OBJ,
-			UVERBS_ACCESS_READ));
+			UVERBS_ACCESS_READ),
+	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+			     UVERBS_OBJECT_FLOW_ACTION,
+			     UVERBS_ACCESS_READ, 1,
+			     MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
+			     UA_OPTIONAL),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_OPTIONAL));
 
 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
 	MLX5_IB_METHOD_DESTROY_FLOW,
@@ -210,6 +541,44 @@ ADD_UVERBS_METHODS(mlx5_ib_fs,
 		   &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
 
 DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
+			UVERBS_OBJECT_FLOW_ACTION,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+			   UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
+				   set_action_in_add_action_in_auto)),
+			   UA_MANDATORY,
+			   UA_ALLOC_AND_COPY),
+	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+			     enum mlx5_ib_uapi_flow_table_type,
+			     UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
+			UVERBS_OBJECT_FLOW_ACTION,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+			   UVERBS_ATTR_MIN_SIZE(1),
+			   UA_ALLOC_AND_COPY,
+			   UA_OPTIONAL),
+	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+			     enum mlx5_ib_uapi_flow_action_packet_reformat_type,
+			     UA_MANDATORY),
+	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+			     enum mlx5_ib_uapi_flow_table_type,
+			     UA_MANDATORY));
+
+ADD_UVERBS_METHODS(
+	mlx5_ib_flow_actions,
+	UVERBS_OBJECT_FLOW_ACTION,
+	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
+	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
+
+DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
 			MLX5_IB_OBJECT_FLOW_MATCHER,
@@ -224,7 +593,10 @@ DECLARE_UVERBS_NAMED_METHOD(
 			    UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
 			   UVERBS_ATTR_TYPE(u8),
-			   UA_MANDATORY));
+			   UA_MANDATORY),
+	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+			     enum ib_flow_flags,
+			     UA_OPTIONAL));
 
 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
 	MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
@@ -247,6 +619,7 @@ int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
 
 	root[i++] = &flow_objects;
 	root[i++] = &mlx5_ib_fs;
+	root[i++] = &mlx5_ib_flow_actions;
 
 	return i;
 }
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 35a0e04c38f2..584ff2ea7810 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -39,9 +39,6 @@ static const struct mlx5_ib_profile rep_profile = {
 	STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
 		     mlx5_ib_stage_post_ib_reg_umr_init,
 		     NULL),
-	STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
-		     mlx5_ib_stage_class_attr_init,
-		     NULL),
 };
 
 static int
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index af32899bb72a..e9c428071df3 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1571,14 +1571,57 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
 			mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
 }
 
-static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
+int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
+{
+	int err = 0;
+
+	mutex_lock(&dev->lb.mutex);
+	if (td)
+		dev->lb.user_td++;
+	if (qp)
+		dev->lb.qps++;
+
+	if (dev->lb.user_td == 2 ||
+	    dev->lb.qps == 1) {
+		if (!dev->lb.enabled) {
+			err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
+			dev->lb.enabled = true;
+		}
+	}
+
+	mutex_unlock(&dev->lb.mutex);
+
+	return err;
+}
+
+void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
+{
+	mutex_lock(&dev->lb.mutex);
+	if (td)
+		dev->lb.user_td--;
+	if (qp)
+		dev->lb.qps--;
+
+	if (dev->lb.user_td == 1 &&
+	    dev->lb.qps == 0) {
+		if (dev->lb.enabled) {
+			mlx5_nic_vport_update_local_lb(dev->mdev, false);
+			dev->lb.enabled = false;
+		}
+	}
+
+	mutex_unlock(&dev->lb.mutex);
+}
+
+static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn,
+					  u16 uid)
 {
 	int err;
 
 	if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
 		return 0;
 
-	err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
+	err = mlx5_cmd_alloc_transport_domain(dev->mdev, tdn, uid);
 	if (err)
 		return err;
 
@@ -1587,35 +1630,23 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
 	     !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
 		return err;
 
-	mutex_lock(&dev->lb_mutex);
-	dev->user_td++;
-
-	if (dev->user_td == 2)
-		err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
-
-	mutex_unlock(&dev->lb_mutex);
-	return err;
+	return mlx5_ib_enable_lb(dev, true, false);
 }
 
-static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
+static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
+					     u16 uid)
 {
 	if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
 		return;
 
-	mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
+	mlx5_cmd_dealloc_transport_domain(dev->mdev, tdn, uid);
 
 	if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
 	    (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
 	     !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
 		return;
 
-	mutex_lock(&dev->lb_mutex);
-	dev->user_td--;
-
-	if (dev->user_td < 2)
-		mlx5_nic_vport_update_local_lb(dev->mdev, false);
-
-	mutex_unlock(&dev->lb_mutex);
+	mlx5_ib_disable_lb(dev, true, false);
 }
 
 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
@@ -1727,30 +1758,24 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
 #endif
 
-	err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
-	if (err)
-		goto out_uars;
-
 	if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
-		/* Block DEVX on Infiniband as of SELinux */
-		if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) {
-			err = -EPERM;
-			goto out_td;
-		}
-
-		err = mlx5_ib_devx_create(dev, context);
-		if (err)
-			goto out_td;
+		err = mlx5_ib_devx_create(dev);
+		if (err < 0)
+			goto out_uars;
+		context->devx_uid = err;
 	}
 
+	err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
+					     context->devx_uid);
+	if (err)
+		goto out_devx;
+
 	if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
 		err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
 		if (err)
 			goto out_mdev;
 	}
 
-	INIT_LIST_HEAD(&context->vma_private_list);
-	mutex_init(&context->vma_private_list_mutex);
 	INIT_LIST_HEAD(&context->db_page_list);
 	mutex_init(&context->db_page_mutex);
 
@@ -1826,13 +1851,21 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	context->lib_caps = req.lib_caps;
 	print_lib_caps(dev, context->lib_caps);
 
+	if (mlx5_lag_is_active(dev->mdev)) {
+		u8 port = mlx5_core_native_port_num(dev->mdev);
+
+		atomic_set(&context->tx_port_affinity,
+			   atomic_add_return(
+				   1, &dev->roce[port].tx_port_affinity));
+	}
+
 	return &context->ibucontext;
 
 out_mdev:
+	mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
+out_devx:
 	if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
-		mlx5_ib_devx_destroy(dev, context);
-out_td:
-	mlx5_ib_dealloc_transport_domain(dev, context->tdn);
+		mlx5_ib_devx_destroy(dev, context->devx_uid);
 
 out_uars:
 	deallocate_uars(dev, context);
@@ -1855,11 +1888,18 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
 	struct mlx5_bfreg_info *bfregi;
 
-	if (context->devx_uid)
-		mlx5_ib_devx_destroy(dev, context);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	/* All umem's must be destroyed before destroying the ucontext. */
+	mutex_lock(&ibcontext->per_mm_list_lock);
+	WARN_ON(!list_empty(&ibcontext->per_mm_list));
+	mutex_unlock(&ibcontext->per_mm_list_lock);
+#endif
 
 	bfregi = &context->bfregi;
-	mlx5_ib_dealloc_transport_domain(dev, context->tdn);
+	mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
+
+	if (context->devx_uid)
+		mlx5_ib_devx_destroy(dev, context->devx_uid);
 
 	deallocate_uars(dev, context);
 	kfree(bfregi->sys_pages);
@@ -1900,94 +1940,9 @@ static int get_extended_index(unsigned long offset)
 	return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
 }
 
-static void  mlx5_ib_vma_open(struct vm_area_struct *area)
-{
-	/* vma_open is called when a new VMA is created on top of our VMA.  This
-	 * is done through either mremap flow or split_vma (usually due to
-	 * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
-	 * as this VMA is strongly hardware related.  Therefore we set the
-	 * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
-	 * calling us again and trying to do incorrect actions.  We assume that
-	 * the original VMA size is exactly a single page, and therefore all
-	 * "splitting" operation will not happen to it.
-	 */
-	area->vm_ops = NULL;
-}
-
-static void  mlx5_ib_vma_close(struct vm_area_struct *area)
-{
-	struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
-
-	/* It's guaranteed that all VMAs opened on a FD are closed before the
-	 * file itself is closed, therefore no sync is needed with the regular
-	 * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
-	 * However need a sync with accessing the vma as part of
-	 * mlx5_ib_disassociate_ucontext.
-	 * The close operation is usually called under mm->mmap_sem except when
-	 * process is exiting.
-	 * The exiting case is handled explicitly as part of
-	 * mlx5_ib_disassociate_ucontext.
-	 */
-	mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
-
-	/* setting the vma context pointer to null in the mlx5_ib driver's
-	 * private data, to protect a race condition in
-	 * mlx5_ib_disassociate_ucontext().
-	 */
-	mlx5_ib_vma_priv_data->vma = NULL;
-	mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
-	list_del(&mlx5_ib_vma_priv_data->list);
-	mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
-	kfree(mlx5_ib_vma_priv_data);
-}
-
-static const struct vm_operations_struct mlx5_ib_vm_ops = {
-	.open = mlx5_ib_vma_open,
-	.close = mlx5_ib_vma_close
-};
-
-static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
-				struct mlx5_ib_ucontext *ctx)
-{
-	struct mlx5_ib_vma_private_data *vma_prv;
-	struct list_head *vma_head = &ctx->vma_private_list;
-
-	vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
-	if (!vma_prv)
-		return -ENOMEM;
-
-	vma_prv->vma = vma;
-	vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
-	vma->vm_private_data = vma_prv;
-	vma->vm_ops =  &mlx5_ib_vm_ops;
-
-	mutex_lock(&ctx->vma_private_list_mutex);
-	list_add(&vma_prv->list, vma_head);
-	mutex_unlock(&ctx->vma_private_list_mutex);
-
-	return 0;
-}
 
 static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
 {
-	struct vm_area_struct *vma;
-	struct mlx5_ib_vma_private_data *vma_private, *n;
-	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
-
-	mutex_lock(&context->vma_private_list_mutex);
-	list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
-				 list) {
-		vma = vma_private->vma;
-		zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
-		/* context going to be destroyed, should
-		 * not access ops any more.
-		 */
-		vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
-		vma->vm_ops = NULL;
-		list_del(&vma_private->list);
-		kfree(vma_private);
-	}
-	mutex_unlock(&context->vma_private_list_mutex);
 }
 
 static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
@@ -2010,9 +1965,6 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
 					struct vm_area_struct *vma,
 					struct mlx5_ib_ucontext *context)
 {
-	phys_addr_t pfn;
-	int err;
-
 	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
 		return -EINVAL;
 
@@ -2025,13 +1977,8 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
 	if (!dev->mdev->clock_info_page)
 		return -EOPNOTSUPP;
 
-	pfn = page_to_pfn(dev->mdev->clock_info_page);
-	err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
-			      vma->vm_page_prot);
-	if (err)
-		return err;
-
-	return mlx5_ib_set_vma_data(vma, context);
+	return rdma_user_mmap_page(&context->ibucontext, vma,
+				   dev->mdev->clock_info_page, PAGE_SIZE);
 }
 
 static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
@@ -2121,21 +2068,15 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
 	pfn = uar_index2pfn(dev, uar_index);
 	mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
 
-	vma->vm_page_prot = prot;
-	err = io_remap_pfn_range(vma, vma->vm_start, pfn,
-				 PAGE_SIZE, vma->vm_page_prot);
+	err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
+				prot);
 	if (err) {
 		mlx5_ib_err(dev,
-			    "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n",
+			    "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
 			    err, mmap_cmd2str(cmd));
-		err = -EAGAIN;
 		goto err;
 	}
 
-	err = mlx5_ib_set_vma_data(vma, context);
-	if (err)
-		goto err;
-
 	if (dyn_uar)
 		bfregi->sys_pages[idx] = uar_index;
 	return 0;
@@ -2160,7 +2101,6 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	size_t map_size = vma->vm_end - vma->vm_start;
 	u32 npages = map_size >> PAGE_SHIFT;
 	phys_addr_t pfn;
-	pgprot_t prot;
 
 	if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
 	    page_idx + npages)
@@ -2170,14 +2110,8 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	      MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
 	      PAGE_SHIFT) +
 	      page_idx;
-	prot = pgprot_writecombine(vma->vm_page_prot);
-	vma->vm_page_prot = prot;
-
-	if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size,
-			       vma->vm_page_prot))
-		return -EAGAIN;
-
-	return mlx5_ib_set_vma_data(vma, mctx);
+	return rdma_user_mmap_io(context, vma, pfn, map_size,
+				 pgprot_writecombine(vma->vm_page_prot));
 }
 
 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -2318,21 +2252,30 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
 	struct mlx5_ib_alloc_pd_resp resp;
 	struct mlx5_ib_pd *pd;
 	int err;
+	u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(alloc_pd_in)]   = {};
+	u16 uid = 0;
 
 	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
 	if (!pd)
 		return ERR_PTR(-ENOMEM);
 
-	err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
+	uid = context ? to_mucontext(context)->devx_uid : 0;
+	MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+	MLX5_SET(alloc_pd_in, in, uid, uid);
+	err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in),
+			    out, sizeof(out));
 	if (err) {
 		kfree(pd);
 		return ERR_PTR(err);
 	}
 
+	pd->pdn = MLX5_GET(alloc_pd_out, out, pd);
+	pd->uid = uid;
 	if (context) {
 		resp.pdn = pd->pdn;
 		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
-			mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
+			mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid);
 			kfree(pd);
 			return ERR_PTR(-EFAULT);
 		}
@@ -2346,7 +2289,7 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
 	struct mlx5_ib_dev *mdev = to_mdev(pd->device);
 	struct mlx5_ib_pd *mpd = to_mpd(pd);
 
-	mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
+	mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
 	kfree(mpd);
 
 	return 0;
@@ -2452,20 +2395,50 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
 		   offsetof(typeof(filter), field) -\
 		   sizeof(filter.field))
 
-static int parse_flow_flow_action(const union ib_flow_spec *ib_spec,
-				  const struct ib_flow_attr *flow_attr,
-				  struct mlx5_flow_act *action)
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+			   bool is_egress,
+			   struct mlx5_flow_act *action)
 {
-	struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act);
 
 	switch (maction->ib_action.type) {
 	case IB_FLOW_ACTION_ESP:
+		if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+				      MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
+			return -EINVAL;
 		/* Currently only AES_GCM keymat is supported by the driver */
 		action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
-		action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ?
+		action->action |= is_egress ?
 			MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
 			MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
 		return 0;
+	case IB_FLOW_ACTION_UNSPECIFIED:
+		if (maction->flow_action_raw.sub_type ==
+		    MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
+			if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+				return -EINVAL;
+			action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+			action->modify_id = maction->flow_action_raw.action_id;
+			return 0;
+		}
+		if (maction->flow_action_raw.sub_type ==
+		    MLX5_IB_FLOW_ACTION_DECAP) {
+			if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+				return -EINVAL;
+			action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+			return 0;
+		}
+		if (maction->flow_action_raw.sub_type ==
+		    MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
+			if (action->action &
+			    MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+				return -EINVAL;
+			action->action |=
+				MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+			action->reformat_id =
+				maction->flow_action_raw.action_id;
+			return 0;
+		}
+		/* fall through */
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -2802,7 +2775,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 		action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
 		break;
 	case IB_FLOW_SPEC_ACTION_HANDLE:
-		ret = parse_flow_flow_action(ib_spec, flow_attr, action);
+		ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
+			flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
 		if (ret)
 			return ret;
 		break;
@@ -2883,7 +2857,7 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
 	 * rules would be supported, always return VALID_SPEC_NA.
 	 */
 	if (!is_crypto)
-		return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA;
+		return VALID_SPEC_NA;
 
 	return is_crypto && is_ipsec &&
 		(!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ?
@@ -3026,14 +3000,15 @@ enum flow_table_type {
 static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
 					   struct mlx5_ib_flow_prio *prio,
 					   int priority,
-					   int num_entries, int num_groups)
+					   int num_entries, int num_groups,
+					   u32 flags)
 {
 	struct mlx5_flow_table *ft;
 
 	ft = mlx5_create_auto_grouped_flow_table(ns, priority,
 						 num_entries,
 						 num_groups,
-						 0, 0);
+						 0, flags);
 	if (IS_ERR(ft))
 		return ERR_CAST(ft);
 
@@ -3053,26 +3028,43 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 	int max_table_size;
 	int num_entries;
 	int num_groups;
+	u32 flags = 0;
 	int priority;
 
 	max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 						       log_max_ft_size));
 	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
-		if (ft_type == MLX5_IB_FT_TX)
-			priority = 0;
-		else if (flow_is_multicast_only(flow_attr) &&
-			 !dont_trap)
+		enum mlx5_flow_namespace_type fn_type;
+
+		if (flow_is_multicast_only(flow_attr) &&
+		    !dont_trap)
 			priority = MLX5_IB_FLOW_MCAST_PRIO;
 		else
 			priority = ib_prio_to_core_prio(flow_attr->priority,
 							dont_trap);
-		ns = mlx5_get_flow_namespace(dev->mdev,
-					     ft_type == MLX5_IB_FT_TX ?
-					     MLX5_FLOW_NAMESPACE_EGRESS :
-					     MLX5_FLOW_NAMESPACE_BYPASS);
+		if (ft_type == MLX5_IB_FT_RX) {
+			fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
+			prio = &dev->flow_db->prios[priority];
+			if (!dev->rep &&
+			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+			if (!dev->rep &&
+			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+					reformat_l3_tunnel_to_l2))
+				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+		} else {
+			max_table_size =
+				BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+							      log_max_ft_size));
+			fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
+			prio = &dev->flow_db->egress_prios[priority];
+			if (!dev->rep &&
+			    MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+		}
+		ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
 		num_entries = MLX5_FS_MAX_ENTRIES;
 		num_groups = MLX5_FS_MAX_TYPES;
-		prio = &dev->flow_db->prios[priority];
 	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
 		ns = mlx5_get_flow_namespace(dev->mdev,
@@ -3104,7 +3096,8 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 
 	ft = prio->flow_table;
 	if (!ft)
-		return _get_prio(ns, prio, priority, num_entries, num_groups);
+		return _get_prio(ns, prio, priority, num_entries, num_groups,
+				 flags);
 
 	return prio;
 }
@@ -3271,6 +3264,9 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 	if (!is_valid_attr(dev->mdev, flow_attr))
 		return ERR_PTR(-EINVAL);
 
+	if (dev->rep && is_egress)
+		return ERR_PTR(-EINVAL);
+
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
 	if (!handler || !spec) {
@@ -3661,34 +3657,54 @@ free_ucmd:
 	return ERR_PTR(err);
 }
 
-static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev,
-						 int priority, bool mcast)
+static struct mlx5_ib_flow_prio *
+_get_flow_table(struct mlx5_ib_dev *dev,
+		struct mlx5_ib_flow_matcher *fs_matcher,
+		bool mcast)
 {
-	int max_table_size;
 	struct mlx5_flow_namespace *ns = NULL;
 	struct mlx5_ib_flow_prio *prio;
+	int max_table_size;
+	u32 flags = 0;
+	int priority;
+
+	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
+		max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+					log_max_ft_size));
+		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+					      reformat_l3_tunnel_to_l2))
+			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+	} else { /* Can only be MLX5_FLOW_NAMESPACE_EGRESS */
+		max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+					log_max_ft_size));
+		if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+	}
 
-	max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
-			     log_max_ft_size));
 	if (max_table_size < MLX5_FS_MAX_ENTRIES)
 		return ERR_PTR(-ENOMEM);
 
 	if (mcast)
 		priority = MLX5_IB_FLOW_MCAST_PRIO;
 	else
-		priority = ib_prio_to_core_prio(priority, false);
+		priority = ib_prio_to_core_prio(fs_matcher->priority, false);
 
-	ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS);
+	ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
 	if (!ns)
 		return ERR_PTR(-ENOTSUPP);
 
-	prio = &dev->flow_db->prios[priority];
+	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
+		prio = &dev->flow_db->prios[priority];
+	else
+		prio = &dev->flow_db->egress_prios[priority];
 
 	if (prio->flow_table)
 		return prio;
 
 	return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES,
-			 MLX5_FS_MAX_TYPES);
+			 MLX5_FS_MAX_TYPES, flags);
 }
 
 static struct mlx5_ib_flow_handler *
@@ -3696,10 +3712,10 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
 		      struct mlx5_ib_flow_prio *ft_prio,
 		      struct mlx5_flow_destination *dst,
 		      struct mlx5_ib_flow_matcher  *fs_matcher,
+		      struct mlx5_flow_act *flow_act,
 		      void *cmd_in, int inlen)
 {
 	struct mlx5_ib_flow_handler *handler;
-	struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
 	struct mlx5_flow_spec *spec;
 	struct mlx5_flow_table *ft = ft_prio->flow_table;
 	int err = 0;
@@ -3718,9 +3734,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
 	       fs_matcher->mask_len);
 	spec->match_criteria_enable = fs_matcher->match_criteria_enable;
 
-	flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	handler->rule = mlx5_add_flow_rules(ft, spec,
-					    &flow_act, dst, 1);
+					    flow_act, dst, 1);
 
 	if (IS_ERR(handler->rule)) {
 		err = PTR_ERR(handler->rule);
@@ -3782,12 +3797,12 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
 struct mlx5_ib_flow_handler *
 mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
 			struct mlx5_ib_flow_matcher *fs_matcher,
+			struct mlx5_flow_act *flow_act,
 			void *cmd_in, int inlen, int dest_id,
 			int dest_type)
 {
 	struct mlx5_flow_destination *dst;
 	struct mlx5_ib_flow_prio *ft_prio;
-	int priority = fs_matcher->priority;
 	struct mlx5_ib_flow_handler *handler;
 	bool mcast;
 	int err;
@@ -3805,7 +3820,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
 	mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
 	mutex_lock(&dev->flow_db->lock);
 
-	ft_prio = _get_flow_table(dev, priority, mcast);
+	ft_prio = _get_flow_table(dev, fs_matcher, mcast);
 	if (IS_ERR(ft_prio)) {
 		err = PTR_ERR(ft_prio);
 		goto unlock;
@@ -3814,13 +3829,18 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
 	if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
 		dst->type = dest_type;
 		dst->tir_num = dest_id;
-	} else {
+		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	} else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
 		dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
 		dst->ft_num = dest_id;
+		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	} else {
+		dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
 	}
 
-	handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in,
-					inlen);
+	handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
+					cmd_in, inlen);
 
 	if (IS_ERR(handler)) {
 		err = PTR_ERR(handler);
@@ -3998,6 +4018,9 @@ static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
 		 */
 		mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
 		break;
+	case IB_FLOW_ACTION_UNSPECIFIED:
+		mlx5_ib_destroy_flow_action_raw(maction);
+		break;
 	default:
 		WARN_ON(true);
 		break;
@@ -4012,13 +4035,17 @@ static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 	struct mlx5_ib_qp *mqp = to_mqp(ibqp);
 	int err;
+	u16 uid;
+
+	uid = ibqp->pd ?
+		to_mpd(ibqp->pd)->uid : 0;
 
 	if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
 		mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
 		return -EOPNOTSUPP;
 	}
 
-	err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
+	err = mlx5_cmd_attach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
 	if (err)
 		mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
 			     ibqp->qp_num, gid->raw);
@@ -4030,8 +4057,11 @@ static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 	int err;
+	u16 uid;
 
-	err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
+	uid = ibqp->pd ?
+		to_mpd(ibqp->pd)->uid : 0;
+	err = mlx5_cmd_detach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
 	if (err)
 		mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
 			     ibqp->qp_num, gid->raw);
@@ -4052,16 +4082,17 @@ static int init_node_data(struct mlx5_ib_dev *dev)
 	return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
 }
 
-static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
-			     char *buf)
+static ssize_t fw_pages_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
 {
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 
 	return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
 }
+static DEVICE_ATTR_RO(fw_pages);
 
-static ssize_t show_reg_pages(struct device *device,
+static ssize_t reg_pages_show(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
 	struct mlx5_ib_dev *dev =
@@ -4069,44 +4100,47 @@ static ssize_t show_reg_pages(struct device *device,
 
 	return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
 }
+static DEVICE_ATTR_RO(reg_pages);
 
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
-			char *buf)
+static ssize_t hca_type_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
 {
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 	return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
 }
+static DEVICE_ATTR_RO(hca_type);
 
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-			char *buf)
+static ssize_t hw_rev_show(struct device *device,
+			   struct device_attribute *attr, char *buf)
 {
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 	return sprintf(buf, "%x\n", dev->mdev->rev_id);
 }
+static DEVICE_ATTR_RO(hw_rev);
 
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
-			  char *buf)
+static ssize_t board_id_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
 {
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 	return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
 		       dev->mdev->board_id);
 }
+static DEVICE_ATTR_RO(board_id);
 
-static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
-static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
-static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
+static struct attribute *mlx5_class_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	&dev_attr_board_id.attr,
+	&dev_attr_fw_pages.attr,
+	&dev_attr_reg_pages.attr,
+	NULL,
+};
 
-static struct device_attribute *mlx5_class_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type,
-	&dev_attr_board_id,
-	&dev_attr_fw_pages,
-	&dev_attr_reg_pages,
+static const struct attribute_group mlx5_attr_group = {
+	.attrs = mlx5_class_attributes,
 };
 
 static void pkey_change_handler(struct work_struct *work)
@@ -5631,7 +5665,6 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
-	const char *name;
 	int err;
 	int i;
 
@@ -5664,12 +5697,6 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 	if (mlx5_use_mad_ifc(dev))
 		get_ext_port_caps(dev);
 
-	if (!mlx5_lag_is_active(mdev))
-		name = "mlx5_%d";
-	else
-		name = "mlx5_bond_%d";
-
-	strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX);
 	dev->ib_dev.owner		= THIS_MODULE;
 	dev->ib_dev.node_type		= RDMA_NODE_IB_CA;
 	dev->ib_dev.local_dma_lkey	= 0 /* not supported for now */;
@@ -5876,7 +5903,7 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 	if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
 	    (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) ||
 	     MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
-		mutex_init(&dev->lb_mutex);
+		mutex_init(&dev->lb.mutex);
 
 	return 0;
 }
@@ -6083,7 +6110,14 @@ static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
 
 int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 {
-	return ib_register_device(&dev->ib_dev, NULL);
+	const char *name;
+
+	rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group);
+	if (!mlx5_lag_is_active(dev->mdev))
+		name = "mlx5_%d";
+	else
+		name = "mlx5_bond_%d";
+	return ib_register_device(&dev->ib_dev, name, NULL);
 }
 
 void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
@@ -6113,21 +6147,6 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
 	cancel_delay_drop(dev);
 }
 
-int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
-{
-	int err;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
-		err = device_create_file(&dev->ib_dev.dev,
-					 mlx5_class_attributes[i]);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
 static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
 {
 	mlx5_ib_register_vport_reps(dev);
@@ -6151,6 +6170,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
 			profile->stage[stage].cleanup(dev);
 	}
 
+	if (dev->devx_whitelist_uid)
+		mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
 	ib_dealloc_device((struct ib_device *)dev);
 }
 
@@ -6159,8 +6180,7 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
 {
 	int err;
 	int i;
-
-	printk_once(KERN_INFO "%s", mlx5_version);
+	int uid;
 
 	for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
 		if (profile->stage[i].init) {
@@ -6170,6 +6190,10 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
 		}
 	}
 
+	uid = mlx5_ib_devx_create(dev);
+	if (uid > 0)
+		dev->devx_whitelist_uid = uid;
+
 	dev->profile = profile;
 	dev->ib_active = true;
 
@@ -6230,9 +6254,6 @@ static const struct mlx5_ib_profile pf_profile = {
 	STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
 		     mlx5_ib_stage_delay_drop_init,
 		     mlx5_ib_stage_delay_drop_cleanup),
-	STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
-		     mlx5_ib_stage_class_attr_init,
-		     NULL),
 };
 
 static const struct mlx5_ib_profile nic_rep_profile = {
@@ -6275,9 +6296,6 @@ static const struct mlx5_ib_profile nic_rep_profile = {
 	STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
 		     mlx5_ib_stage_post_ib_reg_umr_init,
 		     NULL),
-	STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
-		     mlx5_ib_stage_class_attr_init,
-		     NULL),
 	STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
 		     mlx5_ib_stage_rep_reg_init,
 		     mlx5_ib_stage_rep_reg_cleanup),
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index f3dbd75a0a96..549234988bb4 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -57,7 +57,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
 	int entry;
 	unsigned long page_shift = umem->page_shift;
 
-	if (umem->odp_data) {
+	if (umem->is_odp) {
 		*ncont = ib_umem_page_count(umem);
 		*count = *ncont << (page_shift - PAGE_SHIFT);
 		*shift = page_shift;
@@ -152,14 +152,13 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 	struct scatterlist *sg;
 	int entry;
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	const bool odp = umem->odp_data != NULL;
-
-	if (odp) {
+	if (umem->is_odp) {
 		WARN_ON(shift != 0);
 		WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
 
 		for (i = 0; i < num_pages; ++i) {
-			dma_addr_t pa = umem->odp_data->dma_list[offset + i];
+			dma_addr_t pa =
+				to_ib_umem_odp(umem)->dma_list[offset + i];
 
 			pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
 		}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 289c18db2611..b651a7a6fde9 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -39,8 +39,10 @@
 #include <rdma/ib_smi.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cq.h>
+#include <linux/mlx5/fs.h>
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/srq.h>
+#include <linux/mlx5/fs.h>
 #include <linux/types.h>
 #include <linux/mlx5/transobj.h>
 #include <rdma/ib_user_verbs.h>
@@ -48,17 +50,17 @@
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/mlx5_user_ioctl_cmds.h>
 
-#define mlx5_ib_dbg(dev, format, arg...)				\
-pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
-	 __LINE__, current->pid, ##arg)
+#define mlx5_ib_dbg(_dev, format, arg...)                                      \
+	dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__,      \
+		__LINE__, current->pid, ##arg)
 
-#define mlx5_ib_err(dev, format, arg...)				\
-pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
-	__LINE__, current->pid, ##arg)
+#define mlx5_ib_err(_dev, format, arg...)                                      \
+	dev_err(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__,      \
+		__LINE__, current->pid, ##arg)
 
-#define mlx5_ib_warn(dev, format, arg...)				\
-pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
-	__LINE__, current->pid, ##arg)
+#define mlx5_ib_warn(_dev, format, arg...)                                     \
+	dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__,     \
+		 __LINE__, current->pid, ##arg)
 
 #define field_avail(type, fld, sz) (offsetof(type, fld) +		\
 				    sizeof(((type *)0)->fld) <= (sz))
@@ -114,13 +116,6 @@ enum {
 	MLX5_MEMIC_BASE_SIZE	= 1 << MLX5_MEMIC_BASE_ALIGN,
 };
 
-struct mlx5_ib_vma_private_data {
-	struct list_head list;
-	struct vm_area_struct *vma;
-	/* protect vma_private_list add/del */
-	struct mutex *vma_private_list_mutex;
-};
-
 struct mlx5_ib_ucontext {
 	struct ib_ucontext	ibucontext;
 	struct list_head	db_page_list;
@@ -132,13 +127,12 @@ struct mlx5_ib_ucontext {
 	u8			cqe_version;
 	/* Transport Domain number */
 	u32			tdn;
-	struct list_head	vma_private_list;
-	/* protect vma_private_list add/del */
-	struct mutex		vma_private_list_mutex;
 
 	u64			lib_caps;
 	DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
 	u16			devx_uid;
+	/* For RoCE LAG TX affinity */
+	atomic_t		tx_port_affinity;
 };
 
 static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -149,6 +143,13 @@ static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibuconte
 struct mlx5_ib_pd {
 	struct ib_pd		ibpd;
 	u32			pdn;
+	u16			uid;
+};
+
+enum {
+	MLX5_IB_FLOW_ACTION_MODIFY_HEADER,
+	MLX5_IB_FLOW_ACTION_PACKET_REFORMAT,
+	MLX5_IB_FLOW_ACTION_DECAP,
 };
 
 #define MLX5_IB_FLOW_MCAST_PRIO		(MLX5_BY_PASS_NUM_PRIOS - 1)
@@ -180,6 +181,7 @@ struct mlx5_ib_flow_matcher {
 	struct mlx5_ib_match_params matcher_mask;
 	int			mask_len;
 	enum mlx5_ib_flow_type	flow_type;
+	enum mlx5_flow_namespace_type ns_type;
 	u16			priority;
 	struct mlx5_core_dev	*mdev;
 	atomic_t		usecnt;
@@ -188,6 +190,7 @@ struct mlx5_ib_flow_matcher {
 
 struct mlx5_ib_flow_db {
 	struct mlx5_ib_flow_prio	prios[MLX5_IB_NUM_FLOW_FT];
+	struct mlx5_ib_flow_prio	egress_prios[MLX5_IB_NUM_FLOW_FT];
 	struct mlx5_ib_flow_prio	sniffer[MLX5_IB_NUM_SNIFFER_FTS];
 	struct mlx5_ib_flow_prio	egress[MLX5_IB_NUM_EGRESS_FTS];
 	struct mlx5_flow_table		*lag_demux_ft;
@@ -322,6 +325,7 @@ enum {
 struct mlx5_ib_rwq_ind_table {
 	struct ib_rwq_ind_table ib_rwq_ind_tbl;
 	u32			rqtn;
+	u16			uid;
 };
 
 struct mlx5_ib_ubuffer {
@@ -428,7 +432,7 @@ struct mlx5_ib_qp {
 	struct list_head	cq_send_list;
 	struct mlx5_rate_limit	rl;
 	u32                     underlay_qpn;
-	bool			tunnel_offload_en;
+	u32			flags_en;
 	/* storage for qp sub type when core qp type is IB_QPT_DRIVER */
 	enum ib_qp_type		qp_sub_type;
 };
@@ -536,6 +540,7 @@ struct mlx5_ib_srq {
 struct mlx5_ib_xrcd {
 	struct ib_xrcd		ibxrcd;
 	u32			xrcdn;
+	u16			uid;
 };
 
 enum mlx5_ib_mtt_access_flags {
@@ -700,7 +705,7 @@ struct mlx5_roce {
 	rwlock_t		netdev_lock;
 	struct net_device	*netdev;
 	struct notifier_block	nb;
-	atomic_t		next_port;
+	atomic_t		tx_port_affinity;
 	enum ib_port_state last_port_state;
 	struct mlx5_ib_dev	*dev;
 	u8			native_port_num;
@@ -815,6 +820,11 @@ struct mlx5_ib_flow_action {
 			u64			    ib_flags;
 			struct mlx5_accel_esp_xfrm *ctx;
 		} esp_aes_gcm;
+		struct {
+			struct mlx5_ib_dev *dev;
+			u32 sub_type;
+			u32 action_id;
+		} flow_action_raw;
 	};
 };
 
@@ -859,9 +869,20 @@ to_mcounters(struct ib_counters *ibcntrs)
 	return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs);
 }
 
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+			   bool is_egress,
+			   struct mlx5_flow_act *action);
+struct mlx5_ib_lb_state {
+	/* protect the user_td */
+	struct mutex		mutex;
+	u32			user_td;
+	int			qps;
+	bool			enabled;
+};
+
 struct mlx5_ib_dev {
 	struct ib_device		ib_dev;
-	const struct uverbs_object_tree_def *driver_trees[6];
+	const struct uverbs_object_tree_def *driver_trees[7];
 	struct mlx5_core_dev		*mdev;
 	struct mlx5_roce		roce[MLX5_MAX_PORTS];
 	int				num_ports;
@@ -900,13 +921,12 @@ struct mlx5_ib_dev {
 	const struct mlx5_ib_profile	*profile;
 	struct mlx5_eswitch_rep		*rep;
 
-	/* protect the user_td */
-	struct mutex		lb_mutex;
-	u32			user_td;
+	struct mlx5_ib_lb_state		lb;
 	u8			umr_fence;
 	struct list_head	ib_dev_list;
 	u64			sys_image_guid;
 	struct mlx5_memic	memic;
+	u16			devx_whitelist_uid;
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1017,6 +1037,8 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
 int mlx5_ib_destroy_srq(struct ib_srq *srq);
 int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
 			  const struct ib_recv_wr **bad_wr);
+int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
+void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
 struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 				struct ib_qp_init_attr *init_attr,
 				struct ib_udata *udata);
@@ -1106,7 +1128,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 			  int page_shift, __be64 *pas, int access_flags);
 void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
-int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
+int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
 
@@ -1141,7 +1163,7 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
 int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
 int __init mlx5_ib_odp_init(void);
 void mlx5_ib_odp_cleanup(void);
-void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
 			      unsigned long end);
 void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
 void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
@@ -1180,7 +1202,6 @@ void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev);
 int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
 void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
 int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev);
-int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
 void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
 		      const struct mlx5_ib_profile *profile,
 		      int stage);
@@ -1229,22 +1250,20 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
 				  u8 port_num);
 
 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
-			struct mlx5_ib_ucontext *context);
-void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
-			  struct mlx5_ib_ucontext *context);
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev);
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
 const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
 struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
 	struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
-	void *cmd_in, int inlen, int dest_id, int dest_type);
+	struct mlx5_flow_act *flow_act, void *cmd_in, int inlen,
+	int dest_id, int dest_type);
 bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
 int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
+void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
 #else
 static inline int
-mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
-		    struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; };
-static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
-					struct mlx5_ib_ucontext *context) {}
+mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; };
+static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
 static inline const struct uverbs_object_tree_def *
 mlx5_ib_get_devx_tree(void) { return NULL; }
 static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
@@ -1257,6 +1276,11 @@ mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
 {
 	return 0;
 }
+static inline void
+mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
+{
+	return;
+};
 #endif
 static inline void init_query_mad(struct ib_smp *mad)
 {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index e22314837645..9b195d65a13e 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -98,7 +98,7 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 static void update_odp_mr(struct mlx5_ib_mr *mr)
 {
-	if (mr->umem->odp_data) {
+	if (mr->umem->is_odp) {
 		/*
 		 * This barrier prevents the compiler from moving the
 		 * setting of umem->odp_data->private to point to our
@@ -107,7 +107,7 @@ static void update_odp_mr(struct mlx5_ib_mr *mr)
 		 * handle invalidations.
 		 */
 		smp_wmb();
-		mr->umem->odp_data->private = mr;
+		to_ib_umem_odp(mr->umem)->private = mr;
 		/*
 		 * Make sure we will see the new
 		 * umem->odp_data->private value in the invalidation
@@ -691,7 +691,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 		init_completion(&ent->compl);
 		INIT_WORK(&ent->work, cache_work_func);
 		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
-		queue_work(cache->wq, &ent->work);
 
 		if (i > MR_CACHE_LAST_STD_ENTRY) {
 			mlx5_odp_init_mr_cache_entry(ent);
@@ -711,6 +710,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 			ent->limit = dev->mdev->profile->mr_cache[i].limit;
 		else
 			ent->limit = 0;
+		queue_work(cache->wq, &ent->work);
 	}
 
 	err = mlx5_mr_cache_debugfs_init(dev);
@@ -1627,14 +1627,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 	struct ib_umem *umem = mr->umem;
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	if (umem && umem->odp_data) {
+	if (umem && umem->is_odp) {
+		struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
+
 		/* Prevent new page faults from succeeding */
 		mr->live = 0;
 		/* Wait for all running page-fault handlers to finish. */
 		synchronize_srcu(&dev->mr_srcu);
 		/* Destroy all page mappings */
-		if (umem->odp_data->page_list)
-			mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+		if (umem_odp->page_list)
+			mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem),
 						 ib_umem_end(umem));
 		else
 			mlx5_ib_free_implicit_mr(mr);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index d216e0d2921d..b04eb6775326 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -61,13 +61,21 @@ static int check_parent(struct ib_umem_odp *odp,
 	return mr && mr->parent == parent && !odp->dying;
 }
 
+struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr)
+{
+	if (WARN_ON(!mr || !mr->umem || !mr->umem->is_odp))
+		return NULL;
+
+	return to_ib_umem_odp(mr->umem)->per_mm;
+}
+
 static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
 {
 	struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
-	struct ib_ucontext *ctx = odp->umem->context;
+	struct ib_ucontext_per_mm *per_mm = odp->per_mm;
 	struct rb_node *rb;
 
-	down_read(&ctx->umem_rwsem);
+	down_read(&per_mm->umem_rwsem);
 	while (1) {
 		rb = rb_next(&odp->interval_tree.rb);
 		if (!rb)
@@ -79,19 +87,19 @@ static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
 not_found:
 	odp = NULL;
 end:
-	up_read(&ctx->umem_rwsem);
+	up_read(&per_mm->umem_rwsem);
 	return odp;
 }
 
-static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
-				      u64 start, u64 length,
+static struct ib_umem_odp *odp_lookup(u64 start, u64 length,
 				      struct mlx5_ib_mr *parent)
 {
+	struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent);
 	struct ib_umem_odp *odp;
 	struct rb_node *rb;
 
-	down_read(&ctx->umem_rwsem);
-	odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length);
+	down_read(&per_mm->umem_rwsem);
+	odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length);
 	if (!odp)
 		goto end;
 
@@ -102,13 +110,13 @@ static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
 		if (!rb)
 			goto not_found;
 		odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
-		if (ib_umem_start(odp->umem) > start + length)
+		if (ib_umem_start(&odp->umem) > start + length)
 			goto not_found;
 	}
 not_found:
 	odp = NULL;
 end:
-	up_read(&ctx->umem_rwsem);
+	up_read(&per_mm->umem_rwsem);
 	return odp;
 }
 
@@ -116,7 +124,6 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
 			   size_t nentries, struct mlx5_ib_mr *mr, int flags)
 {
 	struct ib_pd *pd = mr->ibmr.pd;
-	struct ib_ucontext *ctx = pd->uobject->context;
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct ib_umem_odp *odp;
 	unsigned long va;
@@ -131,13 +138,13 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
 		return;
 	}
 
-	odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE,
-			     nentries * MLX5_IMR_MTT_SIZE, mr);
+	odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE,
+			 nentries * MLX5_IMR_MTT_SIZE, mr);
 
 	for (i = 0; i < nentries; i++, pklm++) {
 		pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
 		va = (offset + i) * MLX5_IMR_MTT_SIZE;
-		if (odp && odp->umem->address == va) {
+		if (odp && odp->umem.address == va) {
 			struct mlx5_ib_mr *mtt = odp->private;
 
 			pklm->key = cpu_to_be32(mtt->ibmr.lkey);
@@ -153,13 +160,13 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
 static void mr_leaf_free_action(struct work_struct *work)
 {
 	struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
-	int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT;
+	int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT;
 	struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
 
 	mr->parent = NULL;
 	synchronize_srcu(&mr->dev->mr_srcu);
 
-	ib_umem_release(odp->umem);
+	ib_umem_release(&odp->umem);
 	if (imr->live)
 		mlx5_ib_update_xlt(imr, idx, 1, 0,
 				   MLX5_IB_UPD_XLT_INDIRECT |
@@ -170,22 +177,24 @@ static void mr_leaf_free_action(struct work_struct *work)
 		wake_up(&imr->q_leaf_free);
 }
 
-void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
 			      unsigned long end)
 {
 	struct mlx5_ib_mr *mr;
 	const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
 				    sizeof(struct mlx5_mtt)) - 1;
 	u64 idx = 0, blk_start_idx = 0;
+	struct ib_umem *umem;
 	int in_block = 0;
 	u64 addr;
 
-	if (!umem || !umem->odp_data) {
+	if (!umem_odp) {
 		pr_err("invalidation called on NULL umem or non-ODP umem\n");
 		return;
 	}
+	umem = &umem_odp->umem;
 
-	mr = umem->odp_data->private;
+	mr = umem_odp->private;
 
 	if (!mr || !mr->ibmr.pd)
 		return;
@@ -208,7 +217,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
 		 * estimate the cost of another UMR vs. the cost of bigger
 		 * UMR.
 		 */
-		if (umem->odp_data->dma_list[idx] &
+		if (umem_odp->dma_list[idx] &
 		    (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
 			if (!in_block) {
 				blk_start_idx = idx;
@@ -237,13 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
 	 * needed.
 	 */
 
-	ib_umem_odp_unmap_dma_pages(umem, start, end);
+	ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
 
 	if (unlikely(!umem->npages && mr->parent &&
-		     !umem->odp_data->dying)) {
-		WRITE_ONCE(umem->odp_data->dying, 1);
+		     !umem_odp->dying)) {
+		WRITE_ONCE(umem_odp->dying, 1);
 		atomic_inc(&mr->parent->num_leaf_free);
-		schedule_work(&umem->odp_data->work);
+		schedule_work(&umem_odp->work);
 	}
 }
 
@@ -366,16 +375,15 @@ fail:
 static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
 						u64 io_virt, size_t bcnt)
 {
-	struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context;
 	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
 	struct ib_umem_odp *odp, *result = NULL;
+	struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
 	u64 addr = io_virt & MLX5_IMR_MTT_MASK;
 	int nentries = 0, start_idx = 0, ret;
 	struct mlx5_ib_mr *mtt;
-	struct ib_umem *umem;
 
-	mutex_lock(&mr->umem->odp_data->umem_mutex);
-	odp = odp_lookup(ctx, addr, 1, mr);
+	mutex_lock(&odp_mr->umem_mutex);
+	odp = odp_lookup(addr, 1, mr);
 
 	mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
 		    io_virt, bcnt, addr, odp);
@@ -385,22 +393,23 @@ next_mr:
 		if (nentries)
 			nentries++;
 	} else {
-		umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE);
-		if (IS_ERR(umem)) {
-			mutex_unlock(&mr->umem->odp_data->umem_mutex);
-			return ERR_CAST(umem);
+		odp = ib_alloc_odp_umem(odp_mr->per_mm, addr,
+					MLX5_IMR_MTT_SIZE);
+		if (IS_ERR(odp)) {
+			mutex_unlock(&odp_mr->umem_mutex);
+			return ERR_CAST(odp);
 		}
 
-		mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags);
+		mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0,
+					mr->access_flags);
 		if (IS_ERR(mtt)) {
-			mutex_unlock(&mr->umem->odp_data->umem_mutex);
-			ib_umem_release(umem);
+			mutex_unlock(&odp_mr->umem_mutex);
+			ib_umem_release(&odp->umem);
 			return ERR_CAST(mtt);
 		}
 
-		odp = umem->odp_data;
 		odp->private = mtt;
-		mtt->umem = umem;
+		mtt->umem = &odp->umem;
 		mtt->mmkey.iova = addr;
 		mtt->parent = mr;
 		INIT_WORK(&odp->work, mr_leaf_free_action);
@@ -417,7 +426,7 @@ next_mr:
 	addr += MLX5_IMR_MTT_SIZE;
 	if (unlikely(addr < io_virt + bcnt)) {
 		odp = odp_next(odp);
-		if (odp && odp->umem->address != addr)
+		if (odp && odp->umem.address != addr)
 			odp = NULL;
 		goto next_mr;
 	}
@@ -432,7 +441,7 @@ next_mr:
 		}
 	}
 
-	mutex_unlock(&mr->umem->odp_data->umem_mutex);
+	mutex_unlock(&odp_mr->umem_mutex);
 	return result;
 }
 
@@ -460,36 +469,36 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
 	return imr;
 }
 
-static int mr_leaf_free(struct ib_umem *umem, u64 start,
-			u64 end, void *cookie)
+static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
+			void *cookie)
 {
-	struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie;
+	struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
+	struct ib_umem *umem = &umem_odp->umem;
 
 	if (mr->parent != imr)
 		return 0;
 
-	ib_umem_odp_unmap_dma_pages(umem,
-				    ib_umem_start(umem),
+	ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
 				    ib_umem_end(umem));
 
-	if (umem->odp_data->dying)
+	if (umem_odp->dying)
 		return 0;
 
-	WRITE_ONCE(umem->odp_data->dying, 1);
+	WRITE_ONCE(umem_odp->dying, 1);
 	atomic_inc(&imr->num_leaf_free);
-	schedule_work(&umem->odp_data->work);
+	schedule_work(&umem_odp->work);
 
 	return 0;
 }
 
 void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
 {
-	struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context;
+	struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
 
-	down_read(&ctx->umem_rwsem);
-	rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
+	down_read(&per_mm->umem_rwsem);
+	rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX,
 				      mr_leaf_free, true, imr);
-	up_read(&ctx->umem_rwsem);
+	up_read(&per_mm->umem_rwsem);
 
 	wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
 }
@@ -497,6 +506,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
 static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
 			u64 io_virt, size_t bcnt, u32 *bytes_mapped)
 {
+	struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
 	u64 access_mask = ODP_READ_ALLOWED_BIT;
 	int npages = 0, page_shift, np;
 	u64 start_idx, page_mask;
@@ -505,7 +515,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
 	size_t size;
 	int ret;
 
-	if (!mr->umem->odp_data->page_list) {
+	if (!odp_mr->page_list) {
 		odp = implicit_mr_get_data(mr, io_virt, bcnt);
 
 		if (IS_ERR(odp))
@@ -513,11 +523,11 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
 		mr = odp->private;
 
 	} else {
-		odp = mr->umem->odp_data;
+		odp = odp_mr;
 	}
 
 next_mr:
-	size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
+	size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt);
 
 	page_shift = mr->umem->page_shift;
 	page_mask = ~(BIT(page_shift) - 1);
@@ -533,7 +543,7 @@ next_mr:
 	 */
 	smp_rmb();
 
-	ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
+	ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size,
 					access_mask, current_seq);
 
 	if (ret < 0)
@@ -542,7 +552,8 @@ next_mr:
 	np = ret;
 
 	mutex_lock(&odp->umem_mutex);
-	if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+	if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem),
+					current_seq)) {
 		/*
 		 * No need to check whether the MTTs really belong to
 		 * this MR, since ib_umem_odp_map_dma_pages already
@@ -575,7 +586,7 @@ next_mr:
 
 		io_virt += size;
 		next = odp_next(odp);
-		if (unlikely(!next || next->umem->address != io_virt)) {
+		if (unlikely(!next || next->umem.address != io_virt)) {
 			mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
 				    io_virt, next);
 			return -EAGAIN;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index daf1eb84cd31..6841c0f9237f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -37,6 +37,7 @@
 #include <linux/mlx5/fs.h>
 #include "mlx5_ib.h"
 #include "ib_rep.h"
+#include "cmd.h"
 
 /* not supported currently */
 static int wq_signature;
@@ -850,6 +851,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		goto err_umem;
 	}
 
+	MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid);
 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
 	if (ubuffer->umem)
 		mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
@@ -1051,7 +1053,8 @@ static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
 
 static int is_connected(enum ib_qp_type qp_type)
 {
-	if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
+	if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC ||
+	    qp_type == MLX5_IB_QPT_DCI)
 		return 1;
 
 	return 0;
@@ -1059,11 +1062,13 @@ static int is_connected(enum ib_qp_type qp_type)
 
 static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
 				    struct mlx5_ib_qp *qp,
-				    struct mlx5_ib_sq *sq, u32 tdn)
+				    struct mlx5_ib_sq *sq, u32 tdn,
+				    struct ib_pd *pd)
 {
 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
+	MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
 	MLX5_SET(tisc, tisc, transport_domain, tdn);
 	if (qp->flags & MLX5_IB_QP_UNDERLAY)
 		MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
@@ -1072,9 +1077,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
 }
 
 static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
-				      struct mlx5_ib_sq *sq)
+				      struct mlx5_ib_sq *sq, struct ib_pd *pd)
 {
-	mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+	mlx5_cmd_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid);
 }
 
 static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
@@ -1114,6 +1119,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 		goto err_umem;
 	}
 
+	MLX5_SET(create_sq_in, in, uid, to_mpd(pd)->uid);
 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
 	if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe))
@@ -1188,7 +1194,7 @@ static size_t get_rq_pas_size(void *qpc)
 
 static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
 				   struct mlx5_ib_rq *rq, void *qpin,
-				   size_t qpinlen)
+				   size_t qpinlen, struct ib_pd *pd)
 {
 	struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
 	__be64 *pas;
@@ -1209,6 +1215,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
 	if (!in)
 		return -ENOMEM;
 
+	MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
 	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
 	if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
 		MLX5_SET(rqc, rqc, vsd, 1);
@@ -1256,10 +1263,23 @@ static bool tunnel_offload_supported(struct mlx5_core_dev *dev)
 		 MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx));
 }
 
+static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+				      struct mlx5_ib_rq *rq,
+				      u32 qp_flags_en,
+				      struct ib_pd *pd)
+{
+	if (qp_flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+			   MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
+		mlx5_ib_disable_lb(dev, false, true);
+	mlx5_cmd_destroy_tir(dev->mdev, rq->tirn, to_mpd(pd)->uid);
+}
+
 static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 				    struct mlx5_ib_rq *rq, u32 tdn,
-				    bool tunnel_offload_en)
+				    u32 *qp_flags_en,
+				    struct ib_pd *pd)
 {
+	u8 lb_flag = 0;
 	u32 *in;
 	void *tirc;
 	int inlen;
@@ -1270,33 +1290,45 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 	if (!in)
 		return -ENOMEM;
 
+	MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
 	MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
 	MLX5_SET(tirc, tirc, transport_domain, tdn);
-	if (tunnel_offload_en)
+	if (*qp_flags_en & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
 		MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
 
-	if (dev->rep)
-		MLX5_SET(tirc, tirc, self_lb_block,
-			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
+	if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
+		lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+
+	if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
+		lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
+
+	if (dev->rep) {
+		lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+		*qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+	}
+
+	MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
 
 	err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
 
+	if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
+		err = mlx5_ib_enable_lb(dev, false, true);
+
+		if (err)
+			destroy_raw_packet_qp_tir(dev, rq, 0, pd);
+	}
 	kvfree(in);
 
 	return err;
 }
 
-static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
-				      struct mlx5_ib_rq *rq)
-{
-	mlx5_core_destroy_tir(dev->mdev, rq->tirn);
-}
-
 static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 				u32 *in, size_t inlen,
-				struct ib_pd *pd)
+				struct ib_pd *pd,
+				struct ib_udata *udata,
+				struct mlx5_ib_create_qp_resp *resp)
 {
 	struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
 	struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
@@ -1306,9 +1338,10 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
 	int err;
 	u32 tdn = mucontext->tdn;
+	u16 uid = to_mpd(pd)->uid;
 
 	if (qp->sq.wqe_cnt) {
-		err = create_raw_packet_qp_tis(dev, qp, sq, tdn);
+		err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd);
 		if (err)
 			return err;
 
@@ -1316,6 +1349,13 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 		if (err)
 			goto err_destroy_tis;
 
+		if (uid) {
+			resp->tisn = sq->tisn;
+			resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TISN;
+			resp->sqn = sq->base.mqp.qpn;
+			resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_SQN;
+		}
+
 		sq->base.container_mibqp = qp;
 		sq->base.mqp.event = mlx5_ib_qp_event;
 	}
@@ -1327,22 +1367,32 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 			rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
 		if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING)
 			rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
-		err = create_raw_packet_qp_rq(dev, rq, in, inlen);
+		err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd);
 		if (err)
 			goto err_destroy_sq;
 
-
-		err = create_raw_packet_qp_tir(dev, rq, tdn,
-					       qp->tunnel_offload_en);
+		err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd);
 		if (err)
 			goto err_destroy_rq;
+
+		if (uid) {
+			resp->rqn = rq->base.mqp.qpn;
+			resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN;
+			resp->tirn = rq->tirn;
+			resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
+		}
 	}
 
 	qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
 						     rq->base.mqp.qpn;
+	err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp)));
+	if (err)
+		goto err_destroy_tir;
 
 	return 0;
 
+err_destroy_tir:
+	destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, pd);
 err_destroy_rq:
 	destroy_raw_packet_qp_rq(dev, rq);
 err_destroy_sq:
@@ -1350,7 +1400,7 @@ err_destroy_sq:
 		return err;
 	destroy_raw_packet_qp_sq(dev, sq);
 err_destroy_tis:
-	destroy_raw_packet_qp_tis(dev, sq);
+	destroy_raw_packet_qp_tis(dev, sq, pd);
 
 	return err;
 }
@@ -1363,13 +1413,13 @@ static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
 	struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
 
 	if (qp->rq.wqe_cnt) {
-		destroy_raw_packet_qp_tir(dev, rq);
+		destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, qp->ibqp.pd);
 		destroy_raw_packet_qp_rq(dev, rq);
 	}
 
 	if (qp->sq.wqe_cnt) {
 		destroy_raw_packet_qp_sq(dev, sq);
-		destroy_raw_packet_qp_tis(dev, sq);
+		destroy_raw_packet_qp_tis(dev, sq, qp->ibqp.pd);
 	}
 }
 
@@ -1387,7 +1437,11 @@ static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
 
 static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
 {
-	mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn);
+	if (qp->flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+			    MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
+		mlx5_ib_disable_lb(dev, false, true);
+	mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
+			     to_mpd(qp->ibqp.pd)->uid);
 }
 
 static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
@@ -1410,6 +1464,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	u32 tdn = mucontext->tdn;
 	struct mlx5_ib_create_qp_rss ucmd = {};
 	size_t required_cmd_sz;
+	u8 lb_flag = 0;
 
 	if (init_attr->qp_type != IB_QPT_RAW_PACKET)
 		return -EOPNOTSUPP;
@@ -1444,7 +1499,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 		return -EOPNOTSUPP;
 	}
 
-	if (ucmd.flags & ~MLX5_QP_FLAG_TUNNEL_OFFLOADS) {
+	if (ucmd.flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS |
+			   MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+			   MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) {
 		mlx5_ib_dbg(dev, "invalid flags\n");
 		return -EOPNOTSUPP;
 	}
@@ -1461,6 +1518,16 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 		return -EOPNOTSUPP;
 	}
 
+	if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->rep) {
+		lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+		qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+	}
+
+	if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
+		lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
+		qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
+	}
+
 	err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
 	if (err) {
 		mlx5_ib_dbg(dev, "copy failed\n");
@@ -1472,6 +1539,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if (!in)
 		return -ENOMEM;
 
+	MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 	MLX5_SET(tirc, tirc, disp_type,
 		 MLX5_TIRC_DISP_TYPE_INDIRECT);
@@ -1484,6 +1552,8 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
 		MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
 
+	MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
+
 	if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER)
 		hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
 	else
@@ -1580,26 +1650,141 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
 
 create_tir:
-	if (dev->rep)
-		MLX5_SET(tirc, tirc, self_lb_block,
-			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
-
 	err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
 
+	if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
+		err = mlx5_ib_enable_lb(dev, false, true);
+
+		if (err)
+			mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
+					     to_mpd(pd)->uid);
+	}
+
 	if (err)
 		goto err;
 
+	if (mucontext->devx_uid) {
+		resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
+		resp.tirn = qp->rss_qp.tirn;
+	}
+
+	err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
+	if (err)
+		goto err_copy;
+
 	kvfree(in);
 	/* qpn is reserved for that QP */
 	qp->trans_qp.base.mqp.qpn = 0;
 	qp->flags |= MLX5_IB_QP_RSS;
 	return 0;
 
+err_copy:
+	mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, mucontext->devx_uid);
 err:
 	kvfree(in);
 	return err;
 }
 
+static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr,
+					 void *qpc)
+{
+	int rcqe_sz;
+
+	if (init_attr->qp_type == MLX5_IB_QPT_DCI)
+		return;
+
+	rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
+
+	if (rcqe_sz == 128) {
+		MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+		return;
+	}
+
+	if (init_attr->qp_type != MLX5_IB_QPT_DCT)
+		MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
+}
+
+static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
+					 struct ib_qp_init_attr *init_attr,
+					 struct mlx5_ib_create_qp *ucmd,
+					 void *qpc)
+{
+	enum ib_qp_type qpt = init_attr->qp_type;
+	int scqe_sz;
+	bool allow_scat_cqe = 0;
+
+	if (qpt == IB_QPT_UC || qpt == IB_QPT_UD)
+		return;
+
+	if (ucmd)
+		allow_scat_cqe = ucmd->flags & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
+
+	if (!allow_scat_cqe && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR)
+		return;
+
+	scqe_sz = mlx5_ib_get_cqe_size(init_attr->send_cq);
+	if (scqe_sz == 128) {
+		MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
+		return;
+	}
+
+	if (init_attr->qp_type != MLX5_IB_QPT_DCI ||
+	    MLX5_CAP_GEN(dev->mdev, dc_req_scat_data_cqe))
+		MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
+}
+
+static int atomic_size_to_mode(int size_mask)
+{
+	/* driver does not support atomic_size > 256B
+	 * and does not know how to translate bigger sizes
+	 */
+	int supported_size_mask = size_mask & 0x1ff;
+	int log_max_size;
+
+	if (!supported_size_mask)
+		return -EOPNOTSUPP;
+
+	log_max_size = __fls(supported_size_mask);
+
+	if (log_max_size > 3)
+		return log_max_size;
+
+	return MLX5_ATOMIC_MODE_8B;
+}
+
+static int get_atomic_mode(struct mlx5_ib_dev *dev,
+			   enum ib_qp_type qp_type)
+{
+	u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
+	u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic);
+	int atomic_mode = -EOPNOTSUPP;
+	int atomic_size_mask;
+
+	if (!atomic)
+		return -EOPNOTSUPP;
+
+	if (qp_type == MLX5_IB_QPT_DCT)
+		atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
+	else
+		atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+
+	if ((atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP) ||
+	    (atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD))
+		atomic_mode = atomic_size_to_mode(atomic_size_mask);
+
+	if (atomic_mode <= 0 &&
+	    (atomic_operations & MLX5_ATOMIC_OPS_CMP_SWAP &&
+	     atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD))
+		atomic_mode = MLX5_ATOMIC_MODE_IB_COMP;
+
+	return atomic_mode;
+}
+
+static inline bool check_flags_mask(uint64_t input, uint64_t supported)
+{
+	return (input & ~supported) == 0;
+}
+
 static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 			    struct ib_qp_init_attr *init_attr,
 			    struct ib_udata *udata, struct mlx5_ib_qp *qp)
@@ -1697,20 +1882,47 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 			return -EFAULT;
 		}
 
+		if (!check_flags_mask(ucmd.flags,
+				      MLX5_QP_FLAG_SIGNATURE |
+					      MLX5_QP_FLAG_SCATTER_CQE |
+					      MLX5_QP_FLAG_TUNNEL_OFFLOADS |
+					      MLX5_QP_FLAG_BFREG_INDEX |
+					      MLX5_QP_FLAG_TYPE_DCT |
+					      MLX5_QP_FLAG_TYPE_DCI |
+					      MLX5_QP_FLAG_ALLOW_SCATTER_CQE))
+			return -EINVAL;
+
 		err = get_qp_user_index(to_mucontext(pd->uobject->context),
 					&ucmd, udata->inlen, &uidx);
 		if (err)
 			return err;
 
 		qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
-		qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
+		if (MLX5_CAP_GEN(dev->mdev, sctr_data_cqe))
+			qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
 		if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) {
 			if (init_attr->qp_type != IB_QPT_RAW_PACKET ||
 			    !tunnel_offload_supported(mdev)) {
 				mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n");
 				return -EOPNOTSUPP;
 			}
-			qp->tunnel_offload_en = true;
+			qp->flags_en |= MLX5_QP_FLAG_TUNNEL_OFFLOADS;
+		}
+
+		if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) {
+			if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+				mlx5_ib_dbg(dev, "Self-LB UC isn't supported\n");
+				return -EOPNOTSUPP;
+			}
+			qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+		}
+
+		if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
+			if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+				mlx5_ib_dbg(dev, "Self-LB UM isn't supported\n");
+				return -EOPNOTSUPP;
+			}
+			qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
 		}
 
 		if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
@@ -1811,23 +2023,10 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		MLX5_SET(qpc, qpc, cd_slave_receive, 1);
 
 	if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
-		int rcqe_sz;
-		int scqe_sz;
-
-		rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
-		scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
-
-		if (rcqe_sz == 128)
-			MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
-		else
-			MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
-
-		if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
-			if (scqe_sz == 128)
-				MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
-			else
-				MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
-		}
+		configure_responder_scat_cqe(init_attr, qpc);
+		configure_requester_scat_cqe(dev, init_attr,
+					     (pd && pd->uobject) ? &ucmd : NULL,
+					     qpc);
 	}
 
 	if (qp->rq.wqe_cnt) {
@@ -1911,7 +2110,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	    qp->flags & MLX5_IB_QP_UNDERLAY) {
 		qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
 		raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
-		err = create_raw_packet_qp(dev, qp, in, inlen, pd);
+		err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
+					   &resp);
 	} else {
 		err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
 	}
@@ -2192,6 +2392,7 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
 		goto err_free;
 	}
 
+	MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid);
 	dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
 	qp->qp_sub_type = MLX5_IB_QPT_DCT;
 	MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
@@ -2200,6 +2401,9 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
 	MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key);
 	MLX5_SET(dctc, dctc, user_index, uidx);
 
+	if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE)
+		configure_responder_scat_cqe(attr, dctc);
+
 	qp->state = IB_QPS_RESET;
 
 	return &qp->ibqp;
@@ -2405,13 +2609,15 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp)
 	return 0;
 }
 
-static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
-				   int attr_mask)
+static int to_mlx5_access_flags(struct mlx5_ib_qp *qp,
+				const struct ib_qp_attr *attr,
+				int attr_mask, __be32 *hw_access_flags)
 {
-	u32 hw_access_flags = 0;
 	u8 dest_rd_atomic;
 	u32 access_flags;
 
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
+
 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
 		dest_rd_atomic = attr->max_dest_rd_atomic;
 	else
@@ -2426,13 +2632,25 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
 		access_flags &= IB_ACCESS_REMOTE_WRITE;
 
 	if (access_flags & IB_ACCESS_REMOTE_READ)
-		hw_access_flags |= MLX5_QP_BIT_RRE;
-	if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
-		hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX);
+		*hw_access_flags |= MLX5_QP_BIT_RRE;
+	if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	    qp->ibqp.qp_type == IB_QPT_RC) {
+		int atomic_mode;
+
+		atomic_mode = get_atomic_mode(dev, qp->ibqp.qp_type);
+		if (atomic_mode < 0)
+			return -EOPNOTSUPP;
+
+		*hw_access_flags |= MLX5_QP_BIT_RAE;
+		*hw_access_flags |= atomic_mode << MLX5_ATOMIC_MODE_OFFSET;
+	}
+
 	if (access_flags & IB_ACCESS_REMOTE_WRITE)
-		hw_access_flags |= MLX5_QP_BIT_RWE;
+		*hw_access_flags |= MLX5_QP_BIT_RWE;
+
+	*hw_access_flags = cpu_to_be32(*hw_access_flags);
 
-	return cpu_to_be32(hw_access_flags);
+	return 0;
 }
 
 enum {
@@ -2458,7 +2676,8 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
 }
 
 static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
-				      struct mlx5_ib_sq *sq, u8 sl)
+				      struct mlx5_ib_sq *sq, u8 sl,
+				      struct ib_pd *pd)
 {
 	void *in;
 	void *tisc;
@@ -2471,6 +2690,7 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
 		return -ENOMEM;
 
 	MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+	MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
 
 	tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
 	MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
@@ -2483,7 +2703,8 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
 }
 
 static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
-					 struct mlx5_ib_sq *sq, u8 tx_affinity)
+					 struct mlx5_ib_sq *sq, u8 tx_affinity,
+					 struct ib_pd *pd)
 {
 	void *in;
 	void *tisc;
@@ -2496,6 +2717,7 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
 		return -ENOMEM;
 
 	MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1);
+	MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
 
 	tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
 	MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
@@ -2580,7 +2802,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
 		return modify_raw_packet_eth_prio(dev->mdev,
 						  &qp->raw_packet_qp.sq,
-						  sl & 0xf);
+						  sl & 0xf, qp->ibqp.pd);
 
 	return 0;
 }
@@ -2728,9 +2950,9 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
 	return result;
 }
 
-static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
-				   struct mlx5_ib_rq *rq, int new_state,
-				   const struct mlx5_modify_raw_qp_param *raw_qp_param)
+static int modify_raw_packet_qp_rq(
+	struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, int new_state,
+	const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
 {
 	void *in;
 	void *rqc;
@@ -2743,6 +2965,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
 		return -ENOMEM;
 
 	MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+	MLX5_SET(modify_rq_in, in, uid, to_mpd(pd)->uid);
 
 	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
 	MLX5_SET(rqc, rqc, state, new_state);
@@ -2753,8 +2976,9 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
 				   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
 			MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
 		} else
-			pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
-				     dev->ib_dev.name);
+			dev_info_once(
+				&dev->ib_dev.dev,
+				"RAW PACKET QP counters are not supported on current FW\n");
 	}
 
 	err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen);
@@ -2768,10 +2992,9 @@ out:
 	return err;
 }
 
-static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
-				   struct mlx5_ib_sq *sq,
-				   int new_state,
-				   const struct mlx5_modify_raw_qp_param *raw_qp_param)
+static int modify_raw_packet_qp_sq(
+	struct mlx5_core_dev *dev, struct mlx5_ib_sq *sq, int new_state,
+	const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
 {
 	struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
 	struct mlx5_rate_limit old_rl = ibqp->rl;
@@ -2788,6 +3011,7 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
 	if (!in)
 		return -ENOMEM;
 
+	MLX5_SET(modify_sq_in, in, uid, to_mpd(pd)->uid);
 	MLX5_SET(modify_sq_in, in, sq_state, sq->state);
 
 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
@@ -2890,7 +3114,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	}
 
 	if (modify_rq) {
-		err =  modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
+		err =  modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param,
+					       qp->ibqp.pd);
 		if (err)
 			return err;
 	}
@@ -2898,17 +3123,50 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if (modify_sq) {
 		if (tx_affinity) {
 			err = modify_raw_packet_tx_affinity(dev->mdev, sq,
-							    tx_affinity);
+							    tx_affinity,
+							    qp->ibqp.pd);
 			if (err)
 				return err;
 		}
 
-		return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param);
+		return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state,
+					       raw_qp_param, qp->ibqp.pd);
 	}
 
 	return 0;
 }
 
+static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev,
+				    struct mlx5_ib_pd *pd,
+				    struct mlx5_ib_qp_base *qp_base,
+				    u8 port_num)
+{
+	struct mlx5_ib_ucontext *ucontext = NULL;
+	unsigned int tx_port_affinity;
+
+	if (pd && pd->ibpd.uobject && pd->ibpd.uobject->context)
+		ucontext = to_mucontext(pd->ibpd.uobject->context);
+
+	if (ucontext) {
+		tx_port_affinity = (unsigned int)atomic_add_return(
+					   1, &ucontext->tx_port_affinity) %
+					   MLX5_MAX_PORTS +
+				   1;
+		mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n",
+				tx_port_affinity, qp_base->mqp.qpn, ucontext);
+	} else {
+		tx_port_affinity =
+			(unsigned int)atomic_add_return(
+				1, &dev->roce[port_num].tx_port_affinity) %
+				MLX5_MAX_PORTS +
+			1;
+		mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n",
+				tx_port_affinity, qp_base->mqp.qpn);
+	}
+
+	return tx_port_affinity;
+}
+
 static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 			       const struct ib_qp_attr *attr, int attr_mask,
 			       enum ib_qp_state cur_state, enum ib_qp_state new_state,
@@ -2974,6 +3232,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	if (!context)
 		return -ENOMEM;
 
+	pd = get_pd(qp);
 	context->flags = cpu_to_be32(mlx5_st << 16);
 
 	if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
@@ -3002,9 +3261,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 		    (ibqp->qp_type == IB_QPT_XRC_TGT)) {
 			if (mlx5_lag_is_active(dev->mdev)) {
 				u8 p = mlx5_core_native_port_num(dev->mdev);
-				tx_affinity = (unsigned int)atomic_add_return(1,
-						&dev->roce[p].next_port) %
-						MLX5_MAX_PORTS + 1;
+				tx_affinity = get_tx_affinity(dev, pd, base, p);
 				context->flags |= cpu_to_be32(tx_affinity << 24);
 			}
 		}
@@ -3062,7 +3319,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 			goto out;
 	}
 
-	pd = get_pd(qp);
 	get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
 		&send_cq, &recv_cq);
 
@@ -3092,8 +3348,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 				cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
 	}
 
-	if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
-		context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
+	if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
+		__be32 access_flags = 0;
+
+		err = to_mlx5_access_flags(qp, attr, attr_mask, &access_flags);
+		if (err)
+			goto out;
+
+		context->params2 |= access_flags;
+	}
 
 	if (attr_mask & IB_QP_MIN_RNR_TIMER)
 		context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
@@ -3243,7 +3506,9 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new
 	int req = IB_QP_STATE;
 	int opt = 0;
 
-	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+	if (new_state == IB_QPS_RESET) {
+		return is_valid_mask(attr_mask, req, opt);
+	} else if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
 		req |= IB_QP_PKEY_INDEX | IB_QP_PORT;
 		return is_valid_mask(attr_mask, req, opt);
 	} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
@@ -3307,10 +3572,14 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
 			MLX5_SET(dctc, dctc, rwe, 1);
 		if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
-			if (!mlx5_ib_dc_atomic_is_supported(dev))
+			int atomic_mode;
+
+			atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT);
+			if (atomic_mode < 0)
 				return -EOPNOTSUPP;
+
+			MLX5_SET(dctc, dctc, atomic_mode, atomic_mode);
 			MLX5_SET(dctc, dctc, rae, 1);
-			MLX5_SET(dctc, dctc, atomic_mode, MLX5_ATOMIC_MODE_DCT_CX);
 		}
 		MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
 		MLX5_SET(dctc, dctc, port, attr->port_num);
@@ -3367,7 +3636,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	size_t required_cmd_sz;
 	int err = -EINVAL;
 	int port;
-	enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
 
 	if (ibqp->rwq_ind_tbl)
 		return -ENOSYS;
@@ -3413,7 +3681,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
 	if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
 		port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
-		ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
 	}
 
 	if (qp->flags & MLX5_IB_QP_UNDERLAY) {
@@ -3424,7 +3691,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		}
 	} else if (qp_type != MLX5_IB_QPT_REG_UMR &&
 		   qp_type != MLX5_IB_QPT_DCI &&
-		   !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
+		   !ib_modify_qp_is_ok(cur_state, new_state, qp_type,
+				       attr_mask)) {
 		mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
 			    cur_state, new_state, ibqp->qp_type, attr_mask);
 		goto out;
@@ -4371,6 +4639,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 	u8 next_fence = 0;
 	u8 fence;
 
+	if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+		     !drain)) {
+		*bad_wr = wr;
+		return -EIO;
+	}
+
 	if (unlikely(ibqp->qp_type == IB_QPT_GSI))
 		return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
 
@@ -4380,13 +4654,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
 
-	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
-		err = -EIO;
-		*bad_wr = wr;
-		nreq = 0;
-		goto out;
-	}
-
 	for (nreq = 0; wr; nreq++, wr = wr->next) {
 		if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
 			mlx5_ib_warn(dev, "\n");
@@ -4700,18 +4967,17 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 	int ind;
 	int i;
 
+	if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+		     !drain)) {
+		*bad_wr = wr;
+		return -EIO;
+	}
+
 	if (unlikely(ibqp->qp_type == IB_QPT_GSI))
 		return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
 
 	spin_lock_irqsave(&qp->rq.lock, flags);
 
-	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
-		err = -EIO;
-		*bad_wr = wr;
-		nreq = 0;
-		goto out;
-	}
-
 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
 
 	for (nreq = 0; wr; nreq++, wr = wr->next) {
@@ -5175,6 +5441,7 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	struct mlx5_ib_xrcd *xrcd;
 	int err;
+	u16 uid;
 
 	if (!MLX5_CAP_GEN(dev->mdev, xrc))
 		return ERR_PTR(-ENOSYS);
@@ -5183,12 +5450,14 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
 	if (!xrcd)
 		return ERR_PTR(-ENOMEM);
 
-	err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn);
+	uid = context ? to_mucontext(context)->devx_uid : 0;
+	err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid);
 	if (err) {
 		kfree(xrcd);
 		return ERR_PTR(-ENOMEM);
 	}
 
+	xrcd->uid = uid;
 	return &xrcd->ibxrcd;
 }
 
@@ -5196,9 +5465,10 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
 {
 	struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
 	u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
+	u16 uid =  to_mxrcd(xrcd)->uid;
 	int err;
 
-	err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
+	err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid);
 	if (err)
 		mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
 
@@ -5268,6 +5538,7 @@ static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
 	if (!in)
 		return -ENOMEM;
 
+	MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
 	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
 	MLX5_SET(rqc,  rqc, mem_rq_type,
 		 MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
@@ -5443,8 +5714,7 @@ static int prepare_user_rq(struct ib_pd *pd,
 	err = create_user_rq(dev, pd, rwq, &ucmd);
 	if (err) {
 		mlx5_ib_dbg(dev, "err %d\n", err);
-		if (err)
-			return err;
+		return err;
 	}
 
 	rwq->user_index = ucmd.user_index;
@@ -5573,6 +5843,9 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
 	for (i = 0; i < sz; i++)
 		MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
 
+	rwq_ind_tbl->uid = to_mpd(init_attr->ind_tbl[0]->pd)->uid;
+	MLX5_SET(create_rqt_in, in, uid, rwq_ind_tbl->uid);
+
 	err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
 	kvfree(in);
 
@@ -5591,7 +5864,7 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
 	return &rwq_ind_tbl->ib_rwq_ind_tbl;
 
 err_copy:
-	mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
+	mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
 err:
 	kfree(rwq_ind_tbl);
 	return ERR_PTR(err);
@@ -5602,7 +5875,7 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
 	struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
 	struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
 
-	mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
+	mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
 
 	kfree(rwq_ind_tbl);
 	return 0;
@@ -5653,6 +5926,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
 	if (wq_state == IB_WQS_ERR)
 		wq_state = MLX5_RQC_STATE_ERR;
 	MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
+	MLX5_SET(modify_rq_in, in, uid, to_mpd(wq->pd)->uid);
 	MLX5_SET(rqc, rqc, state, wq_state);
 
 	if (wq_attr_mask & IB_WQ_FLAGS) {
@@ -5684,8 +5958,9 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
 			MLX5_SET(rqc, rqc, counter_set_id,
 				 dev->port->cnts.set_id);
 		} else
-			pr_info_once("%s: Receive WQ counters are not supported on current FW\n",
-				     dev->ib_dev.name);
+			dev_info_once(
+				&dev->ib_dev.dev,
+				"Receive WQ counters are not supported on current FW\n");
 	}
 
 	err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index d359fecf7a5b..d012e7dbcc38 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -144,6 +144,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 
 	in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 	in->page_offset = offset;
+	in->uid = to_mpd(pd)->uid;
 	if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
 	    in->type != IB_SRQT_BASIC)
 		in->user_index = uidx;
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 093f7755c843..2e5dc0a67cfc 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -58,8 +58,9 @@ static int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
 
 	ret = ib_query_port(&dev->ib_dev, port_num, tprops);
 	if (ret) {
-		printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n",
-		       ret, dev->ib_dev.name, port_num);
+		dev_warn(&dev->ib_dev.dev,
+			 "ib_query_port failed (%d) forport %d\n", ret,
+			 port_num);
 		goto out;
 	}
 
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index f3e80dec1334..92c49bff22bc 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -986,7 +986,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
 		goto err_free_dev;
 	}
 
-	if (mthca_cmd_init(mdev)) {
+	err = mthca_cmd_init(mdev);
+	if (err) {
 		mthca_err(mdev, "Failed to init command interface, aborting.\n");
 		goto err_free_dev;
 	}
@@ -1014,8 +1015,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
 
 	err = mthca_setup_hca(mdev);
 	if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) {
-		if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
-			pci_free_irq_vectors(pdev);
+		pci_free_irq_vectors(pdev);
 		mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X;
 
 		err = mthca_setup_hca(mdev);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0d3473b4596e..691c6f048938 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1076,16 +1076,17 @@ static int mthca_unmap_fmr(struct list_head *fmr_list)
 	return err;
 }
 
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-			char *buf)
+static ssize_t hw_rev_show(struct device *device,
+			   struct device_attribute *attr, char *buf)
 {
 	struct mthca_dev *dev =
 		container_of(device, struct mthca_dev, ib_dev.dev);
 	return sprintf(buf, "%x\n", dev->rev_id);
 }
+static DEVICE_ATTR_RO(hw_rev);
 
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
-			char *buf)
+static ssize_t hca_type_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
 {
 	struct mthca_dev *dev =
 		container_of(device, struct mthca_dev, ib_dev.dev);
@@ -1103,23 +1104,26 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
 		return sprintf(buf, "unknown\n");
 	}
 }
+static DEVICE_ATTR_RO(hca_type);
 
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
-			  char *buf)
+static ssize_t board_id_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
 {
 	struct mthca_dev *dev =
 		container_of(device, struct mthca_dev, ib_dev.dev);
 	return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
 }
+static DEVICE_ATTR_RO(board_id);
 
-static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
+static struct attribute *mthca_dev_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	&dev_attr_board_id.attr,
+	NULL
+};
 
-static struct device_attribute *mthca_dev_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type,
-	&dev_attr_board_id
+static const struct attribute_group mthca_attr_group = {
+	.attrs = mthca_dev_attributes,
 };
 
 static int mthca_init_node_data(struct mthca_dev *dev)
@@ -1192,13 +1196,11 @@ static void get_dev_fw_str(struct ib_device *device, char *str)
 int mthca_register_device(struct mthca_dev *dev)
 {
 	int ret;
-	int i;
 
 	ret = mthca_init_node_data(dev);
 	if (ret)
 		return ret;
 
-	strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
 	dev->ib_dev.owner                = THIS_MODULE;
 
 	dev->ib_dev.uverbs_abi_ver	 = MTHCA_UVERBS_ABI_VERSION;
@@ -1296,20 +1298,12 @@ int mthca_register_device(struct mthca_dev *dev)
 
 	mutex_init(&dev->cap_mask_mutex);
 
+	rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group);
 	dev->ib_dev.driver_id = RDMA_DRIVER_MTHCA;
-	ret = ib_register_device(&dev->ib_dev, NULL);
+	ret = ib_register_device(&dev->ib_dev, "mthca%d", NULL);
 	if (ret)
 		return ret;
 
-	for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) {
-		ret = device_create_file(&dev->ib_dev.dev,
-					 mthca_dev_attributes[i]);
-		if (ret) {
-			ib_unregister_device(&dev->ib_dev);
-			return ret;
-		}
-	}
-
 	mthca_start_catas_poll(dev);
 
 	return 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 3d37f2373d63..9d178ee3c96a 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -872,8 +872,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 
 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
-				IB_LINK_LAYER_UNSPECIFIED)) {
+	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+				attr_mask)) {
 		mthca_dbg(dev, "Bad QP transition (transport %d) "
 			  "%d->%d with attr 0x%08x\n",
 			  qp->transport, cur_state, new_state,
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 42b68aa999fc..e00add6d78ec 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -456,9 +456,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
 	void __iomem *mmio_regs = NULL;
 	u8 hw_rev;
 
-	assert(pcidev != NULL);
-	assert(ent != NULL);
-
 	printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n",
 			DRV_VERSION, pci_name(pcidev));
 
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index bedaa02749fb..a895fe980d10 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -149,18 +149,9 @@ do { \
 		printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \
 } while (0)
 
-#define assert(expr) \
-do { \
-	if (!(expr)) { \
-		printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \
-			   #expr, __FILE__, __func__, __LINE__); \
-	} \
-} while (0)
-
 #define NES_EVENT_TIMEOUT   1200000
 #else
 #define nes_debug(level, fmt, args...) no_printk(fmt, ##args)
-#define assert(expr)          do {} while (0)
 
 #define NES_EVENT_TIMEOUT   100000
 #endif
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index bd0675d8f298..5517e392bc01 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1443,7 +1443,7 @@ static int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_inde
 		mdelay(1);
 		nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
 		temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-	} while ((temp_phy_data2 == temp_phy_data));
+	} while (temp_phy_data2 == temp_phy_data);
 
 	/* wait for tracking */
 	counter = 0;
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 61014e251555..16f33454c198 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -146,8 +146,6 @@ static int nes_netdev_open(struct net_device *netdev)
 	struct list_head *list_pos, *list_temp;
 	unsigned long flags;
 
-	assert(nesdev != NULL);
-
 	if (nesvnic->netdev_open == 1)
 		return 0;
 
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 6940c7215961..92d1cadd4cfd 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -687,7 +687,7 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
 	}
 
 	nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n",
-			nespd, nesvnic->nesibdev->ibdev.name);
+			nespd, dev_name(&nesvnic->nesibdev->ibdev.dev));
 
 	nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd;
 
@@ -2556,8 +2556,8 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
 /**
  * show_rev
  */
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
-			char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
 {
 	struct nes_ib_device *nesibdev =
 			container_of(dev, struct nes_ib_device, ibdev.dev);
@@ -2566,40 +2566,40 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
 	nes_debug(NES_DBG_INIT, "\n");
 	return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev);
 }
-
+static DEVICE_ATTR_RO(hw_rev);
 
 /**
  * show_hca
  */
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
-		        char *buf)
+static ssize_t hca_type_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	nes_debug(NES_DBG_INIT, "\n");
 	return sprintf(buf, "NES020\n");
 }
-
+static DEVICE_ATTR_RO(hca_type);
 
 /**
  * show_board
  */
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
-			  char *buf)
+static ssize_t board_id_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	nes_debug(NES_DBG_INIT, "\n");
 	return sprintf(buf, "%.*s\n", 32, "NES020 Board ID");
 }
+static DEVICE_ATTR_RO(board_id);
 
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-
-static struct device_attribute *nes_dev_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type,
-	&dev_attr_board_id
+static struct attribute *nes_dev_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	&dev_attr_board_id.attr,
+	NULL
 };
 
+static const struct attribute_group nes_attr_group = {
+	.attrs = nes_dev_attributes,
+};
 
 /**
  * nes_query_qp
@@ -3640,7 +3640,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
 	if (nesibdev == NULL) {
 		return NULL;
 	}
-	strlcpy(nesibdev->ibdev.name, "nes%d", IB_DEVICE_NAME_MAX);
 	nesibdev->ibdev.owner = THIS_MODULE;
 
 	nesibdev->ibdev.node_type = RDMA_NODE_RNIC;
@@ -3795,10 +3794,11 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
 	struct nes_vnic *nesvnic = nesibdev->nesvnic;
 	struct nes_device *nesdev = nesvnic->nesdev;
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
-	int i, ret;
+	int ret;
 
+	rdma_set_device_sysfs_group(&nesvnic->nesibdev->ibdev, &nes_attr_group);
 	nesvnic->nesibdev->ibdev.driver_id = RDMA_DRIVER_NES;
-	ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL);
+	ret = ib_register_device(&nesvnic->nesibdev->ibdev, "nes%d", NULL);
 	if (ret) {
 		return ret;
 	}
@@ -3809,19 +3809,6 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
 	nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count;
 	nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count;
 
-	for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) {
-		ret = device_create_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]);
-		if (ret) {
-			while (i > 0) {
-				i--;
-				device_remove_file(&nesibdev->ibdev.dev,
-						   nes_dev_attributes[i]);
-			}
-			ib_unregister_device(&nesibdev->ibdev);
-			return ret;
-		}
-	}
-
 	nesvnic->of_device_registered = 1;
 
 	return 0;
@@ -3834,15 +3821,9 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
 static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev)
 {
 	struct nes_vnic *nesvnic = nesibdev->nesvnic;
-	int i;
 
-	for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) {
-		device_remove_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]);
-	}
-
-	if (nesvnic->of_device_registered) {
+	if (nesvnic->of_device_registered)
 		ib_unregister_device(&nesibdev->ibdev);
-	}
 
 	nesvnic->of_device_registered = 0;
 }
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index e578281471af..241a57a07485 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -792,7 +792,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
 						     qp->srq->ibsrq.
 						     srq_context);
 	} else if (dev_event) {
-		pr_err("%s: Fatal event received\n", dev->ibdev.name);
+		dev_err(&dev->ibdev.dev, "Fatal event received\n");
 		ib_dispatch_event(&ib_evt);
 	}
 
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 7832ee3e0c84..873cc7f6fe61 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -114,9 +114,37 @@ static void get_dev_fw_str(struct ib_device *device, char *str)
 	snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", &dev->attr.fw_ver[0]);
 }
 
+/* OCRDMA sysfs interface */
+static ssize_t hw_rev_show(struct device *device,
+			   struct device_attribute *attr, char *buf)
+{
+	struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
+{
+	struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *ocrdma_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	NULL
+};
+
+static const struct attribute_group ocrdma_attr_group = {
+	.attrs = ocrdma_attributes,
+};
+
 static int ocrdma_register_device(struct ocrdma_dev *dev)
 {
-	strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
 	ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
 	BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
 	memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
@@ -213,8 +241,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
 		dev->ibdev.destroy_srq = ocrdma_destroy_srq;
 		dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
 	}
+	rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group);
 	dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA;
-	return ib_register_device(&dev->ibdev, NULL);
+	return ib_register_device(&dev->ibdev, "ocrdma%d", NULL);
 }
 
 static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
@@ -260,42 +289,9 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev)
 	kfree(dev->cq_tbl);
 }
 
-/* OCRDMA sysfs interface */
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-			char *buf)
-{
-	struct ocrdma_dev *dev = dev_get_drvdata(device);
-
-	return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
-}
-
-static ssize_t show_hca_type(struct device *device,
-			     struct device_attribute *attr, char *buf)
-{
-	struct ocrdma_dev *dev = dev_get_drvdata(device);
-
-	return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
-
-static struct device_attribute *ocrdma_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type
-};
-
-static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
-		device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
-}
-
 static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
 {
-	int status = 0, i;
+	int status = 0;
 	u8 lstate = 0;
 	struct ocrdma_dev *dev;
 
@@ -331,9 +327,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
 	if (!status)
 		ocrdma_update_link_state(dev, lstate);
 
-	for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
-		if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
-			goto sysfs_err;
 	/* Init stats */
 	ocrdma_add_port_stats(dev);
 	/* Interrupt Moderation */
@@ -348,8 +341,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
 		dev_name(&dev->nic_info.pdev->dev), dev->id);
 	return dev;
 
-sysfs_err:
-	ocrdma_remove_sysfiles(dev);
 alloc_err:
 	ocrdma_free_resources(dev);
 	ocrdma_cleanup_hw(dev);
@@ -376,7 +367,6 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
 	 * of the registered clients.
 	 */
 	cancel_delayed_work_sync(&dev->eqd_work);
-	ocrdma_remove_sysfiles(dev);
 	ib_unregister_device(&dev->ibdev);
 
 	ocrdma_rem_port_stats(dev);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 24d20a4aa262..290d776edf48 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -764,7 +764,8 @@ void ocrdma_add_port_stats(struct ocrdma_dev *dev)
 		return;
 
 	/* Create post stats base dir */
-	dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir);
+	dev->dir =
+		debugfs_create_dir(dev_name(&dev->ibdev.dev), ocrdma_dbgfs_dir);
 	if (!dev->dir)
 		goto err;
 
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index c158ca9fde6d..06d2a7f3304c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1480,8 +1480,7 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		new_qps = old_qps;
 	spin_unlock_irqrestore(&qp->q_lock, flags);
 
-	if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask,
-				IB_LINK_LAYER_ETHERNET)) {
+	if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
 		pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
 		       "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
 		       __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index a0af6d424aed..8d6ff9df49fe 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -133,6 +133,33 @@ static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num,
 	return 0;
 }
 
+/* QEDR sysfs interface */
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+			   char *buf)
+{
+	struct qedr_dev *dev = dev_get_drvdata(device);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *qedr_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	NULL
+};
+
+static const struct attribute_group qedr_attr_group = {
+	.attrs = qedr_attributes,
+};
+
 static int qedr_iw_register_device(struct qedr_dev *dev)
 {
 	dev->ibdev.node_type = RDMA_NODE_RNIC;
@@ -170,8 +197,6 @@ static int qedr_register_device(struct qedr_dev *dev)
 {
 	int rc;
 
-	strlcpy(dev->ibdev.name, "qedr%d", IB_DEVICE_NAME_MAX);
-
 	dev->ibdev.node_guid = dev->attr.node_guid;
 	memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
 	dev->ibdev.owner = THIS_MODULE;
@@ -262,9 +287,9 @@ static int qedr_register_device(struct qedr_dev *dev)
 
 	dev->ibdev.get_link_layer = qedr_link_layer;
 	dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
-
+	rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group);
 	dev->ibdev.driver_id = RDMA_DRIVER_QEDR;
-	return ib_register_device(&dev->ibdev, NULL);
+	return ib_register_device(&dev->ibdev, "qedr%d", NULL);
 }
 
 /* This function allocates fast-path status block memory */
@@ -404,37 +429,6 @@ err1:
 	return rc;
 }
 
-/* QEDR sysfs interface */
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-			char *buf)
-{
-	struct qedr_dev *dev = dev_get_drvdata(device);
-
-	return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
-}
-
-static ssize_t show_hca_type(struct device *device,
-			     struct device_attribute *attr, char *buf)
-{
-	return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
-
-static struct device_attribute *qedr_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type
-};
-
-static void qedr_remove_sysfiles(struct qedr_dev *dev)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
-		device_remove_file(&dev->ibdev.dev, qedr_attributes[i]);
-}
-
 static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev)
 {
 	int rc = pci_enable_atomic_ops_to_root(pdev,
@@ -855,7 +849,7 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
 {
 	struct qed_dev_rdma_info dev_info;
 	struct qedr_dev *dev;
-	int rc = 0, i;
+	int rc = 0;
 
 	dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev));
 	if (!dev) {
@@ -914,18 +908,12 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
 		goto reg_err;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
-		if (device_create_file(&dev->ibdev.dev, qedr_attributes[i]))
-			goto sysfs_err;
-
 	if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
 		qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE);
 
 	DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n");
 	return dev;
 
-sysfs_err:
-	ib_unregister_device(&dev->ibdev);
 reg_err:
 	qedr_sync_free_irqs(dev);
 irq_err:
@@ -944,7 +932,6 @@ static void qedr_remove(struct qedr_dev *dev)
 	/* First unregister with stack to stop all the active traffic
 	 * of the registered clients.
 	 */
-	qedr_remove_sysfiles(dev);
 	ib_unregister_device(&dev->ibdev);
 
 	qedr_stop_hw(dev);
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index a2d708dceb8d..53bbe6b4e6e6 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -43,7 +43,7 @@
 #include "qedr_hsi_rdma.h"
 
 #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
-#define DP_NAME(dev) ((dev)->ibdev.name)
+#define DP_NAME(_dev) dev_name(&(_dev)->ibdev.dev)
 #define IS_IWARP(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_IWARP)
 #define IS_ROCE(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_ROCE)
 
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index 85578887421b..e1ac2fd60bb1 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -519,9 +519,9 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
 	}
 
 	if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h))
-		packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB;
+		packet->tx_dest = QED_LL2_TX_DEST_LB;
 	else
-		packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW;
+		packet->tx_dest = QED_LL2_TX_DEST_NW;
 
 	packet->roce_mode = roce_mode;
 	memcpy(packet->header.vaddr, ud_header_buffer, header_size);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 8cc3df24e04e..82ee4b4a7084 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1447,7 +1447,6 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
 	u64 pbl_base_addr, phy_prod_pair_addr;
 	struct ib_ucontext *ib_ctx = NULL;
 	struct qedr_srq_hwq_info *hw_srq;
-	struct qedr_ucontext *ctx = NULL;
 	u32 page_cnt, page_size;
 	struct qedr_srq *srq;
 	int rc = 0;
@@ -1473,7 +1472,6 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
 
 	if (udata && ibpd->uobject && ibpd->uobject->context) {
 		ib_ctx = ibpd->uobject->context;
-		ctx = get_qedr_ucontext(ib_ctx);
 
 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
 			DP_ERR(dev,
@@ -2240,8 +2238,7 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
-					ibqp->qp_type, attr_mask,
-					IB_LINK_LAYER_ETHERNET)) {
+					ibqp->qp_type, attr_mask)) {
 			DP_ERR(dev,
 			       "modify qp: invalid attribute mask=0x%x specified for\n"
 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 3461df002f81..83d2349188db 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1390,13 +1390,13 @@ static inline u32 qib_get_hdrqtail(const struct qib_ctxtdata *rcd)
  */
 
 extern const char ib_qib_version[];
+extern const struct attribute_group qib_attr_group;
 
 int qib_device_create(struct qib_devdata *);
 void qib_device_remove(struct qib_devdata *);
 
 int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
 			  struct kobject *kobj);
-int qib_verbs_register_sysfs(struct qib_devdata *);
 void qib_verbs_unregister_sysfs(struct qib_devdata *);
 /* Hook for sysfs read of QSFP */
 extern int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len);
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 344e401915f7..a81905df2d0f 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -378,25 +378,22 @@ void qib_flush_qp_waiters(struct rvt_qp *qp)
  * qib_check_send_wqe - validate wr/wqe
  * @qp - The qp
  * @wqe - The built wqe
+ * @call_send - Determine if the send should be posted or scheduled
  *
- * validate wr/wqe.  This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
- *
- * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure
+ * Returns 0 on success, -EINVAL on failure
  */
 int qib_check_send_wqe(struct rvt_qp *qp,
-		       struct rvt_swqe *wqe)
+		       struct rvt_swqe *wqe, bool *call_send)
 {
 	struct rvt_ah *ah;
-	int ret = 0;
 
 	switch (qp->ibqp.qp_type) {
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 		if (wqe->length > 0x80000000U)
 			return -EINVAL;
+		if (wqe->length > qp->pmtu)
+			*call_send = false;
 		break;
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
@@ -405,12 +402,12 @@ int qib_check_send_wqe(struct rvt_qp *qp,
 		if (wqe->length > (1 << ah->log_pmtu))
 			return -EINVAL;
 		/* progress hint */
-		ret = 1;
+		*call_send = true;
 		break;
 	default:
 		break;
 	}
-	return ret;
+	return 0;
 }
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index f35fdeb14347..6fa002940451 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -254,7 +254,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
 			goto bail;
 		}
 		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
-		qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
+		rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
 			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
 		/* will get called again */
 		goto done;
@@ -838,7 +838,7 @@ void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 			qib_migrate_qp(qp);
 			qp->s_retry = qp->s_retry_cnt;
 		} else if (qp->s_last == qp->s_acked) {
-			qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+			rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
 			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			return;
 		} else /* XXX need to handle delayed completion */
@@ -1221,7 +1221,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 			ibp->rvp.n_other_naks++;
 class_b:
 			if (qp->s_last == qp->s_acked) {
-				qib_send_complete(qp, wqe, status);
+				rvt_send_complete(qp, wqe, status);
 				rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			}
 			break;
@@ -1425,7 +1425,8 @@ read_middle:
 		qp->s_rdma_read_len -= pmtu;
 		update_last_psn(qp, psn);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
-		qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
+		rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+			     data, pmtu, false, false);
 		goto bail;
 
 	case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1471,7 +1472,8 @@ read_last:
 		if (unlikely(tlen != qp->s_rdma_read_len))
 			goto ack_len_err;
 		aeth = be32_to_cpu(ohdr->u.aeth);
-		qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
+		rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+			     data, tlen, false, false);
 		WARN_ON(qp->s_rdma_read_sge.num_sge);
 		(void) do_rc_ack(qp, aeth, psn,
 				 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1490,7 +1492,7 @@ ack_len_err:
 	status = IB_WC_LOC_LEN_ERR;
 ack_err:
 	if (qp->s_last == qp->s_acked) {
-		qib_send_complete(qp, wqe, status);
+		rvt_send_complete(qp, wqe, status);
 		rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 	}
 ack_done:
@@ -1844,7 +1846,7 @@ send_middle:
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto nack_inv;
-		qib_copy_sge(&qp->r_sge, data, pmtu, 1);
+		rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
 		break;
 
 	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -1890,7 +1892,7 @@ send_last:
 		wc.byte_len = tlen + qp->r_rcv_len;
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto nack_inv;
-		qib_copy_sge(&qp->r_sge, data, tlen, 1);
+		rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
 		rvt_put_ss(&qp->r_sge);
 		qp->r_msn++;
 		if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index f8a7de795beb..1fa21938f310 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -171,307 +171,6 @@ err:
 }
 
 /**
- * qib_ruc_loopback - handle UC and RC lookback requests
- * @sqp: the sending QP
- *
- * This is called from qib_do_send() to
- * forward a WQE addressed to the same HCA.
- * Note that although we are single threaded due to the tasklet, we still
- * have to protect against post_send().  We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void qib_ruc_loopback(struct rvt_qp *sqp)
-{
-	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
-	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
-	struct qib_devdata *dd = ppd->dd;
-	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
-	struct rvt_qp *qp;
-	struct rvt_swqe *wqe;
-	struct rvt_sge *sge;
-	unsigned long flags;
-	struct ib_wc wc;
-	u64 sdata;
-	atomic64_t *maddr;
-	enum ib_wc_status send_status;
-	int release;
-	int ret;
-
-	rcu_read_lock();
-	/*
-	 * Note that we check the responder QP state after
-	 * checking the requester's state.
-	 */
-	qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn);
-	if (!qp)
-		goto done;
-
-	spin_lock_irqsave(&sqp->s_lock, flags);
-
-	/* Return if we are already busy processing a work request. */
-	if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
-	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
-		goto unlock;
-
-	sqp->s_flags |= RVT_S_BUSY;
-
-again:
-	if (sqp->s_last == READ_ONCE(sqp->s_head))
-		goto clr_busy;
-	wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
-
-	/* Return if it is not OK to start a new work reqeust. */
-	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
-			goto clr_busy;
-		/* We are in the error state, flush the work request. */
-		send_status = IB_WC_WR_FLUSH_ERR;
-		goto flush_send;
-	}
-
-	/*
-	 * We can rely on the entry not changing without the s_lock
-	 * being held until we update s_last.
-	 * We increment s_cur to indicate s_last is in progress.
-	 */
-	if (sqp->s_last == sqp->s_cur) {
-		if (++sqp->s_cur >= sqp->s_size)
-			sqp->s_cur = 0;
-	}
-	spin_unlock_irqrestore(&sqp->s_lock, flags);
-
-	if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
-	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
-		ibp->rvp.n_pkt_drops++;
-		/*
-		 * For RC, the requester would timeout and retry so
-		 * shortcut the timeouts and just signal too many retries.
-		 */
-		if (sqp->ibqp.qp_type == IB_QPT_RC)
-			send_status = IB_WC_RETRY_EXC_ERR;
-		else
-			send_status = IB_WC_SUCCESS;
-		goto serr;
-	}
-
-	memset(&wc, 0, sizeof(wc));
-	send_status = IB_WC_SUCCESS;
-
-	release = 1;
-	sqp->s_sge.sge = wqe->sg_list[0];
-	sqp->s_sge.sg_list = wqe->sg_list + 1;
-	sqp->s_sge.num_sge = wqe->wr.num_sge;
-	sqp->s_len = wqe->length;
-	switch (wqe->wr.opcode) {
-	case IB_WR_SEND_WITH_IMM:
-		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.ex.imm_data = wqe->wr.ex.imm_data;
-		/* FALLTHROUGH */
-	case IB_WR_SEND:
-		ret = rvt_get_rwqe(qp, false);
-		if (ret < 0)
-			goto op_err;
-		if (!ret)
-			goto rnr_nak;
-		break;
-
-	case IB_WR_RDMA_WRITE_WITH_IMM:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-			goto inv_err;
-		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.ex.imm_data = wqe->wr.ex.imm_data;
-		ret = rvt_get_rwqe(qp, true);
-		if (ret < 0)
-			goto op_err;
-		if (!ret)
-			goto rnr_nak;
-		/* FALLTHROUGH */
-	case IB_WR_RDMA_WRITE:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-			goto inv_err;
-		if (wqe->length == 0)
-			break;
-		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
-					  wqe->rdma_wr.remote_addr,
-					  wqe->rdma_wr.rkey,
-					  IB_ACCESS_REMOTE_WRITE)))
-			goto acc_err;
-		qp->r_sge.sg_list = NULL;
-		qp->r_sge.num_sge = 1;
-		qp->r_sge.total_len = wqe->length;
-		break;
-
-	case IB_WR_RDMA_READ:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
-			goto inv_err;
-		if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
-					  wqe->rdma_wr.remote_addr,
-					  wqe->rdma_wr.rkey,
-					  IB_ACCESS_REMOTE_READ)))
-			goto acc_err;
-		release = 0;
-		sqp->s_sge.sg_list = NULL;
-		sqp->s_sge.num_sge = 1;
-		qp->r_sge.sge = wqe->sg_list[0];
-		qp->r_sge.sg_list = wqe->sg_list + 1;
-		qp->r_sge.num_sge = wqe->wr.num_sge;
-		qp->r_sge.total_len = wqe->length;
-		break;
-
-	case IB_WR_ATOMIC_CMP_AND_SWP:
-	case IB_WR_ATOMIC_FETCH_AND_ADD:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
-			goto inv_err;
-		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
-					  wqe->atomic_wr.remote_addr,
-					  wqe->atomic_wr.rkey,
-					  IB_ACCESS_REMOTE_ATOMIC)))
-			goto acc_err;
-		/* Perform atomic OP and save result. */
-		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-		sdata = wqe->atomic_wr.compare_add;
-		*(u64 *) sqp->s_sge.sge.vaddr =
-			(wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
-			(u64) atomic64_add_return(sdata, maddr) - sdata :
-			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-				      sdata, wqe->atomic_wr.swap);
-		rvt_put_mr(qp->r_sge.sge.mr);
-		qp->r_sge.num_sge = 0;
-		goto send_comp;
-
-	default:
-		send_status = IB_WC_LOC_QP_OP_ERR;
-		goto serr;
-	}
-
-	sge = &sqp->s_sge.sge;
-	while (sqp->s_len) {
-		u32 len = sqp->s_len;
-
-		if (len > sge->length)
-			len = sge->length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		qib_copy_sge(&qp->r_sge, sge->vaddr, len, release);
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (!release)
-				rvt_put_mr(sge->mr);
-			if (--sqp->s_sge.num_sge)
-				*sge = *sqp->s_sge.sg_list++;
-		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= RVT_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-		sqp->s_len -= len;
-	}
-	if (release)
-		rvt_put_ss(&qp->r_sge);
-
-	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
-		goto send_comp;
-
-	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-	else
-		wc.opcode = IB_WC_RECV;
-	wc.wr_id = qp->r_wr_id;
-	wc.status = IB_WC_SUCCESS;
-	wc.byte_len = wqe->length;
-	wc.qp = &qp->ibqp;
-	wc.src_qp = qp->remote_qpn;
-	wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
-	wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
-	wc.port_num = 1;
-	/* Signal completion event if the solicited bit is set. */
-	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
-		     wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
-	spin_lock_irqsave(&sqp->s_lock, flags);
-	ibp->rvp.n_loop_pkts++;
-flush_send:
-	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
-	qib_send_complete(sqp, wqe, send_status);
-	goto again;
-
-rnr_nak:
-	/* Handle RNR NAK */
-	if (qp->ibqp.qp_type == IB_QPT_UC)
-		goto send_comp;
-	ibp->rvp.n_rnr_naks++;
-	/*
-	 * Note: we don't need the s_lock held since the BUSY flag
-	 * makes this single threaded.
-	 */
-	if (sqp->s_rnr_retry == 0) {
-		send_status = IB_WC_RNR_RETRY_EXC_ERR;
-		goto serr;
-	}
-	if (sqp->s_rnr_retry_cnt < 7)
-		sqp->s_rnr_retry--;
-	spin_lock_irqsave(&sqp->s_lock, flags);
-	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
-		goto clr_busy;
-	rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
-				IB_AETH_CREDIT_SHIFT);
-	goto clr_busy;
-
-op_err:
-	send_status = IB_WC_REM_OP_ERR;
-	wc.status = IB_WC_LOC_QP_OP_ERR;
-	goto err;
-
-inv_err:
-	send_status = IB_WC_REM_INV_REQ_ERR;
-	wc.status = IB_WC_LOC_QP_OP_ERR;
-	goto err;
-
-acc_err:
-	send_status = IB_WC_REM_ACCESS_ERR;
-	wc.status = IB_WC_LOC_PROT_ERR;
-err:
-	/* responder goes to error state */
-	rvt_rc_error(qp, wc.status);
-
-serr:
-	spin_lock_irqsave(&sqp->s_lock, flags);
-	qib_send_complete(sqp, wqe, send_status);
-	if (sqp->ibqp.qp_type == IB_QPT_RC) {
-		int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
-		sqp->s_flags &= ~RVT_S_BUSY;
-		spin_unlock_irqrestore(&sqp->s_lock, flags);
-		if (lastwqe) {
-			struct ib_event ev;
-
-			ev.device = sqp->ibqp.device;
-			ev.element.qp = &sqp->ibqp;
-			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
-		}
-		goto done;
-	}
-clr_busy:
-	sqp->s_flags &= ~RVT_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
-	rcu_read_unlock();
-}
-
-/**
  * qib_make_grh - construct a GRH header
  * @ibp: a pointer to the IB port
  * @hdr: a pointer to the GRH header being constructed
@@ -573,7 +272,7 @@ void qib_do_send(struct rvt_qp *qp)
 	     qp->ibqp.qp_type == IB_QPT_UC) &&
 	    (rdma_ah_get_dlid(&qp->remote_ah_attr) &
 	     ~((1 << ppd->lmc) - 1)) == ppd->lid) {
-		qib_ruc_loopback(qp);
+		rvt_ruc_loopback(qp);
 		return;
 	}
 
@@ -613,42 +312,3 @@ void qib_do_send(struct rvt_qp *qp)
 
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
-
-/*
- * This should be called with s_lock held.
- */
-void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
-		       enum ib_wc_status status)
-{
-	u32 old_last, last;
-
-	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
-		return;
-
-	last = qp->s_last;
-	old_last = last;
-	if (++last >= qp->s_size)
-		last = 0;
-	qp->s_last = last;
-	/* See post_send() */
-	barrier();
-	rvt_put_swqe(wqe);
-	if (qp->ibqp.qp_type == IB_QPT_UD ||
-	    qp->ibqp.qp_type == IB_QPT_SMI ||
-	    qp->ibqp.qp_type == IB_QPT_GSI)
-		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
-
-	rvt_qp_swqe_complete(qp,
-			     wqe,
-			     ib_qib_wc_opcode[wqe->wr.opcode],
-			     status);
-
-	if (qp->s_acked == old_last)
-		qp->s_acked = last;
-	if (qp->s_cur == old_last)
-		qp->s_cur = last;
-	if (qp->s_tail == old_last)
-		qp->s_tail = last;
-	if (qp->state == IB_QPS_SQD && last == qp->s_cur)
-		qp->s_draining = 0;
-}
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index d0723d4aef5c..757d4c9d713d 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -651,7 +651,7 @@ unmap:
 		if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)
 			rvt_error_qp(qp, IB_WC_GENERAL_ERR);
 	} else if (qp->s_wqe)
-		qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
+		rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
 	spin_unlock(&qp->s_lock);
 	spin_unlock(&qp->r_lock);
 	/* return zero to process the next send work request */
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index ca2638d8f35e..1cf4ca3f23e3 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -551,17 +551,18 @@ static struct kobj_type qib_diagc_ktype = {
  * Start of per-unit (or driver, in some cases, but replicated
  * per unit) functions (these get a device *)
  */
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-			char *buf)
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+			   char *buf)
 {
 	struct qib_ibdev *dev =
 		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 
 	return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
 }
+static DEVICE_ATTR_RO(hw_rev);
 
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
-			char *buf)
+static ssize_t hca_type_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
 		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
@@ -574,15 +575,18 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
 		ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
 	return ret;
 }
+static DEVICE_ATTR_RO(hca_type);
+static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL);
 
-static ssize_t show_version(struct device *device,
+static ssize_t version_show(struct device *device,
 			    struct device_attribute *attr, char *buf)
 {
 	/* The string printed here is already newline-terminated. */
 	return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version);
 }
+static DEVICE_ATTR_RO(version);
 
-static ssize_t show_boardversion(struct device *device,
+static ssize_t boardversion_show(struct device *device,
 				 struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
@@ -592,9 +596,9 @@ static ssize_t show_boardversion(struct device *device,
 	/* The string printed here is already newline-terminated. */
 	return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
 }
+static DEVICE_ATTR_RO(boardversion);
 
-
-static ssize_t show_localbus_info(struct device *device,
+static ssize_t localbus_info_show(struct device *device,
 				  struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
@@ -604,9 +608,9 @@ static ssize_t show_localbus_info(struct device *device,
 	/* The string printed here is already newline-terminated. */
 	return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info);
 }
+static DEVICE_ATTR_RO(localbus_info);
 
-
-static ssize_t show_nctxts(struct device *device,
+static ssize_t nctxts_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
@@ -620,9 +624,10 @@ static ssize_t show_nctxts(struct device *device,
 			(dd->first_user_ctxt > dd->cfgctxts) ? 0 :
 			(dd->cfgctxts - dd->first_user_ctxt));
 }
+static DEVICE_ATTR_RO(nctxts);
 
-static ssize_t show_nfreectxts(struct device *device,
-			   struct device_attribute *attr, char *buf)
+static ssize_t nfreectxts_show(struct device *device,
+			       struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
 		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
@@ -631,8 +636,9 @@ static ssize_t show_nfreectxts(struct device *device,
 	/* Return the number of free user ports (contexts) available. */
 	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
 }
+static DEVICE_ATTR_RO(nfreectxts);
 
-static ssize_t show_serial(struct device *device,
+static ssize_t serial_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
@@ -644,8 +650,9 @@ static ssize_t show_serial(struct device *device,
 	strcat(buf, "\n");
 	return strlen(buf);
 }
+static DEVICE_ATTR_RO(serial);
 
-static ssize_t store_chip_reset(struct device *device,
+static ssize_t chip_reset_store(struct device *device,
 				struct device_attribute *attr, const char *buf,
 				size_t count)
 {
@@ -663,11 +670,12 @@ static ssize_t store_chip_reset(struct device *device,
 bail:
 	return ret < 0 ? ret : count;
 }
+static DEVICE_ATTR_WO(chip_reset);
 
 /*
  * Dump tempsense regs. in decimal, to ease shell-scripts.
  */
-static ssize_t show_tempsense(struct device *device,
+static ssize_t tempsense_show(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
@@ -695,6 +703,7 @@ static ssize_t show_tempsense(struct device *device,
 				*(signed char *)(regvals + 7));
 	return ret;
 }
+static DEVICE_ATTR_RO(tempsense);
 
 /*
  * end of per-unit (or driver, in some cases, but replicated
@@ -702,30 +711,23 @@ static ssize_t show_tempsense(struct device *device,
  */
 
 /* start of per-unit file structures and support code */
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(version, S_IRUGO, show_version, NULL);
-static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
-static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
-static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
-static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
-
-static struct device_attribute *qib_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type,
-	&dev_attr_board_id,
-	&dev_attr_version,
-	&dev_attr_nctxts,
-	&dev_attr_nfreectxts,
-	&dev_attr_serial,
-	&dev_attr_boardversion,
-	&dev_attr_tempsense,
-	&dev_attr_localbus_info,
-	&dev_attr_chip_reset,
+static struct attribute *qib_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	&dev_attr_board_id.attr,
+	&dev_attr_version.attr,
+	&dev_attr_nctxts.attr,
+	&dev_attr_nfreectxts.attr,
+	&dev_attr_serial.attr,
+	&dev_attr_boardversion.attr,
+	&dev_attr_tempsense.attr,
+	&dev_attr_localbus_info.attr,
+	&dev_attr_chip_reset.attr,
+	NULL,
+};
+
+const struct attribute_group qib_attr_group = {
+	.attrs = qib_attributes,
 };
 
 int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
@@ -827,27 +829,6 @@ bail:
 }
 
 /*
- * Register and create our files in /sys/class/infiniband.
- */
-int qib_verbs_register_sysfs(struct qib_devdata *dd)
-{
-	struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
-	int i, ret;
-
-	for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {
-		ret = device_create_file(&dev->dev, qib_attributes[i]);
-		if (ret)
-			goto bail;
-	}
-
-	return 0;
-bail:
-	for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i)
-		device_remove_file(&dev->dev, qib_attributes[i]);
-	return ret;
-}
-
-/*
  * Unregister and remove our files in /sys/class/infiniband.
  */
 void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 3e54bc11e0ae..30c70ad0f4bf 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -68,7 +68,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
 			goto bail;
 		}
 		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
-		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+		rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		goto done;
 	}
 
@@ -359,7 +359,7 @@ send_first:
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto rewind;
-		qib_copy_sge(&qp->r_sge, data, pmtu, 0);
+		rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
 		break;
 
 	case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -385,7 +385,7 @@ send_last:
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto rewind;
 		wc.opcode = IB_WC_RECV;
-		qib_copy_sge(&qp->r_sge, data, tlen, 0);
+		rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
 		rvt_put_ss(&qp->s_rdma_read_sge);
 last_imm:
 		wc.wr_id = qp->r_wr_id;
@@ -449,7 +449,7 @@ rdma_first:
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto drop;
-		qib_copy_sge(&qp->r_sge, data, pmtu, 1);
+		rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
 		break;
 
 	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -479,7 +479,7 @@ rdma_last_imm:
 		}
 		wc.byte_len = qp->r_len;
 		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-		qib_copy_sge(&qp->r_sge, data, tlen, 1);
+		rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
 		rvt_put_ss(&qp->r_sge);
 		goto last_imm;
 
@@ -495,7 +495,7 @@ rdma_last:
 		tlen -= (hdrsize + pad + 4);
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
-		qib_copy_sge(&qp->r_sge, data, tlen, 1);
+		rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
 		rvt_put_ss(&qp->r_sge);
 		break;
 
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index f8d029a2390f..4d4c31ea4e2d 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -162,8 +162,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 		const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr);
 
 		qib_make_grh(ibp, &grh, grd, 0, 0);
-		qib_copy_sge(&qp->r_sge, &grh,
-			     sizeof(grh), 1);
+		rvt_copy_sge(qp, &qp->r_sge, &grh,
+			     sizeof(grh), true, false);
 		wc.wc_flags |= IB_WC_GRH;
 	} else
 		rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
@@ -179,7 +179,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 		if (len > sge->sge_length)
 			len = sge->sge_length;
 		BUG_ON(len == 0);
-		qib_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
+		rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
@@ -260,7 +260,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
 			goto bail;
 		}
 		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
-		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+		rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		goto done;
 	}
 
@@ -304,7 +304,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
 			qib_ud_loopback(qp, wqe);
 			spin_lock_irqsave(&qp->s_lock, tflags);
 			*flags = tflags;
-			qib_send_complete(qp, wqe, IB_WC_SUCCESS);
+			rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
 			goto done;
 		}
 	}
@@ -551,12 +551,13 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
 		goto drop;
 	}
 	if (has_grh) {
-		qib_copy_sge(&qp->r_sge, &hdr->u.l.grh,
-			     sizeof(struct ib_grh), 1);
+		rvt_copy_sge(qp, &qp->r_sge, &hdr->u.l.grh,
+			     sizeof(struct ib_grh), true, false);
 		wc.wc_flags |= IB_WC_GRH;
 	} else
 		rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
-	qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
+	rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+		     true, false);
 	rvt_put_ss(&qp->r_sge);
 	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		return;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 41babbc0db58..4b0f5761a646 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -131,27 +131,6 @@ const enum ib_wc_opcode ib_qib_wc_opcode[] = {
  */
 __be64 ib_qib_sys_image_guid;
 
-/**
- * qib_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- */
-void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release)
-{
-	struct rvt_sge *sge = &ss->sge;
-
-	while (length) {
-		u32 len = rvt_get_sge_length(sge, length);
-
-		WARN_ON_ONCE(len == 0);
-		memcpy(sge->vaddr, data, len);
-		rvt_update_sge(ss, len, release);
-		data += len;
-		length -= len;
-	}
-}
-
 /*
  * Count the number of DMA descriptors needed to send length bytes of data.
  * Don't modify the qib_sge_state to get the count.
@@ -752,7 +731,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
 
 	spin_lock(&qp->s_lock);
 	if (tx->wqe)
-		qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
+		rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
 	else if (qp->ibqp.qp_type == IB_QPT_RC) {
 		struct ib_header *hdr;
 
@@ -1025,7 +1004,7 @@ done:
 	}
 	if (qp->s_wqe) {
 		spin_lock_irqsave(&qp->s_lock, flags);
-		qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
+		rvt_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
 		spin_lock_irqsave(&qp->s_lock, flags);
@@ -1512,6 +1491,9 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
 					rdi->dparms.props.max_mcast_grp;
 	/* post send table */
 	dd->verbs_dev.rdi.post_parms = qib_post_parms;
+
+	/* opcode translation table */
+	dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode;
 }
 
 /**
@@ -1588,7 +1570,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
 	dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
 	dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
 	dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
-	dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
+	dd->verbs_dev.rdi.driver_f.setup_wqe = qib_check_send_wqe;
 	dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
 	dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
 	dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
@@ -1631,6 +1613,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
 	dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id;
 	dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
 	dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
+	dd->verbs_dev.rdi.dparms.sge_copy_mode = RVT_SGE_COPY_MEMCPY;
 
 	qib_fill_device_attr(dd);
 
@@ -1642,19 +1625,14 @@ int qib_register_ib_device(struct qib_devdata *dd)
 			      i,
 			      dd->rcd[ctxt]->pkeys);
 	}
+	rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group);
 
 	ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB);
 	if (ret)
 		goto err_tx;
 
-	ret = qib_verbs_register_sysfs(dd);
-	if (ret)
-		goto err_class;
-
 	return ret;
 
-err_class:
-	rvt_unregister_device(&dd->verbs_dev.rdi);
 err_tx:
 	while (!list_empty(&dev->txreq_free)) {
 		struct list_head *l = dev->txreq_free.next;
@@ -1716,14 +1694,14 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
  * It is only used in post send, which doesn't hold
  * the s_lock.
  */
-void _qib_schedule_send(struct rvt_qp *qp)
+bool _qib_schedule_send(struct rvt_qp *qp)
 {
 	struct qib_ibport *ibp =
 		to_iport(qp->ibqp.device, qp->port_num);
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 	struct qib_qp_priv *priv = qp->priv;
 
-	queue_work(ppd->qib_wq, &priv->s_work);
+	return queue_work(ppd->qib_wq, &priv->s_work);
 }
 
 /**
@@ -1733,8 +1711,9 @@ void _qib_schedule_send(struct rvt_qp *qp)
  * This schedules qp progress.  The s_lock
  * should be held.
  */
-void qib_schedule_send(struct rvt_qp *qp)
+bool qib_schedule_send(struct rvt_qp *qp)
 {
 	if (qib_send_ok(qp))
-		_qib_schedule_send(qp);
+		return _qib_schedule_send(qp);
+	return false;
 }
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 666613eef88f..a4426c24b0d1 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 - 2017 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 - 2018 Intel Corporation.  All rights reserved.
  * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
@@ -223,8 +223,8 @@ static inline int qib_send_ok(struct rvt_qp *qp)
 		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
 }
 
-void _qib_schedule_send(struct rvt_qp *qp);
-void qib_schedule_send(struct rvt_qp *qp);
+bool _qib_schedule_send(struct rvt_qp *qp);
+bool qib_schedule_send(struct rvt_qp *qp);
 
 static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
 {
@@ -292,9 +292,6 @@ void qib_put_txreq(struct qib_verbs_txreq *tx);
 int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
 		   u32 hdrwords, struct rvt_sge_state *ss, u32 len);
 
-void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
-		  int release);
-
 void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
 		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
@@ -303,7 +300,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
 
 int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
 
-int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+		       bool *call_send);
 
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
 
@@ -333,9 +331,6 @@ void _qib_do_send(struct work_struct *work);
 
 void qib_do_send(struct rvt_qp *qp);
 
-void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
-		       enum ib_wc_status status);
-
 void qib_send_rc_ack(struct rvt_qp *qp);
 
 int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags);
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
index 92dc66cc2d50..a3115709fb03 100644
--- a/drivers/infiniband/hw/usnic/usnic_debugfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -165,6 +165,5 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
 
 void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow)
 {
-	if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry))
-		debugfs_remove(qp_flow->dbgfs_dentry);
+	debugfs_remove(qp_flow->dbgfs_dentry);
 }
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index f0538a460328..73bd00f8d2c8 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -76,7 +76,7 @@ static LIST_HEAD(usnic_ib_ibdev_list);
 static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz)
 {
 	struct usnic_ib_vf *vf = obj;
-	return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name);
+	return scnprintf(buf, buf_sz, "PF: %s ", dev_name(&vf->pf->ib_dev.dev));
 }
 /* End callback dump funcs */
 
@@ -138,7 +138,7 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
 	netdev = us_ibdev->netdev;
 	switch (event) {
 	case NETDEV_REBOOT:
-		usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name);
+		usnic_info("PF Reset on %s\n", dev_name(&us_ibdev->ib_dev.dev));
 		usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
 		ib_event.event = IB_EVENT_PORT_ERR;
 		ib_event.device = &us_ibdev->ib_dev;
@@ -151,7 +151,8 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
 		if (!us_ibdev->ufdev->link_up &&
 				netif_carrier_ok(netdev)) {
 			usnic_fwd_carrier_up(us_ibdev->ufdev);
-			usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name);
+			usnic_info("Link UP on %s\n",
+				   dev_name(&us_ibdev->ib_dev.dev));
 			ib_event.event = IB_EVENT_PORT_ACTIVE;
 			ib_event.device = &us_ibdev->ib_dev;
 			ib_event.element.port_num = 1;
@@ -159,7 +160,8 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
 		} else if (us_ibdev->ufdev->link_up &&
 				!netif_carrier_ok(netdev)) {
 			usnic_fwd_carrier_down(us_ibdev->ufdev);
-			usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name);
+			usnic_info("Link DOWN on %s\n",
+				   dev_name(&us_ibdev->ib_dev.dev));
 			usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
 			ib_event.event = IB_EVENT_PORT_ERR;
 			ib_event.device = &us_ibdev->ib_dev;
@@ -168,17 +170,17 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
 		} else {
 			usnic_dbg("Ignoring %s on %s\n",
 					netdev_cmd_to_name(event),
-					us_ibdev->ib_dev.name);
+					dev_name(&us_ibdev->ib_dev.dev));
 		}
 		break;
 	case NETDEV_CHANGEADDR:
 		if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr,
 				sizeof(us_ibdev->ufdev->mac))) {
 			usnic_dbg("Ignoring addr change on %s\n",
-					us_ibdev->ib_dev.name);
+				  dev_name(&us_ibdev->ib_dev.dev));
 		} else {
 			usnic_info(" %s old mac: %pM new mac: %pM\n",
-					us_ibdev->ib_dev.name,
+					dev_name(&us_ibdev->ib_dev.dev),
 					us_ibdev->ufdev->mac,
 					netdev->dev_addr);
 			usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr);
@@ -193,19 +195,19 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
 	case NETDEV_CHANGEMTU:
 		if (us_ibdev->ufdev->mtu != netdev->mtu) {
 			usnic_info("MTU Change on %s old: %u new: %u\n",
-					us_ibdev->ib_dev.name,
+					dev_name(&us_ibdev->ib_dev.dev),
 					us_ibdev->ufdev->mtu, netdev->mtu);
 			usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu);
 			usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
 		} else {
 			usnic_dbg("Ignoring MTU change on %s\n",
-					us_ibdev->ib_dev.name);
+				  dev_name(&us_ibdev->ib_dev.dev));
 		}
 		break;
 	default:
 		usnic_dbg("Ignoring event %s on %s",
 				netdev_cmd_to_name(event),
-				us_ibdev->ib_dev.name);
+				dev_name(&us_ibdev->ib_dev.dev));
 	}
 	mutex_unlock(&us_ibdev->usdev_lock);
 }
@@ -267,7 +269,7 @@ static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
 	default:
 		usnic_info("Ignoring event %s on %s",
 				netdev_cmd_to_name(event),
-				us_ibdev->ib_dev.name);
+				dev_name(&us_ibdev->ib_dev.dev));
 	}
 	mutex_unlock(&us_ibdev->usdev_lock);
 
@@ -364,7 +366,6 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
 	us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
 	us_ibdev->ib_dev.dev.parent = &dev->dev;
 	us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
-	strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
 
 	us_ibdev->ib_dev.uverbs_cmd_mask =
 		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -416,7 +417,9 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
 
 
 	us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC;
-	if (ib_register_device(&us_ibdev->ib_dev, NULL))
+	rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group);
+
+	if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d", NULL))
 		goto err_fwd_dealloc;
 
 	usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
@@ -437,9 +440,9 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
 	kref_init(&us_ibdev->vf_cnt);
 
 	usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n",
-			us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev),
-			us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up,
-			us_ibdev->ufdev->mtu);
+		   dev_name(&us_ibdev->ib_dev.dev),
+		   netdev_name(us_ibdev->netdev), us_ibdev->ufdev->mac,
+		   us_ibdev->ufdev->link_up, us_ibdev->ufdev->mtu);
 	return us_ibdev;
 
 err_fwd_dealloc:
@@ -452,7 +455,7 @@ err_dealloc:
 
 static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev)
 {
-	usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name);
+	usnic_info("Unregistering %s\n", dev_name(&us_ibdev->ib_dev.dev));
 	usnic_ib_sysfs_unregister_usdev(us_ibdev);
 	usnic_fwd_dev_free(us_ibdev->ufdev);
 	ib_unregister_device(&us_ibdev->ib_dev);
@@ -591,7 +594,7 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev,
 	mutex_unlock(&pf->usdev_lock);
 
 	usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev),
-			pf->ib_dev.name);
+		   dev_name(&pf->ib_dev.dev));
 	usnic_ib_log_vf(vf);
 	return 0;
 
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 4210ca14014d..a7e4b2ccfaf8 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -46,9 +46,8 @@
 #include "usnic_ib_sysfs.h"
 #include "usnic_log.h"
 
-static ssize_t usnic_ib_show_board(struct device *device,
-					struct device_attribute *attr,
-					char *buf)
+static ssize_t board_id_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
 {
 	struct usnic_ib_dev *us_ibdev =
 		container_of(device, struct usnic_ib_dev, ib_dev.dev);
@@ -60,13 +59,13 @@ static ssize_t usnic_ib_show_board(struct device *device,
 
 	return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id);
 }
+static DEVICE_ATTR_RO(board_id);
 
 /*
  * Report the configuration for this PF
  */
 static ssize_t
-usnic_ib_show_config(struct device *device, struct device_attribute *attr,
-			char *buf)
+config_show(struct device *device, struct device_attribute *attr, char *buf)
 {
 	struct usnic_ib_dev *us_ibdev;
 	char *ptr;
@@ -94,7 +93,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
 
 		n = scnprintf(ptr, left,
 			"%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:",
-			us_ibdev->ib_dev.name,
+			dev_name(&us_ibdev->ib_dev.dev),
 			busname,
 			PCI_SLOT(us_ibdev->pdev->devfn),
 			PCI_FUNC(us_ibdev->pdev->devfn),
@@ -119,17 +118,17 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
 		UPDATE_PTR_LEFT(n, ptr, left);
 	} else {
 		n = scnprintf(ptr, left, "%s: no VFs\n",
-				us_ibdev->ib_dev.name);
+				dev_name(&us_ibdev->ib_dev.dev));
 		UPDATE_PTR_LEFT(n, ptr, left);
 	}
 	mutex_unlock(&us_ibdev->usdev_lock);
 
 	return ptr - buf;
 }
+static DEVICE_ATTR_RO(config);
 
 static ssize_t
-usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
-			char *buf)
+iface_show(struct device *device, struct device_attribute *attr, char *buf)
 {
 	struct usnic_ib_dev *us_ibdev;
 
@@ -138,10 +137,10 @@ usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
 	return scnprintf(buf, PAGE_SIZE, "%s\n",
 			netdev_name(us_ibdev->netdev));
 }
+static DEVICE_ATTR_RO(iface);
 
 static ssize_t
-usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
-			char *buf)
+max_vf_show(struct device *device, struct device_attribute *attr, char *buf)
 {
 	struct usnic_ib_dev *us_ibdev;
 
@@ -150,10 +149,10 @@ usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
 	return scnprintf(buf, PAGE_SIZE, "%u\n",
 			kref_read(&us_ibdev->vf_cnt));
 }
+static DEVICE_ATTR_RO(max_vf);
 
 static ssize_t
-usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
-			char *buf)
+qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf)
 {
 	struct usnic_ib_dev *us_ibdev;
 	int qp_per_vf;
@@ -165,10 +164,10 @@ usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
 	return scnprintf(buf, PAGE_SIZE,
 				"%d\n", qp_per_vf);
 }
+static DEVICE_ATTR_RO(qp_per_vf);
 
 static ssize_t
-usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
-			char *buf)
+cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf)
 {
 	struct usnic_ib_dev *us_ibdev;
 
@@ -177,21 +176,20 @@ usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
 	return scnprintf(buf, PAGE_SIZE, "%d\n",
 			us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
 }
+static DEVICE_ATTR_RO(cq_per_vf);
+
+static struct attribute *usnic_class_attributes[] = {
+	&dev_attr_board_id.attr,
+	&dev_attr_config.attr,
+	&dev_attr_iface.attr,
+	&dev_attr_max_vf.attr,
+	&dev_attr_qp_per_vf.attr,
+	&dev_attr_cq_per_vf.attr,
+	NULL
+};
 
-static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
-static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
-static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
-static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL);
-static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
-static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
-
-static struct device_attribute *usnic_class_attributes[] = {
-	&dev_attr_board_id,
-	&dev_attr_config,
-	&dev_attr_iface,
-	&dev_attr_max_vf,
-	&dev_attr_qp_per_vf,
-	&dev_attr_cq_per_vf,
+const struct attribute_group usnic_attr_group = {
+	.attrs = usnic_class_attributes,
 };
 
 struct qpn_attribute {
@@ -278,18 +276,6 @@ static struct kobj_type usnic_ib_qpn_type = {
 
 int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
 {
-	int i;
-	int err;
-	for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
-		err = device_create_file(&us_ibdev->ib_dev.dev,
-						usnic_class_attributes[i]);
-		if (err) {
-			usnic_err("Failed to create device file %d for %s eith err %d",
-				i, us_ibdev->ib_dev.name, err);
-			return -EINVAL;
-		}
-	}
-
 	/* create kernel object for looking at individual QPs */
 	kobject_get(&us_ibdev->ib_dev.dev.kobj);
 	us_ibdev->qpn_kobj = kobject_create_and_add("qpn",
@@ -304,12 +290,6 @@ int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
 
 void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev)
 {
-	int i;
-	for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
-		device_remove_file(&us_ibdev->ib_dev.dev,
-					usnic_class_attributes[i]);
-	}
-
 	kobject_put(us_ibdev->qpn_kobj);
 }
 
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
index 3d98e16cfeaf..b1f064cec850 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
@@ -41,4 +41,6 @@ void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev);
 void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp);
 void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp);
 
+extern const struct attribute_group usnic_attr_group;
+
 #endif /* !USNIC_IB_SYSFS_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 9973ac893635..0b91ff36768a 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -159,7 +159,8 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
 
 	err = ib_copy_to_udata(udata, &resp, sizeof(resp));
 	if (err) {
-		usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name);
+		usnic_err("Failed to copy udata for %s",
+			  dev_name(&us_ibdev->ib_dev.dev));
 		return err;
 	}
 
@@ -197,7 +198,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
 			vnic = vf->vnic;
 			if (!usnic_vnic_check_room(vnic, res_spec)) {
 				usnic_dbg("Found used vnic %s from %s\n",
-						us_ibdev->ib_dev.name,
+						dev_name(&us_ibdev->ib_dev.dev),
 						pci_name(usnic_vnic_get_pdev(
 									vnic)));
 				qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev,
@@ -230,7 +231,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
 		spin_unlock(&vf->lock);
 	}
 
-	usnic_info("No free qp grp found on %s\n", us_ibdev->ib_dev.name);
+	usnic_info("No free qp grp found on %s\n",
+		   dev_name(&us_ibdev->ib_dev.dev));
 	return ERR_PTR(-ENOMEM);
 
 qp_grp_check:
@@ -471,7 +473,7 @@ struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
 	}
 
 	usnic_info("domain 0x%p allocated for context 0x%p and device %s\n",
-			pd, context, ibdev->name);
+		   pd, context, dev_name(&ibdev->dev));
 	return &pd->ibpd;
 }
 
@@ -508,20 +510,20 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
 	err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
 	if (err) {
 		usnic_err("%s: cannot copy udata for create_qp\n",
-				us_ibdev->ib_dev.name);
+			  dev_name(&us_ibdev->ib_dev.dev));
 		return ERR_PTR(-EINVAL);
 	}
 
 	err = create_qp_validate_user_data(cmd);
 	if (err) {
 		usnic_err("%s: Failed to validate user data\n",
-				us_ibdev->ib_dev.name);
+			  dev_name(&us_ibdev->ib_dev.dev));
 		return ERR_PTR(-EINVAL);
 	}
 
 	if (init_attr->qp_type != IB_QPT_UD) {
 		usnic_err("%s asked to make a non-UD QP: %d\n",
-				us_ibdev->ib_dev.name, init_attr->qp_type);
+			  dev_name(&us_ibdev->ib_dev.dev), init_attr->qp_type);
 		return ERR_PTR(-EINVAL);
 	}
 
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
index e0a95538c364..82dd810bc000 100644
--- a/drivers/infiniband/hw/usnic/usnic_transport.c
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -121,7 +121,7 @@ void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num)
 	if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
 		spin_lock(&roce_bitmap_lock);
 		if (!port_num) {
-			usnic_err("Unreserved unvalid port num 0 for %s\n",
+			usnic_err("Unreserved invalid port num 0 for %s\n",
 					usnic_transport_to_str(type));
 			goto out_roce_custom;
 		}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 9dd39daa602b..49275a548751 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -54,18 +54,6 @@ static struct workqueue_struct *usnic_uiom_wq;
 	((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] -	\
 	(void *) &((struct usnic_uiom_chunk *) 0)->page_list[0]))
 
-static void usnic_uiom_reg_account(struct work_struct *work)
-{
-	struct usnic_uiom_reg *umem = container_of(work,
-						struct usnic_uiom_reg, work);
-
-	down_write(&umem->mm->mmap_sem);
-	umem->mm->locked_vm -= umem->diff;
-	up_write(&umem->mm->mmap_sem);
-	mmput(umem->mm);
-	kfree(umem);
-}
-
 static int usnic_uiom_dma_fault(struct iommu_domain *domain,
 				struct device *dev,
 				unsigned long iova, int flags,
@@ -99,8 +87,9 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
 }
 
 static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
-				int dmasync, struct list_head *chunk_list)
+				int dmasync, struct usnic_uiom_reg *uiomr)
 {
+	struct list_head *chunk_list = &uiomr->chunk_list;
 	struct page **page_list;
 	struct scatterlist *sg;
 	struct usnic_uiom_chunk *chunk;
@@ -114,6 +103,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 	int flags;
 	dma_addr_t pa;
 	unsigned int gup_flags;
+	struct mm_struct *mm;
 
 	/*
 	 * If the combination of the addr and size requested for this memory
@@ -136,7 +126,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 
 	npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
 
-	down_write(&current->mm->mmap_sem);
+	uiomr->owning_mm = mm = current->mm;
+	down_write(&mm->mmap_sem);
 
 	locked = npages + current->mm->pinned_vm;
 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -196,10 +187,12 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 out:
 	if (ret < 0)
 		usnic_uiom_put_pages(chunk_list, 0);
-	else
-		current->mm->pinned_vm = locked;
+	else {
+		mm->pinned_vm = locked;
+		mmgrab(uiomr->owning_mm);
+	}
 
-	up_write(&current->mm->mmap_sem);
+	up_write(&mm->mmap_sem);
 	free_page((unsigned long) page_list);
 	return ret;
 }
@@ -379,7 +372,7 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
 	uiomr->pd = pd;
 
 	err = usnic_uiom_get_pages(addr, size, writable, dmasync,
-					&uiomr->chunk_list);
+				   uiomr);
 	if (err) {
 		usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n",
 				vpn_start, vpn_last, err);
@@ -426,29 +419,39 @@ out_put_intervals:
 out_put_pages:
 	usnic_uiom_put_pages(&uiomr->chunk_list, 0);
 	spin_unlock(&pd->lock);
+	mmdrop(uiomr->owning_mm);
 out_free_uiomr:
 	kfree(uiomr);
 	return ERR_PTR(err);
 }
 
-void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
-			    struct ib_ucontext *ucontext)
+static void __usnic_uiom_release_tail(struct usnic_uiom_reg *uiomr)
 {
-	struct task_struct *task;
-	struct mm_struct *mm;
-	unsigned long diff;
+	mmdrop(uiomr->owning_mm);
+	kfree(uiomr);
+}
 
-	__usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
+static inline size_t usnic_uiom_num_pages(struct usnic_uiom_reg *uiomr)
+{
+	return PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+}
 
-	task = get_pid_task(ucontext->tgid, PIDTYPE_PID);
-	if (!task)
-		goto out;
-	mm = get_task_mm(task);
-	put_task_struct(task);
-	if (!mm)
-		goto out;
+static void usnic_uiom_release_defer(struct work_struct *work)
+{
+	struct usnic_uiom_reg *uiomr =
+		container_of(work, struct usnic_uiom_reg, work);
 
-	diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+	down_write(&uiomr->owning_mm->mmap_sem);
+	uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
+	up_write(&uiomr->owning_mm->mmap_sem);
+
+	__usnic_uiom_release_tail(uiomr);
+}
+
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
+			    struct ib_ucontext *context)
+{
+	__usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
 
 	/*
 	 * We may be called with the mm's mmap_sem already held.  This
@@ -456,25 +459,21 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
 	 * the last reference to our file and calls our release
 	 * method.  If there are memory regions to destroy, we'll end
 	 * up here and not be able to take the mmap_sem.  In that case
-	 * we defer the vm_locked accounting to the system workqueue.
+	 * we defer the vm_locked accounting to a workqueue.
 	 */
-	if (ucontext->closing) {
-		if (!down_write_trylock(&mm->mmap_sem)) {
-			INIT_WORK(&uiomr->work, usnic_uiom_reg_account);
-			uiomr->mm = mm;
-			uiomr->diff = diff;
-
+	if (context->closing) {
+		if (!down_write_trylock(&uiomr->owning_mm->mmap_sem)) {
+			INIT_WORK(&uiomr->work, usnic_uiom_release_defer);
 			queue_work(usnic_uiom_wq, &uiomr->work);
 			return;
 		}
-	} else
-		down_write(&mm->mmap_sem);
+	} else {
+		down_write(&uiomr->owning_mm->mmap_sem);
+	}
+	uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
+	up_write(&uiomr->owning_mm->mmap_sem);
 
-	mm->pinned_vm -= diff;
-	up_write(&mm->mmap_sem);
-	mmput(mm);
-out:
-	kfree(uiomr);
+	__usnic_uiom_release_tail(uiomr);
 }
 
 struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
index 8c096acff123..b86a9731071b 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.h
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -71,8 +71,7 @@ struct usnic_uiom_reg {
 	int				writable;
 	struct list_head		chunk_list;
 	struct work_struct		work;
-	struct mm_struct		*mm;
-	unsigned long			diff;
+	struct mm_struct		*owning_mm;
 };
 
 struct usnic_uiom_chunk {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index a5719899f49a..398443f43dc3 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -65,32 +65,36 @@ static struct workqueue_struct *event_wq;
 static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context);
 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context);
 
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
-			char *buf)
+static ssize_t hca_type_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION);
 }
+static DEVICE_ATTR_RO(hca_type);
 
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-			char *buf)
+static ssize_t hw_rev_show(struct device *device,
+			   struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%d\n", PVRDMA_REV_ID);
 }
+static DEVICE_ATTR_RO(hw_rev);
 
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
-			  char *buf)
+static ssize_t board_id_show(struct device *device,
+			     struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%d\n", PVRDMA_BOARD_ID);
 }
+static DEVICE_ATTR_RO(board_id);
 
-static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,	   NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,	   NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
+static struct attribute *pvrdma_class_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	&dev_attr_board_id.attr,
+	NULL,
+};
 
-static struct device_attribute *pvrdma_class_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type,
-	&dev_attr_board_id
+static const struct attribute_group pvrdma_attr_group = {
+	.attrs = pvrdma_class_attributes,
 };
 
 static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str)
@@ -160,9 +164,7 @@ static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev,
 static int pvrdma_register_device(struct pvrdma_dev *dev)
 {
 	int ret = -1;
-	int i = 0;
 
-	strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX);
 	dev->ib_dev.node_guid = dev->dsr->caps.node_guid;
 	dev->sys_image_guid = dev->dsr->caps.sys_image_guid;
 	dev->flags = 0;
@@ -266,24 +268,16 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
 	}
 	dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA;
 	spin_lock_init(&dev->srq_tbl_lock);
+	rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group);
 
-	ret = ib_register_device(&dev->ib_dev, NULL);
+	ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", NULL);
 	if (ret)
 		goto err_srq_free;
 
-	for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) {
-		ret = device_create_file(&dev->ib_dev.dev,
-					 pvrdma_class_attributes[i]);
-		if (ret)
-			goto err_class;
-	}
-
 	dev->ib_active = true;
 
 	return 0;
 
-err_class:
-	ib_unregister_device(&dev->ib_dev);
 err_srq_free:
 	kfree(dev->srq_tbl);
 err_qp_free:
@@ -735,7 +729,7 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
 
 	default:
 		dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
-			event, dev->ib_dev.name);
+			event, dev_name(&dev->ib_dev.dev));
 		break;
 	}
 }
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 60083c0363a5..cf22f57a9f0d 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -499,7 +499,7 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
 
 	if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
-				attr_mask, IB_LINK_LAYER_ETHERNET)) {
+				attr_mask)) {
 		ret = -EINVAL;
 		goto out;
 	}
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
index 98e798007f75..7df896a18d38 100644
--- a/drivers/infiniband/sw/rdmavt/Kconfig
+++ b/drivers/infiniband/sw/rdmavt/Kconfig
@@ -1,6 +1,6 @@
 config INFINIBAND_RDMAVT
 	tristate "RDMA verbs transport library"
-	depends on 64BIT && ARCH_DMA_ADDR_T_64BIT
+	depends on X86_64 && ARCH_DMA_ADDR_T_64BIT
 	depends on PCI
 	select DMA_VIRT_OPS
 	---help---
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 5ce403c6cddb..1735deb1a9d4 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -118,6 +118,187 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
 };
 EXPORT_SYMBOL(ib_rvt_state_ops);
 
+/* platform specific: return the last level cache (llc) size, in KiB */
+static int rvt_wss_llc_size(void)
+{
+	/* assume that the boot CPU value is universal for all CPUs */
+	return boot_cpu_data.x86_cache_size;
+}
+
+/* platform specific: cacheless copy */
+static void cacheless_memcpy(void *dst, void *src, size_t n)
+{
+	/*
+	 * Use the only available X64 cacheless copy.  Add a __user cast
+	 * to quiet sparse.  The src agument is already in the kernel so
+	 * there are no security issues.  The extra fault recovery machinery
+	 * is not invoked.
+	 */
+	__copy_user_nocache(dst, (void __user *)src, n, 0);
+}
+
+void rvt_wss_exit(struct rvt_dev_info *rdi)
+{
+	struct rvt_wss *wss = rdi->wss;
+
+	if (!wss)
+		return;
+
+	/* coded to handle partially initialized and repeat callers */
+	kfree(wss->entries);
+	wss->entries = NULL;
+	kfree(rdi->wss);
+	rdi->wss = NULL;
+}
+
+/**
+ * rvt_wss_init - Init wss data structures
+ *
+ * Return: 0 on success
+ */
+int rvt_wss_init(struct rvt_dev_info *rdi)
+{
+	unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode;
+	unsigned int wss_threshold = rdi->dparms.wss_threshold;
+	unsigned int wss_clean_period = rdi->dparms.wss_clean_period;
+	long llc_size;
+	long llc_bits;
+	long table_size;
+	long table_bits;
+	struct rvt_wss *wss;
+	int node = rdi->dparms.node;
+
+	if (sge_copy_mode != RVT_SGE_COPY_ADAPTIVE) {
+		rdi->wss = NULL;
+		return 0;
+	}
+
+	rdi->wss = kzalloc_node(sizeof(*rdi->wss), GFP_KERNEL, node);
+	if (!rdi->wss)
+		return -ENOMEM;
+	wss = rdi->wss;
+
+	/* check for a valid percent range - default to 80 if none or invalid */
+	if (wss_threshold < 1 || wss_threshold > 100)
+		wss_threshold = 80;
+
+	/* reject a wildly large period */
+	if (wss_clean_period > 1000000)
+		wss_clean_period = 256;
+
+	/* reject a zero period */
+	if (wss_clean_period == 0)
+		wss_clean_period = 1;
+
+	/*
+	 * Calculate the table size - the next power of 2 larger than the
+	 * LLC size.  LLC size is in KiB.
+	 */
+	llc_size = rvt_wss_llc_size() * 1024;
+	table_size = roundup_pow_of_two(llc_size);
+
+	/* one bit per page in rounded up table */
+	llc_bits = llc_size / PAGE_SIZE;
+	table_bits = table_size / PAGE_SIZE;
+	wss->pages_mask = table_bits - 1;
+	wss->num_entries = table_bits / BITS_PER_LONG;
+
+	wss->threshold = (llc_bits * wss_threshold) / 100;
+	if (wss->threshold == 0)
+		wss->threshold = 1;
+
+	wss->clean_period = wss_clean_period;
+	atomic_set(&wss->clean_counter, wss_clean_period);
+
+	wss->entries = kcalloc_node(wss->num_entries, sizeof(*wss->entries),
+				    GFP_KERNEL, node);
+	if (!wss->entries) {
+		rvt_wss_exit(rdi);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Advance the clean counter.  When the clean period has expired,
+ * clean an entry.
+ *
+ * This is implemented in atomics to avoid locking.  Because multiple
+ * variables are involved, it can be racy which can lead to slightly
+ * inaccurate information.  Since this is only a heuristic, this is
+ * OK.  Any innaccuracies will clean themselves out as the counter
+ * advances.  That said, it is unlikely the entry clean operation will
+ * race - the next possible racer will not start until the next clean
+ * period.
+ *
+ * The clean counter is implemented as a decrement to zero.  When zero
+ * is reached an entry is cleaned.
+ */
+static void wss_advance_clean_counter(struct rvt_wss *wss)
+{
+	int entry;
+	int weight;
+	unsigned long bits;
+
+	/* become the cleaner if we decrement the counter to zero */
+	if (atomic_dec_and_test(&wss->clean_counter)) {
+		/*
+		 * Set, not add, the clean period.  This avoids an issue
+		 * where the counter could decrement below the clean period.
+		 * Doing a set can result in lost decrements, slowing the
+		 * clean advance.  Since this a heuristic, this possible
+		 * slowdown is OK.
+		 *
+		 * An alternative is to loop, advancing the counter by a
+		 * clean period until the result is > 0. However, this could
+		 * lead to several threads keeping another in the clean loop.
+		 * This could be mitigated by limiting the number of times
+		 * we stay in the loop.
+		 */
+		atomic_set(&wss->clean_counter, wss->clean_period);
+
+		/*
+		 * Uniquely grab the entry to clean and move to next.
+		 * The current entry is always the lower bits of
+		 * wss.clean_entry.  The table size, wss.num_entries,
+		 * is always a power-of-2.
+		 */
+		entry = (atomic_inc_return(&wss->clean_entry) - 1)
+			& (wss->num_entries - 1);
+
+		/* clear the entry and count the bits */
+		bits = xchg(&wss->entries[entry], 0);
+		weight = hweight64((u64)bits);
+		/* only adjust the contended total count if needed */
+		if (weight)
+			atomic_sub(weight, &wss->total_count);
+	}
+}
+
+/*
+ * Insert the given address into the working set array.
+ */
+static void wss_insert(struct rvt_wss *wss, void *address)
+{
+	u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss->pages_mask;
+	u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
+	u32 nr = page & (BITS_PER_LONG - 1);
+
+	if (!test_and_set_bit(nr, &wss->entries[entry]))
+		atomic_inc(&wss->total_count);
+
+	wss_advance_clean_counter(wss);
+}
+
+/*
+ * Is the working set larger than the threshold?
+ */
+static inline bool wss_exceeds_threshold(struct rvt_wss *wss)
+{
+	return atomic_read(&wss->total_count) >= wss->threshold;
+}
+
 static void get_map_page(struct rvt_qpn_table *qpt,
 			 struct rvt_qpn_map *map)
 {
@@ -1164,11 +1345,8 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	int lastwqe = 0;
 	int mig = 0;
 	int pmtu = 0; /* for gcc warning only */
-	enum rdma_link_layer link;
 	int opa_ah;
 
-	link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
-
 	spin_lock_irq(&qp->r_lock);
 	spin_lock(&qp->s_hlock);
 	spin_lock(&qp->s_lock);
@@ -1179,7 +1357,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num);
 
 	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-				attr_mask, link))
+				attr_mask))
 		goto inval;
 
 	if (rdi->driver_f.check_modify_qp &&
@@ -1718,7 +1896,7 @@ static inline int rvt_qp_is_avail(
  */
 static int rvt_post_one_wr(struct rvt_qp *qp,
 			   const struct ib_send_wr *wr,
-			   int *call_send)
+			   bool *call_send)
 {
 	struct rvt_swqe *wqe;
 	u32 next;
@@ -1823,15 +2001,11 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
 		wqe->wr.num_sge = j;
 	}
 
-	/* general part of wqe valid - allow for driver checks */
-	if (rdi->driver_f.check_send_wqe) {
-		ret = rdi->driver_f.check_send_wqe(qp, wqe);
-		if (ret < 0)
-			goto bail_inval_free;
-		if (ret)
-			*call_send = ret;
-	}
-
+	/*
+	 * Calculate and set SWQE PSN values prior to handing it off
+	 * to the driver's check routine. This give the driver the
+	 * opportunity to adjust PSN values based on internal checks.
+	 */
 	log_pmtu = qp->log_pmtu;
 	if (qp->ibqp.qp_type != IB_QPT_UC &&
 	    qp->ibqp.qp_type != IB_QPT_RC) {
@@ -1856,8 +2030,18 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
 				(wqe->length ?
 					((wqe->length - 1) >> log_pmtu) :
 					0);
-		qp->s_next_psn = wqe->lpsn + 1;
 	}
+
+	/* general part of wqe valid - allow for driver checks */
+	if (rdi->driver_f.setup_wqe) {
+		ret = rdi->driver_f.setup_wqe(qp, wqe, call_send);
+		if (ret < 0)
+			goto bail_inval_free_ref;
+	}
+
+	if (!(rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL))
+		qp->s_next_psn = wqe->lpsn + 1;
+
 	if (unlikely(reserved_op)) {
 		wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
 		rvt_qp_wqe_reserve(qp, wqe);
@@ -1871,6 +2055,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
 
 	return 0;
 
+bail_inval_free_ref:
+	if (qp->ibqp.qp_type != IB_QPT_UC &&
+	    qp->ibqp.qp_type != IB_QPT_RC)
+		atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
 bail_inval_free:
 	/* release mr holds */
 	while (j) {
@@ -1897,7 +2085,7 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
 	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
 	unsigned long flags = 0;
-	int call_send;
+	bool call_send;
 	unsigned nreq = 0;
 	int err = 0;
 
@@ -1930,7 +2118,11 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 bail:
 	spin_unlock_irqrestore(&qp->s_hlock, flags);
 	if (nreq) {
-		if (call_send)
+		/*
+		 * Only call do_send if there is exactly one packet, and the
+		 * driver said it was ok.
+		 */
+		if (nreq == 1 && call_send)
 			rdi->driver_f.do_send(qp);
 		else
 			rdi->driver_f.schedule_send_no_lock(qp);
@@ -2465,3 +2657,454 @@ void rvt_qp_iter(struct rvt_dev_info *rdi,
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL(rvt_qp_iter);
+
+/*
+ * This should be called with s_lock held.
+ */
+void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
+		       enum ib_wc_status status)
+{
+	u32 old_last, last;
+	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
+		return;
+
+	last = qp->s_last;
+	old_last = last;
+	trace_rvt_qp_send_completion(qp, wqe, last);
+	if (++last >= qp->s_size)
+		last = 0;
+	trace_rvt_qp_send_completion(qp, wqe, last);
+	qp->s_last = last;
+	/* See post_send() */
+	barrier();
+	rvt_put_swqe(wqe);
+	if (qp->ibqp.qp_type == IB_QPT_UD ||
+	    qp->ibqp.qp_type == IB_QPT_SMI ||
+	    qp->ibqp.qp_type == IB_QPT_GSI)
+		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
+
+	rvt_qp_swqe_complete(qp,
+			     wqe,
+			     rdi->wc_opcode[wqe->wr.opcode],
+			     status);
+
+	if (qp->s_acked == old_last)
+		qp->s_acked = last;
+	if (qp->s_cur == old_last)
+		qp->s_cur = last;
+	if (qp->s_tail == old_last)
+		qp->s_tail = last;
+	if (qp->state == IB_QPS_SQD && last == qp->s_cur)
+		qp->s_draining = 0;
+}
+EXPORT_SYMBOL(rvt_send_complete);
+
+/**
+ * rvt_copy_sge - copy data to SGE memory
+ * @qp: associated QP
+ * @ss: the SGE state
+ * @data: the data to copy
+ * @length: the length of the data
+ * @release: boolean to release MR
+ * @copy_last: do a separate copy of the last 8 bytes
+ */
+void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
+		  void *data, u32 length,
+		  bool release, bool copy_last)
+{
+	struct rvt_sge *sge = &ss->sge;
+	int i;
+	bool in_last = false;
+	bool cacheless_copy = false;
+	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+	struct rvt_wss *wss = rdi->wss;
+	unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode;
+
+	if (sge_copy_mode == RVT_SGE_COPY_CACHELESS) {
+		cacheless_copy = length >= PAGE_SIZE;
+	} else if (sge_copy_mode == RVT_SGE_COPY_ADAPTIVE) {
+		if (length >= PAGE_SIZE) {
+			/*
+			 * NOTE: this *assumes*:
+			 * o The first vaddr is the dest.
+			 * o If multiple pages, then vaddr is sequential.
+			 */
+			wss_insert(wss, sge->vaddr);
+			if (length >= (2 * PAGE_SIZE))
+				wss_insert(wss, (sge->vaddr + PAGE_SIZE));
+
+			cacheless_copy = wss_exceeds_threshold(wss);
+		} else {
+			wss_advance_clean_counter(wss);
+		}
+	}
+
+	if (copy_last) {
+		if (length > 8) {
+			length -= 8;
+		} else {
+			copy_last = false;
+			in_last = true;
+		}
+	}
+
+again:
+	while (length) {
+		u32 len = rvt_get_sge_length(sge, length);
+
+		WARN_ON_ONCE(len == 0);
+		if (unlikely(in_last)) {
+			/* enforce byte transfer ordering */
+			for (i = 0; i < len; i++)
+				((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
+		} else if (cacheless_copy) {
+			cacheless_memcpy(sge->vaddr, data, len);
+		} else {
+			memcpy(sge->vaddr, data, len);
+		}
+		rvt_update_sge(ss, len, release);
+		data += len;
+		length -= len;
+	}
+
+	if (copy_last) {
+		copy_last = false;
+		in_last = true;
+		length = 8;
+		goto again;
+	}
+}
+EXPORT_SYMBOL(rvt_copy_sge);
+
+/**
+ * ruc_loopback - handle UC and RC loopback requests
+ * @sqp: the sending QP
+ *
+ * This is called from rvt_do_send() to forward a WQE addressed to the same HFI
+ * Note that although we are single threaded due to the send engine, we still
+ * have to protect against post_send().  We don't have to worry about
+ * receive interrupts since this is a connected protocol and all packets
+ * will pass through here.
+ */
+void rvt_ruc_loopback(struct rvt_qp *sqp)
+{
+	struct rvt_ibport *rvp =  NULL;
+	struct rvt_dev_info *rdi = ib_to_rvt(sqp->ibqp.device);
+	struct rvt_qp *qp;
+	struct rvt_swqe *wqe;
+	struct rvt_sge *sge;
+	unsigned long flags;
+	struct ib_wc wc;
+	u64 sdata;
+	atomic64_t *maddr;
+	enum ib_wc_status send_status;
+	bool release;
+	int ret;
+	bool copy_last = false;
+	int local_ops = 0;
+
+	rcu_read_lock();
+	rvp = rdi->ports[sqp->port_num - 1];
+
+	/*
+	 * Note that we check the responder QP state after
+	 * checking the requester's state.
+	 */
+
+	qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), rvp,
+			    sqp->remote_qpn);
+
+	spin_lock_irqsave(&sqp->s_lock, flags);
+
+	/* Return if we are already busy processing a work request. */
+	if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
+		goto unlock;
+
+	sqp->s_flags |= RVT_S_BUSY;
+
+again:
+	if (sqp->s_last == READ_ONCE(sqp->s_head))
+		goto clr_busy;
+	wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
+
+	/* Return if it is not OK to start a new work request. */
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
+			goto clr_busy;
+		/* We are in the error state, flush the work request. */
+		send_status = IB_WC_WR_FLUSH_ERR;
+		goto flush_send;
+	}
+
+	/*
+	 * We can rely on the entry not changing without the s_lock
+	 * being held until we update s_last.
+	 * We increment s_cur to indicate s_last is in progress.
+	 */
+	if (sqp->s_last == sqp->s_cur) {
+		if (++sqp->s_cur >= sqp->s_size)
+			sqp->s_cur = 0;
+	}
+	spin_unlock_irqrestore(&sqp->s_lock, flags);
+
+	if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
+	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
+		rvp->n_pkt_drops++;
+		/*
+		 * For RC, the requester would timeout and retry so
+		 * shortcut the timeouts and just signal too many retries.
+		 */
+		if (sqp->ibqp.qp_type == IB_QPT_RC)
+			send_status = IB_WC_RETRY_EXC_ERR;
+		else
+			send_status = IB_WC_SUCCESS;
+		goto serr;
+	}
+
+	memset(&wc, 0, sizeof(wc));
+	send_status = IB_WC_SUCCESS;
+
+	release = true;
+	sqp->s_sge.sge = wqe->sg_list[0];
+	sqp->s_sge.sg_list = wqe->sg_list + 1;
+	sqp->s_sge.num_sge = wqe->wr.num_sge;
+	sqp->s_len = wqe->length;
+	switch (wqe->wr.opcode) {
+	case IB_WR_REG_MR:
+		goto send_comp;
+
+	case IB_WR_LOCAL_INV:
+		if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
+			if (rvt_invalidate_rkey(sqp,
+						wqe->wr.ex.invalidate_rkey))
+				send_status = IB_WC_LOC_PROT_ERR;
+			local_ops = 1;
+		}
+		goto send_comp;
+
+	case IB_WR_SEND_WITH_INV:
+		if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
+			wc.wc_flags = IB_WC_WITH_INVALIDATE;
+			wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
+		}
+		goto send;
+
+	case IB_WR_SEND_WITH_IMM:
+		wc.wc_flags = IB_WC_WITH_IMM;
+		wc.ex.imm_data = wqe->wr.ex.imm_data;
+		/* FALLTHROUGH */
+	case IB_WR_SEND:
+send:
+		ret = rvt_get_rwqe(qp, false);
+		if (ret < 0)
+			goto op_err;
+		if (!ret)
+			goto rnr_nak;
+		break;
+
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+			goto inv_err;
+		wc.wc_flags = IB_WC_WITH_IMM;
+		wc.ex.imm_data = wqe->wr.ex.imm_data;
+		ret = rvt_get_rwqe(qp, true);
+		if (ret < 0)
+			goto op_err;
+		if (!ret)
+			goto rnr_nak;
+		/* skip copy_last set and qp_access_flags recheck */
+		goto do_write;
+	case IB_WR_RDMA_WRITE:
+		copy_last = rvt_is_user_qp(qp);
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+			goto inv_err;
+do_write:
+		if (wqe->length == 0)
+			break;
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+					  wqe->rdma_wr.remote_addr,
+					  wqe->rdma_wr.rkey,
+					  IB_ACCESS_REMOTE_WRITE)))
+			goto acc_err;
+		qp->r_sge.sg_list = NULL;
+		qp->r_sge.num_sge = 1;
+		qp->r_sge.total_len = wqe->length;
+		break;
+
+	case IB_WR_RDMA_READ:
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+			goto inv_err;
+		if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+					  wqe->rdma_wr.remote_addr,
+					  wqe->rdma_wr.rkey,
+					  IB_ACCESS_REMOTE_READ)))
+			goto acc_err;
+		release = false;
+		sqp->s_sge.sg_list = NULL;
+		sqp->s_sge.num_sge = 1;
+		qp->r_sge.sge = wqe->sg_list[0];
+		qp->r_sge.sg_list = wqe->sg_list + 1;
+		qp->r_sge.num_sge = wqe->wr.num_sge;
+		qp->r_sge.total_len = wqe->length;
+		break;
+
+	case IB_WR_ATOMIC_CMP_AND_SWP:
+	case IB_WR_ATOMIC_FETCH_AND_ADD:
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+			goto inv_err;
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+					  wqe->atomic_wr.remote_addr,
+					  wqe->atomic_wr.rkey,
+					  IB_ACCESS_REMOTE_ATOMIC)))
+			goto acc_err;
+		/* Perform atomic OP and save result. */
+		maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
+		sdata = wqe->atomic_wr.compare_add;
+		*(u64 *)sqp->s_sge.sge.vaddr =
+			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+			(u64)atomic64_add_return(sdata, maddr) - sdata :
+			(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
+				      sdata, wqe->atomic_wr.swap);
+		rvt_put_mr(qp->r_sge.sge.mr);
+		qp->r_sge.num_sge = 0;
+		goto send_comp;
+
+	default:
+		send_status = IB_WC_LOC_QP_OP_ERR;
+		goto serr;
+	}
+
+	sge = &sqp->s_sge.sge;
+	while (sqp->s_len) {
+		u32 len = sqp->s_len;
+
+		if (len > sge->length)
+			len = sge->length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
+		WARN_ON_ONCE(len == 0);
+		rvt_copy_sge(qp, &qp->r_sge, sge->vaddr,
+			     len, release, copy_last);
+		sge->vaddr += len;
+		sge->length -= len;
+		sge->sge_length -= len;
+		if (sge->sge_length == 0) {
+			if (!release)
+				rvt_put_mr(sge->mr);
+			if (--sqp->s_sge.num_sge)
+				*sge = *sqp->s_sge.sg_list++;
+		} else if (sge->length == 0 && sge->mr->lkey) {
+			if (++sge->n >= RVT_SEGSZ) {
+				if (++sge->m >= sge->mr->mapsz)
+					break;
+				sge->n = 0;
+			}
+			sge->vaddr =
+				sge->mr->map[sge->m]->segs[sge->n].vaddr;
+			sge->length =
+				sge->mr->map[sge->m]->segs[sge->n].length;
+		}
+		sqp->s_len -= len;
+	}
+	if (release)
+		rvt_put_ss(&qp->r_sge);
+
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
+		goto send_comp;
+
+	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+	else
+		wc.opcode = IB_WC_RECV;
+	wc.wr_id = qp->r_wr_id;
+	wc.status = IB_WC_SUCCESS;
+	wc.byte_len = wqe->length;
+	wc.qp = &qp->ibqp;
+	wc.src_qp = qp->remote_qpn;
+	wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
+	wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
+	wc.port_num = 1;
+	/* Signal completion event if the solicited bit is set. */
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     wqe->wr.send_flags & IB_SEND_SOLICITED);
+
+send_comp:
+	spin_lock_irqsave(&sqp->s_lock, flags);
+	rvp->n_loop_pkts++;
+flush_send:
+	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
+	rvt_send_complete(sqp, wqe, send_status);
+	if (local_ops) {
+		atomic_dec(&sqp->local_ops_pending);
+		local_ops = 0;
+	}
+	goto again;
+
+rnr_nak:
+	/* Handle RNR NAK */
+	if (qp->ibqp.qp_type == IB_QPT_UC)
+		goto send_comp;
+	rvp->n_rnr_naks++;
+	/*
+	 * Note: we don't need the s_lock held since the BUSY flag
+	 * makes this single threaded.
+	 */
+	if (sqp->s_rnr_retry == 0) {
+		send_status = IB_WC_RNR_RETRY_EXC_ERR;
+		goto serr;
+	}
+	if (sqp->s_rnr_retry_cnt < 7)
+		sqp->s_rnr_retry--;
+	spin_lock_irqsave(&sqp->s_lock, flags);
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
+		goto clr_busy;
+	rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
+				IB_AETH_CREDIT_SHIFT);
+	goto clr_busy;
+
+op_err:
+	send_status = IB_WC_REM_OP_ERR;
+	wc.status = IB_WC_LOC_QP_OP_ERR;
+	goto err;
+
+inv_err:
+	send_status = IB_WC_REM_INV_REQ_ERR;
+	wc.status = IB_WC_LOC_QP_OP_ERR;
+	goto err;
+
+acc_err:
+	send_status = IB_WC_REM_ACCESS_ERR;
+	wc.status = IB_WC_LOC_PROT_ERR;
+err:
+	/* responder goes to error state */
+	rvt_rc_error(qp, wc.status);
+
+serr:
+	spin_lock_irqsave(&sqp->s_lock, flags);
+	rvt_send_complete(sqp, wqe, send_status);
+	if (sqp->ibqp.qp_type == IB_QPT_RC) {
+		int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+
+		sqp->s_flags &= ~RVT_S_BUSY;
+		spin_unlock_irqrestore(&sqp->s_lock, flags);
+		if (lastwqe) {
+			struct ib_event ev;
+
+			ev.device = sqp->ibqp.device;
+			ev.element.qp = &sqp->ibqp;
+			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
+		}
+		goto done;
+	}
+clr_busy:
+	sqp->s_flags &= ~RVT_S_BUSY;
+unlock:
+	spin_unlock_irqrestore(&sqp->s_lock, flags);
+done:
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(rvt_ruc_loopback);
diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
index 264811fdc530..6d883972e0b8 100644
--- a/drivers/infiniband/sw/rdmavt/qp.h
+++ b/drivers/infiniband/sw/rdmavt/qp.h
@@ -66,4 +66,6 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 		  const struct ib_send_wr **bad_wr);
 int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
 		      const struct ib_recv_wr **bad_wr);
+int rvt_wss_init(struct rvt_dev_info *rdi);
+void rvt_wss_exit(struct rvt_dev_info *rdi);
 #endif          /* DEF_RVTQP_H */
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
index 0ef25fc49f25..d5df352eadb1 100644
--- a/drivers/infiniband/sw/rdmavt/trace_tx.h
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -153,6 +153,48 @@ TRACE_EVENT(
 	)
 );
 
+TRACE_EVENT(
+	rvt_qp_send_completion,
+	TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx),
+	TP_ARGS(qp, wqe, idx),
+	TP_STRUCT__entry(
+		RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+		__field(struct rvt_swqe *, wqe)
+		__field(u64, wr_id)
+		__field(u32, qpn)
+		__field(u32, qpt)
+		__field(u32, length)
+		__field(u32, idx)
+		__field(u32, ssn)
+		__field(enum ib_wr_opcode, opcode)
+		__field(int, send_flags)
+	),
+	TP_fast_assign(
+		RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+		__entry->wqe = wqe;
+		__entry->wr_id = wqe->wr.wr_id;
+		__entry->qpn = qp->ibqp.qp_num;
+		__entry->qpt = qp->ibqp.qp_type;
+		__entry->length = wqe->length;
+		__entry->idx = idx;
+		__entry->ssn = wqe->ssn;
+		__entry->opcode = wqe->wr.opcode;
+		__entry->send_flags = wqe->wr.send_flags;
+	),
+	TP_printk(
+		"[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x",
+		__get_str(dev),
+		__entry->qpn,
+		__entry->qpt,
+		__entry->wqe,
+		__entry->idx,
+		__entry->wr_id,
+		__entry->length,
+		__entry->ssn,
+		__entry->opcode,
+		__entry->send_flags
+	)
+);
 #endif /* __RVT_TRACE_TX_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 17e4abc067af..723d3daf2eba 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -774,6 +774,13 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
 		goto bail_no_mr;
 	}
 
+	/* Memory Working Set Size */
+	ret = rvt_wss_init(rdi);
+	if (ret) {
+		rvt_pr_err(rdi, "Error in WSS init.\n");
+		goto bail_mr;
+	}
+
 	/* Completion queues */
 	spin_lock_init(&rdi->n_cqs_lock);
 
@@ -828,10 +835,11 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
 
 	rdi->ibdev.driver_id = driver_id;
 	/* We are now good to announce we exist */
-	ret =  ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback);
+	ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev),
+				 rdi->driver_f.port_callback);
 	if (ret) {
 		rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
-		goto bail_mr;
+		goto bail_wss;
 	}
 
 	rvt_create_mad_agents(rdi);
@@ -839,6 +847,8 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
 	rvt_pr_info(rdi, "Registration with rdmavt done.\n");
 	return ret;
 
+bail_wss:
+	rvt_wss_exit(rdi);
 bail_mr:
 	rvt_mr_exit(rdi);
 
@@ -862,6 +872,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi)
 	rvt_free_mad_agents(rdi);
 
 	ib_unregister_device(&rdi->ibdev);
+	rvt_wss_exit(rdi);
 	rvt_mr_exit(rdi);
 	rvt_qp_exit(rdi);
 }
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 10999fa69281..383e65c7bbc0 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -103,7 +103,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
 	rxe->attr.max_res_rd_atom		= RXE_MAX_RES_RD_ATOM;
 	rxe->attr.max_qp_init_rd_atom		= RXE_MAX_QP_INIT_RD_ATOM;
 	rxe->attr.max_ee_init_rd_atom		= RXE_MAX_EE_INIT_RD_ATOM;
-	rxe->attr.atomic_cap			= RXE_ATOMIC_CAP;
+	rxe->attr.atomic_cap			= IB_ATOMIC_HCA;
 	rxe->attr.max_ee			= RXE_MAX_EE;
 	rxe->attr.max_rdd			= RXE_MAX_RDD;
 	rxe->attr.max_mw			= RXE_MAX_MW;
@@ -128,9 +128,9 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
 /* initialize port attributes */
 static int rxe_init_port_param(struct rxe_port *port)
 {
-	port->attr.state		= RXE_PORT_STATE;
-	port->attr.max_mtu		= RXE_PORT_MAX_MTU;
-	port->attr.active_mtu		= RXE_PORT_ACTIVE_MTU;
+	port->attr.state		= IB_PORT_DOWN;
+	port->attr.max_mtu		= IB_MTU_4096;
+	port->attr.active_mtu		= IB_MTU_256;
 	port->attr.gid_tbl_len		= RXE_PORT_GID_TBL_LEN;
 	port->attr.port_cap_flags	= RXE_PORT_PORT_CAP_FLAGS;
 	port->attr.max_msg_sz		= RXE_PORT_MAX_MSG_SZ;
@@ -147,8 +147,7 @@ static int rxe_init_port_param(struct rxe_port *port)
 	port->attr.active_width		= RXE_PORT_ACTIVE_WIDTH;
 	port->attr.active_speed		= RXE_PORT_ACTIVE_SPEED;
 	port->attr.phys_state		= RXE_PORT_PHYS_STATE;
-	port->mtu_cap			=
-				ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU);
+	port->mtu_cap			= ib_mtu_enum_to_int(IB_MTU_256);
 	port->subnet_prefix		= cpu_to_be64(RXE_PORT_SUBNET_PREFIX);
 
 	return 0;
@@ -300,7 +299,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
 	mtu = eth_mtu_int_to_enum(ndev_mtu);
 
 	/* Make sure that new MTU in range */
-	mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256;
+	mtu = mtu ? min_t(enum ib_mtu, mtu, IB_MTU_4096) : IB_MTU_256;
 
 	port->attr.active_mtu = mtu;
 	port->mtu_cap = ib_mtu_enum_to_int(mtu);
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 83311dd07019..ea089cb091ad 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -191,6 +191,7 @@ static inline void reset_retry_counters(struct rxe_qp *qp)
 {
 	qp->comp.retry_cnt = qp->attr.retry_cnt;
 	qp->comp.rnr_retry = qp->attr.rnr_retry;
+	qp->comp.started_retry = 0;
 }
 
 static inline enum comp_state check_psn(struct rxe_qp *qp,
@@ -253,6 +254,17 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 	case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
 		if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
 		    pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
+			/* read retries of partial data may restart from
+			 * read response first or response only.
+			 */
+			if ((pkt->psn == wqe->first_psn &&
+			     pkt->opcode ==
+			     IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) ||
+			    (wqe->first_psn == wqe->last_psn &&
+			     pkt->opcode ==
+			     IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY))
+				break;
+
 			return COMPST_ERROR;
 		}
 		break;
@@ -499,11 +511,11 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
 					   struct rxe_pkt_info *pkt,
 					   struct rxe_send_wqe *wqe)
 {
-	qp->comp.opcode = -1;
-
-	if (pkt) {
-		if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
-			qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+	if (pkt && wqe->state == wqe_state_pending) {
+		if (psn_compare(wqe->last_psn, qp->comp.psn) >= 0) {
+			qp->comp.psn = (wqe->last_psn + 1) & BTH_PSN_MASK;
+			qp->comp.opcode = -1;
+		}
 
 		if (qp->req.wait_psn) {
 			qp->req.wait_psn = 0;
@@ -676,6 +688,20 @@ int rxe_completer(void *arg)
 				goto exit;
 			}
 
+			/* if we've started a retry, don't start another
+			 * retry sequence, unless this is a timeout.
+			 */
+			if (qp->comp.started_retry &&
+			    !qp->comp.timeout_retry) {
+				if (pkt) {
+					rxe_drop_ref(pkt->qp);
+					kfree_skb(skb);
+					skb = NULL;
+				}
+
+				goto done;
+			}
+
 			if (qp->comp.retry_cnt > 0) {
 				if (qp->comp.retry_cnt != 7)
 					qp->comp.retry_cnt--;
@@ -692,6 +718,7 @@ int rxe_completer(void *arg)
 					rxe_counter_inc(rxe,
 							RXE_CNT_COMP_RETRY);
 					qp->req.need_retry = 1;
+					qp->comp.started_retry = 1;
 					rxe_run_task(&qp->req.task, 1);
 				}
 
@@ -701,7 +728,7 @@ int rxe_completer(void *arg)
 					skb = NULL;
 				}
 
-				goto exit;
+				goto done;
 
 			} else {
 				rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED);
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 2ee4b08b00ea..a57276f2cb84 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -30,7 +30,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-
+#include <linux/vmalloc.h>
 #include "rxe.h"
 #include "rxe_loc.h"
 #include "rxe_queue.h"
@@ -97,7 +97,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
 	err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context,
 			   cq->queue->buf, cq->queue->buf_size, &cq->queue->ip);
 	if (err) {
-		kvfree(cq->queue->buf);
+		vfree(cq->queue->buf);
 		kfree(cq->queue);
 		return err;
 	}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 87d14f7ef21b..afd53f57a62b 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -144,8 +144,7 @@ void rxe_loopback(struct sk_buff *skb);
 int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb);
 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 				int paylen, struct rxe_pkt_info *pkt);
-int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
-		struct sk_buff *skb, u32 *crc);
+int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc);
 enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num);
 const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num);
 struct device *rxe_dma_device(struct rxe_dev *rxe);
@@ -196,7 +195,7 @@ static inline int qp_mtu(struct rxe_qp *qp)
 	if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
 		return qp->attr.path_mtu;
 	else
-		return RXE_PORT_MAX_MTU;
+		return IB_MTU_4096;
 }
 
 static inline int rcv_wqe_size(int max_sge)
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index dff605fdf60f..9d3916b93f23 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -573,33 +573,20 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
 	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
 	int index = key >> 8;
 
-	if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) {
-		mem = rxe_pool_get_index(&rxe->mr_pool, index);
-		if (!mem)
-			goto err1;
-	} else {
-		goto err1;
+	mem = rxe_pool_get_index(&rxe->mr_pool, index);
+	if (!mem)
+		return NULL;
+
+	if (unlikely((type == lookup_local && mem->lkey != key) ||
+		     (type == lookup_remote && mem->rkey != key) ||
+		     mem->pd != pd ||
+		     (access && !(access & mem->access)) ||
+		     mem->state != RXE_MEM_STATE_VALID)) {
+		rxe_drop_ref(mem);
+		mem = NULL;
 	}
 
-	if ((type == lookup_local && mem->lkey != key) ||
-	    (type == lookup_remote && mem->rkey != key))
-		goto err2;
-
-	if (mem->pd != pd)
-		goto err2;
-
-	if (access && !(access & mem->access))
-		goto err2;
-
-	if (mem->state != RXE_MEM_STATE_VALID)
-		goto err2;
-
 	return mem;
-
-err2:
-	rxe_drop_ref(mem);
-err1:
-	return NULL;
 }
 
 int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 8094cbaa54a9..40e82e0f6c2d 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -72,7 +72,7 @@ struct rxe_dev *get_rxe_by_name(const char *name)
 
 	spin_lock_bh(&dev_list_lock);
 	list_for_each_entry(rxe, &rxe_dev_list, list) {
-		if (!strcmp(name, rxe->ib_dev.name)) {
+		if (!strcmp(name, dev_name(&rxe->ib_dev.dev))) {
 			found = rxe;
 			break;
 		}
@@ -182,19 +182,11 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
 
 #endif
 
-static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
+static struct dst_entry *rxe_find_route(struct net_device *ndev,
 					struct rxe_qp *qp,
 					struct rxe_av *av)
 {
-	const struct ib_gid_attr *attr;
 	struct dst_entry *dst = NULL;
-	struct net_device *ndev;
-
-	attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num,
-				 av->grh.sgid_index);
-	if (IS_ERR(attr))
-		return NULL;
-	ndev = attr->ndev;
 
 	if (qp_type(qp) == IB_QPT_RC)
 		dst = sk_dst_get(qp->sk->sk);
@@ -229,7 +221,6 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
 			sk_dst_set(qp->sk->sk, dst);
 		}
 	}
-	rdma_put_gid_attr(attr);
 	return dst;
 }
 
@@ -377,8 +368,8 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
 	ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
 }
 
-static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
-		    struct sk_buff *skb, struct rxe_av *av)
+static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb,
+		    struct rxe_av *av)
 {
 	struct rxe_qp *qp = pkt->qp;
 	struct dst_entry *dst;
@@ -387,7 +378,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 	struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
 	struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
 
-	dst = rxe_find_route(rxe, qp, av);
+	dst = rxe_find_route(skb->dev, qp, av);
 	if (!dst) {
 		pr_err("Host not reachable\n");
 		return -EHOSTUNREACH;
@@ -396,8 +387,8 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 	if (!memcmp(saddr, daddr, sizeof(*daddr)))
 		pkt->mask |= RXE_LOOPBACK_MASK;
 
-	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
-			htons(ROCE_V2_UDP_DPORT));
+	prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
+			cpu_to_be16(ROCE_V2_UDP_DPORT));
 
 	prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
 			 av->grh.traffic_class, av->grh.hop_limit, df, xnet);
@@ -406,15 +397,15 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 	return 0;
 }
 
-static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
-		    struct sk_buff *skb, struct rxe_av *av)
+static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb,
+		    struct rxe_av *av)
 {
 	struct rxe_qp *qp = pkt->qp;
 	struct dst_entry *dst;
 	struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
 	struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
 
-	dst = rxe_find_route(rxe, qp, av);
+	dst = rxe_find_route(skb->dev, qp, av);
 	if (!dst) {
 		pr_err("Host not reachable\n");
 		return -EHOSTUNREACH;
@@ -423,8 +414,8 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 	if (!memcmp(saddr, daddr, sizeof(*daddr)))
 		pkt->mask |= RXE_LOOPBACK_MASK;
 
-	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
-			htons(ROCE_V2_UDP_DPORT));
+	prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
+			cpu_to_be16(ROCE_V2_UDP_DPORT));
 
 	prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,
 			 av->grh.traffic_class,
@@ -434,16 +425,15 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 	return 0;
 }
 
-int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
-		struct sk_buff *skb, u32 *crc)
+int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc)
 {
 	int err = 0;
 	struct rxe_av *av = rxe_get_av(pkt);
 
 	if (av->network_type == RDMA_NETWORK_IPV4)
-		err = prepare4(rxe, pkt, skb, av);
+		err = prepare4(pkt, skb, av);
 	else if (av->network_type == RDMA_NETWORK_IPV6)
-		err = prepare6(rxe, pkt, skb, av);
+		err = prepare6(pkt, skb, av);
 
 	*crc = rxe_icrc_hdr(pkt, skb);
 
@@ -501,11 +491,6 @@ void rxe_loopback(struct sk_buff *skb)
 	rxe_rcv(skb);
 }
 
-static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
-{
-	return rxe->port.port_guid == av->grh.dgid.global.interface_id;
-}
-
 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 				int paylen, struct rxe_pkt_info *pkt)
 {
@@ -625,7 +610,7 @@ void rxe_port_up(struct rxe_dev *rxe)
 	port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
 
 	rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
-	pr_info("set %s active\n", rxe->ib_dev.name);
+	dev_info(&rxe->ib_dev.dev, "set active\n");
 }
 
 /* Caller must hold net_info_lock */
@@ -638,7 +623,7 @@ void rxe_port_down(struct rxe_dev *rxe)
 	port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
 
 	rxe_port_event(rxe, IB_EVENT_PORT_ERR);
-	pr_info("set %s down\n", rxe->ib_dev.name);
+	dev_info(&rxe->ib_dev.dev, "set down\n");
 }
 
 static int rxe_notify(struct notifier_block *not_blk,
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 4555510d86c4..bdea899a58ac 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -90,7 +90,6 @@ enum rxe_device_param {
 	RXE_MAX_RES_RD_ATOM		= 0x3f000,
 	RXE_MAX_QP_INIT_RD_ATOM		= 128,
 	RXE_MAX_EE_INIT_RD_ATOM		= 0,
-	RXE_ATOMIC_CAP			= 1,
 	RXE_MAX_EE			= 0,
 	RXE_MAX_RDD			= 0,
 	RXE_MAX_MW			= 0,
@@ -139,9 +138,6 @@ enum rxe_device_param {
 
 /* default/initial rxe port parameters */
 enum rxe_port_param {
-	RXE_PORT_STATE			= IB_PORT_DOWN,
-	RXE_PORT_MAX_MTU		= IB_MTU_4096,
-	RXE_PORT_ACTIVE_MTU		= IB_MTU_256,
 	RXE_PORT_GID_TBL_LEN		= 1024,
 	RXE_PORT_PORT_CAP_FLAGS		= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP,
 	RXE_PORT_MAX_MSG_SZ		= 0x800000,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index b4a8acc7bb7d..36b53fb94a49 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -207,7 +207,7 @@ int rxe_pool_init(
 
 	kref_init(&pool->ref_cnt);
 
-	spin_lock_init(&pool->pool_lock);
+	rwlock_init(&pool->pool_lock);
 
 	if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
 		err = rxe_pool_init_index(pool,
@@ -222,7 +222,7 @@ int rxe_pool_init(
 		pool->key_size = rxe_type_info[type].key_size;
 	}
 
-	pool->state = rxe_pool_valid;
+	pool->state = RXE_POOL_STATE_VALID;
 
 out:
 	return err;
@@ -232,7 +232,7 @@ static void rxe_pool_release(struct kref *kref)
 {
 	struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
 
-	pool->state = rxe_pool_invalid;
+	pool->state = RXE_POOL_STATE_INVALID;
 	kfree(pool->table);
 }
 
@@ -245,12 +245,12 @@ int rxe_pool_cleanup(struct rxe_pool *pool)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&pool->pool_lock, flags);
-	pool->state = rxe_pool_invalid;
+	write_lock_irqsave(&pool->pool_lock, flags);
+	pool->state = RXE_POOL_STATE_INVALID;
 	if (atomic_read(&pool->num_elem) > 0)
 		pr_warn("%s pool destroyed with unfree'd elem\n",
 			pool_name(pool));
-	spin_unlock_irqrestore(&pool->pool_lock, flags);
+	write_unlock_irqrestore(&pool->pool_lock, flags);
 
 	rxe_pool_put(pool);
 
@@ -336,10 +336,10 @@ void rxe_add_key(void *arg, void *key)
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pool->pool_lock, flags);
+	write_lock_irqsave(&pool->pool_lock, flags);
 	memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
 	insert_key(pool, elem);
-	spin_unlock_irqrestore(&pool->pool_lock, flags);
+	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
 void rxe_drop_key(void *arg)
@@ -348,9 +348,9 @@ void rxe_drop_key(void *arg)
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pool->pool_lock, flags);
+	write_lock_irqsave(&pool->pool_lock, flags);
 	rb_erase(&elem->node, &pool->tree);
-	spin_unlock_irqrestore(&pool->pool_lock, flags);
+	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
 void rxe_add_index(void *arg)
@@ -359,10 +359,10 @@ void rxe_add_index(void *arg)
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pool->pool_lock, flags);
+	write_lock_irqsave(&pool->pool_lock, flags);
 	elem->index = alloc_index(pool);
 	insert_index(pool, elem);
-	spin_unlock_irqrestore(&pool->pool_lock, flags);
+	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
 void rxe_drop_index(void *arg)
@@ -371,10 +371,10 @@ void rxe_drop_index(void *arg)
 	struct rxe_pool *pool = elem->pool;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pool->pool_lock, flags);
+	write_lock_irqsave(&pool->pool_lock, flags);
 	clear_bit(elem->index - pool->min_index, pool->table);
 	rb_erase(&elem->node, &pool->tree);
-	spin_unlock_irqrestore(&pool->pool_lock, flags);
+	write_unlock_irqrestore(&pool->pool_lock, flags);
 }
 
 void *rxe_alloc(struct rxe_pool *pool)
@@ -384,13 +384,13 @@ void *rxe_alloc(struct rxe_pool *pool)
 
 	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
 
-	spin_lock_irqsave(&pool->pool_lock, flags);
-	if (pool->state != rxe_pool_valid) {
-		spin_unlock_irqrestore(&pool->pool_lock, flags);
+	read_lock_irqsave(&pool->pool_lock, flags);
+	if (pool->state != RXE_POOL_STATE_VALID) {
+		read_unlock_irqrestore(&pool->pool_lock, flags);
 		return NULL;
 	}
 	kref_get(&pool->ref_cnt);
-	spin_unlock_irqrestore(&pool->pool_lock, flags);
+	read_unlock_irqrestore(&pool->pool_lock, flags);
 
 	kref_get(&pool->rxe->ref_cnt);
 
@@ -436,9 +436,9 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
 	struct rxe_pool_entry *elem = NULL;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pool->pool_lock, flags);
+	read_lock_irqsave(&pool->pool_lock, flags);
 
-	if (pool->state != rxe_pool_valid)
+	if (pool->state != RXE_POOL_STATE_VALID)
 		goto out;
 
 	node = pool->tree.rb_node;
@@ -450,15 +450,14 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
 			node = node->rb_left;
 		else if (elem->index < index)
 			node = node->rb_right;
-		else
+		else {
+			kref_get(&elem->ref_cnt);
 			break;
+		}
 	}
 
-	if (node)
-		kref_get(&elem->ref_cnt);
-
 out:
-	spin_unlock_irqrestore(&pool->pool_lock, flags);
+	read_unlock_irqrestore(&pool->pool_lock, flags);
 	return node ? elem : NULL;
 }
 
@@ -469,9 +468,9 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
 	int cmp;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pool->pool_lock, flags);
+	read_lock_irqsave(&pool->pool_lock, flags);
 
-	if (pool->state != rxe_pool_valid)
+	if (pool->state != RXE_POOL_STATE_VALID)
 		goto out;
 
 	node = pool->tree.rb_node;
@@ -494,6 +493,6 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
 		kref_get(&elem->ref_cnt);
 
 out:
-	spin_unlock_irqrestore(&pool->pool_lock, flags);
+	read_unlock_irqrestore(&pool->pool_lock, flags);
 	return node ? elem : NULL;
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 47df28e43acf..aa4ba307097b 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -74,8 +74,8 @@ struct rxe_type_info {
 extern struct rxe_type_info rxe_type_info[];
 
 enum rxe_pool_state {
-	rxe_pool_invalid,
-	rxe_pool_valid,
+	RXE_POOL_STATE_INVALID,
+	RXE_POOL_STATE_VALID,
 };
 
 struct rxe_pool_entry {
@@ -90,7 +90,7 @@ struct rxe_pool_entry {
 
 struct rxe_pool {
 	struct rxe_dev		*rxe;
-	spinlock_t              pool_lock; /* pool spinlock */
+	rwlock_t		pool_lock; /* protects pool add/del/search */
 	size_t			elem_size;
 	struct kref		ref_cnt;
 	void			(*cleanup)(struct rxe_pool_entry *obj);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index c58452daffc7..b9710907dac2 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -34,6 +34,7 @@
 #include <linux/skbuff.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
 #include "rxe.h"
 #include "rxe_loc.h"
@@ -227,6 +228,16 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
 		return err;
 	qp->sk->sk->sk_user_data = qp;
 
+	/* pick a source UDP port number for this QP based on
+	 * the source QPN. this spreads traffic for different QPs
+	 * across different NIC RX queues (while using a single
+	 * flow for a given QP to maintain packet order).
+	 * the port number must be in the Dynamic Ports range
+	 * (0xc000 - 0xffff).
+	 */
+	qp->src_port = RXE_ROCE_V2_SPORT +
+		(hash_32_generic(qp_num(qp), 14) & 0x3fff);
+
 	qp->sq.max_wr		= init->cap.max_send_wr;
 	qp->sq.max_sge		= init->cap.max_send_sge;
 	qp->sq.max_inline	= init->cap.max_inline_data;
@@ -247,7 +258,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
 			   &qp->sq.queue->ip);
 
 	if (err) {
-		kvfree(qp->sq.queue->buf);
+		vfree(qp->sq.queue->buf);
 		kfree(qp->sq.queue);
 		return err;
 	}
@@ -300,7 +311,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
 				   qp->rq.queue->buf, qp->rq.queue->buf_size,
 				   &qp->rq.queue->ip);
 		if (err) {
-			kvfree(qp->rq.queue->buf);
+			vfree(qp->rq.queue->buf);
 			kfree(qp->rq.queue);
 			return err;
 		}
@@ -408,8 +419,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
 	enum ib_qp_state new_state = (mask & IB_QP_STATE) ?
 					attr->qp_state : cur_state;
 
-	if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask,
-				IB_LINK_LAYER_ETHERNET)) {
+	if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) {
 		pr_warn("invalid mask or state for qp\n");
 		goto err1;
 	}
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index d30dbac24583..5c29a1bb575a 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -122,7 +122,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 			set_bad_pkey_cntr(port);
 			goto err1;
 		}
-	} else if (qpn != 0) {
+	} else {
 		if (unlikely(!pkey_match(pkey,
 					 port->pkey_tbl[qp->attr.pkey_index]
 					))) {
@@ -134,7 +134,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 	}
 
 	if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) &&
-	    qpn != 0 && pkt->mask) {
+	    pkt->mask) {
 		u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey;
 
 		if (unlikely(deth_qkey(pkt) != qkey)) {
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 8be27238a86e..6c361d70d7cd 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -73,9 +73,6 @@ static void req_retry(struct rxe_qp *qp)
 	int npsn;
 	int first = 1;
 
-	wqe = queue_head(qp->sq.queue);
-	npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK;
-
 	qp->req.wqe_index	= consumer_index(qp->sq.queue);
 	qp->req.psn		= qp->comp.psn;
 	qp->req.opcode		= -1;
@@ -107,11 +104,17 @@ static void req_retry(struct rxe_qp *qp)
 		if (first) {
 			first = 0;
 
-			if (mask & WR_WRITE_OR_SEND_MASK)
+			if (mask & WR_WRITE_OR_SEND_MASK) {
+				npsn = (qp->comp.psn - wqe->first_psn) &
+					BTH_PSN_MASK;
 				retry_first_write_send(qp, wqe, mask, npsn);
+			}
 
-			if (mask & WR_READ_MASK)
+			if (mask & WR_READ_MASK) {
+				npsn = (wqe->dma.length - wqe->dma.resid) /
+					qp->mtu;
 				wqe->iova += npsn * qp->mtu;
+			}
 		}
 
 		wqe->state = wqe_state_posted;
@@ -435,7 +438,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
 	if (pkt->mask & RXE_RETH_MASK) {
 		reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
 		reth_set_va(pkt, wqe->iova);
-		reth_set_len(pkt, wqe->dma.length);
+		reth_set_len(pkt, wqe->dma.resid);
 	}
 
 	if (pkt->mask & RXE_IMMDT_MASK)
@@ -476,7 +479,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 	u32 *p;
 	int err;
 
-	err = rxe_prepare(rxe, pkt, skb, &crc);
+	err = rxe_prepare(pkt, skb, &crc);
 	if (err)
 		return err;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index aa5833318372..c962160292f4 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -637,7 +637,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 	if (ack->mask & RXE_ATMACK_MASK)
 		atmack_set_orig(ack, qp->resp.atomic_orig);
 
-	err = rxe_prepare(rxe, ack, skb, &crc);
+	err = rxe_prepare(ack, skb, &crc);
 	if (err) {
 		kfree_skb(skb);
 		return NULL;
@@ -682,6 +682,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 		rxe_advance_resp_resource(qp);
 
 		res->type		= RXE_READ_MASK;
+		res->replay		= 0;
 
 		res->read.va		= qp->resp.va;
 		res->read.va_org	= qp->resp.va;
@@ -752,7 +753,8 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 		state = RESPST_DONE;
 	} else {
 		qp->resp.res = NULL;
-		qp->resp.opcode = -1;
+		if (!res->replay)
+			qp->resp.opcode = -1;
 		if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
 			qp->resp.psn = res->cur_psn;
 		state = RESPST_CLEANUP;
@@ -814,6 +816,7 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 
 	/* next expected psn, read handles this separately */
 	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+	qp->resp.ack_psn = qp->resp.psn;
 
 	qp->resp.opcode = pkt->opcode;
 	qp->resp.status = IB_WC_SUCCESS;
@@ -1065,7 +1068,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
 					  struct rxe_pkt_info *pkt)
 {
 	enum resp_states rc;
-	u32 prev_psn = (qp->resp.psn - 1) & BTH_PSN_MASK;
+	u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK;
 
 	if (pkt->mask & RXE_SEND_MASK ||
 	    pkt->mask & RXE_WRITE_MASK) {
@@ -1108,6 +1111,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
 			res->state = (pkt->psn == res->first_psn) ?
 					rdatm_res_state_new :
 					rdatm_res_state_replay;
+			res->replay = 1;
 
 			/* Reset the resource, except length. */
 			res->read.va_org = iova;
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 0d6c04ba7fc3..c41a5fee81f7 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -31,6 +31,7 @@
  * SOFTWARE.
  */
 
+#include <linux/vmalloc.h>
 #include "rxe.h"
 #include "rxe_loc.h"
 #include "rxe_queue.h"
@@ -129,13 +130,18 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
 
 	err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, q->buf,
 			   q->buf_size, &q->ip);
-	if (err)
+	if (err) {
+		vfree(q->buf);
+		kfree(q);
 		return err;
+	}
 
 	if (uresp) {
 		if (copy_to_user(&uresp->srq_num, &srq->srq_num,
-				 sizeof(uresp->srq_num)))
+				 sizeof(uresp->srq_num))) {
+			rxe_queue_cleanup(q);
 			return -EFAULT;
+		}
 	}
 
 	return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index d5ed7571128f..73a19f808e1b 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -105,7 +105,7 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
 	}
 
 	rxe_set_port_state(ndev);
-	pr_info("added %s to %s\n", rxe->ib_dev.name, intf);
+	dev_info(&rxe->ib_dev.dev, "added %s\n", intf);
 err:
 	if (ndev)
 		dev_put(ndev);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index f5b1e0ad6142..9c19f2027511 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1148,18 +1148,21 @@ static ssize_t parent_show(struct device *device,
 
 static DEVICE_ATTR_RO(parent);
 
-static struct device_attribute *rxe_dev_attributes[] = {
-	&dev_attr_parent,
+static struct attribute *rxe_dev_attributes[] = {
+	&dev_attr_parent.attr,
+	NULL
+};
+
+static const struct attribute_group rxe_attr_group = {
+	.attrs = rxe_dev_attributes,
 };
 
 int rxe_register_device(struct rxe_dev *rxe)
 {
 	int err;
-	int i;
 	struct ib_device *dev = &rxe->ib_dev;
 	struct crypto_shash *tfm;
 
-	strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX);
 	strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
 
 	dev->owner = THIS_MODULE;
@@ -1260,26 +1263,16 @@ int rxe_register_device(struct rxe_dev *rxe)
 	}
 	rxe->tfm = tfm;
 
+	rdma_set_device_sysfs_group(dev, &rxe_attr_group);
 	dev->driver_id = RDMA_DRIVER_RXE;
-	err = ib_register_device(dev, NULL);
+	err = ib_register_device(dev, "rxe%d", NULL);
 	if (err) {
 		pr_warn("%s failed with error %d\n", __func__, err);
 		goto err1;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {
-		err = device_create_file(&dev->dev, rxe_dev_attributes[i]);
-		if (err) {
-			pr_warn("%s failed with error %d for attr number %d\n",
-				__func__, err, i);
-			goto err2;
-		}
-	}
-
 	return 0;
 
-err2:
-	ib_unregister_device(dev);
 err1:
 	crypto_free_shash(rxe->tfm);
 
@@ -1288,12 +1281,8 @@ err1:
 
 int rxe_unregister_device(struct rxe_dev *rxe)
 {
-	int i;
 	struct ib_device *dev = &rxe->ib_dev;
 
-	for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i)
-		device_remove_file(&dev->dev, rxe_dev_attributes[i]);
-
 	ib_unregister_device(dev);
 
 	return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index af1470d29391..82e670d6eeea 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -158,6 +158,7 @@ struct rxe_comp_info {
 	int			opcode;
 	int			timeout;
 	int			timeout_retry;
+	int			started_retry;
 	u32			retry_cnt;
 	u32			rnr_retry;
 	struct rxe_task		task;
@@ -171,6 +172,7 @@ enum rdatm_res_state {
 
 struct resp_res {
 	int			type;
+	int			replay;
 	u32			first_psn;
 	u32			last_psn;
 	u32			cur_psn;
@@ -195,6 +197,7 @@ struct rxe_resp_info {
 	enum rxe_qp_state	state;
 	u32			msn;
 	u32			psn;
+	u32			ack_psn;
 	int			opcode;
 	int			drop_msg;
 	int			goto_error;
@@ -248,6 +251,7 @@ struct rxe_qp {
 
 	struct socket		*sk;
 	u32			dst_cookie;
+	u16			src_port;
 
 	struct rxe_av		pri_av;
 	struct rxe_av		alt_av;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 3d5424f335cb..0428e01e8f69 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1438,11 +1438,15 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
 		spin_unlock_irqrestore(&priv->lock, flags);
 		netif_tx_unlock_bh(dev);
 
-		if (skb->protocol == htons(ETH_P_IP))
+		if (skb->protocol == htons(ETH_P_IP)) {
+			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+		}
 #if IS_ENABLED(CONFIG_IPV6)
-		else if (skb->protocol == htons(ETH_P_IPV6))
+		else if (skb->protocol == htons(ETH_P_IPV6)) {
+			memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		}
 #endif
 		dev_kfree_skb_any(skb);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 8baa75a705c5..8710214594d8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -243,7 +243,8 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
 		return 0;
 	}
 
-	if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
+	if (new_mtu < (ETH_MIN_MTU + IPOIB_ENCAP_LEN) ||
+	    new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
 		return -EINVAL;
 
 	priv->admin_mtu = new_mtu;
@@ -1880,6 +1881,8 @@ static int ipoib_parent_init(struct net_device *ndev)
 	       sizeof(union ib_gid));
 
 	SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent);
+	priv->dev->dev_port = priv->port - 1;
+	/* Let's set this one too for backwards compatibility. */
 	priv->dev->dev_id = priv->port - 1;
 
 	return 0;
@@ -2385,6 +2388,35 @@ int ipoib_add_pkey_attr(struct net_device *dev)
 	return device_create_file(&dev->dev, &dev_attr_pkey);
 }
 
+/*
+ * We erroneously exposed the iface's port number in the dev_id
+ * sysfs field long after dev_port was introduced for that purpose[1],
+ * and we need to stop everyone from relying on that.
+ * Let's overload the shower routine for the dev_id file here
+ * to gently bring the issue up.
+ *
+ * [1] https://www.spinics.net/lists/netdev/msg272123.html
+ */
+static ssize_t dev_id_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct net_device *ndev = to_net_dev(dev);
+
+	if (ndev->dev_id == ndev->dev_port)
+		netdev_info_once(ndev,
+			"\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n",
+			current->comm);
+
+	return sprintf(buf, "%#x\n", ndev->dev_id);
+}
+static DEVICE_ATTR_RO(dev_id);
+
+int ipoib_intercept_dev_id_attr(struct net_device *dev)
+{
+	device_remove_file(&dev->dev, &dev_attr_dev_id);
+	return device_create_file(&dev->dev, &dev_attr_dev_id);
+}
+
 static struct net_device *ipoib_add_port(const char *format,
 					 struct ib_device *hca, u8 port)
 {
@@ -2437,6 +2469,8 @@ static struct net_device *ipoib_add_port(const char *format,
 	 */
 	ndev->priv_destructor = ipoib_intf_free;
 
+	if (ipoib_intercept_dev_id_attr(ndev))
+		goto sysfs_failed;
 	if (ipoib_cm_add_mode_attr(ndev))
 		goto sysfs_failed;
 	if (ipoib_add_pkey_attr(ndev))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 9f36ca786df8..1e88213459f2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -277,7 +277,7 @@ void ipoib_event(struct ib_event_handler *handler,
 		return;
 
 	ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event,
-		  record->device->name, record->element.port_num);
+		  dev_name(&record->device->dev), record->element.port_num);
 
 	if (record->event == IB_EVENT_SM_CHANGE ||
 	    record->event == IB_EVENT_CLIENT_REREGISTER) {
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 2f6388596f88..96af06cfe0af 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -589,13 +589,19 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
 	ib_conn->post_recv_buf_count--;
 }
 
-static inline void
+static inline int
 iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
 {
-	if (likely(rkey == desc->rsc.mr->rkey))
+	if (likely(rkey == desc->rsc.mr->rkey)) {
 		desc->rsc.mr_valid = 0;
-	else if (likely(rkey == desc->pi_ctx->sig_mr->rkey))
+	} else if (likely(desc->pi_ctx && rkey == desc->pi_ctx->sig_mr->rkey)) {
 		desc->pi_ctx->sig_mr_valid = 0;
+	} else {
+		iser_err("Bogus remote invalidation for rkey %#x\n", rkey);
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 static int
@@ -623,12 +629,14 @@ iser_check_remote_inv(struct iser_conn *iser_conn,
 
 			if (iser_task->dir[ISER_DIR_IN]) {
 				desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h;
-				iser_inv_desc(desc, rkey);
+				if (unlikely(iser_inv_desc(desc, rkey)))
+					return -EINVAL;
 			}
 
 			if (iser_task->dir[ISER_DIR_OUT]) {
 				desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h;
-				iser_inv_desc(desc, rkey);
+				if (unlikely(iser_inv_desc(desc, rkey)))
+					return -EINVAL;
 			}
 		} else {
 			iser_err("failed to get task for itt=%d\n", hdr->itt);
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index b686a4aaffe8..946b623ba5eb 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -55,7 +55,7 @@ static void iser_event_handler(struct ib_event_handler *handler,
 {
 	iser_err("async event %s (%d) on device %s port %d\n",
 		 ib_event_msg(event->event), event->event,
-		 event->device->name, event->element.port_num);
+		dev_name(&event->device->dev), event->element.port_num);
 }
 
 /**
@@ -85,7 +85,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
 	max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
 
 	iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
-		  device->comps_used, ib_dev->name,
+		  device->comps_used, dev_name(&ib_dev->dev),
 		  ib_dev->num_comp_vectors, max_cqe);
 
 	device->pd = ib_alloc_pd(ib_dev,
@@ -468,7 +468,8 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
 			iser_conn->max_cmds =
 				ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
 			iser_dbg("device %s supports max_send_wr %d\n",
-				 device->ib_device->name, ib_dev->attrs.max_qp_wr);
+				 dev_name(&device->ib_device->dev),
+				 ib_dev->attrs.max_qp_wr);
 		}
 	}
 
@@ -764,7 +765,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
 		      IB_DEVICE_SIGNATURE_HANDOVER)) {
 			iser_warn("T10-PI requested but not supported on %s, "
 				  "continue without T10-PI\n",
-				  ib_conn->device->ib_device->name);
+				  dev_name(&ib_conn->device->ib_device->dev));
 			ib_conn->pi_support = false;
 		} else {
 			ib_conn->pi_support = true;
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index f39670c5c25c..e3dd13798d79 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -262,7 +262,7 @@ isert_alloc_comps(struct isert_device *device)
 
 	isert_info("Using %d CQs, %s supports %d vectors support "
 		   "pi_capable %d\n",
-		   device->comps_used, device->ib_device->name,
+		   device->comps_used, dev_name(&device->ib_device->dev),
 		   device->ib_device->num_comp_vectors,
 		   device->pi_capable);
 
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
index 267da8215e08..31cd361416ac 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -351,7 +351,8 @@ static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter,
 			if (unlikely(!dlid))
 				v_warn("Null dlid in MAC address\n");
 		} else if (def_port != OPA_VNIC_INVALID_PORT) {
-			dlid = info->vesw.u_ucast_dlid[def_port];
+			if (def_port < OPA_VESW_MAX_NUM_DEF_PORT)
+				dlid = info->vesw.u_ucast_dlid[def_port];
 		}
 	}
 
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
index 15711dcc6f58..d119d9afa845 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -888,7 +888,8 @@ static void opa_vnic_event(struct ib_event_handler *handler,
 		return;
 
 	c_dbg("OPA_VNIC received event %d on device %s port %d\n",
-	      record->event, record->device->name, record->element.port_num);
+	      record->event, dev_name(&record->device->dev),
+	      record->element.port_num);
 
 	if (record->event == IB_EVENT_PORT_ERR)
 		idr_for_each(&port->vport_idr, vema_disable_vport, NULL);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 0b34e909505f..eed0eb3bb04c 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1330,17 +1330,8 @@ static void srp_terminate_io(struct srp_rport *rport)
 {
 	struct srp_target_port *target = rport->lld_data;
 	struct srp_rdma_ch *ch;
-	struct Scsi_Host *shost = target->scsi_host;
-	struct scsi_device *sdev;
 	int i, j;
 
-	/*
-	 * Invoking srp_terminate_io() while srp_queuecommand() is running
-	 * is not safe. Hence the warning statement below.
-	 */
-	shost_for_each_device(sdev, shost)
-		WARN_ON_ONCE(sdev->request_queue->request_fn_active);
-
 	for (i = 0; i < target->ch_count; i++) {
 		ch = &target->ch[i];
 
@@ -3124,7 +3115,8 @@ static ssize_t show_local_ib_device(struct device *dev,
 {
 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
 
-	return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
+	return sprintf(buf, "%s\n",
+		       dev_name(&target->srp_host->srp_dev->dev->dev));
 }
 
 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
@@ -3987,7 +3979,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
 {
 	struct srp_host *host = container_of(dev, struct srp_host, dev);
 
-	return sprintf(buf, "%s\n", host->srp_dev->dev->name);
+	return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev));
 }
 
 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -4019,7 +4011,8 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
 
 	host->dev.class = &srp_class;
 	host->dev.parent = device->dev->dev.parent;
-	dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
+	dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev),
+		     port);
 
 	if (device_register(&host->dev))
 		goto free_host;
@@ -4095,7 +4088,7 @@ static void srp_add_one(struct ib_device *device)
 	srp_dev->mr_max_size	= srp_dev->mr_page_size *
 				   srp_dev->max_pages_per_mr;
 	pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
-		 device->name, mr_page_shift, attr->max_mr_size,
+		 dev_name(&device->dev), mr_page_shift, attr->max_mr_size,
 		 attr->max_fast_reg_page_list_len,
 		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
 
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index f37cbad022a2..2357aa727dcf 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -148,7 +148,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
 		return;
 
 	pr_debug("ASYNC event= %d on device= %s\n", event->event,
-		 sdev->device->name);
+		 dev_name(&sdev->device->dev));
 
 	switch (event->event) {
 	case IB_EVENT_PORT_ERR:
@@ -1941,7 +1941,8 @@ static void __srpt_close_all_ch(struct srpt_port *sport)
 			if (srpt_disconnect_ch(ch) >= 0)
 				pr_info("Closing channel %s because target %s_%d has been disabled\n",
 					ch->sess_name,
-					sport->sdev->device->name, sport->port);
+					dev_name(&sport->sdev->device->dev),
+					sport->port);
 			srpt_close_ch(ch);
 		}
 	}
@@ -2127,7 +2128,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
 	if (!sport->enabled) {
 		rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
 		pr_info("rejected SRP_LOGIN_REQ because target port %s_%d has not yet been enabled\n",
-			sport->sdev->device->name, port_num);
+			dev_name(&sport->sdev->device->dev), port_num);
 		goto reject;
 	}
 
@@ -2267,7 +2268,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
 		rej->reason = cpu_to_be32(
 				SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
 		pr_info("rejected SRP_LOGIN_REQ because target %s_%d is not enabled\n",
-			sdev->device->name, port_num);
+			dev_name(&sdev->device->dev), port_num);
 		mutex_unlock(&sport->mutex);
 		goto reject;
 	}
@@ -2708,7 +2709,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
 		break;
 	}
 
-	if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT)))
+	if (WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))
 		return;
 
 	/* For read commands, transfer the data to the initiator. */
@@ -2842,7 +2843,7 @@ static int srpt_release_sport(struct srpt_port *sport)
 	while (wait_event_timeout(sport->ch_releaseQ,
 				  srpt_ch_list_empty(sport), 5 * HZ) <= 0) {
 		pr_info("%s_%d: waiting for session unregistration ...\n",
-			sport->sdev->device->name, sport->port);
+			dev_name(&sport->sdev->device->dev), sport->port);
 		rcu_read_lock();
 		list_for_each_entry(nexus, &sport->nexus_list, entry) {
 			list_for_each_entry(ch, &nexus->ch_list, list) {
@@ -2932,7 +2933,7 @@ static int srpt_alloc_srq(struct srpt_device *sdev)
 	}
 
 	pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size,
-		 sdev->device->attrs.max_srq_wr, device->name);
+		 sdev->device->attrs.max_srq_wr, dev_name(&device->dev));
 
 	sdev->ioctx_ring = (struct srpt_recv_ioctx **)
 		srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
@@ -2965,8 +2966,8 @@ static int srpt_use_srq(struct srpt_device *sdev, bool use_srq)
 	} else if (use_srq && !sdev->srq) {
 		ret = srpt_alloc_srq(sdev);
 	}
-	pr_debug("%s(%s): use_srq = %d; ret = %d\n", __func__, device->name,
-		 sdev->use_srq, ret);
+	pr_debug("%s(%s): use_srq = %d; ret = %d\n", __func__,
+		 dev_name(&device->dev), sdev->use_srq, ret);
 	return ret;
 }
 
@@ -3052,7 +3053,7 @@ static void srpt_add_one(struct ib_device *device)
 
 		if (srpt_refresh_port(sport)) {
 			pr_err("MAD registration failed for %s-%d.\n",
-			       sdev->device->name, i);
+			       dev_name(&sdev->device->dev), i);
 			goto err_event;
 		}
 	}
@@ -3063,7 +3064,7 @@ static void srpt_add_one(struct ib_device *device)
 
 out:
 	ib_set_client_data(device, &srpt_client, sdev);
-	pr_debug("added %s.\n", device->name);
+	pr_debug("added %s.\n", dev_name(&device->dev));
 	return;
 
 err_event:
@@ -3078,7 +3079,7 @@ free_dev:
 	kfree(sdev);
 err:
 	sdev = NULL;
-	pr_info("%s(%s) failed.\n", __func__, device->name);
+	pr_info("%s(%s) failed.\n", __func__, dev_name(&device->dev));
 	goto out;
 }
 
@@ -3093,7 +3094,8 @@ static void srpt_remove_one(struct ib_device *device, void *client_data)
 	int i;
 
 	if (!sdev) {
-		pr_info("%s(%s): nothing to do.\n", __func__, device->name);
+		pr_info("%s(%s): nothing to do.\n", __func__,
+			dev_name(&device->dev));
 		return;
 	}
 
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index c60395b7470f..d9a25715650e 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -186,6 +186,19 @@ config INTEL_IOMMU
 	  and include PCI device scope covered by these DMA
 	  remapping devices.
 
+config INTEL_IOMMU_DEBUGFS
+	bool "Export Intel IOMMU internals in Debugfs"
+	depends on INTEL_IOMMU && IOMMU_DEBUGFS
+	help
+	  !!!WARNING!!!
+
+	  DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!!
+
+	  Expose Intel IOMMU internals in Debugfs.
+
+	  This option is -NOT- intended for production environments, and should
+	  only be enabled for debugging Intel IOMMU.
+
 config INTEL_IOMMU_SVM
 	bool "Support for Shared Virtual Memory with Intel IOMMU"
 	depends on INTEL_IOMMU && X86
@@ -372,6 +385,14 @@ config S390_CCW_IOMMU
 	  Enables bits of IOMMU API required by VFIO. The iommu_ops
 	  is not implemented as it is not necessary for VFIO.
 
+config S390_AP_IOMMU
+	bool "S390 AP IOMMU Support"
+	depends on S390 && ZCRYPT
+	select IOMMU_API
+	help
+	  Enables bits of IOMMU API required by VFIO. The iommu_ops
+	  is not implemented as it is not necessary for VFIO.
+
 config MTK_IOMMU
 	bool "MTK IOMMU Support"
 	depends on ARM || ARM64
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index ab5eba6edf82..a158a68c8ea8 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
 obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
+obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += intel-iommu-debugfs.o
 obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
 obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
 obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index bee0dfb7b93b..1167ff0416cf 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3083,6 +3083,8 @@ static bool amd_iommu_capable(enum iommu_cap cap)
 		return (irq_remapping_enabled == 1);
 	case IOMMU_CAP_NOEXEC:
 		return false;
+	default:
+		break;
 	}
 
 	return false;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 3931c7de7c69..bb2cd29e1658 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1719,7 +1719,7 @@ static const struct attribute_group *amd_iommu_groups[] = {
 	NULL,
 };
 
-static int iommu_init_pci(struct amd_iommu *iommu)
+static int __init iommu_init_pci(struct amd_iommu *iommu)
 {
 	int cap_ptr = iommu->cap_ptr;
 	u32 range, misc, low, high;
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 5059d09f3202..6947ccf26512 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * IOMMU API for ARM architected SMMUv3 implementations.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
  * Copyright (C) 2015 ARM Limited
  *
  * Author: Will Deacon <will.deacon@arm.com>
@@ -567,7 +556,8 @@ struct arm_smmu_device {
 
 	int				gerr_irq;
 	int				combined_irq;
-	atomic_t			sync_nr;
+	u32				sync_nr;
+	u8				prev_cmd_opcode;
 
 	unsigned long			ias; /* IPA */
 	unsigned long			oas; /* PA */
@@ -611,6 +601,7 @@ struct arm_smmu_domain {
 	struct mutex			init_mutex; /* Protects smmu pointer */
 
 	struct io_pgtable_ops		*pgtbl_ops;
+	bool				non_strict;
 
 	enum arm_smmu_domain_stage	stage;
 	union {
@@ -708,7 +699,7 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
 }
 
 /*
- * Wait for the SMMU to consume items. If drain is true, wait until the queue
+ * Wait for the SMMU to consume items. If sync is true, wait until the queue
  * is empty. Otherwise, wait until there is at least one free slot.
  */
 static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
@@ -901,6 +892,8 @@ static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
 	struct arm_smmu_queue *q = &smmu->cmdq.q;
 	bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
 
+	smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
+
 	while (queue_insert_raw(q, cmd) == -ENOSPC) {
 		if (queue_poll_cons(q, false, wfe))
 			dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
@@ -948,15 +941,21 @@ static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
 	struct arm_smmu_cmdq_ent ent = {
 		.opcode = CMDQ_OP_CMD_SYNC,
 		.sync	= {
-			.msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
 			.msiaddr = virt_to_phys(&smmu->sync_count),
 		},
 	};
 
-	arm_smmu_cmdq_build_cmd(cmd, &ent);
-
 	spin_lock_irqsave(&smmu->cmdq.lock, flags);
-	arm_smmu_cmdq_insert_cmd(smmu, cmd);
+
+	/* Piggy-back on the previous command if it's a SYNC */
+	if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
+		ent.sync.msidata = smmu->sync_nr;
+	} else {
+		ent.sync.msidata = ++smmu->sync_nr;
+		arm_smmu_cmdq_build_cmd(cmd, &ent);
+		arm_smmu_cmdq_insert_cmd(smmu, cmd);
+	}
+
 	spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
 
 	return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
@@ -1372,15 +1371,11 @@ static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
 }
 
 /* IO_PGTABLE API */
-static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
-{
-	arm_smmu_cmdq_issue_sync(smmu);
-}
-
 static void arm_smmu_tlb_sync(void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
-	__arm_smmu_tlb_sync(smmu_domain->smmu);
+
+	arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
 }
 
 static void arm_smmu_tlb_inv_context(void *cookie)
@@ -1398,8 +1393,14 @@ static void arm_smmu_tlb_inv_context(void *cookie)
 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
 	}
 
+	/*
+	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
+	 * PTEs previously cleared by unmaps on the current CPU not yet visible
+	 * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
+	 * to guarantee those are observed before the TLBI. Do be careful, 007.
+	 */
 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-	__arm_smmu_tlb_sync(smmu);
+	arm_smmu_cmdq_issue_sync(smmu);
 }
 
 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
@@ -1624,6 +1625,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 	if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
 		pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
 
+	if (smmu_domain->non_strict)
+		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
+
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
 	if (!pgtbl_ops)
 		return -ENOMEM;
@@ -1772,12 +1776,20 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 	return ops->unmap(ops, iova, size);
 }
 
+static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
+{
+	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+	if (smmu_domain->smmu)
+		arm_smmu_tlb_inv_context(smmu_domain);
+}
+
 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
 {
 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
 
 	if (smmu)
-		__arm_smmu_tlb_sync(smmu);
+		arm_smmu_cmdq_issue_sync(smmu);
 }
 
 static phys_addr_t
@@ -1917,15 +1929,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
 {
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
-	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
-		return -EINVAL;
-
-	switch (attr) {
-	case DOMAIN_ATTR_NESTING:
-		*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
-		return 0;
+	switch (domain->type) {
+	case IOMMU_DOMAIN_UNMANAGED:
+		switch (attr) {
+		case DOMAIN_ATTR_NESTING:
+			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+			return 0;
+		default:
+			return -ENODEV;
+		}
+		break;
+	case IOMMU_DOMAIN_DMA:
+		switch (attr) {
+		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+			*(int *)data = smmu_domain->non_strict;
+			return 0;
+		default:
+			return -ENODEV;
+		}
+		break;
 	default:
-		return -ENODEV;
+		return -EINVAL;
 	}
 }
 
@@ -1935,26 +1959,37 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
 	int ret = 0;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
-	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
-		return -EINVAL;
-
 	mutex_lock(&smmu_domain->init_mutex);
 
-	switch (attr) {
-	case DOMAIN_ATTR_NESTING:
-		if (smmu_domain->smmu) {
-			ret = -EPERM;
-			goto out_unlock;
+	switch (domain->type) {
+	case IOMMU_DOMAIN_UNMANAGED:
+		switch (attr) {
+		case DOMAIN_ATTR_NESTING:
+			if (smmu_domain->smmu) {
+				ret = -EPERM;
+				goto out_unlock;
+			}
+
+			if (*(int *)data)
+				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+			else
+				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+			break;
+		default:
+			ret = -ENODEV;
+		}
+		break;
+	case IOMMU_DOMAIN_DMA:
+		switch(attr) {
+		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+			smmu_domain->non_strict = *(int *)data;
+			break;
+		default:
+			ret = -ENODEV;
 		}
-
-		if (*(int *)data)
-			smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
-		else
-			smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
 		break;
 	default:
-		ret = -ENODEV;
+		ret = -EINVAL;
 	}
 
 out_unlock:
@@ -1999,7 +2034,7 @@ static struct iommu_ops arm_smmu_ops = {
 	.attach_dev		= arm_smmu_attach_dev,
 	.map			= arm_smmu_map,
 	.unmap			= arm_smmu_unmap,
-	.flush_iotlb_all	= arm_smmu_iotlb_sync,
+	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
 	.iotlb_sync		= arm_smmu_iotlb_sync,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
 	.add_device		= arm_smmu_add_device,
@@ -2180,7 +2215,6 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
 {
 	int ret;
 
-	atomic_set(&smmu->sync_nr, 0);
 	ret = arm_smmu_init_queues(smmu);
 	if (ret)
 		return ret;
@@ -2353,8 +2387,8 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 	irq = smmu->combined_irq;
 	if (irq) {
 		/*
-		 * Cavium ThunderX2 implementation doesn't not support unique
-		 * irq lines. Use single irq line for all the SMMUv3 interrupts.
+		 * Cavium ThunderX2 implementation doesn't support unique irq
+		 * lines. Use a single irq line for all the SMMUv3 interrupts.
 		 */
 		ret = devm_request_threaded_irq(smmu->dev, irq,
 					arm_smmu_combined_irq_handler,
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index fd1b80ef9490..5a28ae892504 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -52,6 +52,7 @@
 #include <linux/spinlock.h>
 
 #include <linux/amba/bus.h>
+#include <linux/fsl/mc.h>
 
 #include "io-pgtable.h"
 #include "arm-smmu-regs.h"
@@ -246,6 +247,7 @@ struct arm_smmu_domain {
 	const struct iommu_gather_ops	*tlb_ops;
 	struct arm_smmu_cfg		cfg;
 	enum arm_smmu_domain_stage	stage;
+	bool				non_strict;
 	struct mutex			init_mutex; /* Protects smmu pointer */
 	spinlock_t			cb_lock; /* Serialises ATS1* ops and TLB syncs */
 	struct iommu_domain		domain;
@@ -447,7 +449,11 @@ static void arm_smmu_tlb_inv_context_s1(void *cookie)
 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 	void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
 
-	writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
+	/*
+	 * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
+	 * cleared by the current CPU are visible to the SMMU before the TLBI.
+	 */
+	writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
 	arm_smmu_tlb_sync_context(cookie);
 }
 
@@ -457,7 +463,8 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie)
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	void __iomem *base = ARM_SMMU_GR0(smmu);
 
-	writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+	/* NOTE: see above */
+	writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
 	arm_smmu_tlb_sync_global(smmu);
 }
 
@@ -469,6 +476,9 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
 	void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
 
+	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+		wmb();
+
 	if (stage1) {
 		reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
 
@@ -510,6 +520,9 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
 	struct arm_smmu_domain *smmu_domain = cookie;
 	void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
 
+	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+		wmb();
+
 	writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
 }
 
@@ -863,6 +876,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
 		pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
 
+	if (smmu_domain->non_strict)
+		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
+
 	smmu_domain->smmu = smmu;
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
 	if (!pgtbl_ops) {
@@ -1252,6 +1268,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 	return ops->unmap(ops, iova, size);
 }
 
+static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
+{
+	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+	if (smmu_domain->tlb_ops)
+		smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
+}
+
 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
 {
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -1459,6 +1483,8 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
 
 	if (dev_is_pci(dev))
 		group = pci_device_group(dev);
+	else if (dev_is_fsl_mc(dev))
+		group = fsl_mc_device_group(dev);
 	else
 		group = generic_device_group(dev);
 
@@ -1470,15 +1496,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
 {
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
-	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
-		return -EINVAL;
-
-	switch (attr) {
-	case DOMAIN_ATTR_NESTING:
-		*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
-		return 0;
+	switch(domain->type) {
+	case IOMMU_DOMAIN_UNMANAGED:
+		switch (attr) {
+		case DOMAIN_ATTR_NESTING:
+			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+			return 0;
+		default:
+			return -ENODEV;
+		}
+		break;
+	case IOMMU_DOMAIN_DMA:
+		switch (attr) {
+		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+			*(int *)data = smmu_domain->non_strict;
+			return 0;
+		default:
+			return -ENODEV;
+		}
+		break;
 	default:
-		return -ENODEV;
+		return -EINVAL;
 	}
 }
 
@@ -1488,28 +1526,38 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
 	int ret = 0;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
-	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
-		return -EINVAL;
-
 	mutex_lock(&smmu_domain->init_mutex);
 
-	switch (attr) {
-	case DOMAIN_ATTR_NESTING:
-		if (smmu_domain->smmu) {
-			ret = -EPERM;
-			goto out_unlock;
+	switch(domain->type) {
+	case IOMMU_DOMAIN_UNMANAGED:
+		switch (attr) {
+		case DOMAIN_ATTR_NESTING:
+			if (smmu_domain->smmu) {
+				ret = -EPERM;
+				goto out_unlock;
+			}
+
+			if (*(int *)data)
+				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+			else
+				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+			break;
+		default:
+			ret = -ENODEV;
+		}
+		break;
+	case IOMMU_DOMAIN_DMA:
+		switch (attr) {
+		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+			smmu_domain->non_strict = *(int *)data;
+			break;
+		default:
+			ret = -ENODEV;
 		}
-
-		if (*(int *)data)
-			smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
-		else
-			smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
 		break;
 	default:
-		ret = -ENODEV;
+		ret = -EINVAL;
 	}
-
 out_unlock:
 	mutex_unlock(&smmu_domain->init_mutex);
 	return ret;
@@ -1562,7 +1610,7 @@ static struct iommu_ops arm_smmu_ops = {
 	.attach_dev		= arm_smmu_attach_dev,
 	.map			= arm_smmu_map,
 	.unmap			= arm_smmu_unmap,
-	.flush_iotlb_all	= arm_smmu_iotlb_sync,
+	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
 	.iotlb_sync		= arm_smmu_iotlb_sync,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
 	.add_device		= arm_smmu_add_device,
@@ -2036,6 +2084,10 @@ static void arm_smmu_bus_init(void)
 		bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
 	}
 #endif
+#ifdef CONFIG_FSL_MC_BUS
+	if (!iommu_present(&fsl_mc_bus_type))
+		bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
+#endif
 }
 
 static int arm_smmu_device_probe(struct platform_device *pdev)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 511ff9a1d6d9..d1b04753b204 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -55,6 +55,9 @@ struct iommu_dma_cookie {
 	};
 	struct list_head		msi_page_list;
 	spinlock_t			msi_lock;
+
+	/* Domain for flush queue callback; NULL if flush queue not in use */
+	struct iommu_domain		*fq_domain;
 };
 
 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
@@ -257,6 +260,20 @@ static int iova_reserve_iommu_regions(struct device *dev,
 	return ret;
 }
 
+static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
+{
+	struct iommu_dma_cookie *cookie;
+	struct iommu_domain *domain;
+
+	cookie = container_of(iovad, struct iommu_dma_cookie, iovad);
+	domain = cookie->fq_domain;
+	/*
+	 * The IOMMU driver supporting DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE
+	 * implies that ops->flush_iotlb_all must be non-NULL.
+	 */
+	domain->ops->flush_iotlb_all(domain);
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -275,6 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iova_domain *iovad = &cookie->iovad;
 	unsigned long order, base_pfn, end_pfn;
+	int attr;
 
 	if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
 		return -EINVAL;
@@ -308,6 +326,13 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 	}
 
 	init_iova_domain(iovad, 1UL << order, base_pfn);
+
+	if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
+			DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
+		cookie->fq_domain = domain;
+		init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL);
+	}
+
 	if (!dev)
 		return 0;
 
@@ -393,6 +418,9 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 	/* The MSI case is only ever cleaning up its most recent allocation */
 	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
 		cookie->msi_iova -= size;
+	else if (cookie->fq_domain)	/* non-strict mode */
+		queue_iova(iovad, iova_pfn(iovad, iova),
+				size >> iova_shift(iovad), 0);
 	else
 		free_iova_fast(iovad, iova_pfn(iovad, iova),
 				size >> iova_shift(iovad));
@@ -408,7 +436,9 @@ static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
 	dma_addr -= iova_off;
 	size = iova_align(iovad, size + iova_off);
 
-	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
+	WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size);
+	if (!cookie->fq_domain)
+		iommu_tlb_sync(domain);
 	iommu_dma_free_iova(cookie, dma_addr, size);
 }
 
@@ -491,7 +521,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count,
 void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
 		dma_addr_t *handle)
 {
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size);
+	__iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size);
 	__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
 	*handle = IOMMU_MAPPING_ERROR;
 }
@@ -518,7 +548,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
 		unsigned long attrs, int prot, dma_addr_t *handle,
 		void (*flush_page)(struct device *, const void *, phys_addr_t))
 {
-	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+	struct iommu_domain *domain = iommu_get_dma_domain(dev);
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iova_domain *iovad = &cookie->iovad;
 	struct page **pages;
@@ -606,9 +636,8 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
 }
 
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
-		size_t size, int prot)
+		size_t size, int prot, struct iommu_domain *domain)
 {
-	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	size_t iova_off = 0;
 	dma_addr_t iova;
@@ -632,13 +661,14 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
 dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, int prot)
 {
-	return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot);
+	return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot,
+			iommu_get_dma_domain(dev));
 }
 
 void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
+	__iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size);
 }
 
 /*
@@ -726,7 +756,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents)
 int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		int nents, int prot)
 {
-	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+	struct iommu_domain *domain = iommu_get_dma_domain(dev);
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iova_domain *iovad = &cookie->iovad;
 	struct scatterlist *s, *prev = NULL;
@@ -811,20 +841,21 @@ void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 		sg = tmp;
 	}
 	end = sg_dma_address(sg) + sg_dma_len(sg);
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start);
+	__iommu_dma_unmap(iommu_get_dma_domain(dev), start, end - start);
 }
 
 dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
 	return __iommu_dma_map(dev, phys, size,
-			dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO);
+			dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO,
+			iommu_get_dma_domain(dev));
 }
 
 void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
+	__iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size);
 }
 
 int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -850,7 +881,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 	if (!msi_page)
 		return NULL;
 
-	iova = __iommu_dma_map(dev, msi_addr, size, prot);
+	iova = __iommu_dma_map(dev, msi_addr, size, prot, domain);
 	if (iommu_dma_mapping_error(dev, iova))
 		goto out_free_page;
 
diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index 8540625796a1..1b955aea44dd 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -543,7 +543,7 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu)
 		return ~(u32)0;
 	}
 
-	for_each_node_by_type(node, "cpu") {
+	for_each_of_cpu_node(node) {
 		prop = of_get_property(node, "reg", &len);
 		for (i = 0; i < len / sizeof(u32); i++) {
 			if (be32_to_cpup(&prop[i]) == vcpu) {
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index f089136e9c3f..9b528cfcc6c3 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -814,6 +814,55 @@ static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool en
 	return 0;
 }
 
+static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
+{
+	struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	/* Ensure domain is inactive i.e. DMA should be disabled for the domain */
+	if (dma_domain->enabled) {
+		pr_debug("Can't set geometry attributes as domain is active\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return  -EBUSY;
+	}
+
+	/* Ensure that the geometry has been set for the domain */
+	if (!dma_domain->geom_size) {
+		pr_debug("Please configure geometry before setting the number of windows\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	/*
+	 * Ensure we have valid window count i.e. it should be less than
+	 * maximum permissible limit and should be a power of two.
+	 */
+	if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) {
+		pr_debug("Invalid window count\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
+				       w_count > 1 ? w_count : 0);
+	if (!ret) {
+		kfree(dma_domain->win_arr);
+		dma_domain->win_arr = kcalloc(w_count,
+					      sizeof(*dma_domain->win_arr),
+					      GFP_ATOMIC);
+		if (!dma_domain->win_arr) {
+			spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+			return -ENOMEM;
+		}
+		dma_domain->win_cnt = w_count;
+	}
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return ret;
+}
+
 static int fsl_pamu_set_domain_attr(struct iommu_domain *domain,
 				    enum iommu_attr attr_type, void *data)
 {
@@ -830,6 +879,9 @@ static int fsl_pamu_set_domain_attr(struct iommu_domain *domain,
 	case DOMAIN_ATTR_FSL_PAMU_ENABLE:
 		ret = configure_domain_dma_state(dma_domain, *(int *)data);
 		break;
+	case DOMAIN_ATTR_WINDOWS:
+		ret = fsl_pamu_set_windows(domain, *(u32 *)data);
+		break;
 	default:
 		pr_debug("Unsupported attribute type\n");
 		ret = -EINVAL;
@@ -856,6 +908,9 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain,
 	case DOMAIN_ATTR_FSL_PAMUV1:
 		*(int *)data = DOMAIN_ATTR_FSL_PAMUV1;
 		break;
+	case DOMAIN_ATTR_WINDOWS:
+		*(u32 *)data = dma_domain->win_cnt;
+		break;
 	default:
 		pr_debug("Unsupported attribute type\n");
 		ret = -EINVAL;
@@ -916,13 +971,13 @@ static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev)
 static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
 {
 	struct pci_controller *pci_ctl;
-	bool pci_endpt_partioning;
+	bool pci_endpt_partitioning;
 	struct iommu_group *group = NULL;
 
 	pci_ctl = pci_bus_to_host(pdev->bus);
-	pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl);
+	pci_endpt_partitioning = check_pci_ctl_endpt_part(pci_ctl);
 	/* We can partition PCIe devices so assign device group to the device */
-	if (pci_endpt_partioning) {
+	if (pci_endpt_partitioning) {
 		group = pci_device_group(&pdev->dev);
 
 		/*
@@ -994,62 +1049,6 @@ static void fsl_pamu_remove_device(struct device *dev)
 	iommu_group_remove_device(dev);
 }
 
-static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
-{
-	struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&dma_domain->domain_lock, flags);
-	/* Ensure domain is inactive i.e. DMA should be disabled for the domain */
-	if (dma_domain->enabled) {
-		pr_debug("Can't set geometry attributes as domain is active\n");
-		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-		return  -EBUSY;
-	}
-
-	/* Ensure that the geometry has been set for the domain */
-	if (!dma_domain->geom_size) {
-		pr_debug("Please configure geometry before setting the number of windows\n");
-		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-		return -EINVAL;
-	}
-
-	/*
-	 * Ensure we have valid window count i.e. it should be less than
-	 * maximum permissible limit and should be a power of two.
-	 */
-	if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) {
-		pr_debug("Invalid window count\n");
-		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-		return -EINVAL;
-	}
-
-	ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
-				       w_count > 1 ? w_count : 0);
-	if (!ret) {
-		kfree(dma_domain->win_arr);
-		dma_domain->win_arr = kcalloc(w_count,
-					      sizeof(*dma_domain->win_arr),
-					      GFP_ATOMIC);
-		if (!dma_domain->win_arr) {
-			spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-			return -ENOMEM;
-		}
-		dma_domain->win_cnt = w_count;
-	}
-	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-
-	return ret;
-}
-
-static u32 fsl_pamu_get_windows(struct iommu_domain *domain)
-{
-	struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
-
-	return dma_domain->win_cnt;
-}
-
 static const struct iommu_ops fsl_pamu_ops = {
 	.capable	= fsl_pamu_capable,
 	.domain_alloc	= fsl_pamu_domain_alloc,
@@ -1058,8 +1057,6 @@ static const struct iommu_ops fsl_pamu_ops = {
 	.detach_dev	= fsl_pamu_detach_device,
 	.domain_window_enable = fsl_pamu_window_enable,
 	.domain_window_disable = fsl_pamu_window_disable,
-	.domain_get_windows = fsl_pamu_get_windows,
-	.domain_set_windows = fsl_pamu_set_windows,
 	.iova_to_phys	= fsl_pamu_iova_to_phys,
 	.domain_set_attr = fsl_pamu_set_domain_attr,
 	.domain_get_attr = fsl_pamu_get_domain_attr,
diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c
new file mode 100644
index 000000000000..7fabf9b1c2dc
--- /dev/null
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2018 Intel Corporation.
+ *
+ * Authors: Gayatri Kammela <gayatri.kammela@intel.com>
+ *	    Sohil Mehta <sohil.mehta@intel.com>
+ *	    Jacob Pan <jacob.jun.pan@linux.intel.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+#include <linux/pci.h>
+
+#include <asm/irq_remapping.h>
+
+struct iommu_regset {
+	int offset;
+	const char *regs;
+};
+
+#define IOMMU_REGSET_ENTRY(_reg_)					\
+	{ DMAR_##_reg_##_REG, __stringify(_reg_) }
+static const struct iommu_regset iommu_regs[] = {
+	IOMMU_REGSET_ENTRY(VER),
+	IOMMU_REGSET_ENTRY(CAP),
+	IOMMU_REGSET_ENTRY(ECAP),
+	IOMMU_REGSET_ENTRY(GCMD),
+	IOMMU_REGSET_ENTRY(GSTS),
+	IOMMU_REGSET_ENTRY(RTADDR),
+	IOMMU_REGSET_ENTRY(CCMD),
+	IOMMU_REGSET_ENTRY(FSTS),
+	IOMMU_REGSET_ENTRY(FECTL),
+	IOMMU_REGSET_ENTRY(FEDATA),
+	IOMMU_REGSET_ENTRY(FEADDR),
+	IOMMU_REGSET_ENTRY(FEUADDR),
+	IOMMU_REGSET_ENTRY(AFLOG),
+	IOMMU_REGSET_ENTRY(PMEN),
+	IOMMU_REGSET_ENTRY(PLMBASE),
+	IOMMU_REGSET_ENTRY(PLMLIMIT),
+	IOMMU_REGSET_ENTRY(PHMBASE),
+	IOMMU_REGSET_ENTRY(PHMLIMIT),
+	IOMMU_REGSET_ENTRY(IQH),
+	IOMMU_REGSET_ENTRY(IQT),
+	IOMMU_REGSET_ENTRY(IQA),
+	IOMMU_REGSET_ENTRY(ICS),
+	IOMMU_REGSET_ENTRY(IRTA),
+	IOMMU_REGSET_ENTRY(PQH),
+	IOMMU_REGSET_ENTRY(PQT),
+	IOMMU_REGSET_ENTRY(PQA),
+	IOMMU_REGSET_ENTRY(PRS),
+	IOMMU_REGSET_ENTRY(PECTL),
+	IOMMU_REGSET_ENTRY(PEDATA),
+	IOMMU_REGSET_ENTRY(PEADDR),
+	IOMMU_REGSET_ENTRY(PEUADDR),
+	IOMMU_REGSET_ENTRY(MTRRCAP),
+	IOMMU_REGSET_ENTRY(MTRRDEF),
+	IOMMU_REGSET_ENTRY(MTRR_FIX64K_00000),
+	IOMMU_REGSET_ENTRY(MTRR_FIX16K_80000),
+	IOMMU_REGSET_ENTRY(MTRR_FIX16K_A0000),
+	IOMMU_REGSET_ENTRY(MTRR_FIX4K_C0000),
+	IOMMU_REGSET_ENTRY(MTRR_FIX4K_C8000),
+	IOMMU_REGSET_ENTRY(MTRR_FIX4K_D0000),
+	IOMMU_REGSET_ENTRY(MTRR_FIX4K_D8000),
+	IOMMU_REGSET_ENTRY(MTRR_FIX4K_E0000),
+	IOMMU_REGSET_ENTRY(MTRR_FIX4K_E8000),
+	IOMMU_REGSET_ENTRY(MTRR_FIX4K_F0000),
+	IOMMU_REGSET_ENTRY(MTRR_FIX4K_F8000),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSBASE0),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSMASK0),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSBASE1),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSMASK1),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSBASE2),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSMASK2),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSBASE3),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSMASK3),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSBASE4),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSMASK4),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSBASE5),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSMASK5),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSBASE6),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSMASK6),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSBASE7),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSMASK7),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSBASE8),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSMASK8),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSBASE9),
+	IOMMU_REGSET_ENTRY(MTRR_PHYSMASK9),
+	IOMMU_REGSET_ENTRY(VCCAP),
+	IOMMU_REGSET_ENTRY(VCMD),
+	IOMMU_REGSET_ENTRY(VCRSP),
+};
+
+static int iommu_regset_show(struct seq_file *m, void *unused)
+{
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
+	unsigned long flag;
+	int i, ret = 0;
+	u64 value;
+
+	rcu_read_lock();
+	for_each_active_iommu(iommu, drhd) {
+		if (!drhd->reg_base_addr) {
+			seq_puts(m, "IOMMU: Invalid base address\n");
+			ret = -EINVAL;
+			goto out;
+		}
+
+		seq_printf(m, "IOMMU: %s Register Base Address: %llx\n",
+			   iommu->name, drhd->reg_base_addr);
+		seq_puts(m, "Name\t\t\tOffset\t\tContents\n");
+		/*
+		 * Publish the contents of the 64-bit hardware registers
+		 * by adding the offset to the pointer (virtual address).
+		 */
+		raw_spin_lock_irqsave(&iommu->register_lock, flag);
+		for (i = 0 ; i < ARRAY_SIZE(iommu_regs); i++) {
+			value = dmar_readq(iommu->reg + iommu_regs[i].offset);
+			seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",
+				   iommu_regs[i].regs, iommu_regs[i].offset,
+				   value);
+		}
+		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
+		seq_putc(m, '\n');
+	}
+out:
+	rcu_read_unlock();
+
+	return ret;
+}
+DEFINE_SHOW_ATTRIBUTE(iommu_regset);
+
+static void ctx_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu,
+			       int bus)
+{
+	struct context_entry *context;
+	int devfn;
+
+	seq_printf(m, " Context Table Entries for Bus: %d\n", bus);
+	seq_puts(m, "  Entry\tB:D.F\tHigh\tLow\n");
+
+	for (devfn = 0; devfn < 256; devfn++) {
+		context = iommu_context_addr(iommu, bus, devfn, 0);
+		if (!context)
+			return;
+
+		if (!context_present(context))
+			continue;
+
+		seq_printf(m, "  %-5d\t%02x:%02x.%x\t%-6llx\t%llx\n", devfn,
+			   bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+			   context[0].hi, context[0].lo);
+	}
+}
+
+static void root_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu)
+{
+	unsigned long flags;
+	int bus;
+
+	spin_lock_irqsave(&iommu->lock, flags);
+	seq_printf(m, "IOMMU %s: Root Table Address:%llx\n", iommu->name,
+		   (u64)virt_to_phys(iommu->root_entry));
+	seq_puts(m, "Root Table Entries:\n");
+
+	for (bus = 0; bus < 256; bus++) {
+		if (!(iommu->root_entry[bus].lo & 1))
+			continue;
+
+		seq_printf(m, " Bus: %d H: %llx L: %llx\n", bus,
+			   iommu->root_entry[bus].hi,
+			   iommu->root_entry[bus].lo);
+
+		ctx_tbl_entry_show(m, iommu, bus);
+		seq_putc(m, '\n');
+	}
+	spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static int dmar_translation_struct_show(struct seq_file *m, void *unused)
+{
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
+
+	rcu_read_lock();
+	for_each_active_iommu(iommu, drhd) {
+		root_tbl_entry_show(m, iommu);
+		seq_putc(m, '\n');
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(dmar_translation_struct);
+
+#ifdef CONFIG_IRQ_REMAP
+static void ir_tbl_remap_entry_show(struct seq_file *m,
+				    struct intel_iommu *iommu)
+{
+	struct irte *ri_entry;
+	unsigned long flags;
+	int idx;
+
+	seq_puts(m, " Entry SrcID   DstID    Vct IRTE_high\t\tIRTE_low\n");
+
+	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
+	for (idx = 0; idx < INTR_REMAP_TABLE_ENTRIES; idx++) {
+		ri_entry = &iommu->ir_table->base[idx];
+		if (!ri_entry->present || ri_entry->p_pst)
+			continue;
+
+		seq_printf(m, " %-5d %02x:%02x.%01x %08x %02x  %016llx\t%016llx\n",
+			   idx, PCI_BUS_NUM(ri_entry->sid),
+			   PCI_SLOT(ri_entry->sid), PCI_FUNC(ri_entry->sid),
+			   ri_entry->dest_id, ri_entry->vector,
+			   ri_entry->high, ri_entry->low);
+	}
+	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+}
+
+static void ir_tbl_posted_entry_show(struct seq_file *m,
+				     struct intel_iommu *iommu)
+{
+	struct irte *pi_entry;
+	unsigned long flags;
+	int idx;
+
+	seq_puts(m, " Entry SrcID   PDA_high PDA_low  Vct IRTE_high\t\tIRTE_low\n");
+
+	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
+	for (idx = 0; idx < INTR_REMAP_TABLE_ENTRIES; idx++) {
+		pi_entry = &iommu->ir_table->base[idx];
+		if (!pi_entry->present || !pi_entry->p_pst)
+			continue;
+
+		seq_printf(m, " %-5d %02x:%02x.%01x %08x %08x %02x  %016llx\t%016llx\n",
+			   idx, PCI_BUS_NUM(pi_entry->sid),
+			   PCI_SLOT(pi_entry->sid), PCI_FUNC(pi_entry->sid),
+			   pi_entry->pda_h, pi_entry->pda_l << 6,
+			   pi_entry->vector, pi_entry->high,
+			   pi_entry->low);
+	}
+	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+}
+
+/*
+ * For active IOMMUs go through the Interrupt remapping
+ * table and print valid entries in a table format for
+ * Remapped and Posted Interrupts.
+ */
+static int ir_translation_struct_show(struct seq_file *m, void *unused)
+{
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
+	u64 irta;
+
+	rcu_read_lock();
+	for_each_active_iommu(iommu, drhd) {
+		if (!ecap_ir_support(iommu->ecap))
+			continue;
+
+		seq_printf(m, "Remapped Interrupt supported on IOMMU: %s\n",
+			   iommu->name);
+
+		if (iommu->ir_table) {
+			irta = virt_to_phys(iommu->ir_table->base);
+			seq_printf(m, " IR table address:%llx\n", irta);
+			ir_tbl_remap_entry_show(m, iommu);
+		} else {
+			seq_puts(m, "Interrupt Remapping is not enabled\n");
+		}
+		seq_putc(m, '\n');
+	}
+
+	seq_puts(m, "****\n\n");
+
+	for_each_active_iommu(iommu, drhd) {
+		if (!cap_pi_support(iommu->cap))
+			continue;
+
+		seq_printf(m, "Posted Interrupt supported on IOMMU: %s\n",
+			   iommu->name);
+
+		if (iommu->ir_table) {
+			irta = virt_to_phys(iommu->ir_table->base);
+			seq_printf(m, " IR table address:%llx\n", irta);
+			ir_tbl_posted_entry_show(m, iommu);
+		} else {
+			seq_puts(m, "Interrupt Remapping is not enabled\n");
+		}
+		seq_putc(m, '\n');
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(ir_translation_struct);
+#endif
+
+void __init intel_iommu_debugfs_init(void)
+{
+	struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
+						iommu_debugfs_dir);
+
+	debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL,
+			    &iommu_regset_fops);
+	debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug,
+			    NULL, &dmar_translation_struct_fops);
+#ifdef CONFIG_IRQ_REMAP
+	debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug,
+			    NULL, &ir_translation_struct_fops);
+#endif
+}
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 76f0a5d16ed3..f3ccf025108b 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -185,16 +185,6 @@ static int rwbf_quirk;
 static int force_on = 0;
 int intel_iommu_tboot_noforce;
 
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
-	u64	lo;
-	u64	hi;
-};
 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
 
 /*
@@ -220,21 +210,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
 
 	return re->hi & VTD_PAGE_MASK;
 }
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
-	u64 lo;
-	u64 hi;
-};
 
 static inline void context_clear_pasid_enable(struct context_entry *context)
 {
@@ -261,7 +236,7 @@ static inline bool __context_present(struct context_entry *context)
 	return (context->lo & 1);
 }
 
-static inline bool context_present(struct context_entry *context)
+bool context_present(struct context_entry *context)
 {
 	return context_pasid_enabled(context) ?
 	     __context_present(context) :
@@ -788,8 +763,8 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
 	domain->iommu_superpage = domain_update_iommu_superpage(NULL);
 }
 
-static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
-						       u8 bus, u8 devfn, int alloc)
+struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+					 u8 devfn, int alloc)
 {
 	struct root_entry *root = &iommu->root_entry[bus];
 	struct context_entry *context;
@@ -4860,6 +4835,7 @@ int __init intel_iommu_init(void)
 	cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
 			  intel_iommu_cpu_dead);
 	intel_iommu_enabled = 1;
+	intel_iommu_debugfs_init();
 
 	return 0;
 
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 967450bd421a..c2d6c11431de 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -76,7 +76,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS];
  * in single-threaded environment with interrupt disabled, so no need to tabke
  * the dmar_global_lock.
  */
-static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
+DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
 static const struct irq_domain_ops intel_ir_domain_ops;
 
 static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index b5948ba6b3b3..445c3bde0480 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -587,6 +587,7 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
 	}
 
 	io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
+	io_pgtable_tlb_sync(&data->iop);
 	return size;
 }
 
@@ -642,6 +643,13 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 				io_pgtable_tlb_sync(iop);
 				ptep = iopte_deref(pte[i], lvl);
 				__arm_v7s_free_table(ptep, lvl + 1, data);
+			} else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
+				/*
+				 * Order the PTE update against queueing the IOVA, to
+				 * guarantee that a flush callback from a different CPU
+				 * has observed it before the TLBIALL can be issued.
+				 */
+				smp_wmb();
 			} else {
 				io_pgtable_tlb_add_flush(iop, iova, blk_size,
 							 blk_size, true);
@@ -712,7 +720,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 			    IO_PGTABLE_QUIRK_NO_PERMS |
 			    IO_PGTABLE_QUIRK_TLBI_ON_MAP |
 			    IO_PGTABLE_QUIRK_ARM_MTK_4GB |
-			    IO_PGTABLE_QUIRK_NO_DMA))
+			    IO_PGTABLE_QUIRK_NO_DMA |
+			    IO_PGTABLE_QUIRK_NON_STRICT))
 		return NULL;
 
 	/* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 88641b4560bc..237cacd4a62b 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -574,13 +574,13 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 			return 0;
 
 		tablep = iopte_deref(pte, data);
+	} else if (unmap_idx >= 0) {
+		io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
+		io_pgtable_tlb_sync(&data->iop);
+		return size;
 	}
 
-	if (unmap_idx < 0)
-		return __arm_lpae_unmap(data, iova, size, lvl, tablep);
-
-	io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
-	return size;
+	return __arm_lpae_unmap(data, iova, size, lvl, tablep);
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
@@ -610,6 +610,13 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 			io_pgtable_tlb_sync(iop);
 			ptep = iopte_deref(pte, data);
 			__arm_lpae_free_pgtable(data, lvl + 1, ptep);
+		} else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
+			/*
+			 * Order the PTE update against queueing the IOVA, to
+			 * guarantee that a flush callback from a different CPU
+			 * has observed it before the TLBIALL can be issued.
+			 */
+			smp_wmb();
 		} else {
 			io_pgtable_tlb_add_flush(iop, iova, size, size, true);
 		}
@@ -772,7 +779,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	u64 reg;
 	struct arm_lpae_io_pgtable *data;
 
-	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA))
+	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA |
+			    IO_PGTABLE_QUIRK_NON_STRICT))
 		return NULL;
 
 	data = arm_lpae_alloc_pgtable(cfg);
@@ -864,7 +872,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	struct arm_lpae_io_pgtable *data;
 
 	/* The NS quirk doesn't apply at stage 2 */
-	if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA)
+	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA |
+			    IO_PGTABLE_QUIRK_NON_STRICT))
 		return NULL;
 
 	data = arm_lpae_alloc_pgtable(cfg);
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 2df79093cad9..47d5ae559329 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -71,12 +71,17 @@ struct io_pgtable_cfg {
 	 *	be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
 	 *	software-emulated IOMMU), such that pagetable updates need not
 	 *	be treated as explicit DMA data.
+	 *
+	 * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
+	 *	on unmap, for DMA domains using the flush queue mechanism for
+	 *	delayed invalidation.
 	 */
 	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
 	#define IO_PGTABLE_QUIRK_TLBI_ON_MAP	BIT(2)
 	#define IO_PGTABLE_QUIRK_ARM_MTK_4GB	BIT(3)
 	#define IO_PGTABLE_QUIRK_NO_DMA		BIT(4)
+	#define IO_PGTABLE_QUIRK_NON_STRICT	BIT(5)
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8c15c5980299..edbdf5d6962c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -32,6 +32,7 @@
 #include <linux/pci.h>
 #include <linux/bitops.h>
 #include <linux/property.h>
+#include <linux/fsl/mc.h>
 #include <trace/events/iommu.h>
 
 static struct kset *iommu_group_kset;
@@ -41,6 +42,7 @@ static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
 #else
 static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
 #endif
+static bool iommu_dma_strict __read_mostly = true;
 
 struct iommu_callback_data {
 	const struct iommu_ops *ops;
@@ -131,6 +133,12 @@ static int __init iommu_set_def_domain_type(char *str)
 }
 early_param("iommu.passthrough", iommu_set_def_domain_type);
 
+static int __init iommu_dma_setup(char *str)
+{
+	return kstrtobool(str, &iommu_dma_strict);
+}
+early_param("iommu.strict", iommu_dma_setup);
+
 static ssize_t iommu_group_attr_show(struct kobject *kobj,
 				     struct attribute *__attr, char *buf)
 {
@@ -1024,6 +1032,18 @@ struct iommu_group *pci_device_group(struct device *dev)
 	return iommu_group_alloc();
 }
 
+/* Get the IOMMU group for device on fsl-mc bus */
+struct iommu_group *fsl_mc_device_group(struct device *dev)
+{
+	struct device *cont_dev = fsl_mc_cont_dev(dev);
+	struct iommu_group *group;
+
+	group = iommu_group_get(cont_dev);
+	if (!group)
+		group = iommu_group_alloc();
+	return group;
+}
+
 /**
  * iommu_group_get_for_dev - Find or create the IOMMU group for a device
  * @dev: target device
@@ -1072,6 +1092,13 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 		group->default_domain = dom;
 		if (!group->domain)
 			group->domain = dom;
+
+		if (dom && !iommu_dma_strict) {
+			int attr = 1;
+			iommu_domain_set_attr(dom,
+					      DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+					      &attr);
+		}
 	}
 
 	ret = iommu_group_add_device(group, dev);
@@ -1416,7 +1443,16 @@ struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
 
 /*
- * IOMMU groups are really the natrual working unit of the IOMMU, but
+ * For IOMMU_DOMAIN_DMA implementations which already provide their own
+ * guarantees that the group and its default domain are valid and correct.
+ */
+struct iommu_domain *iommu_get_dma_domain(struct device *dev)
+{
+	return dev->iommu_group->default_domain;
+}
+
+/*
+ * IOMMU groups are really the natural working unit of the IOMMU, but
  * the IOMMU API works on domains and devices.  Bridge that gap by
  * iterating over the devices in a group.  Ideally we'd have a single
  * device which represents the requestor ID of the group, but we also
@@ -1796,7 +1832,6 @@ int iommu_domain_get_attr(struct iommu_domain *domain,
 	struct iommu_domain_geometry *geometry;
 	bool *paging;
 	int ret = 0;
-	u32 *count;
 
 	switch (attr) {
 	case DOMAIN_ATTR_GEOMETRY:
@@ -1808,15 +1843,6 @@ int iommu_domain_get_attr(struct iommu_domain *domain,
 		paging  = data;
 		*paging = (domain->pgsize_bitmap != 0UL);
 		break;
-	case DOMAIN_ATTR_WINDOWS:
-		count = data;
-
-		if (domain->ops->domain_get_windows != NULL)
-			*count = domain->ops->domain_get_windows(domain);
-		else
-			ret = -ENODEV;
-
-		break;
 	default:
 		if (!domain->ops->domain_get_attr)
 			return -EINVAL;
@@ -1832,18 +1858,8 @@ int iommu_domain_set_attr(struct iommu_domain *domain,
 			  enum iommu_attr attr, void *data)
 {
 	int ret = 0;
-	u32 *count;
 
 	switch (attr) {
-	case DOMAIN_ATTR_WINDOWS:
-		count = data;
-
-		if (domain->ops->domain_set_windows != NULL)
-			ret = domain->ops->domain_set_windows(domain, *count);
-		else
-			ret = -ENODEV;
-
-		break;
 	default:
 		if (domain->ops->domain_set_attr == NULL)
 			return -EINVAL;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 83fe2621effe..f8d3ba247523 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -56,6 +56,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	iovad->granule = granule;
 	iovad->start_pfn = start_pfn;
 	iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
+	iovad->max32_alloc_size = iovad->dma_32bit_pfn;
 	iovad->flush_cb = NULL;
 	iovad->fq = NULL;
 	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
@@ -139,8 +140,10 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
 
 	cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
 	if (free->pfn_hi < iovad->dma_32bit_pfn &&
-	    free->pfn_lo >= cached_iova->pfn_lo)
+	    free->pfn_lo >= cached_iova->pfn_lo) {
 		iovad->cached32_node = rb_next(&free->node);
+		iovad->max32_alloc_size = iovad->dma_32bit_pfn;
+	}
 
 	cached_iova = rb_entry(iovad->cached_node, struct iova, node);
 	if (free->pfn_lo >= cached_iova->pfn_lo)
@@ -190,6 +193,10 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 
 	/* Walk the tree backwards */
 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+	if (limit_pfn <= iovad->dma_32bit_pfn &&
+			size >= iovad->max32_alloc_size)
+		goto iova32_full;
+
 	curr = __get_cached_rbnode(iovad, limit_pfn);
 	curr_iova = rb_entry(curr, struct iova, node);
 	do {
@@ -200,10 +207,8 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 		curr_iova = rb_entry(curr, struct iova, node);
 	} while (curr && new_pfn <= curr_iova->pfn_hi);
 
-	if (limit_pfn < size || new_pfn < iovad->start_pfn) {
-		spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-		return -ENOMEM;
-	}
+	if (limit_pfn < size || new_pfn < iovad->start_pfn)
+		goto iova32_full;
 
 	/* pfn_lo will point to size aligned address if size_aligned is set */
 	new->pfn_lo = new_pfn;
@@ -214,9 +219,12 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 	__cached_rbnode_insert_update(iovad, new);
 
 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-
-
 	return 0;
+
+iova32_full:
+	iovad->max32_alloc_size = size;
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+	return -ENOMEM;
 }
 
 static struct kmem_cache *iova_cache;
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 22b94f8a9a04..b98a03189580 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * IPMMU VMSA
  *
  * Copyright (C) 2014 Renesas Electronics Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/bitmap.h>
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index f7787e757244..c5dd63072529 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -24,6 +24,7 @@
 #include <linux/of_iommu.h>
 #include <linux/of_pci.h>
 #include <linux/slab.h>
+#include <linux/fsl/mc.h>
 
 #define NO_IOMMU	1
 
@@ -132,9 +133,8 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
 	struct of_phandle_args iommu_spec = { .args_count = 1 };
 	int err;
 
-	err = of_pci_map_rid(info->np, alias, "iommu-map",
-			     "iommu-map-mask", &iommu_spec.np,
-			     iommu_spec.args);
+	err = of_map_rid(info->np, alias, "iommu-map", "iommu-map-mask",
+			 &iommu_spec.np, iommu_spec.args);
 	if (err)
 		return err == -ENODEV ? NO_IOMMU : err;
 
@@ -143,6 +143,23 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
 	return err;
 }
 
+static int of_fsl_mc_iommu_init(struct fsl_mc_device *mc_dev,
+				struct device_node *master_np)
+{
+	struct of_phandle_args iommu_spec = { .args_count = 1 };
+	int err;
+
+	err = of_map_rid(master_np, mc_dev->icid, "iommu-map",
+			 "iommu-map-mask", &iommu_spec.np,
+			 iommu_spec.args);
+	if (err)
+		return err == -ENODEV ? NO_IOMMU : err;
+
+	err = of_iommu_xlate(&mc_dev->dev, &iommu_spec);
+	of_node_put(iommu_spec.np);
+	return err;
+}
+
 const struct iommu_ops *of_iommu_configure(struct device *dev,
 					   struct device_node *master_np)
 {
@@ -174,6 +191,8 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 
 		err = pci_for_each_dma_alias(to_pci_dev(dev),
 					     of_pci_iommu_init, &info);
+	} else if (dev_is_fsl_mc(dev)) {
+		err = of_fsl_mc_iommu_init(to_fsl_mc_device(dev), master_np);
 	} else {
 		struct of_phandle_args iommu_spec;
 		int idx = 0;
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 383e7b70221d..96451b581452 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -310,6 +310,9 @@ config MVEBU_ODMI
 config MVEBU_PIC
 	bool
 
+config MVEBU_SEI
+        bool
+
 config LS_SCFG_MSI
 	def_bool y if SOC_LS1021A || ARCH_LAYERSCAPE
 	depends on PCI && PCI_MSI
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index fbd1ec8070ef..b822199445ff 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_MVEBU_GICP)		+= irq-mvebu-gicp.o
 obj-$(CONFIG_MVEBU_ICU)			+= irq-mvebu-icu.o
 obj-$(CONFIG_MVEBU_ODMI)		+= irq-mvebu-odmi.o
 obj-$(CONFIG_MVEBU_PIC)			+= irq-mvebu-pic.o
+obj-$(CONFIG_MVEBU_SEI)			+= irq-mvebu-sei.o
 obj-$(CONFIG_LS_SCFG_MSI)		+= irq-ls-scfg-msi.o
 obj-$(CONFIG_EZNPS_GIC)			+= irq-eznps.o
 obj-$(CONFIG_ARCH_ASPEED)		+= irq-aspeed-vic.o irq-aspeed-i2c-ic.o
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index c2df341ff6fa..db20e992a40f 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -19,13 +19,16 @@
 #include <linux/acpi_iort.h>
 #include <linux/bitmap.h>
 #include <linux/cpu.h>
+#include <linux/crash_dump.h>
 #include <linux/delay.h>
 #include <linux/dma-iommu.h>
+#include <linux/efi.h>
 #include <linux/interrupt.h>
 #include <linux/irqdomain.h>
 #include <linux/list.h>
 #include <linux/list_sort.h>
 #include <linux/log2.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/msi.h>
 #include <linux/of.h>
@@ -52,6 +55,7 @@
 #define ITS_FLAGS_SAVE_SUSPEND_STATE		(1ULL << 3)
 
 #define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING	(1 << 0)
+#define RDIST_FLAGS_RD_TABLES_PREALLOCATED	(1 << 1)
 
 static u32 lpi_id_bits;
 
@@ -64,7 +68,7 @@ static u32 lpi_id_bits;
 #define LPI_PROPBASE_SZ		ALIGN(BIT(LPI_NRBITS), SZ_64K)
 #define LPI_PENDBASE_SZ		ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K)
 
-#define LPI_PROP_DEFAULT_PRIO	0xa0
+#define LPI_PROP_DEFAULT_PRIO	GICD_INT_DEF_PRI
 
 /*
  * Collection structure - just an ID, and a redistributor address to
@@ -173,6 +177,7 @@ static DEFINE_RAW_SPINLOCK(vmovp_lock);
 static DEFINE_IDA(its_vpeid_ida);
 
 #define gic_data_rdist()		(raw_cpu_ptr(gic_rdists->rdist))
+#define gic_data_rdist_cpu(cpu)		(per_cpu_ptr(gic_rdists->rdist, cpu))
 #define gic_data_rdist_rd_base()	(gic_data_rdist()->rd_base)
 #define gic_data_rdist_vlpi_base()	(gic_data_rdist_rd_base() + SZ_128K)
 
@@ -1028,7 +1033,7 @@ static inline u32 its_get_event_id(struct irq_data *d)
 static void lpi_write_config(struct irq_data *d, u8 clr, u8 set)
 {
 	irq_hw_number_t hwirq;
-	struct page *prop_page;
+	void *va;
 	u8 *cfg;
 
 	if (irqd_is_forwarded_to_vcpu(d)) {
@@ -1036,7 +1041,7 @@ static void lpi_write_config(struct irq_data *d, u8 clr, u8 set)
 		u32 event = its_get_event_id(d);
 		struct its_vlpi_map *map;
 
-		prop_page = its_dev->event_map.vm->vprop_page;
+		va = page_address(its_dev->event_map.vm->vprop_page);
 		map = &its_dev->event_map.vlpi_maps[event];
 		hwirq = map->vintid;
 
@@ -1044,11 +1049,11 @@ static void lpi_write_config(struct irq_data *d, u8 clr, u8 set)
 		map->properties &= ~clr;
 		map->properties |= set | LPI_PROP_GROUP1;
 	} else {
-		prop_page = gic_rdists->prop_page;
+		va = gic_rdists->prop_table_va;
 		hwirq = d->hwirq;
 	}
 
-	cfg = page_address(prop_page) + hwirq - 8192;
+	cfg = va + hwirq - 8192;
 	*cfg &= ~clr;
 	*cfg |= set | LPI_PROP_GROUP1;
 
@@ -1597,6 +1602,15 @@ static void its_lpi_free(unsigned long *bitmap, u32 base, u32 nr_ids)
 	kfree(bitmap);
 }
 
+static void gic_reset_prop_table(void *va)
+{
+	/* Priority 0xa0, Group-1, disabled */
+	memset(va, LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1, LPI_PROPBASE_SZ);
+
+	/* Make sure the GIC will observe the written configuration */
+	gic_flush_dcache_to_poc(va, LPI_PROPBASE_SZ);
+}
+
 static struct page *its_allocate_prop_table(gfp_t gfp_flags)
 {
 	struct page *prop_page;
@@ -1605,13 +1619,7 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags)
 	if (!prop_page)
 		return NULL;
 
-	/* Priority 0xa0, Group-1, disabled */
-	memset(page_address(prop_page),
-	       LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1,
-	       LPI_PROPBASE_SZ);
-
-	/* Make sure the GIC will observe the written configuration */
-	gic_flush_dcache_to_poc(page_address(prop_page), LPI_PROPBASE_SZ);
+	gic_reset_prop_table(page_address(prop_page));
 
 	return prop_page;
 }
@@ -1622,20 +1630,74 @@ static void its_free_prop_table(struct page *prop_page)
 		   get_order(LPI_PROPBASE_SZ));
 }
 
-static int __init its_alloc_lpi_tables(void)
+static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size)
 {
-	phys_addr_t paddr;
+	phys_addr_t start, end, addr_end;
+	u64 i;
 
-	lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gic_rdists->gicd_typer),
-				ITS_MAX_LPI_NRBITS);
-	gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT);
-	if (!gic_rdists->prop_page) {
-		pr_err("Failed to allocate PROPBASE\n");
-		return -ENOMEM;
+	/*
+	 * We don't bother checking for a kdump kernel as by
+	 * construction, the LPI tables are out of this kernel's
+	 * memory map.
+	 */
+	if (is_kdump_kernel())
+		return true;
+
+	addr_end = addr + size - 1;
+
+	for_each_reserved_mem_region(i, &start, &end) {
+		if (addr >= start && addr_end <= end)
+			return true;
+	}
+
+	/* Not found, not a good sign... */
+	pr_warn("GICv3: Expected reserved range [%pa:%pa], not found\n",
+		&addr, &addr_end);
+	add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
+	return false;
+}
+
+static int gic_reserve_range(phys_addr_t addr, unsigned long size)
+{
+	if (efi_enabled(EFI_CONFIG_TABLES))
+		return efi_mem_reserve_persistent(addr, size);
+
+	return 0;
+}
+
+static int __init its_setup_lpi_prop_table(void)
+{
+	if (gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED) {
+		u64 val;
+
+		val = gicr_read_propbaser(gic_data_rdist_rd_base() + GICR_PROPBASER);
+		lpi_id_bits = (val & GICR_PROPBASER_IDBITS_MASK) + 1;
+
+		gic_rdists->prop_table_pa = val & GENMASK_ULL(51, 12);
+		gic_rdists->prop_table_va = memremap(gic_rdists->prop_table_pa,
+						     LPI_PROPBASE_SZ,
+						     MEMREMAP_WB);
+		gic_reset_prop_table(gic_rdists->prop_table_va);
+	} else {
+		struct page *page;
+
+		lpi_id_bits = min_t(u32,
+				    GICD_TYPER_ID_BITS(gic_rdists->gicd_typer),
+				    ITS_MAX_LPI_NRBITS);
+		page = its_allocate_prop_table(GFP_NOWAIT);
+		if (!page) {
+			pr_err("Failed to allocate PROPBASE\n");
+			return -ENOMEM;
+		}
+
+		gic_rdists->prop_table_pa = page_to_phys(page);
+		gic_rdists->prop_table_va = page_address(page);
+		WARN_ON(gic_reserve_range(gic_rdists->prop_table_pa,
+					  LPI_PROPBASE_SZ));
 	}
 
-	paddr = page_to_phys(gic_rdists->prop_page);
-	pr_info("GIC: using LPI property table @%pa\n", &paddr);
+	pr_info("GICv3: using LPI property table @%pa\n",
+		&gic_rdists->prop_table_pa);
 
 	return its_lpi_init(lpi_id_bits);
 }
@@ -1924,12 +1986,9 @@ static int its_alloc_collections(struct its_node *its)
 static struct page *its_allocate_pending_table(gfp_t gfp_flags)
 {
 	struct page *pend_page;
-	/*
-	 * The pending pages have to be at least 64kB aligned,
-	 * hence the 'max(LPI_PENDBASE_SZ, SZ_64K)' below.
-	 */
+
 	pend_page = alloc_pages(gfp_flags | __GFP_ZERO,
-				get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K)));
+				get_order(LPI_PENDBASE_SZ));
 	if (!pend_page)
 		return NULL;
 
@@ -1941,36 +2000,103 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags)
 
 static void its_free_pending_table(struct page *pt)
 {
-	free_pages((unsigned long)page_address(pt),
-		   get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K)));
+	free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ));
+}
+
+/*
+ * Booting with kdump and LPIs enabled is generally fine. Any other
+ * case is wrong in the absence of firmware/EFI support.
+ */
+static bool enabled_lpis_allowed(void)
+{
+	phys_addr_t addr;
+	u64 val;
+
+	/* Check whether the property table is in a reserved region */
+	val = gicr_read_propbaser(gic_data_rdist_rd_base() + GICR_PROPBASER);
+	addr = val & GENMASK_ULL(51, 12);
+
+	return gic_check_reserved_range(addr, LPI_PROPBASE_SZ);
+}
+
+static int __init allocate_lpi_tables(void)
+{
+	u64 val;
+	int err, cpu;
+
+	/*
+	 * If LPIs are enabled while we run this from the boot CPU,
+	 * flag the RD tables as pre-allocated if the stars do align.
+	 */
+	val = readl_relaxed(gic_data_rdist_rd_base() + GICR_CTLR);
+	if ((val & GICR_CTLR_ENABLE_LPIS) && enabled_lpis_allowed()) {
+		gic_rdists->flags |= (RDIST_FLAGS_RD_TABLES_PREALLOCATED |
+				      RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING);
+		pr_info("GICv3: Using preallocated redistributor tables\n");
+	}
+
+	err = its_setup_lpi_prop_table();
+	if (err)
+		return err;
+
+	/*
+	 * We allocate all the pending tables anyway, as we may have a
+	 * mix of RDs that have had LPIs enabled, and some that
+	 * don't. We'll free the unused ones as each CPU comes online.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct page *pend_page;
+
+		pend_page = its_allocate_pending_table(GFP_NOWAIT);
+		if (!pend_page) {
+			pr_err("Failed to allocate PENDBASE for CPU%d\n", cpu);
+			return -ENOMEM;
+		}
+
+		gic_data_rdist_cpu(cpu)->pend_page = pend_page;
+	}
+
+	return 0;
 }
 
 static void its_cpu_init_lpis(void)
 {
 	void __iomem *rbase = gic_data_rdist_rd_base();
 	struct page *pend_page;
+	phys_addr_t paddr;
 	u64 val, tmp;
 
-	/* If we didn't allocate the pending table yet, do it now */
-	pend_page = gic_data_rdist()->pend_page;
-	if (!pend_page) {
-		phys_addr_t paddr;
+	if (gic_data_rdist()->lpi_enabled)
+		return;
 
-		pend_page = its_allocate_pending_table(GFP_NOWAIT);
-		if (!pend_page) {
-			pr_err("Failed to allocate PENDBASE for CPU%d\n",
-			       smp_processor_id());
-			return;
-		}
+	val = readl_relaxed(rbase + GICR_CTLR);
+	if ((gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED) &&
+	    (val & GICR_CTLR_ENABLE_LPIS)) {
+		/*
+		 * Check that we get the same property table on all
+		 * RDs. If we don't, this is hopeless.
+		 */
+		paddr = gicr_read_propbaser(rbase + GICR_PROPBASER);
+		paddr &= GENMASK_ULL(51, 12);
+		if (WARN_ON(gic_rdists->prop_table_pa != paddr))
+			add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
+
+		paddr = gicr_read_pendbaser(rbase + GICR_PENDBASER);
+		paddr &= GENMASK_ULL(51, 16);
 
-		paddr = page_to_phys(pend_page);
-		pr_info("CPU%d: using LPI pending table @%pa\n",
-			smp_processor_id(), &paddr);
-		gic_data_rdist()->pend_page = pend_page;
+		WARN_ON(!gic_check_reserved_range(paddr, LPI_PENDBASE_SZ));
+		its_free_pending_table(gic_data_rdist()->pend_page);
+		gic_data_rdist()->pend_page = NULL;
+
+		goto out;
 	}
 
+	pend_page = gic_data_rdist()->pend_page;
+	paddr = page_to_phys(pend_page);
+	WARN_ON(gic_reserve_range(paddr, LPI_PENDBASE_SZ));
+
 	/* set PROPBASE */
-	val = (page_to_phys(gic_rdists->prop_page) |
+	val = (gic_rdists->prop_table_pa |
 	       GICR_PROPBASER_InnerShareable |
 	       GICR_PROPBASER_RaWaWb |
 	       ((LPI_NRBITS - 1) & GICR_PROPBASER_IDBITS_MASK));
@@ -2020,6 +2146,12 @@ static void its_cpu_init_lpis(void)
 
 	/* Make sure the GIC has seen the above */
 	dsb(sy);
+out:
+	gic_data_rdist()->lpi_enabled = true;
+	pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n",
+		smp_processor_id(),
+		gic_data_rdist()->pend_page ? "allocated" : "reserved",
+		&paddr);
 }
 
 static void its_cpu_init_collection(struct its_node *its)
@@ -3498,16 +3630,6 @@ static int redist_disable_lpis(void)
 	u64 timeout = USEC_PER_SEC;
 	u64 val;
 
-	/*
-	 * If coming via a CPU hotplug event, we don't need to disable
-	 * LPIs before trying to re-enable them. They are already
-	 * configured and all is well in the world. Detect this case
-	 * by checking the allocation of the pending table for the
-	 * current CPU.
-	 */
-	if (gic_data_rdist()->pend_page)
-		return 0;
-
 	if (!gic_rdists_supports_plpis()) {
 		pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
 		return -ENXIO;
@@ -3517,7 +3639,21 @@ static int redist_disable_lpis(void)
 	if (!(val & GICR_CTLR_ENABLE_LPIS))
 		return 0;
 
-	pr_warn("CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
+	/*
+	 * If coming via a CPU hotplug event, we don't need to disable
+	 * LPIs before trying to re-enable them. They are already
+	 * configured and all is well in the world.
+	 *
+	 * If running with preallocated tables, there is nothing to do.
+	 */
+	if (gic_data_rdist()->lpi_enabled ||
+	    (gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED))
+		return 0;
+
+	/*
+	 * From that point on, we only try to do some damage control.
+	 */
+	pr_warn("GICv3: CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
 		smp_processor_id());
 	add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
 
@@ -3773,7 +3909,8 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
 	}
 
 	gic_rdists = rdists;
-	err = its_alloc_lpi_tables();
+
+	err = allocate_lpi_tables();
 	if (err)
 		return err;
 
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index d5912f1ec884..8f87f40c9460 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -348,48 +348,45 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
 {
 	u32 irqnr;
 
-	do {
-		irqnr = gic_read_iar();
+	irqnr = gic_read_iar();
 
-		if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) {
-			int err;
+	if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) {
+		int err;
 
-			if (static_branch_likely(&supports_deactivate_key))
+		if (static_branch_likely(&supports_deactivate_key))
+			gic_write_eoir(irqnr);
+		else
+			isb();
+
+		err = handle_domain_irq(gic_data.domain, irqnr, regs);
+		if (err) {
+			WARN_ONCE(true, "Unexpected interrupt received!\n");
+			if (static_branch_likely(&supports_deactivate_key)) {
+				if (irqnr < 8192)
+					gic_write_dir(irqnr);
+			} else {
 				gic_write_eoir(irqnr);
-			else
-				isb();
-
-			err = handle_domain_irq(gic_data.domain, irqnr, regs);
-			if (err) {
-				WARN_ONCE(true, "Unexpected interrupt received!\n");
-				if (static_branch_likely(&supports_deactivate_key)) {
-					if (irqnr < 8192)
-						gic_write_dir(irqnr);
-				} else {
-					gic_write_eoir(irqnr);
-				}
 			}
-			continue;
 		}
-		if (irqnr < 16) {
-			gic_write_eoir(irqnr);
-			if (static_branch_likely(&supports_deactivate_key))
-				gic_write_dir(irqnr);
+		return;
+	}
+	if (irqnr < 16) {
+		gic_write_eoir(irqnr);
+		if (static_branch_likely(&supports_deactivate_key))
+			gic_write_dir(irqnr);
 #ifdef CONFIG_SMP
-			/*
-			 * Unlike GICv2, we don't need an smp_rmb() here.
-			 * The control dependency from gic_read_iar to
-			 * the ISB in gic_write_eoir is enough to ensure
-			 * that any shared data read by handle_IPI will
-			 * be read after the ACK.
-			 */
-			handle_IPI(irqnr, regs);
+		/*
+		 * Unlike GICv2, we don't need an smp_rmb() here.
+		 * The control dependency from gic_read_iar to
+		 * the ISB in gic_write_eoir is enough to ensure
+		 * that any shared data read by handle_IPI will
+		 * be read after the ACK.
+		 */
+		handle_IPI(irqnr, regs);
 #else
-			WARN_ONCE(true, "Unexpected SGI received!\n");
+		WARN_ONCE(true, "Unexpected SGI received!\n");
 #endif
-			continue;
-		}
-	} while (irqnr != ICC_IAR1_EL1_SPURIOUS);
+	}
 }
 
 static void __init gic_dist_init(void)
@@ -653,7 +650,9 @@ early_param("irqchip.gicv3_nolpi", gicv3_nolpi_cfg);
 
 static int gic_dist_supports_lpis(void)
 {
-	return !!(readl_relaxed(gic_data.dist_base + GICD_TYPER) & GICD_TYPER_LPIS) && !gicv3_nolpi;
+	return (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) &&
+		!!(readl_relaxed(gic_data.dist_base + GICD_TYPER) & GICD_TYPER_LPIS) &&
+		!gicv3_nolpi);
 }
 
 static void gic_cpu_init(void)
@@ -673,10 +672,6 @@ static void gic_cpu_init(void)
 
 	gic_cpu_config(rbase, gic_redist_wait_for_rwp);
 
-	/* Give LPIs a spin */
-	if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis())
-		its_cpu_init();
-
 	/* initialise system registers */
 	gic_cpu_sys_reg_init();
 }
@@ -689,6 +684,10 @@ static void gic_cpu_init(void)
 static int gic_starting_cpu(unsigned int cpu)
 {
 	gic_cpu_init();
+
+	if (gic_dist_supports_lpis())
+		its_cpu_init();
+
 	return 0;
 }
 
@@ -1127,14 +1126,16 @@ static int __init gic_init_bases(void __iomem *dist_base,
 
 	gic_update_vlpi_properties();
 
-	if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis())
-		its_init(handle, &gic_data.rdists, gic_data.domain);
-
 	gic_smp_init();
 	gic_dist_init();
 	gic_cpu_init();
 	gic_cpu_pm_init();
 
+	if (gic_dist_supports_lpis()) {
+		its_init(handle, &gic_data.rdists, gic_data.domain);
+		its_cpu_init();
+	}
+
 	return 0;
 
 out_free:
diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c
index 13063339b416..547045d89c4b 100644
--- a/drivers/irqchip/irq-mvebu-icu.c
+++ b/drivers/irqchip/irq-mvebu-icu.c
@@ -13,6 +13,7 @@
 #include <linux/irq.h>
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
+#include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/msi.h>
 #include <linux/of_irq.h>
@@ -26,6 +27,10 @@
 #define ICU_SETSPI_NSR_AH	0x14
 #define ICU_CLRSPI_NSR_AL	0x18
 #define ICU_CLRSPI_NSR_AH	0x1c
+#define ICU_SET_SEI_AL		0x50
+#define ICU_SET_SEI_AH		0x54
+#define ICU_CLR_SEI_AL		0x58
+#define ICU_CLR_SEI_AH		0x5C
 #define ICU_INT_CFG(x)          (0x100 + 4 * (x))
 #define   ICU_INT_ENABLE	BIT(24)
 #define   ICU_IS_EDGE		BIT(28)
@@ -36,12 +41,23 @@
 #define ICU_SATA0_ICU_ID	109
 #define ICU_SATA1_ICU_ID	107
 
+struct mvebu_icu_subset_data {
+	unsigned int icu_group;
+	unsigned int offset_set_ah;
+	unsigned int offset_set_al;
+	unsigned int offset_clr_ah;
+	unsigned int offset_clr_al;
+};
+
 struct mvebu_icu {
-	struct irq_chip irq_chip;
 	void __iomem *base;
-	struct irq_domain *domain;
 	struct device *dev;
+};
+
+struct mvebu_icu_msi_data {
+	struct mvebu_icu *icu;
 	atomic_t initialized;
+	const struct mvebu_icu_subset_data *subset_data;
 };
 
 struct mvebu_icu_irq_data {
@@ -50,28 +66,40 @@ struct mvebu_icu_irq_data {
 	unsigned int type;
 };
 
-static void mvebu_icu_init(struct mvebu_icu *icu, struct msi_msg *msg)
+DEFINE_STATIC_KEY_FALSE(legacy_bindings);
+
+static void mvebu_icu_init(struct mvebu_icu *icu,
+			   struct mvebu_icu_msi_data *msi_data,
+			   struct msi_msg *msg)
 {
-	if (atomic_cmpxchg(&icu->initialized, false, true))
+	const struct mvebu_icu_subset_data *subset = msi_data->subset_data;
+
+	if (atomic_cmpxchg(&msi_data->initialized, false, true))
 		return;
 
-	/* Set Clear/Set ICU SPI message address in AP */
-	writel_relaxed(msg[0].address_hi, icu->base + ICU_SETSPI_NSR_AH);
-	writel_relaxed(msg[0].address_lo, icu->base + ICU_SETSPI_NSR_AL);
-	writel_relaxed(msg[1].address_hi, icu->base + ICU_CLRSPI_NSR_AH);
-	writel_relaxed(msg[1].address_lo, icu->base + ICU_CLRSPI_NSR_AL);
+	/* Set 'SET' ICU SPI message address in AP */
+	writel_relaxed(msg[0].address_hi, icu->base + subset->offset_set_ah);
+	writel_relaxed(msg[0].address_lo, icu->base + subset->offset_set_al);
+
+	if (subset->icu_group != ICU_GRP_NSR)
+		return;
+
+	/* Set 'CLEAR' ICU SPI message address in AP (level-MSI only) */
+	writel_relaxed(msg[1].address_hi, icu->base + subset->offset_clr_ah);
+	writel_relaxed(msg[1].address_lo, icu->base + subset->offset_clr_al);
 }
 
 static void mvebu_icu_write_msg(struct msi_desc *desc, struct msi_msg *msg)
 {
 	struct irq_data *d = irq_get_irq_data(desc->irq);
+	struct mvebu_icu_msi_data *msi_data = platform_msi_get_host_data(d->domain);
 	struct mvebu_icu_irq_data *icu_irqd = d->chip_data;
 	struct mvebu_icu *icu = icu_irqd->icu;
 	unsigned int icu_int;
 
 	if (msg->address_lo || msg->address_hi) {
-		/* One off initialization */
-		mvebu_icu_init(icu, msg);
+		/* One off initialization per domain */
+		mvebu_icu_init(icu, msi_data, msg);
 		/* Configure the ICU with irq number & type */
 		icu_int = msg->data | ICU_INT_ENABLE;
 		if (icu_irqd->type & IRQ_TYPE_EDGE_RISING)
@@ -101,37 +129,66 @@ static void mvebu_icu_write_msg(struct msi_desc *desc, struct msi_msg *msg)
 	}
 }
 
+static struct irq_chip mvebu_icu_nsr_chip = {
+	.name			= "ICU-NSR",
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_type		= irq_chip_set_type_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+};
+
+static struct irq_chip mvebu_icu_sei_chip = {
+	.name			= "ICU-SEI",
+	.irq_ack		= irq_chip_ack_parent,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_set_type		= irq_chip_set_type_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+};
+
 static int
 mvebu_icu_irq_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
 			       unsigned long *hwirq, unsigned int *type)
 {
-	struct mvebu_icu *icu = d->host_data;
-	unsigned int icu_group;
+	struct mvebu_icu_msi_data *msi_data = platform_msi_get_host_data(d);
+	struct mvebu_icu *icu = platform_msi_get_host_data(d);
+	unsigned int param_count = static_branch_unlikely(&legacy_bindings) ? 3 : 2;
 
 	/* Check the count of the parameters in dt */
-	if (WARN_ON(fwspec->param_count < 3)) {
+	if (WARN_ON(fwspec->param_count != param_count)) {
 		dev_err(icu->dev, "wrong ICU parameter count %d\n",
 			fwspec->param_count);
 		return -EINVAL;
 	}
 
-	/* Only ICU group type is handled */
-	icu_group = fwspec->param[0];
-	if (icu_group != ICU_GRP_NSR && icu_group != ICU_GRP_SR &&
-	    icu_group != ICU_GRP_SEI && icu_group != ICU_GRP_REI) {
-		dev_err(icu->dev, "wrong ICU group type %x\n", icu_group);
-		return -EINVAL;
+	if (static_branch_unlikely(&legacy_bindings)) {
+		*hwirq = fwspec->param[1];
+		*type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
+		if (fwspec->param[0] != ICU_GRP_NSR) {
+			dev_err(icu->dev, "wrong ICU group type %x\n",
+				fwspec->param[0]);
+			return -EINVAL;
+		}
+	} else {
+		*hwirq = fwspec->param[0];
+		*type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
+
+		/*
+		 * The ICU receives level interrupts. While the NSR are also
+		 * level interrupts, SEI are edge interrupts. Force the type
+		 * here in this case. Please note that this makes the interrupt
+		 * handling unreliable.
+		 */
+		if (msi_data->subset_data->icu_group == ICU_GRP_SEI)
+			*type = IRQ_TYPE_EDGE_RISING;
 	}
 
-	*hwirq = fwspec->param[1];
 	if (*hwirq >= ICU_MAX_IRQS) {
 		dev_err(icu->dev, "invalid interrupt number %ld\n", *hwirq);
 		return -EINVAL;
 	}
 
-	/* Mask the type to prevent wrong DT configuration */
-	*type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
-
 	return 0;
 }
 
@@ -142,8 +199,10 @@ mvebu_icu_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 	int err;
 	unsigned long hwirq;
 	struct irq_fwspec *fwspec = args;
-	struct mvebu_icu *icu = platform_msi_get_host_data(domain);
+	struct mvebu_icu_msi_data *msi_data = platform_msi_get_host_data(domain);
+	struct mvebu_icu *icu = msi_data->icu;
 	struct mvebu_icu_irq_data *icu_irqd;
+	struct irq_chip *chip = &mvebu_icu_nsr_chip;
 
 	icu_irqd = kmalloc(sizeof(*icu_irqd), GFP_KERNEL);
 	if (!icu_irqd)
@@ -156,7 +215,10 @@ mvebu_icu_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 		goto free_irqd;
 	}
 
-	icu_irqd->icu_group = fwspec->param[0];
+	if (static_branch_unlikely(&legacy_bindings))
+		icu_irqd->icu_group = fwspec->param[0];
+	else
+		icu_irqd->icu_group = msi_data->subset_data->icu_group;
 	icu_irqd->icu = icu;
 
 	err = platform_msi_domain_alloc(domain, virq, nr_irqs);
@@ -170,8 +232,11 @@ mvebu_icu_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 	if (err)
 		goto free_msi;
 
+	if (icu_irqd->icu_group == ICU_GRP_SEI)
+		chip = &mvebu_icu_sei_chip;
+
 	err = irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
-					    &icu->irq_chip, icu_irqd);
+					    chip, icu_irqd);
 	if (err) {
 		dev_err(icu->dev, "failed to set the data to IRQ domain\n");
 		goto free_msi;
@@ -204,11 +269,84 @@ static const struct irq_domain_ops mvebu_icu_domain_ops = {
 	.free      = mvebu_icu_irq_domain_free,
 };
 
+static const struct mvebu_icu_subset_data mvebu_icu_nsr_subset_data = {
+	.icu_group = ICU_GRP_NSR,
+	.offset_set_ah = ICU_SETSPI_NSR_AH,
+	.offset_set_al = ICU_SETSPI_NSR_AL,
+	.offset_clr_ah = ICU_CLRSPI_NSR_AH,
+	.offset_clr_al = ICU_CLRSPI_NSR_AL,
+};
+
+static const struct mvebu_icu_subset_data mvebu_icu_sei_subset_data = {
+	.icu_group = ICU_GRP_SEI,
+	.offset_set_ah = ICU_SET_SEI_AH,
+	.offset_set_al = ICU_SET_SEI_AL,
+};
+
+static const struct of_device_id mvebu_icu_subset_of_match[] = {
+	{
+		.compatible = "marvell,cp110-icu-nsr",
+		.data = &mvebu_icu_nsr_subset_data,
+	},
+	{
+		.compatible = "marvell,cp110-icu-sei",
+		.data = &mvebu_icu_sei_subset_data,
+	},
+	{},
+};
+
+static int mvebu_icu_subset_probe(struct platform_device *pdev)
+{
+	struct mvebu_icu_msi_data *msi_data;
+	struct device_node *msi_parent_dn;
+	struct device *dev = &pdev->dev;
+	struct irq_domain *irq_domain;
+
+	msi_data = devm_kzalloc(dev, sizeof(*msi_data), GFP_KERNEL);
+	if (!msi_data)
+		return -ENOMEM;
+
+	if (static_branch_unlikely(&legacy_bindings)) {
+		msi_data->icu = dev_get_drvdata(dev);
+		msi_data->subset_data = &mvebu_icu_nsr_subset_data;
+	} else {
+		msi_data->icu = dev_get_drvdata(dev->parent);
+		msi_data->subset_data = of_device_get_match_data(dev);
+	}
+
+	dev->msi_domain = of_msi_get_domain(dev, dev->of_node,
+					    DOMAIN_BUS_PLATFORM_MSI);
+	if (!dev->msi_domain)
+		return -EPROBE_DEFER;
+
+	msi_parent_dn = irq_domain_get_of_node(dev->msi_domain);
+	if (!msi_parent_dn)
+		return -ENODEV;
+
+	irq_domain = platform_msi_create_device_tree_domain(dev, ICU_MAX_IRQS,
+							    mvebu_icu_write_msg,
+							    &mvebu_icu_domain_ops,
+							    msi_data);
+	if (!irq_domain) {
+		dev_err(dev, "Failed to create ICU MSI domain\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static struct platform_driver mvebu_icu_subset_driver = {
+	.probe  = mvebu_icu_subset_probe,
+	.driver = {
+		.name = "mvebu-icu-subset",
+		.of_match_table = mvebu_icu_subset_of_match,
+	},
+};
+builtin_platform_driver(mvebu_icu_subset_driver);
+
 static int mvebu_icu_probe(struct platform_device *pdev)
 {
 	struct mvebu_icu *icu;
-	struct device_node *node = pdev->dev.of_node;
-	struct device_node *gicp_dn;
 	struct resource *res;
 	int i;
 
@@ -226,53 +364,38 @@ static int mvebu_icu_probe(struct platform_device *pdev)
 		return PTR_ERR(icu->base);
 	}
 
-	icu->irq_chip.name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
-					    "ICU.%x",
-					    (unsigned int)res->start);
-	if (!icu->irq_chip.name)
-		return -ENOMEM;
-
-	icu->irq_chip.irq_mask = irq_chip_mask_parent;
-	icu->irq_chip.irq_unmask = irq_chip_unmask_parent;
-	icu->irq_chip.irq_eoi = irq_chip_eoi_parent;
-	icu->irq_chip.irq_set_type = irq_chip_set_type_parent;
-#ifdef CONFIG_SMP
-	icu->irq_chip.irq_set_affinity = irq_chip_set_affinity_parent;
-#endif
-
 	/*
-	 * We're probed after MSI domains have been resolved, so force
-	 * resolution here.
+	 * Legacy bindings: ICU is one node with one MSI parent: force manually
+	 *                  the probe of the NSR interrupts side.
+	 * New bindings: ICU node has children, one per interrupt controller
+	 *               having its own MSI parent: call platform_populate().
+	 * All ICU instances should use the same bindings.
 	 */
-	pdev->dev.msi_domain = of_msi_get_domain(&pdev->dev, node,
-						 DOMAIN_BUS_PLATFORM_MSI);
-	if (!pdev->dev.msi_domain)
-		return -EPROBE_DEFER;
-
-	gicp_dn = irq_domain_get_of_node(pdev->dev.msi_domain);
-	if (!gicp_dn)
-		return -ENODEV;
+	if (!of_get_child_count(pdev->dev.of_node))
+		static_branch_enable(&legacy_bindings);
 
 	/*
-	 * Clean all ICU interrupts with type SPI_NSR, required to
+	 * Clean all ICU interrupts of type NSR and SEI, required to
 	 * avoid unpredictable SPI assignments done by firmware.
 	 */
 	for (i = 0 ; i < ICU_MAX_IRQS ; i++) {
-		u32 icu_int = readl_relaxed(icu->base + ICU_INT_CFG(i));
-		if ((icu_int >> ICU_GROUP_SHIFT) == ICU_GRP_NSR)
+		u32 icu_int, icu_grp;
+
+		icu_int = readl_relaxed(icu->base + ICU_INT_CFG(i));
+		icu_grp = icu_int >> ICU_GROUP_SHIFT;
+
+		if (icu_grp == ICU_GRP_NSR ||
+		    (icu_grp == ICU_GRP_SEI &&
+		     !static_branch_unlikely(&legacy_bindings)))
 			writel_relaxed(0x0, icu->base + ICU_INT_CFG(i));
 	}
 
-	icu->domain =
-		platform_msi_create_device_domain(&pdev->dev, ICU_MAX_IRQS,
-						  mvebu_icu_write_msg,
-						  &mvebu_icu_domain_ops, icu);
-	if (!icu->domain) {
-		dev_err(&pdev->dev, "Failed to create ICU domain\n");
-		return -ENOMEM;
-	}
+	platform_set_drvdata(pdev, icu);
 
-	return 0;
+	if (static_branch_unlikely(&legacy_bindings))
+		return mvebu_icu_subset_probe(pdev);
+	else
+		return devm_of_platform_populate(&pdev->dev);
 }
 
 static const struct of_device_id mvebu_icu_of_match[] = {
diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
new file mode 100644
index 000000000000..566d69a2edbc
--- /dev/null
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -0,0 +1,507 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt) "mvebu-sei: " fmt
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+/* Cause register */
+#define GICP_SECR(idx)		(0x0  + ((idx) * 0x4))
+/* Mask register */
+#define GICP_SEMR(idx)		(0x20 + ((idx) * 0x4))
+#define GICP_SET_SEI_OFFSET	0x30
+
+#define SEI_IRQ_COUNT_PER_REG	32
+#define SEI_IRQ_REG_COUNT	2
+#define SEI_IRQ_COUNT		(SEI_IRQ_COUNT_PER_REG * SEI_IRQ_REG_COUNT)
+#define SEI_IRQ_REG_IDX(irq_id)	((irq_id) / SEI_IRQ_COUNT_PER_REG)
+#define SEI_IRQ_REG_BIT(irq_id)	((irq_id) % SEI_IRQ_COUNT_PER_REG)
+
+struct mvebu_sei_interrupt_range {
+	u32 first;
+	u32 size;
+};
+
+struct mvebu_sei_caps {
+	struct mvebu_sei_interrupt_range ap_range;
+	struct mvebu_sei_interrupt_range cp_range;
+};
+
+struct mvebu_sei {
+	struct device *dev;
+	void __iomem *base;
+	struct resource *res;
+	struct irq_domain *sei_domain;
+	struct irq_domain *ap_domain;
+	struct irq_domain *cp_domain;
+	const struct mvebu_sei_caps *caps;
+
+	/* Lock on MSI allocations/releases */
+	struct mutex cp_msi_lock;
+	DECLARE_BITMAP(cp_msi_bitmap, SEI_IRQ_COUNT);
+
+	/* Lock on IRQ masking register */
+	raw_spinlock_t mask_lock;
+};
+
+static void mvebu_sei_ack_irq(struct irq_data *d)
+{
+	struct mvebu_sei *sei = irq_data_get_irq_chip_data(d);
+	u32 reg_idx = SEI_IRQ_REG_IDX(d->hwirq);
+
+	writel_relaxed(BIT(SEI_IRQ_REG_BIT(d->hwirq)),
+		       sei->base + GICP_SECR(reg_idx));
+}
+
+static void mvebu_sei_mask_irq(struct irq_data *d)
+{
+	struct mvebu_sei *sei = irq_data_get_irq_chip_data(d);
+	u32 reg, reg_idx = SEI_IRQ_REG_IDX(d->hwirq);
+	unsigned long flags;
+
+	/* 1 disables the interrupt */
+	raw_spin_lock_irqsave(&sei->mask_lock, flags);
+	reg = readl_relaxed(sei->base + GICP_SEMR(reg_idx));
+	reg |= BIT(SEI_IRQ_REG_BIT(d->hwirq));
+	writel_relaxed(reg, sei->base + GICP_SEMR(reg_idx));
+	raw_spin_unlock_irqrestore(&sei->mask_lock, flags);
+}
+
+static void mvebu_sei_unmask_irq(struct irq_data *d)
+{
+	struct mvebu_sei *sei = irq_data_get_irq_chip_data(d);
+	u32 reg, reg_idx = SEI_IRQ_REG_IDX(d->hwirq);
+	unsigned long flags;
+
+	/* 0 enables the interrupt */
+	raw_spin_lock_irqsave(&sei->mask_lock, flags);
+	reg = readl_relaxed(sei->base + GICP_SEMR(reg_idx));
+	reg &= ~BIT(SEI_IRQ_REG_BIT(d->hwirq));
+	writel_relaxed(reg, sei->base + GICP_SEMR(reg_idx));
+	raw_spin_unlock_irqrestore(&sei->mask_lock, flags);
+}
+
+static int mvebu_sei_set_affinity(struct irq_data *d,
+				  const struct cpumask *mask_val,
+				  bool force)
+{
+	return -EINVAL;
+}
+
+static int mvebu_sei_set_irqchip_state(struct irq_data *d,
+				       enum irqchip_irq_state which,
+				       bool state)
+{
+	/* We can only clear the pending state by acking the interrupt */
+	if (which != IRQCHIP_STATE_PENDING || state)
+		return -EINVAL;
+
+	mvebu_sei_ack_irq(d);
+	return 0;
+}
+
+static struct irq_chip mvebu_sei_irq_chip = {
+	.name			= "SEI",
+	.irq_ack		= mvebu_sei_ack_irq,
+	.irq_mask		= mvebu_sei_mask_irq,
+	.irq_unmask		= mvebu_sei_unmask_irq,
+	.irq_set_affinity       = mvebu_sei_set_affinity,
+	.irq_set_irqchip_state	= mvebu_sei_set_irqchip_state,
+};
+
+static int mvebu_sei_ap_set_type(struct irq_data *data, unsigned int type)
+{
+	if ((type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_LEVEL_HIGH)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct irq_chip mvebu_sei_ap_irq_chip = {
+	.name			= "AP SEI",
+	.irq_ack		= irq_chip_ack_parent,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_set_affinity       = irq_chip_set_affinity_parent,
+	.irq_set_type		= mvebu_sei_ap_set_type,
+};
+
+static void mvebu_sei_cp_compose_msi_msg(struct irq_data *data,
+					 struct msi_msg *msg)
+{
+	struct mvebu_sei *sei = data->chip_data;
+	phys_addr_t set = sei->res->start + GICP_SET_SEI_OFFSET;
+
+	msg->data = data->hwirq + sei->caps->cp_range.first;
+	msg->address_lo = lower_32_bits(set);
+	msg->address_hi = upper_32_bits(set);
+}
+
+static int mvebu_sei_cp_set_type(struct irq_data *data, unsigned int type)
+{
+	if ((type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_EDGE_RISING)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct irq_chip mvebu_sei_cp_irq_chip = {
+	.name			= "CP SEI",
+	.irq_ack		= irq_chip_ack_parent,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_set_affinity       = irq_chip_set_affinity_parent,
+	.irq_set_type		= mvebu_sei_cp_set_type,
+	.irq_compose_msi_msg	= mvebu_sei_cp_compose_msi_msg,
+};
+
+static int mvebu_sei_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *arg)
+{
+	struct mvebu_sei *sei = domain->host_data;
+	struct irq_fwspec *fwspec = arg;
+
+	/* Not much to do, just setup the irqdata */
+	irq_domain_set_hwirq_and_chip(domain, virq, fwspec->param[0],
+				      &mvebu_sei_irq_chip, sei);
+
+	return 0;
+}
+
+static void mvebu_sei_domain_free(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);
+		irq_set_handler(virq + i, NULL);
+		irq_domain_reset_irq_data(d);
+	}
+}
+
+static const struct irq_domain_ops mvebu_sei_domain_ops = {
+	.alloc	= mvebu_sei_domain_alloc,
+	.free	= mvebu_sei_domain_free,
+};
+
+static int mvebu_sei_ap_translate(struct irq_domain *domain,
+				  struct irq_fwspec *fwspec,
+				  unsigned long *hwirq,
+				  unsigned int *type)
+{
+	*hwirq = fwspec->param[0];
+	*type  = IRQ_TYPE_LEVEL_HIGH;
+
+	return 0;
+}
+
+static int mvebu_sei_ap_alloc(struct irq_domain *domain, unsigned int virq,
+			      unsigned int nr_irqs, void *arg)
+{
+	struct mvebu_sei *sei = domain->host_data;
+	struct irq_fwspec fwspec;
+	unsigned long hwirq;
+	unsigned int type;
+	int err;
+
+	mvebu_sei_ap_translate(domain, arg, &hwirq, &type);
+
+	fwspec.fwnode = domain->parent->fwnode;
+	fwspec.param_count = 1;
+	fwspec.param[0] = hwirq + sei->caps->ap_range.first;
+
+	err = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+	if (err)
+		return err;
+
+	irq_domain_set_info(domain, virq, hwirq,
+			    &mvebu_sei_ap_irq_chip, sei,
+			    handle_level_irq, NULL, NULL);
+	irq_set_probe(virq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops mvebu_sei_ap_domain_ops = {
+	.translate	= mvebu_sei_ap_translate,
+	.alloc		= mvebu_sei_ap_alloc,
+	.free		= irq_domain_free_irqs_parent,
+};
+
+static void mvebu_sei_cp_release_irq(struct mvebu_sei *sei, unsigned long hwirq)
+{
+	mutex_lock(&sei->cp_msi_lock);
+	clear_bit(hwirq, sei->cp_msi_bitmap);
+	mutex_unlock(&sei->cp_msi_lock);
+}
+
+static int mvebu_sei_cp_domain_alloc(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs,
+				     void *args)
+{
+	struct mvebu_sei *sei = domain->host_data;
+	struct irq_fwspec fwspec;
+	unsigned long hwirq;
+	int ret;
+
+	/* The software only supports single allocations for now */
+	if (nr_irqs != 1)
+		return -ENOTSUPP;
+
+	mutex_lock(&sei->cp_msi_lock);
+	hwirq = find_first_zero_bit(sei->cp_msi_bitmap,
+				    sei->caps->cp_range.size);
+	if (hwirq < sei->caps->cp_range.size)
+		set_bit(hwirq, sei->cp_msi_bitmap);
+	mutex_unlock(&sei->cp_msi_lock);
+
+	if (hwirq == sei->caps->cp_range.size)
+		return -ENOSPC;
+
+	fwspec.fwnode = domain->parent->fwnode;
+	fwspec.param_count = 1;
+	fwspec.param[0] = hwirq + sei->caps->cp_range.first;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+	if (ret)
+		goto free_irq;
+
+	irq_domain_set_info(domain, virq, hwirq,
+			    &mvebu_sei_cp_irq_chip, sei,
+			    handle_edge_irq, NULL, NULL);
+
+	return 0;
+
+free_irq:
+	mvebu_sei_cp_release_irq(sei, hwirq);
+	return ret;
+}
+
+static void mvebu_sei_cp_domain_free(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs)
+{
+	struct mvebu_sei *sei = domain->host_data;
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+
+	if (nr_irqs != 1 || d->hwirq >= sei->caps->cp_range.size) {
+		dev_err(sei->dev, "Invalid hwirq %lu\n", d->hwirq);
+		return;
+	}
+
+	mvebu_sei_cp_release_irq(sei, d->hwirq);
+	irq_domain_free_irqs_parent(domain, virq, 1);
+}
+
+static const struct irq_domain_ops mvebu_sei_cp_domain_ops = {
+	.alloc	= mvebu_sei_cp_domain_alloc,
+	.free	= mvebu_sei_cp_domain_free,
+};
+
+static struct irq_chip mvebu_sei_msi_irq_chip = {
+	.name		= "SEI pMSI",
+	.irq_ack	= irq_chip_ack_parent,
+	.irq_set_type	= irq_chip_set_type_parent,
+};
+
+static struct msi_domain_ops mvebu_sei_msi_ops = {
+};
+
+static struct msi_domain_info mvebu_sei_msi_domain_info = {
+	.flags	= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS,
+	.ops	= &mvebu_sei_msi_ops,
+	.chip	= &mvebu_sei_msi_irq_chip,
+};
+
+static void mvebu_sei_handle_cascade_irq(struct irq_desc *desc)
+{
+	struct mvebu_sei *sei = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	u32 idx;
+
+	chained_irq_enter(chip, desc);
+
+	for (idx = 0; idx < SEI_IRQ_REG_COUNT; idx++) {
+		unsigned long irqmap;
+		int bit;
+
+		irqmap = readl_relaxed(sei->base + GICP_SECR(idx));
+		for_each_set_bit(bit, &irqmap, SEI_IRQ_COUNT_PER_REG) {
+			unsigned long hwirq;
+			unsigned int virq;
+
+			hwirq = idx * SEI_IRQ_COUNT_PER_REG + bit;
+			virq = irq_find_mapping(sei->sei_domain, hwirq);
+			if (likely(virq)) {
+				generic_handle_irq(virq);
+				continue;
+			}
+
+			dev_warn(sei->dev,
+				 "Spurious IRQ detected (hwirq %lu)\n", hwirq);
+		}
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static void mvebu_sei_reset(struct mvebu_sei *sei)
+{
+	u32 reg_idx;
+
+	/* Clear IRQ cause registers, mask all interrupts */
+	for (reg_idx = 0; reg_idx < SEI_IRQ_REG_COUNT; reg_idx++) {
+		writel_relaxed(0xFFFFFFFF, sei->base + GICP_SECR(reg_idx));
+		writel_relaxed(0xFFFFFFFF, sei->base + GICP_SEMR(reg_idx));
+	}
+}
+
+static int mvebu_sei_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct irq_domain *plat_domain;
+	struct mvebu_sei *sei;
+	u32 parent_irq;
+	int ret;
+
+	sei = devm_kzalloc(&pdev->dev, sizeof(*sei), GFP_KERNEL);
+	if (!sei)
+		return -ENOMEM;
+
+	sei->dev = &pdev->dev;
+
+	mutex_init(&sei->cp_msi_lock);
+	raw_spin_lock_init(&sei->mask_lock);
+
+	sei->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	sei->base = devm_ioremap_resource(sei->dev, sei->res);
+	if (!sei->base) {
+		dev_err(sei->dev, "Failed to remap SEI resource\n");
+		return -ENODEV;
+	}
+
+	/* Retrieve the SEI capabilities with the interrupt ranges */
+	sei->caps = of_device_get_match_data(&pdev->dev);
+	if (!sei->caps) {
+		dev_err(sei->dev,
+			"Could not retrieve controller capabilities\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Reserve the single (top-level) parent SPI IRQ from which all the
+	 * interrupts handled by this driver will be signaled.
+	 */
+	parent_irq = irq_of_parse_and_map(node, 0);
+	if (parent_irq <= 0) {
+		dev_err(sei->dev, "Failed to retrieve top-level SPI IRQ\n");
+		return -ENODEV;
+	}
+
+	/* Create the root SEI domain */
+	sei->sei_domain = irq_domain_create_linear(of_node_to_fwnode(node),
+						   (sei->caps->ap_range.size +
+						    sei->caps->cp_range.size),
+						   &mvebu_sei_domain_ops,
+						   sei);
+	if (!sei->sei_domain) {
+		dev_err(sei->dev, "Failed to create SEI IRQ domain\n");
+		ret = -ENOMEM;
+		goto dispose_irq;
+	}
+
+	irq_domain_update_bus_token(sei->sei_domain, DOMAIN_BUS_NEXUS);
+
+	/* Create the 'wired' domain */
+	sei->ap_domain = irq_domain_create_hierarchy(sei->sei_domain, 0,
+						     sei->caps->ap_range.size,
+						     of_node_to_fwnode(node),
+						     &mvebu_sei_ap_domain_ops,
+						     sei);
+	if (!sei->ap_domain) {
+		dev_err(sei->dev, "Failed to create AP IRQ domain\n");
+		ret = -ENOMEM;
+		goto remove_sei_domain;
+	}
+
+	irq_domain_update_bus_token(sei->ap_domain, DOMAIN_BUS_WIRED);
+
+	/* Create the 'MSI' domain */
+	sei->cp_domain = irq_domain_create_hierarchy(sei->sei_domain, 0,
+						     sei->caps->cp_range.size,
+						     of_node_to_fwnode(node),
+						     &mvebu_sei_cp_domain_ops,
+						     sei);
+	if (!sei->cp_domain) {
+		pr_err("Failed to create CPs IRQ domain\n");
+		ret = -ENOMEM;
+		goto remove_ap_domain;
+	}
+
+	irq_domain_update_bus_token(sei->cp_domain, DOMAIN_BUS_GENERIC_MSI);
+
+	plat_domain = platform_msi_create_irq_domain(of_node_to_fwnode(node),
+						     &mvebu_sei_msi_domain_info,
+						     sei->cp_domain);
+	if (!plat_domain) {
+		pr_err("Failed to create CPs MSI domain\n");
+		ret = -ENOMEM;
+		goto remove_cp_domain;
+	}
+
+	mvebu_sei_reset(sei);
+
+	irq_set_chained_handler_and_data(parent_irq,
+					 mvebu_sei_handle_cascade_irq,
+					 sei);
+
+	return 0;
+
+remove_cp_domain:
+	irq_domain_remove(sei->cp_domain);
+remove_ap_domain:
+	irq_domain_remove(sei->ap_domain);
+remove_sei_domain:
+	irq_domain_remove(sei->sei_domain);
+dispose_irq:
+	irq_dispose_mapping(parent_irq);
+
+	return ret;
+}
+
+struct mvebu_sei_caps mvebu_sei_ap806_caps = {
+	.ap_range = {
+		.first = 0,
+		.size = 21,
+	},
+	.cp_range = {
+		.first = 21,
+		.size = 43,
+	},
+};
+
+static const struct of_device_id mvebu_sei_of_match[] = {
+	{
+		.compatible = "marvell,ap806-sei",
+		.data = &mvebu_sei_ap806_caps,
+	},
+	{},
+};
+
+static struct platform_driver mvebu_sei_driver = {
+	.probe  = mvebu_sei_probe,
+	.driver = {
+		.name = "mvebu-sei",
+		.of_match_table = mvebu_sei_of_match,
+	},
+};
+builtin_platform_driver(mvebu_sei_driver);
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 532e9d68c704..357e9daf94ae 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -15,6 +15,7 @@
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
+#include <asm/smp.h>
 
 /*
  * This driver implements a version of the RISC-V PLIC with the actual layout
@@ -176,7 +177,7 @@ static int plic_find_hart_id(struct device_node *node)
 {
 	for (; node; node = node->parent) {
 		if (of_device_is_compatible(node, "riscv"))
-			return riscv_of_processor_hart(node);
+			return riscv_of_processor_hartid(node);
 	}
 
 	return -1;
@@ -218,7 +219,7 @@ static int __init plic_init(struct device_node *node,
 		struct of_phandle_args parent;
 		struct plic_handler *handler;
 		irq_hw_number_t hwirq;
-		int cpu;
+		int cpu, hartid;
 
 		if (of_irq_parse_one(node, i, &parent)) {
 			pr_err("failed to parse parent for context %d.\n", i);
@@ -229,12 +230,13 @@ static int __init plic_init(struct device_node *node,
 		if (parent.args[0] == -1)
 			continue;
 
-		cpu = plic_find_hart_id(parent.np);
-		if (cpu < 0) {
+		hartid = plic_find_hart_id(parent.np);
+		if (hartid < 0) {
 			pr_warn("failed to parse hart ID for context %d.\n", i);
 			continue;
 		}
 
+		cpu = riscv_hartid_to_cpuid(hartid);
 		handler = per_cpu_ptr(&plic_handlers, cpu);
 		handler->present = true;
 		handler->ctxid = i;
diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index b1b47a40a278..faa7d61b9d6c 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -124,6 +124,7 @@ static int qcom_pdc_gic_set_type(struct irq_data *d, unsigned int type)
 		break;
 	case IRQ_TYPE_EDGE_BOTH:
 		pdc_type = PDC_EDGE_DUAL;
+		type = IRQ_TYPE_EDGE_RISING;
 		break;
 	case IRQ_TYPE_LEVEL_HIGH:
 		pdc_type = PDC_LEVEL_HIGH;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index e1fa6baf4e8e..bb3096bf2cc6 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -559,7 +559,12 @@ static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result
 		}
 		memset(result + size, 0, JOURNAL_MAC_SIZE - size);
 	} else {
-		__u8 digest[size];
+		__u8 digest[HASH_MAX_DIGESTSIZE];
+
+		if (WARN_ON(size > sizeof(digest))) {
+			dm_integrity_io_error(ic, "digest_size", -EINVAL);
+			goto err;
+		}
 		r = crypto_shash_final(desc, digest);
 		if (unlikely(r)) {
 			dm_integrity_io_error(ic, "crypto_shash_final", r);
@@ -1324,7 +1329,7 @@ static void integrity_metadata(struct work_struct *w)
 		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 		char *checksums;
 		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
-		char checksums_onstack[ic->tag_size + extra_space];
+		char checksums_onstack[HASH_MAX_DIGESTSIZE];
 		unsigned sectors_to_process = dio->range.n_sectors;
 		sector_t sector = dio->range.logical_sector;
 
@@ -1333,8 +1338,14 @@ static void integrity_metadata(struct work_struct *w)
 
 		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
 				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
-		if (!checksums)
+		if (!checksums) {
 			checksums = checksums_onstack;
+			if (WARN_ON(extra_space &&
+				    digest_size > sizeof(checksums_onstack))) {
+				r = -EINVAL;
+				goto error;
+			}
+		}
 
 		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
 			unsigned pos;
@@ -1546,7 +1557,7 @@ retry_kmap:
 				} while (++s < ic->sectors_per_block);
 #ifdef INTERNAL_VERIFY
 				if (ic->internal_hash) {
-					char checksums_onstack[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)];
+					char checksums_onstack[max(HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
 
 					integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
 					if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
@@ -1596,7 +1607,7 @@ retry_kmap:
 				if (ic->internal_hash) {
 					unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
 					if (unlikely(digest_size > ic->tag_size)) {
-						char checksums_onstack[digest_size];
+						char checksums_onstack[HASH_MAX_DIGESTSIZE];
 						integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack);
 						memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size);
 					} else
@@ -2023,7 +2034,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
 				    unlikely(from_replay) &&
 #endif
 				    ic->internal_hash) {
-					char test_tag[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)];
+					char test_tag[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
 
 					integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block),
 								  (char *)access_journal_data(ic, i, l), test_tag);
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 684af08d0747..0ce04e5b4afb 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -212,12 +212,15 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
 	struct dm_verity_fec_io *fio = fec_io(io);
 	u64 block, ileaved;
 	u8 *bbuf, *rs_block;
-	u8 want_digest[v->digest_size];
+	u8 want_digest[HASH_MAX_DIGESTSIZE];
 	unsigned n, k;
 
 	if (neras)
 		*neras = 0;
 
+	if (WARN_ON(v->digest_size > sizeof(want_digest)))
+		return -EINVAL;
+
 	/*
 	 * read each of the rsn data blocks that are part of the RS block, and
 	 * interleave contents to available bufs
diff --git a/drivers/media/usb/em28xx/em28xx-audio.c b/drivers/media/usb/em28xx/em28xx-audio.c
index 8e799ae1df69..67481fc82445 100644
--- a/drivers/media/usb/em28xx/em28xx-audio.c
+++ b/drivers/media/usb/em28xx/em28xx-audio.c
@@ -116,6 +116,7 @@ static void em28xx_audio_isocirq(struct urb *urb)
 		stride = runtime->frame_bits >> 3;
 
 		for (i = 0; i < urb->number_of_packets; i++) {
+			unsigned long flags;
 			int length =
 			    urb->iso_frame_desc[i].actual_length / stride;
 			cp = (unsigned char *)urb->transfer_buffer +
@@ -137,7 +138,7 @@ static void em28xx_audio_isocirq(struct urb *urb)
 				       length * stride);
 			}
 
-			snd_pcm_stream_lock(substream);
+			snd_pcm_stream_lock_irqsave(substream, flags);
 
 			dev->adev.hwptr_done_capture += length;
 			if (dev->adev.hwptr_done_capture >=
@@ -153,7 +154,7 @@ static void em28xx_audio_isocirq(struct urb *urb)
 				period_elapsed = 1;
 			}
 
-			snd_pcm_stream_unlock(substream);
+			snd_pcm_stream_unlock_irqrestore(substream, flags);
 		}
 		if (period_elapsed)
 			snd_pcm_period_elapsed(substream);
diff --git a/drivers/media/usb/em28xx/em28xx-core.c b/drivers/media/usb/em28xx/em28xx-core.c
index 5657f8710ca6..2b8c84a5c9a8 100644
--- a/drivers/media/usb/em28xx/em28xx-core.c
+++ b/drivers/media/usb/em28xx/em28xx-core.c
@@ -777,6 +777,7 @@ EXPORT_SYMBOL_GPL(em28xx_set_mode);
 static void em28xx_irq_callback(struct urb *urb)
 {
 	struct em28xx *dev = urb->context;
+	unsigned long flags;
 	int i;
 
 	switch (urb->status) {
@@ -793,9 +794,9 @@ static void em28xx_irq_callback(struct urb *urb)
 	}
 
 	/* Copy data from URB */
-	spin_lock(&dev->slock);
+	spin_lock_irqsave(&dev->slock, flags);
 	dev->usb_ctl.urb_data_copy(dev, urb);
-	spin_unlock(&dev->slock);
+	spin_unlock_irqrestore(&dev->slock, flags);
 
 	/* Reset urb buffers */
 	for (i = 0; i < urb->number_of_packets; i++) {
diff --git a/drivers/media/usb/tm6000/tm6000-video.c b/drivers/media/usb/tm6000/tm6000-video.c
index 96055de6e8ce..7d268f2404e1 100644
--- a/drivers/media/usb/tm6000/tm6000-video.c
+++ b/drivers/media/usb/tm6000/tm6000-video.c
@@ -419,6 +419,7 @@ static void tm6000_irq_callback(struct urb *urb)
 {
 	struct tm6000_dmaqueue  *dma_q = urb->context;
 	struct tm6000_core *dev = container_of(dma_q, struct tm6000_core, vidq);
+	unsigned long flags;
 	int i;
 
 	switch (urb->status) {
@@ -436,9 +437,9 @@ static void tm6000_irq_callback(struct urb *urb)
 		break;
 	}
 
-	spin_lock(&dev->slock);
+	spin_lock_irqsave(&dev->slock, flags);
 	tm6000_isoc_copy(urb);
-	spin_unlock(&dev->slock);
+	spin_unlock_irqrestore(&dev->slock, flags);
 
 	/* Reset urb buffers */
 	for (i = 0; i < urb->number_of_packets; i++) {
diff --git a/drivers/misc/ad525x_dpot-i2c.c b/drivers/misc/ad525x_dpot-i2c.c
index 4f832002d116..1827c69959fb 100644
--- a/drivers/misc/ad525x_dpot-i2c.c
+++ b/drivers/misc/ad525x_dpot-i2c.c
@@ -114,6 +114,6 @@ static struct i2c_driver ad_dpot_i2c_driver = {
 
 module_i2c_driver(ad_dpot_i2c_driver);
 
-MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_AUTHOR("Michael Hennerich <michael.hennerich@analog.com>");
 MODULE_DESCRIPTION("digital potentiometer I2C bus driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ad525x_dpot-spi.c b/drivers/misc/ad525x_dpot-spi.c
index 39a7f517ee7e..0383ec153725 100644
--- a/drivers/misc/ad525x_dpot-spi.c
+++ b/drivers/misc/ad525x_dpot-spi.c
@@ -140,7 +140,7 @@ static struct spi_driver ad_dpot_spi_driver = {
 
 module_spi_driver(ad_dpot_spi_driver);
 
-MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_AUTHOR("Michael Hennerich <michael.hennerich@analog.com>");
 MODULE_DESCRIPTION("digital potentiometer SPI bus driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("spi:ad_dpot");
diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c
index bc591b7168db..a0afadefcc49 100644
--- a/drivers/misc/ad525x_dpot.c
+++ b/drivers/misc/ad525x_dpot.c
@@ -1,7 +1,7 @@
 /*
  * ad525x_dpot: Driver for the Analog Devices digital potentiometers
  * Copyright (c) 2009-2010 Analog Devices, Inc.
- * Author: Michael Hennerich <hennerich@blackfin.uclinux.org>
+ * Author: Michael Hennerich <michael.hennerich@analog.com>
  *
  * DEVID		#Wipers		#Positions	Resistor Options (kOhm)
  * AD5258		1		64		1, 10, 50, 100
@@ -64,7 +64,7 @@
  * Author: Chris Verges <chrisv@cyberswitching.com>
  *
  * derived from ad5252.c
- * Copyright (c) 2006-2011 Michael Hennerich <hennerich@blackfin.uclinux.org>
+ * Copyright (c) 2006-2011 Michael Hennerich <michael.hennerich@analog.com>
  *
  * Licensed under the GPL-2 or later.
  */
@@ -760,6 +760,6 @@ EXPORT_SYMBOL(ad_dpot_remove);
 
 
 MODULE_AUTHOR("Chris Verges <chrisv@cyberswitching.com>, "
-	      "Michael Hennerich <hennerich@blackfin.uclinux.org>");
+	      "Michael Hennerich <michael.hennerich@analog.com>");
 MODULE_DESCRIPTION("Digital potentiometer driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/misc/apds990x.c b/drivers/misc/apds990x.c
index ed9412d750b7..24876c615c3c 100644
--- a/drivers/misc/apds990x.c
+++ b/drivers/misc/apds990x.c
@@ -188,7 +188,6 @@ struct apds990x_chip {
 #define APDS_LUX_DEFAULT_RATE		200
 
 static const u8 again[]	= {1, 8, 16, 120}; /* ALS gain steps */
-static const u8 ir_currents[]	= {100, 50, 25, 12}; /* IRled currents in mA */
 
 /* Following two tables must match i.e 10Hz rate means 1 as persistence value */
 static const u16 arates_hz[] = {10, 5, 2, 1};
diff --git a/drivers/misc/bh1770glc.c b/drivers/misc/bh1770glc.c
index 9c62bf064f77..17e81ce9925b 100644
--- a/drivers/misc/bh1770glc.c
+++ b/drivers/misc/bh1770glc.c
@@ -180,9 +180,6 @@ static const char reg_vleds[] = "Vleds";
 static const s16 prox_rates_hz[] = {100, 50, 33, 25, 14, 10, 5, 2};
 static const s16 prox_rates_ms[] = {10, 20, 30, 40, 70, 100, 200, 500};
 
-/* Supported IR-led currents in mA */
-static const u8 prox_curr_ma[] = {5, 10, 20, 50, 100, 150, 200};
-
 /*
  * Supported stand alone rates in ms from chip data sheet
  * {100, 200, 500, 1000, 2000};
diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c
index 43917898fb9a..4d6836f19489 100644
--- a/drivers/misc/cxl/flash.c
+++ b/drivers/misc/cxl/flash.c
@@ -92,8 +92,8 @@ static int update_property(struct device_node *dn, const char *name,
 
 	val = (u32 *)new_prop->value;
 	rc = cxl_update_properties(dn, new_prop);
-	pr_devel("%s: update property (%s, length: %i, value: %#x)\n",
-		  dn->name, name, vd, be32_to_cpu(*val));
+	pr_devel("%pOFn: update property (%s, length: %i, value: %#x)\n",
+		  dn, name, vd, be32_to_cpu(*val));
 
 	if (rc) {
 		kfree(new_prop->name);
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index 3bc0c15d4d85..5d28d9e454f5 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -1018,8 +1018,6 @@ err1:
 
 void cxl_guest_remove_afu(struct cxl_afu *afu)
 {
-	pr_devel("in %s - AFU(%d)\n", __func__, afu->slice);
-
 	if (!afu)
 		return;
 
diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c
index 8a5adc0d2e88..3ebe5d75ad6a 100644
--- a/drivers/misc/echo/echo.c
+++ b/drivers/misc/echo/echo.c
@@ -381,7 +381,7 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
 	 */
 	ec->factor = 0;
 	ec->shift = 0;
-	if ((ec->nonupdate_dwell == 0)) {
+	if (!ec->nonupdate_dwell) {
 		int p, logp, shift;
 
 		/* Determine:
diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig
index 68a1ac929917..fe7a1d27a017 100644
--- a/drivers/misc/eeprom/Kconfig
+++ b/drivers/misc/eeprom/Kconfig
@@ -111,4 +111,15 @@ config EEPROM_IDT_89HPESX
 	  This driver can also be built as a module. If so, the module
 	  will be called idt_89hpesx.
 
+config EEPROM_EE1004
+	tristate "SPD EEPROMs on DDR4 memory modules"
+	depends on I2C && SYSFS
+	help
+	  Enable this driver to get read support to SPD EEPROMs following
+	  the JEDEC EE1004 standard. These are typically found on DDR4
+	  SDRAM memory modules.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called ee1004.
+
 endmenu
diff --git a/drivers/misc/eeprom/Makefile b/drivers/misc/eeprom/Makefile
index 2aab60ef3e3e..a9b4b6579b75 100644
--- a/drivers/misc/eeprom/Makefile
+++ b/drivers/misc/eeprom/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_EEPROM_93CX6)	+= eeprom_93cx6.o
 obj-$(CONFIG_EEPROM_93XX46)	+= eeprom_93xx46.o
 obj-$(CONFIG_EEPROM_DIGSY_MTC_CFG) += digsy_mtc_eeprom.o
 obj-$(CONFIG_EEPROM_IDT_89HPESX) += idt_89hpesx.o
+obj-$(CONFIG_EEPROM_EE1004)	+= ee1004.o
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 840afb398f9e..99de6939cd5a 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -366,7 +366,7 @@ static int at25_probe(struct spi_device *spi)
 	at25->nvmem_config.word_size = 1;
 	at25->nvmem_config.size = chip.byte_len;
 
-	at25->nvmem = nvmem_register(&at25->nvmem_config);
+	at25->nvmem = devm_nvmem_register(&spi->dev, &at25->nvmem_config);
 	if (IS_ERR(at25->nvmem))
 		return PTR_ERR(at25->nvmem);
 
@@ -379,16 +379,6 @@ static int at25_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int at25_remove(struct spi_device *spi)
-{
-	struct at25_data	*at25;
-
-	at25 = spi_get_drvdata(spi);
-	nvmem_unregister(at25->nvmem);
-
-	return 0;
-}
-
 /*-------------------------------------------------------------------------*/
 
 static const struct of_device_id at25_of_match[] = {
@@ -403,7 +393,6 @@ static struct spi_driver at25_driver = {
 		.of_match_table = at25_of_match,
 	},
 	.probe		= at25_probe,
-	.remove		= at25_remove,
 };
 
 module_spi_driver(at25_driver);
diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c
new file mode 100644
index 000000000000..276c1690ea1b
--- /dev/null
+++ b/drivers/misc/eeprom/ee1004.c
@@ -0,0 +1,281 @@
+/*
+ * ee1004 - driver for DDR4 SPD EEPROMs
+ *
+ * Copyright (C) 2017 Jean Delvare
+ *
+ * Based on the at24 driver:
+ * Copyright (C) 2005-2007 David Brownell
+ * Copyright (C) 2008 Wolfram Sang, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+
+/*
+ * DDR4 memory modules use special EEPROMs following the Jedec EE1004
+ * specification. These are 512-byte EEPROMs using a single I2C address
+ * in the 0x50-0x57 range for data. One of two 256-byte page is selected
+ * by writing a command to I2C address 0x36 or 0x37 on the same I2C bus.
+ *
+ * Therefore we need to request these 2 additional addresses, and serialize
+ * access to all such EEPROMs with a single mutex.
+ *
+ * We assume it is safe to read up to 32 bytes at once from these EEPROMs.
+ * We use SMBus access even if I2C is available, these EEPROMs are small
+ * enough, and reading from them infrequent enough, that we favor simplicity
+ * over performance.
+ */
+
+#define EE1004_ADDR_SET_PAGE		0x36
+#define EE1004_EEPROM_SIZE		512
+#define EE1004_PAGE_SIZE		256
+#define EE1004_PAGE_SHIFT		8
+
+/*
+ * Mutex protects ee1004_set_page and ee1004_dev_count, and must be held
+ * from page selection to end of read.
+ */
+static DEFINE_MUTEX(ee1004_bus_lock);
+static struct i2c_client *ee1004_set_page[2];
+static unsigned int ee1004_dev_count;
+static int ee1004_current_page;
+
+static const struct i2c_device_id ee1004_ids[] = {
+	{ "ee1004", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ee1004_ids);
+
+/*-------------------------------------------------------------------------*/
+
+static ssize_t ee1004_eeprom_read(struct i2c_client *client, char *buf,
+				  unsigned int offset, size_t count)
+{
+	int status;
+
+	if (count > I2C_SMBUS_BLOCK_MAX)
+		count = I2C_SMBUS_BLOCK_MAX;
+	/* Can't cross page boundaries */
+	if (unlikely(offset + count > EE1004_PAGE_SIZE))
+		count = EE1004_PAGE_SIZE - offset;
+
+	status = i2c_smbus_read_i2c_block_data_or_emulated(client, offset,
+							   count, buf);
+	dev_dbg(&client->dev, "read %zu@%d --> %d\n", count, offset, status);
+
+	return status;
+}
+
+static ssize_t ee1004_read(struct file *filp, struct kobject *kobj,
+			   struct bin_attribute *bin_attr,
+			   char *buf, loff_t off, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct i2c_client *client = to_i2c_client(dev);
+	size_t requested = count;
+	int page;
+
+	if (unlikely(!count))
+		return count;
+
+	page = off >> EE1004_PAGE_SHIFT;
+	if (unlikely(page > 1))
+		return 0;
+	off &= (1 << EE1004_PAGE_SHIFT) - 1;
+
+	/*
+	 * Read data from chip, protecting against concurrent access to
+	 * other EE1004 SPD EEPROMs on the same adapter.
+	 */
+	mutex_lock(&ee1004_bus_lock);
+
+	while (count) {
+		int status;
+
+		/* Select page */
+		if (page != ee1004_current_page) {
+			/* Data is ignored */
+			status = i2c_smbus_write_byte(ee1004_set_page[page],
+						      0x00);
+			if (status < 0) {
+				dev_err(dev, "Failed to select page %d (%d)\n",
+					page, status);
+				mutex_unlock(&ee1004_bus_lock);
+				return status;
+			}
+			dev_dbg(dev, "Selected page %d\n", page);
+			ee1004_current_page = page;
+		}
+
+		status = ee1004_eeprom_read(client, buf, off, count);
+		if (status < 0) {
+			mutex_unlock(&ee1004_bus_lock);
+			return status;
+		}
+		buf += status;
+		off += status;
+		count -= status;
+
+		if (off == EE1004_PAGE_SIZE) {
+			page++;
+			off = 0;
+		}
+	}
+
+	mutex_unlock(&ee1004_bus_lock);
+
+	return requested;
+}
+
+static const struct bin_attribute eeprom_attr = {
+	.attr = {
+		.name = "eeprom",
+		.mode = 0444,
+	},
+	.size = EE1004_EEPROM_SIZE,
+	.read = ee1004_read,
+};
+
+static int ee1004_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	int err, cnr = 0;
+	const char *slow = NULL;
+
+	/* Make sure we can operate on this adapter */
+	if (!i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_READ_BYTE |
+				     I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+		if (i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_READ_BYTE |
+				     I2C_FUNC_SMBUS_READ_WORD_DATA))
+			slow = "word";
+		else if (i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_READ_BYTE |
+				     I2C_FUNC_SMBUS_READ_BYTE_DATA))
+			slow = "byte";
+		else
+			return -EPFNOSUPPORT;
+	}
+
+	/* Use 2 dummy devices for page select command */
+	mutex_lock(&ee1004_bus_lock);
+	if (++ee1004_dev_count == 1) {
+		for (cnr = 0; cnr < 2; cnr++) {
+			ee1004_set_page[cnr] = i2c_new_dummy(client->adapter,
+						EE1004_ADDR_SET_PAGE + cnr);
+			if (!ee1004_set_page[cnr]) {
+				dev_err(&client->dev,
+					"address 0x%02x unavailable\n",
+					EE1004_ADDR_SET_PAGE + cnr);
+				err = -EADDRINUSE;
+				goto err_clients;
+			}
+		}
+	} else if (i2c_adapter_id(client->adapter) !=
+		   i2c_adapter_id(ee1004_set_page[0]->adapter)) {
+		dev_err(&client->dev,
+			"Driver only supports devices on a single I2C bus\n");
+		err = -EOPNOTSUPP;
+		goto err_clients;
+	}
+
+	/* Remember current page to avoid unneeded page select */
+	err = i2c_smbus_read_byte(ee1004_set_page[0]);
+	if (err == -ENXIO) {
+		/* Nack means page 1 is selected */
+		ee1004_current_page = 1;
+	} else if (err < 0) {
+		/* Anything else is a real error, bail out */
+		goto err_clients;
+	} else {
+		/* Ack means page 0 is selected, returned value meaningless */
+		ee1004_current_page = 0;
+	}
+	dev_dbg(&client->dev, "Currently selected page: %d\n",
+		ee1004_current_page);
+	mutex_unlock(&ee1004_bus_lock);
+
+	/* Create the sysfs eeprom file */
+	err = sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr);
+	if (err)
+		goto err_clients_lock;
+
+	dev_info(&client->dev,
+		 "%u byte EE1004-compliant SPD EEPROM, read-only\n",
+		 EE1004_EEPROM_SIZE);
+	if (slow)
+		dev_notice(&client->dev,
+			   "Falling back to %s reads, performance will suffer\n",
+			   slow);
+
+	return 0;
+
+ err_clients_lock:
+	mutex_lock(&ee1004_bus_lock);
+ err_clients:
+	if (--ee1004_dev_count == 0) {
+		for (cnr--; cnr >= 0; cnr--) {
+			i2c_unregister_device(ee1004_set_page[cnr]);
+			ee1004_set_page[cnr] = NULL;
+		}
+	}
+	mutex_unlock(&ee1004_bus_lock);
+
+	return err;
+}
+
+static int ee1004_remove(struct i2c_client *client)
+{
+	int i;
+
+	sysfs_remove_bin_file(&client->dev.kobj, &eeprom_attr);
+
+	/* Remove page select clients if this is the last device */
+	mutex_lock(&ee1004_bus_lock);
+	if (--ee1004_dev_count == 0) {
+		for (i = 0; i < 2; i++) {
+			i2c_unregister_device(ee1004_set_page[i]);
+			ee1004_set_page[i] = NULL;
+		}
+	}
+	mutex_unlock(&ee1004_bus_lock);
+
+	return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static struct i2c_driver ee1004_driver = {
+	.driver = {
+		.name = "ee1004",
+	},
+	.probe = ee1004_probe,
+	.remove = ee1004_remove,
+	.id_table = ee1004_ids,
+};
+
+static int __init ee1004_init(void)
+{
+	return i2c_add_driver(&ee1004_driver);
+}
+module_init(ee1004_init);
+
+static void __exit ee1004_exit(void)
+{
+	i2c_del_driver(&ee1004_driver);
+}
+module_exit(ee1004_exit);
+
+MODULE_DESCRIPTION("Driver for EE1004-compliant DDR4 SPD EEPROMs");
+MODULE_AUTHOR("Jean Delvare");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c
index 38766968bfa2..c6dd9ad9bf7b 100644
--- a/drivers/misc/eeprom/eeprom_93xx46.c
+++ b/drivers/misc/eeprom/eeprom_93xx46.c
@@ -439,7 +439,7 @@ static int eeprom_93xx46_probe(struct spi_device *spi)
 		return -ENODEV;
 	}
 
-	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+	edev = devm_kzalloc(&spi->dev, sizeof(*edev), GFP_KERNEL);
 	if (!edev)
 		return -ENOMEM;
 
@@ -449,8 +449,7 @@ static int eeprom_93xx46_probe(struct spi_device *spi)
 		edev->addrlen = 6;
 	else {
 		dev_err(&spi->dev, "unspecified address type\n");
-		err = -EINVAL;
-		goto fail;
+		return -EINVAL;
 	}
 
 	mutex_init(&edev->lock);
@@ -473,11 +472,9 @@ static int eeprom_93xx46_probe(struct spi_device *spi)
 	edev->nvmem_config.word_size = 1;
 	edev->nvmem_config.size = edev->size;
 
-	edev->nvmem = nvmem_register(&edev->nvmem_config);
-	if (IS_ERR(edev->nvmem)) {
-		err = PTR_ERR(edev->nvmem);
-		goto fail;
-	}
+	edev->nvmem = devm_nvmem_register(&spi->dev, &edev->nvmem_config);
+	if (IS_ERR(edev->nvmem))
+		return PTR_ERR(edev->nvmem);
 
 	dev_info(&spi->dev, "%d-bit eeprom %s\n",
 		(pd->flags & EE_ADDR8) ? 8 : 16,
@@ -490,21 +487,15 @@ static int eeprom_93xx46_probe(struct spi_device *spi)
 
 	spi_set_drvdata(spi, edev);
 	return 0;
-fail:
-	kfree(edev);
-	return err;
 }
 
 static int eeprom_93xx46_remove(struct spi_device *spi)
 {
 	struct eeprom_93xx46_dev *edev = spi_get_drvdata(spi);
 
-	nvmem_unregister(edev->nvmem);
-
 	if (!(edev->pdata->flags & EE_READONLY))
 		device_remove_file(&spi->dev, &dev_attr_erase);
 
-	kfree(edev);
 	return 0;
 }
 
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
index c7cd3675bcd1..d137d0fab9bf 100644
--- a/drivers/misc/genwqe/card_base.c
+++ b/drivers/misc/genwqe/card_base.c
@@ -24,7 +24,6 @@
  * controlled from here.
  */
 
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/err.h>
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
index 656449cb4476..9a65bd9d6152 100644
--- a/drivers/misc/genwqe/card_ddcb.c
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -27,7 +27,6 @@
  */
 
 #include <linux/types.h>
-#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/pci.h>
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
index 8679e0bd8ec2..3fcb9a2fe1c9 100644
--- a/drivers/misc/genwqe/card_utils.c
+++ b/drivers/misc/genwqe/card_utils.c
@@ -23,14 +23,12 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/dma-mapping.h>
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
 #include <linux/page-flags.h>
 #include <linux/scatterlist.h>
 #include <linux/hugetlb.h>
 #include <linux/iommu.h>
-#include <linux/delay.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/ctype.h>
@@ -298,7 +296,7 @@ static int genwqe_sgl_size(int num_pages)
 int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
 			  void __user *user_addr, size_t user_size, int write)
 {
-	int rc;
+	int ret = -ENOMEM;
 	struct pci_dev *pci_dev = cd->pci_dev;
 
 	sgl->fpage_offs = offset_in_page((unsigned long)user_addr);
@@ -318,7 +316,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
 	if (get_order(sgl->sgl_size) > MAX_ORDER) {
 		dev_err(&pci_dev->dev,
 			"[%s] err: too much memory requested!\n", __func__);
-		return -ENOMEM;
+		return ret;
 	}
 
 	sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size,
@@ -326,7 +324,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
 	if (sgl->sgl == NULL) {
 		dev_err(&pci_dev->dev,
 			"[%s] err: no memory available!\n", __func__);
-		return -ENOMEM;
+		return ret;
 	}
 
 	/* Only use buffering on incomplete pages */
@@ -339,7 +337,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
 		/* Sync with user memory */
 		if (copy_from_user(sgl->fpage + sgl->fpage_offs,
 				   user_addr, sgl->fpage_size)) {
-			rc = -EFAULT;
+			ret = -EFAULT;
 			goto err_out;
 		}
 	}
@@ -352,7 +350,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
 		/* Sync with user memory */
 		if (copy_from_user(sgl->lpage, user_addr + user_size -
 				   sgl->lpage_size, sgl->lpage_size)) {
-			rc = -EFAULT;
+			ret = -EFAULT;
 			goto err_out2;
 		}
 	}
@@ -374,7 +372,8 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
 	sgl->sgl = NULL;
 	sgl->sgl_dma_addr = 0;
 	sgl->sgl_size = 0;
-	return -ENOMEM;
+
+	return ret;
 }
 
 int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 6193270e7b3d..de20bdaa148d 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -985,6 +985,12 @@ static void kgdbts_run_tests(void)
 	int nmi_sleep = 0;
 	int i;
 
+	verbose = 0;
+	if (strstr(config, "V1"))
+		verbose = 1;
+	if (strstr(config, "V2"))
+		verbose = 2;
+
 	ptr = strchr(config, 'F');
 	if (ptr)
 		fork_test = simple_strtol(ptr + 1, NULL, 10);
@@ -1068,13 +1074,6 @@ static int kgdbts_option_setup(char *opt)
 		return -ENOSPC;
 	}
 	strcpy(config, opt);
-
-	verbose = 0;
-	if (strstr(config, "V1"))
-		verbose = 1;
-	if (strstr(config, "V2"))
-		verbose = 2;
-
 	return 0;
 }
 
@@ -1086,9 +1085,6 @@ static int configure_kgdbts(void)
 
 	if (!strlen(config) || isspace(config[0]))
 		goto noconfig;
-	err = kgdbts_option_setup(config);
-	if (err)
-		goto noconfig;
 
 	final_ack = 0;
 	run_plant_and_detach_test(1);
diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c
index 389475b25bb7..d5a0e7f1813b 100644
--- a/drivers/misc/lkdtm/usercopy.c
+++ b/drivers/misc/lkdtm/usercopy.c
@@ -18,7 +18,7 @@
  * hardened usercopy checks by added "unconst" to all the const copies,
  * and making sure "cache_size" isn't optimized into a const.
  */
-static volatile size_t unconst = 0;
+static volatile size_t unconst;
 static volatile size_t cache_size = 1024;
 static struct kmem_cache *whitelist_cache;
 
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index a6f41f96f2a1..80215c312f0e 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/uuid.h>
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 4d77a6ae183a..87281b3695e6 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -599,10 +599,10 @@ static __poll_t mei_poll(struct file *file, poll_table *wait)
 			mei_cl_read_start(cl, mei_cl_mtu(cl), file);
 	}
 
-	if (req_events & (POLLOUT | POLLWRNORM)) {
+	if (req_events & (EPOLLOUT | EPOLLWRNORM)) {
 		poll_wait(file, &cl->tx_wait, wait);
 		if (cl->tx_cb_queued < dev->tx_queue_limit)
-			mask |= POLLOUT | POLLWRNORM;
+			mask |= EPOLLOUT | EPOLLWRNORM;
 	}
 
 out:
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
index 6369aeaa7056..18b8ed57c4ac 100644
--- a/drivers/misc/mic/scif/scif_dma.c
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -1035,8 +1035,6 @@ scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work,
 			}
 			dma_async_issue_pending(chan);
 		}
-		if (ret < 0)
-			goto err;
 		offset += loop_len;
 		temp += loop_len;
 		temp_phys += loop_len;
@@ -1553,9 +1551,8 @@ static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd,
 	int src_cache_off, dst_cache_off;
 	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
 	u8 *temp = NULL;
-	bool src_local = true, dst_local = false;
+	bool src_local = true;
 	struct scif_dma_comp_cb *comp_cb;
-	dma_addr_t src_dma_addr, dst_dma_addr;
 	int err;
 
 	if (is_dma_copy_aligned(chan->device, 1, 1, 1))
@@ -1569,12 +1566,8 @@ static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd,
 
 	if (work->loopback)
 		return scif_rma_list_cpu_copy(work);
-	src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset);
-	dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset);
 	src_local = work->src_window->type == SCIF_WINDOW_SELF;
-	dst_local = work->dst_window->type == SCIF_WINDOW_SELF;
 
-	dst_local = dst_local;
 	/* Allocate dma_completion cb */
 	comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL);
 	if (!comp_cb)
diff --git a/drivers/misc/mic/scif/scif_fence.c b/drivers/misc/mic/scif/scif_fence.c
index cac3bcc308a7..7bb929f05d85 100644
--- a/drivers/misc/mic/scif/scif_fence.c
+++ b/drivers/misc/mic/scif/scif_fence.c
@@ -272,7 +272,7 @@ static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val)
 dma_fail:
 	if (!x100)
 		dma_pool_free(ep->remote_dev->signal_pool, status,
-			      status->src_dma_addr);
+			      src - offsetof(struct scif_status, val));
 alloc_fail:
 	return err;
 }
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index 030769018461..4b23d586fc3f 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -634,7 +634,7 @@ static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd,
 			break;
 		case CBSS_PAGE_OVERFLOW:
 			STAT(mesq_noop_page_overflow);
-			/* fallthru */
+			/* fall through */
 		default:
 			BUG();
 		}
@@ -792,7 +792,7 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
 		break;
 	case CBSS_PAGE_OVERFLOW:
 		STAT(mesq_page_overflow);
-		/* fallthru */
+		/* fall through */
 	default:
 		BUG();
 	}
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 05a890ce2ab8..8e6607fc8a67 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -28,7 +28,7 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
 {
 	enum xp_retval ret;
 
-	DBUG_ON(!spin_is_locked(&ch->lock));
+	lockdep_assert_held(&ch->lock);
 
 	if (!(ch->flags & XPC_C_OPENREQUEST) ||
 	    !(ch->flags & XPC_C_ROPENREQUEST)) {
@@ -82,7 +82,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 	struct xpc_partition *part = &xpc_partitions[ch->partid];
 	u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
 
-	DBUG_ON(!spin_is_locked(&ch->lock));
+	lockdep_assert_held(&ch->lock);
 
 	if (!(ch->flags & XPC_C_DISCONNECTING))
 		return;
@@ -755,7 +755,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
 {
 	u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
 
-	DBUG_ON(!spin_is_locked(&ch->lock));
+	lockdep_assert_held(&ch->lock);
 
 	if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
 		return;
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 0c3ef6f1df54..3eba1c420cc0 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -98,8 +98,7 @@ xpc_get_rsvd_page_pa(int nasid)
 			len = L1_CACHE_ALIGN(len);
 
 		if (len > buf_len) {
-			if (buf_base != NULL)
-				kfree(buf_base);
+			kfree(buf_base);
 			buf_len = L1_CACHE_ALIGN(len);
 			buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
 							    &buf_base);
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 5a12d2a54049..0ae69b9390ce 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -1671,7 +1671,7 @@ xpc_teardown_msg_structures_sn2(struct xpc_channel *ch)
 {
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 
-	DBUG_ON(!spin_is_locked(&ch->lock));
+	lockdep_assert_held(&ch->lock);
 
 	ch_sn2->remote_msgqueue_pa = 0;
 
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 340b44d9e8cf..0441abe87880 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -1183,7 +1183,7 @@ xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
 {
 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
 
-	DBUG_ON(!spin_is_locked(&ch->lock));
+	lockdep_assert_held(&ch->lock);
 
 	kfree(ch_uv->cached_notify_gru_mq_desc);
 	ch_uv->cached_notify_gru_mq_desc = NULL;
diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c
index 74b183baf044..80d8cbe8c01a 100644
--- a/drivers/misc/sram.c
+++ b/drivers/misc/sram.c
@@ -323,10 +323,8 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
 		cur_start = block->start + block->size;
 	}
 
- err_chunks:
-	if (child)
-		of_node_put(child);
-
+err_chunks:
+	of_node_put(child);
 	kfree(rblocks);
 
 	return ret;
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 2543ef1ece17..9b0b3fa4f836 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -25,6 +25,9 @@
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/vmw_vmci_defs.h>
 #include <linux/vmw_vmci_api.h>
 #include <asm/hypervisor.h>
@@ -37,20 +40,20 @@ MODULE_ALIAS("vmware_vmmemctl");
 MODULE_LICENSE("GPL");
 
 /*
- * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
- * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
- * __GFP_NOWARN, to suppress page allocation failure warnings.
+ * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't allow wait
+ * (__GFP_RECLAIM) for huge page allocations. Use __GFP_NOWARN, to suppress page
+ * allocation failure warnings. Disallow access to emergency low-memory pools.
  */
-#define VMW_PAGE_ALLOC_NOSLEEP		(__GFP_HIGHMEM|__GFP_NOWARN)
+#define VMW_HUGE_PAGE_ALLOC_FLAGS	(__GFP_HIGHMEM|__GFP_NOWARN|	\
+					 __GFP_NOMEMALLOC)
 
 /*
- * Use GFP_HIGHUSER when executing in a separate kernel thread
- * context and allocation can sleep.  This is less stressful to
- * the guest memory system, since it allows the thread to block
- * while memory is reclaimed, and won't take pages from emergency
- * low-memory pools.
+ * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We allow lightweight
+ * reclamation (__GFP_NORETRY). Use __GFP_NOWARN, to suppress page allocation
+ * failure warnings. Disallow access to emergency low-memory pools.
  */
-#define VMW_PAGE_ALLOC_CANSLEEP		(GFP_HIGHUSER)
+#define VMW_PAGE_ALLOC_FLAGS		(__GFP_HIGHMEM|__GFP_NOWARN|	\
+					 __GFP_NOMEMALLOC|__GFP_NORETRY)
 
 /* Maximum number of refused pages we accumulate during inflation cycle */
 #define VMW_BALLOON_MAX_REFUSED		16
@@ -77,225 +80,420 @@ enum vmwballoon_capabilities {
 					| VMW_BALLOON_BATCHED_2M_CMDS \
 					| VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
 
-#define VMW_BALLOON_2M_SHIFT		(9)
-#define VMW_BALLOON_NUM_PAGE_SIZES	(2)
+#define VMW_BALLOON_2M_ORDER		(PMD_SHIFT - PAGE_SHIFT)
 
-/*
- * Backdoor commands availability:
- *
- * START, GET_TARGET and GUEST_ID are always available,
- *
- * VMW_BALLOON_BASIC_CMDS:
- *	LOCK and UNLOCK commands,
- * VMW_BALLOON_BATCHED_CMDS:
- *	BATCHED_LOCK and BATCHED_UNLOCK commands.
- * VMW BALLOON_BATCHED_2M_CMDS:
- *	BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
- * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
- *	VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
- */
-#define VMW_BALLOON_CMD_START			0
-#define VMW_BALLOON_CMD_GET_TARGET		1
-#define VMW_BALLOON_CMD_LOCK			2
-#define VMW_BALLOON_CMD_UNLOCK			3
-#define VMW_BALLOON_CMD_GUEST_ID		4
-#define VMW_BALLOON_CMD_BATCHED_LOCK		6
-#define VMW_BALLOON_CMD_BATCHED_UNLOCK		7
-#define VMW_BALLOON_CMD_BATCHED_2M_LOCK		8
-#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK	9
-#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET	10
-
-
-/* error codes */
-#define VMW_BALLOON_SUCCESS		        0
-#define VMW_BALLOON_FAILURE		        -1
-#define VMW_BALLOON_ERROR_CMD_INVALID	        1
-#define VMW_BALLOON_ERROR_PPN_INVALID	        2
-#define VMW_BALLOON_ERROR_PPN_LOCKED	        3
-#define VMW_BALLOON_ERROR_PPN_UNLOCKED	        4
-#define VMW_BALLOON_ERROR_PPN_PINNED	        5
-#define VMW_BALLOON_ERROR_PPN_NOTNEEDED	        6
-#define VMW_BALLOON_ERROR_RESET		        7
-#define VMW_BALLOON_ERROR_BUSY		        8
+enum vmballoon_page_size_type {
+	VMW_BALLOON_4K_PAGE,
+	VMW_BALLOON_2M_PAGE,
+	VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE
+};
 
-#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES	(0x03000000)
+#define VMW_BALLOON_NUM_PAGE_SIZES	(VMW_BALLOON_LAST_SIZE + 1)
 
-/* Batch page description */
+static const char * const vmballoon_page_size_names[] = {
+	[VMW_BALLOON_4K_PAGE]			= "4k",
+	[VMW_BALLOON_2M_PAGE]			= "2M"
+};
 
-/*
- * Layout of a page in the batch page:
+enum vmballoon_op {
+	VMW_BALLOON_INFLATE,
+	VMW_BALLOON_DEFLATE
+};
+
+enum vmballoon_op_stat_type {
+	VMW_BALLOON_OP_STAT,
+	VMW_BALLOON_OP_FAIL_STAT
+};
+
+#define VMW_BALLOON_OP_STAT_TYPES	(VMW_BALLOON_OP_FAIL_STAT + 1)
+
+/**
+ * enum vmballoon_cmd_type - backdoor commands.
  *
- * +-------------+----------+--------+
- * |             |          |        |
- * | Page number | Reserved | Status |
- * |             |          |        |
- * +-------------+----------+--------+
- * 64  PAGE_SHIFT          6         0
+ * Availability of the commands is as followed:
  *
- * The reserved field should be set to 0.
+ * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and
+ * %VMW_BALLOON_CMD_GUEST_ID are always available.
+ *
+ * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then
+ * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available.
+ *
+ * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then
+ * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands
+ * are available.
+ *
+ * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then
+ * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK
+ * are supported.
+ *
+ * If the host reports  VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then
+ * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported.
+ *
+ * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor.
+ * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size.
+ * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page.
+ * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about
+ *			    to be deflated from the balloon.
+ * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that
+ *			      runs in the VM.
+ * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of
+ *				  ballooned pages (up to 512).
+ * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of
+ *				  pages that are about to be deflated from the
+ *				  balloon (up to 512).
+ * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK
+ *				     for 2MB pages.
+ * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to
+ *				       @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB
+ *				       pages.
+ * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification
+ *				       that would be invoked when the balloon
+ *				       size changes.
+ * @VMW_BALLOON_CMD_LAST: Value of the last command.
  */
-#define VMW_BALLOON_BATCH_MAX_PAGES	(PAGE_SIZE / sizeof(u64))
-#define VMW_BALLOON_BATCH_STATUS_MASK	((1UL << 5) - 1)
-#define VMW_BALLOON_BATCH_PAGE_MASK	(~((1UL << PAGE_SHIFT) - 1))
-
-struct vmballoon_batch_page {
-	u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
+enum vmballoon_cmd_type {
+	VMW_BALLOON_CMD_START,
+	VMW_BALLOON_CMD_GET_TARGET,
+	VMW_BALLOON_CMD_LOCK,
+	VMW_BALLOON_CMD_UNLOCK,
+	VMW_BALLOON_CMD_GUEST_ID,
+	/* No command 5 */
+	VMW_BALLOON_CMD_BATCHED_LOCK = 6,
+	VMW_BALLOON_CMD_BATCHED_UNLOCK,
+	VMW_BALLOON_CMD_BATCHED_2M_LOCK,
+	VMW_BALLOON_CMD_BATCHED_2M_UNLOCK,
+	VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
+	VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
 };
 
-static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
-{
-	return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
-}
+#define VMW_BALLOON_CMD_NUM	(VMW_BALLOON_CMD_LAST + 1)
+
+enum vmballoon_error_codes {
+	VMW_BALLOON_SUCCESS,
+	VMW_BALLOON_ERROR_CMD_INVALID,
+	VMW_BALLOON_ERROR_PPN_INVALID,
+	VMW_BALLOON_ERROR_PPN_LOCKED,
+	VMW_BALLOON_ERROR_PPN_UNLOCKED,
+	VMW_BALLOON_ERROR_PPN_PINNED,
+	VMW_BALLOON_ERROR_PPN_NOTNEEDED,
+	VMW_BALLOON_ERROR_RESET,
+	VMW_BALLOON_ERROR_BUSY
+};
 
-static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
-				int idx)
-{
-	return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
-}
+#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES	(0x03000000)
 
-static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
-				u64 pa)
-{
-	batch->pages[idx] = pa;
-}
+#define VMW_BALLOON_CMD_WITH_TARGET_MASK			\
+	((1UL << VMW_BALLOON_CMD_GET_TARGET)		|	\
+	 (1UL << VMW_BALLOON_CMD_LOCK)			|	\
+	 (1UL << VMW_BALLOON_CMD_UNLOCK)		|	\
+	 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK)		|	\
+	 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK)	|	\
+	 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK)	|	\
+	 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))
+
+static const char * const vmballoon_cmd_names[] = {
+	[VMW_BALLOON_CMD_START]			= "start",
+	[VMW_BALLOON_CMD_GET_TARGET]		= "target",
+	[VMW_BALLOON_CMD_LOCK]			= "lock",
+	[VMW_BALLOON_CMD_UNLOCK]		= "unlock",
+	[VMW_BALLOON_CMD_GUEST_ID]		= "guestType",
+	[VMW_BALLOON_CMD_BATCHED_LOCK]		= "batchLock",
+	[VMW_BALLOON_CMD_BATCHED_UNLOCK]	= "batchUnlock",
+	[VMW_BALLOON_CMD_BATCHED_2M_LOCK]	= "2m-lock",
+	[VMW_BALLOON_CMD_BATCHED_2M_UNLOCK]	= "2m-unlock",
+	[VMW_BALLOON_CMD_VMCI_DOORBELL_SET]	= "doorbellSet"
+};
 
+enum vmballoon_stat_page {
+	VMW_BALLOON_PAGE_STAT_ALLOC,
+	VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
+	VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
+	VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
+	VMW_BALLOON_PAGE_STAT_FREE,
+	VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE
+};
 
-#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result)		\
-({								\
-	unsigned long __status, __dummy1, __dummy2, __dummy3;	\
-	__asm__ __volatile__ ("inl %%dx" :			\
-		"=a"(__status),					\
-		"=c"(__dummy1),					\
-		"=d"(__dummy2),					\
-		"=b"(result),					\
-		"=S" (__dummy3) :				\
-		"0"(VMW_BALLOON_HV_MAGIC),			\
-		"1"(VMW_BALLOON_CMD_##cmd),			\
-		"2"(VMW_BALLOON_HV_PORT),			\
-		"3"(arg1),					\
-		"4" (arg2) :					\
-		"memory");					\
-	if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START)	\
-		result = __dummy1;				\
-	result &= -1UL;						\
-	__status & -1UL;					\
-})
+#define VMW_BALLOON_PAGE_STAT_NUM	(VMW_BALLOON_PAGE_STAT_LAST + 1)
 
-#ifdef CONFIG_DEBUG_FS
-struct vmballoon_stats {
-	unsigned int timer;
-	unsigned int doorbell;
-
-	/* allocation statistics */
-	unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
-	unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
-	unsigned int sleep_alloc;
-	unsigned int sleep_alloc_fail;
-	unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
-	unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
-	unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
-
-	/* monitor operations */
-	unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
-	unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
-	unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
-	unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
-	unsigned int target;
-	unsigned int target_fail;
-	unsigned int start;
-	unsigned int start_fail;
-	unsigned int guest_type;
-	unsigned int guest_type_fail;
-	unsigned int doorbell_set;
-	unsigned int doorbell_unset;
+enum vmballoon_stat_general {
+	VMW_BALLOON_STAT_TIMER,
+	VMW_BALLOON_STAT_DOORBELL,
+	VMW_BALLOON_STAT_RESET,
+	VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_RESET
 };
 
-#define STATS_INC(stat) (stat)++
-#else
-#define STATS_INC(stat)
-#endif
+#define VMW_BALLOON_STAT_NUM		(VMW_BALLOON_STAT_LAST + 1)
 
-struct vmballoon;
 
-struct vmballoon_ops {
-	void (*add_page)(struct vmballoon *b, int idx, struct page *p);
-	int (*lock)(struct vmballoon *b, unsigned int num_pages,
-			bool is_2m_pages, unsigned int *target);
-	int (*unlock)(struct vmballoon *b, unsigned int num_pages,
-			bool is_2m_pages, unsigned int *target);
+static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
+static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled);
+
+struct vmballoon_ctl {
+	struct list_head pages;
+	struct list_head refused_pages;
+	unsigned int n_refused_pages;
+	unsigned int n_pages;
+	enum vmballoon_page_size_type page_size;
+	enum vmballoon_op op;
 };
 
 struct vmballoon_page_size {
 	/* list of reserved physical pages */
 	struct list_head pages;
-
-	/* transient list of non-balloonable pages */
-	struct list_head refused_pages;
-	unsigned int n_refused_pages;
 };
 
+/**
+ * struct vmballoon_batch_entry - a batch entry for lock or unlock.
+ *
+ * @status: the status of the operation, which is written by the hypervisor.
+ * @reserved: reserved for future use. Must be set to zero.
+ * @pfn: the physical frame number of the page to be locked or unlocked.
+ */
+struct vmballoon_batch_entry {
+	u64 status : 5;
+	u64 reserved : PAGE_SHIFT - 5;
+	u64 pfn : 52;
+} __packed;
+
 struct vmballoon {
 	struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
 
-	/* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
-	unsigned supported_page_sizes;
+	/**
+	 * @max_page_size: maximum supported page size for ballooning.
+	 *
+	 * Protected by @conf_sem
+	 */
+	enum vmballoon_page_size_type max_page_size;
+
+	/**
+	 * @size: balloon actual size in basic page size (frames).
+	 *
+	 * While we currently do not support size which is bigger than 32-bit,
+	 * in preparation for future support, use 64-bits.
+	 */
+	atomic64_t size;
 
-	/* balloon size in pages */
-	unsigned int size;
-	unsigned int target;
+	/**
+	 * @target: balloon target size in basic page size (frames).
+	 *
+	 * We do not protect the target under the assumption that setting the
+	 * value is always done through a single write. If this assumption ever
+	 * breaks, we would have to use X_ONCE for accesses, and suffer the less
+	 * optimized code. Although we may read stale target value if multiple
+	 * accesses happen at once, the performance impact should be minor.
+	 */
+	unsigned long target;
 
-	/* reset flag */
+	/**
+	 * @reset_required: reset flag
+	 *
+	 * Setting this flag may introduce races, but the code is expected to
+	 * handle them gracefully. In the worst case, another operation will
+	 * fail as reset did not take place. Clearing the flag is done while
+	 * holding @conf_sem for write.
+	 */
 	bool reset_required;
 
+	/**
+	 * @capabilities: hypervisor balloon capabilities.
+	 *
+	 * Protected by @conf_sem.
+	 */
 	unsigned long capabilities;
 
-	struct vmballoon_batch_page *batch_page;
+	/**
+	 * @batch_page: pointer to communication batch page.
+	 *
+	 * When batching is used, batch_page points to a page, which holds up to
+	 * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking.
+	 */
+	struct vmballoon_batch_entry *batch_page;
+
+	/**
+	 * @batch_max_pages: maximum pages that can be locked/unlocked.
+	 *
+	 * Indicates the number of pages that the hypervisor can lock or unlock
+	 * at once, according to whether batching is enabled. If batching is
+	 * disabled, only a single page can be locked/unlock on each operation.
+	 *
+	 * Protected by @conf_sem.
+	 */
 	unsigned int batch_max_pages;
-	struct page *page;
 
-	const struct vmballoon_ops *ops;
+	/**
+	 * @page: page to be locked/unlocked by the hypervisor
+	 *
+	 * @page is only used when batching is disabled and a single page is
+	 * reclaimed on each iteration.
+	 *
+	 * Protected by @comm_lock.
+	 */
+	struct page *page;
 
-#ifdef CONFIG_DEBUG_FS
 	/* statistics */
-	struct vmballoon_stats stats;
+	struct vmballoon_stats *stats;
 
+#ifdef CONFIG_DEBUG_FS
 	/* debugfs file exporting statistics */
 	struct dentry *dbg_entry;
 #endif
 
-	struct sysinfo sysinfo;
-
 	struct delayed_work dwork;
 
+	/**
+	 * @vmci_doorbell.
+	 *
+	 * Protected by @conf_sem.
+	 */
 	struct vmci_handle vmci_doorbell;
+
+	/**
+	 * @conf_sem: semaphore to protect the configuration and the statistics.
+	 */
+	struct rw_semaphore conf_sem;
+
+	/**
+	 * @comm_lock: lock to protect the communication with the host.
+	 *
+	 * Lock ordering: @conf_sem -> @comm_lock .
+	 */
+	spinlock_t comm_lock;
 };
 
 static struct vmballoon balloon;
 
+struct vmballoon_stats {
+	/* timer / doorbell operations */
+	atomic64_t general_stat[VMW_BALLOON_STAT_NUM];
+
+	/* allocation statistics for huge and small pages */
+	atomic64_t
+	       page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES];
+
+	/* Monitor operations: total operations, and failures */
+	atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES];
+};
+
+static inline bool is_vmballoon_stats_on(void)
+{
+	return IS_ENABLED(CONFIG_DEBUG_FS) &&
+		static_branch_unlikely(&balloon_stat_enabled);
+}
+
+static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op,
+					  enum vmballoon_op_stat_type type)
+{
+	if (is_vmballoon_stats_on())
+		atomic64_inc(&b->stats->ops[op][type]);
+}
+
+static inline void vmballoon_stats_gen_inc(struct vmballoon *b,
+					   enum vmballoon_stat_general stat)
+{
+	if (is_vmballoon_stats_on())
+		atomic64_inc(&b->stats->general_stat[stat]);
+}
+
+static inline void vmballoon_stats_gen_add(struct vmballoon *b,
+					   enum vmballoon_stat_general stat,
+					   unsigned int val)
+{
+	if (is_vmballoon_stats_on())
+		atomic64_add(val, &b->stats->general_stat[stat]);
+}
+
+static inline void vmballoon_stats_page_inc(struct vmballoon *b,
+					    enum vmballoon_stat_page stat,
+					    enum vmballoon_page_size_type size)
+{
+	if (is_vmballoon_stats_on())
+		atomic64_inc(&b->stats->page_stat[stat][size]);
+}
+
+static inline void vmballoon_stats_page_add(struct vmballoon *b,
+					    enum vmballoon_stat_page stat,
+					    enum vmballoon_page_size_type size,
+					    unsigned int val)
+{
+	if (is_vmballoon_stats_on())
+		atomic64_add(val, &b->stats->page_stat[stat][size]);
+}
+
+static inline unsigned long
+__vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
+		unsigned long arg2, unsigned long *result)
+{
+	unsigned long status, dummy1, dummy2, dummy3, local_result;
+
+	vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT);
+
+	asm volatile ("inl %%dx" :
+		"=a"(status),
+		"=c"(dummy1),
+		"=d"(dummy2),
+		"=b"(local_result),
+		"=S"(dummy3) :
+		"0"(VMW_BALLOON_HV_MAGIC),
+		"1"(cmd),
+		"2"(VMW_BALLOON_HV_PORT),
+		"3"(arg1),
+		"4"(arg2) :
+		"memory");
+
+	/* update the result if needed */
+	if (result)
+		*result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
+							   local_result;
+
+	/* update target when applicable */
+	if (status == VMW_BALLOON_SUCCESS &&
+	    ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
+		WRITE_ONCE(b->target, local_result);
+
+	if (status != VMW_BALLOON_SUCCESS &&
+	    status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) {
+		vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT);
+		pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n",
+			 __func__, vmballoon_cmd_names[cmd], arg1, arg2,
+			 status);
+	}
+
+	/* mark reset required accordingly */
+	if (status == VMW_BALLOON_ERROR_RESET)
+		b->reset_required = true;
+
+	return status;
+}
+
+static __always_inline unsigned long
+vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
+	      unsigned long arg2)
+{
+	unsigned long dummy;
+
+	return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
+}
+
 /*
  * Send "start" command to the host, communicating supported version
  * of the protocol.
  */
-static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
+static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
 {
-	unsigned long status, capabilities, dummy = 0;
-	bool success;
-
-	STATS_INC(b->stats.start);
+	unsigned long status, capabilities;
 
-	status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);
+	status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
+				 &capabilities);
 
 	switch (status) {
 	case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
 		b->capabilities = capabilities;
-		success = true;
 		break;
 	case VMW_BALLOON_SUCCESS:
 		b->capabilities = VMW_BALLOON_BASIC_CMDS;
-		success = true;
 		break;
 	default:
-		success = false;
+		return -EIO;
 	}
 
 	/*
@@ -303,626 +501,693 @@ static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
 	 * reason disabled, do not use 2MB pages, since otherwise the legacy
 	 * mechanism is used with 2MB pages, causing a failure.
 	 */
+	b->max_page_size = VMW_BALLOON_4K_PAGE;
 	if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
 	    (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
-		b->supported_page_sizes = 2;
-	else
-		b->supported_page_sizes = 1;
-
-	if (!success) {
-		pr_debug("%s - failed, hv returns %ld\n", __func__, status);
-		STATS_INC(b->stats.start_fail);
-	}
-	return success;
-}
+		b->max_page_size = VMW_BALLOON_2M_PAGE;
 
-static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
-{
-	switch (status) {
-	case VMW_BALLOON_SUCCESS:
-		return true;
 
-	case VMW_BALLOON_ERROR_RESET:
-		b->reset_required = true;
-		/* fall through */
-
-	default:
-		return false;
-	}
+	return 0;
 }
 
-/*
+/**
+ * vmballoon_send_guest_id - communicate guest type to the host.
+ *
+ * @b: pointer to the balloon.
+ *
  * Communicate guest type to the host so that it can adjust ballooning
  * algorithm to the one most appropriate for the guest. This command
  * is normally issued after sending "start" command and is part of
  * standard reset sequence.
+ *
+ * Return: zero on success or appropriate error code.
  */
-static bool vmballoon_send_guest_id(struct vmballoon *b)
+static int vmballoon_send_guest_id(struct vmballoon *b)
 {
-	unsigned long status, dummy = 0;
-
-	status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
-				dummy);
-
-	STATS_INC(b->stats.guest_type);
+	unsigned long status;
 
-	if (vmballoon_check_status(b, status))
-		return true;
+	status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
+			       VMW_BALLOON_GUEST_ID, 0);
 
-	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
-	STATS_INC(b->stats.guest_type_fail);
-	return false;
+	return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
 }
 
-static u16 vmballoon_page_size(bool is_2m_page)
+/**
+ * vmballoon_page_order() - return the order of the page
+ * @page_size: the size of the page.
+ *
+ * Return: the allocation order.
+ */
+static inline
+unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size)
 {
-	if (is_2m_page)
-		return 1 << VMW_BALLOON_2M_SHIFT;
+	return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0;
+}
 
-	return 1;
+/**
+ * vmballoon_page_in_frames() - returns the number of frames in a page.
+ * @page_size: the size of the page.
+ *
+ * Return: the number of 4k frames.
+ */
+static inline unsigned int
+vmballoon_page_in_frames(enum vmballoon_page_size_type page_size)
+{
+	return 1 << vmballoon_page_order(page_size);
 }
 
-/*
- * Retrieve desired balloon size from the host.
+/**
+ * vmballoon_send_get_target() - Retrieve desired balloon size from the host.
+ *
+ * @b: pointer to the balloon.
+ *
+ * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required
+ * by the host-guest protocol and EIO if an error occurred in communicating with
+ * the host.
  */
-static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
+static int vmballoon_send_get_target(struct vmballoon *b)
 {
 	unsigned long status;
-	unsigned long target;
 	unsigned long limit;
-	unsigned long dummy = 0;
-	u32 limit32;
 
-	/*
-	 * si_meminfo() is cheap. Moreover, we want to provide dynamic
-	 * max balloon size later. So let us call si_meminfo() every
-	 * iteration.
-	 */
-	si_meminfo(&b->sysinfo);
-	limit = b->sysinfo.totalram;
+	limit = totalram_pages;
 
 	/* Ensure limit fits in 32-bits */
-	limit32 = (u32)limit;
-	if (limit != limit32)
-		return false;
-
-	/* update stats */
-	STATS_INC(b->stats.target);
+	if (limit != (u32)limit)
+		return -EINVAL;
 
-	status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
-	if (vmballoon_check_status(b, status)) {
-		*new_target = target;
-		return true;
-	}
+	status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
 
-	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
-	STATS_INC(b->stats.target_fail);
-	return false;
+	return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
 }
 
-/*
- * Notify the host about allocated page so that host can use it without
- * fear that guest will need it. Host may reject some pages, we need to
- * check the return value and maybe submit a different page.
+/**
+ * vmballoon_alloc_page_list - allocates a list of pages.
+ *
+ * @b: pointer to the balloon.
+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
+ * @req_n_pages: the number of requested pages.
+ *
+ * Tries to allocate @req_n_pages. Add them to the list of balloon pages in
+ * @ctl.pages and updates @ctl.n_pages to reflect the number of pages.
+ *
+ * Return: zero on success or error code otherwise.
  */
-static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
-				unsigned int *hv_status, unsigned int *target)
+static int vmballoon_alloc_page_list(struct vmballoon *b,
+				     struct vmballoon_ctl *ctl,
+				     unsigned int req_n_pages)
 {
-	unsigned long status, dummy = 0;
-	u32 pfn32;
-
-	pfn32 = (u32)pfn;
-	if (pfn32 != pfn)
-		return -EINVAL;
-
-	STATS_INC(b->stats.lock[false]);
+	struct page *page;
+	unsigned int i;
 
-	*hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
-	if (vmballoon_check_status(b, status))
-		return 0;
+	for (i = 0; i < req_n_pages; i++) {
+		if (ctl->page_size == VMW_BALLOON_2M_PAGE)
+			page = alloc_pages(VMW_HUGE_PAGE_ALLOC_FLAGS,
+					   VMW_BALLOON_2M_ORDER);
+		else
+			page = alloc_page(VMW_PAGE_ALLOC_FLAGS);
 
-	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
-	STATS_INC(b->stats.lock_fail[false]);
-	return -EIO;
-}
+		/* Update statistics */
+		vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC,
+					 ctl->page_size);
 
-static int vmballoon_send_batched_lock(struct vmballoon *b,
-		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
-{
-	unsigned long status;
-	unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
+		if (page) {
+			/* Success. Add the page to the list and continue. */
+			list_add(&page->lru, &ctl->pages);
+			continue;
+		}
 
-	STATS_INC(b->stats.lock[is_2m_pages]);
+		/* Allocation failed. Update statistics and stop. */
+		vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
+					 ctl->page_size);
+		break;
+	}
 
-	if (is_2m_pages)
-		status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages,
-				*target);
-	else
-		status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages,
-				*target);
+	ctl->n_pages = i;
 
-	if (vmballoon_check_status(b, status))
-		return 0;
-
-	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
-	STATS_INC(b->stats.lock_fail[is_2m_pages]);
-	return 1;
+	return req_n_pages == ctl->n_pages ? 0 : -ENOMEM;
 }
 
-/*
- * Notify the host that guest intends to release given page back into
- * the pool of available (to the guest) pages.
+/**
+ * vmballoon_handle_one_result - Handle lock/unlock result for a single page.
+ *
+ * @b: pointer for %struct vmballoon.
+ * @page: pointer for the page whose result should be handled.
+ * @page_size: size of the page.
+ * @status: status of the operation as provided by the hypervisor.
  */
-static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
-							unsigned int *target)
+static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page,
+				       enum vmballoon_page_size_type page_size,
+				       unsigned long status)
 {
-	unsigned long status, dummy = 0;
-	u32 pfn32;
-
-	pfn32 = (u32)pfn;
-	if (pfn32 != pfn)
-		return false;
+	/* On success do nothing. The page is already on the balloon list. */
+	if (likely(status == VMW_BALLOON_SUCCESS))
+		return 0;
 
-	STATS_INC(b->stats.unlock[false]);
+	pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__,
+		 page_to_pfn(page), status,
+		 vmballoon_page_size_names[page_size]);
 
-	status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
-	if (vmballoon_check_status(b, status))
-		return true;
+	/* Error occurred */
+	vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
+				 page_size);
 
-	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
-	STATS_INC(b->stats.unlock_fail[false]);
-	return false;
+	return -EIO;
 }
 
-static bool vmballoon_send_batched_unlock(struct vmballoon *b,
-		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+/**
+ * vmballoon_status_page - returns the status of (un)lock operation
+ *
+ * @b: pointer to the balloon.
+ * @idx: index for the page for which the operation is performed.
+ * @p: pointer to where the page struct is returned.
+ *
+ * Following a lock or unlock operation, returns the status of the operation for
+ * an individual page. Provides the page that the operation was performed on on
+ * the @page argument.
+ *
+ * Returns: The status of a lock or unlock operation for an individual page.
+ */
+static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
+					   struct page **p)
 {
-	unsigned long status;
-	unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
-
-	STATS_INC(b->stats.unlock[is_2m_pages]);
-
-	if (is_2m_pages)
-		status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages,
-				*target);
-	else
-		status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages,
-				*target);
+	if (static_branch_likely(&vmw_balloon_batching)) {
+		/* batching mode */
+		*p = pfn_to_page(b->batch_page[idx].pfn);
+		return b->batch_page[idx].status;
+	}
 
-	if (vmballoon_check_status(b, status))
-		return true;
+	/* non-batching mode */
+	*p = b->page;
 
-	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
-	STATS_INC(b->stats.unlock_fail[is_2m_pages]);
-	return false;
+	/*
+	 * If a failure occurs, the indication will be provided in the status
+	 * of the entire operation, which is considered before the individual
+	 * page status. So for non-batching mode, the indication is always of
+	 * success.
+	 */
+	return VMW_BALLOON_SUCCESS;
 }
 
-static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
+/**
+ * vmballoon_lock_op - notifies the host about inflated/deflated pages.
+ * @b: pointer to the balloon.
+ * @num_pages: number of inflated/deflated pages.
+ * @page_size: size of the page.
+ * @op: the type of operation (lock or unlock).
+ *
+ * Notify the host about page(s) that were ballooned (or removed from the
+ * balloon) so that host can use it without fear that guest will need it (or
+ * stop using them since the VM does). Host may reject some pages, we need to
+ * check the return value and maybe submit a different page. The pages that are
+ * inflated/deflated are pointed by @b->page.
+ *
+ * Return: result as provided by the hypervisor.
+ */
+static unsigned long vmballoon_lock_op(struct vmballoon *b,
+				       unsigned int num_pages,
+				       enum vmballoon_page_size_type page_size,
+				       enum vmballoon_op op)
 {
-	if (is_2m_page)
-		return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);
+	unsigned long cmd, pfn;
 
-	return alloc_page(flags);
-}
+	lockdep_assert_held(&b->comm_lock);
 
-static void vmballoon_free_page(struct page *page, bool is_2m_page)
-{
-	if (is_2m_page)
-		__free_pages(page, VMW_BALLOON_2M_SHIFT);
-	else
-		__free_page(page);
+	if (static_branch_likely(&vmw_balloon_batching)) {
+		if (op == VMW_BALLOON_INFLATE)
+			cmd = page_size == VMW_BALLOON_2M_PAGE ?
+				VMW_BALLOON_CMD_BATCHED_2M_LOCK :
+				VMW_BALLOON_CMD_BATCHED_LOCK;
+		else
+			cmd = page_size == VMW_BALLOON_2M_PAGE ?
+				VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
+				VMW_BALLOON_CMD_BATCHED_UNLOCK;
+
+		pfn = PHYS_PFN(virt_to_phys(b->batch_page));
+	} else {
+		cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK :
+						  VMW_BALLOON_CMD_UNLOCK;
+		pfn = page_to_pfn(b->page);
+
+		/* In non-batching mode, PFNs must fit in 32-bit */
+		if (unlikely(pfn != (u32)pfn))
+			return VMW_BALLOON_ERROR_PPN_INVALID;
+	}
+
+	return vmballoon_cmd(b, cmd, pfn, num_pages);
 }
 
-/*
- * Quickly release all pages allocated for the balloon. This function is
- * called when host decides to "reset" balloon for one reason or another.
- * Unlike normal "deflate" we do not (shall not) notify host of the pages
- * being released.
+/**
+ * vmballoon_add_page - adds a page towards lock/unlock operation.
+ *
+ * @b: pointer to the balloon.
+ * @idx: index of the page to be ballooned in this batch.
+ * @p: pointer to the page that is about to be ballooned.
+ *
+ * Adds the page to be ballooned. Must be called while holding @comm_lock.
  */
-static void vmballoon_pop(struct vmballoon *b)
+static void vmballoon_add_page(struct vmballoon *b, unsigned int idx,
+			       struct page *p)
 {
-	struct page *page, *next;
-	unsigned is_2m_pages;
-
-	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
-			is_2m_pages++) {
-		struct vmballoon_page_size *page_size =
-				&b->page_sizes[is_2m_pages];
-		u16 size_per_page = vmballoon_page_size(is_2m_pages);
-
-		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
-			list_del(&page->lru);
-			vmballoon_free_page(page, is_2m_pages);
-			STATS_INC(b->stats.free[is_2m_pages]);
-			b->size -= size_per_page;
-			cond_resched();
-		}
-	}
+	lockdep_assert_held(&b->comm_lock);
 
-	/* Clearing the batch_page unconditionally has no adverse effect */
-	free_page((unsigned long)b->batch_page);
-	b->batch_page = NULL;
+	if (static_branch_likely(&vmw_balloon_batching))
+		b->batch_page[idx] = (struct vmballoon_batch_entry)
+					{ .pfn = page_to_pfn(p) };
+	else
+		b->page = p;
 }
 
-/*
- * Notify the host of a ballooned page. If host rejects the page put it on the
- * refuse list, those refused page are then released at the end of the
- * inflation cycle.
+/**
+ * vmballoon_lock - lock or unlock a batch of pages.
+ *
+ * @b: pointer to the balloon.
+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
+ *
+ * Notifies the host of about ballooned pages (after inflation or deflation,
+ * according to @ctl). If the host rejects the page put it on the
+ * @ctl refuse list. These refused page are then released when moving to the
+ * next size of pages.
+ *
+ * Note that we neither free any @page here nor put them back on the ballooned
+ * pages list. Instead we queue it for later processing. We do that for several
+ * reasons. First, we do not want to free the page under the lock. Second, it
+ * allows us to unify the handling of lock and unlock. In the inflate case, the
+ * caller will check if there are too many refused pages and release them.
+ * Although it is not identical to the past behavior, it should not affect
+ * performance.
  */
-static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
-				bool is_2m_pages, unsigned int *target)
+static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl)
 {
-	int locked, hv_status;
-	struct page *page = b->page;
-	struct vmballoon_page_size *page_size = &b->page_sizes[false];
-
-	/* is_2m_pages can never happen as 2m pages support implies batching */
-
-	locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
-								target);
-	if (locked) {
-		STATS_INC(b->stats.refused_alloc[false]);
-
-		if (locked == -EIO &&
-		    (hv_status == VMW_BALLOON_ERROR_RESET ||
-		     hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) {
-			vmballoon_free_page(page, false);
-			return -EIO;
-		}
-
-		/*
-		 * Place page on the list of non-balloonable pages
-		 * and retry allocation, unless we already accumulated
-		 * too many of them, in which case take a breather.
-		 */
-		if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
-			page_size->n_refused_pages++;
-			list_add(&page->lru, &page_size->refused_pages);
-		} else {
-			vmballoon_free_page(page, false);
-		}
-		return locked;
-	}
-
-	/* track allocated page */
-	list_add(&page->lru, &page_size->pages);
+	unsigned long batch_status;
+	struct page *page;
+	unsigned int i, num_pages;
 
-	/* update balloon size */
-	b->size++;
+	num_pages = ctl->n_pages;
+	if (num_pages == 0)
+		return 0;
 
-	return 0;
-}
+	/* communication with the host is done under the communication lock */
+	spin_lock(&b->comm_lock);
 
-static int vmballoon_lock_batched_page(struct vmballoon *b,
-		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
-{
-	int locked, i;
-	u16 size_per_page = vmballoon_page_size(is_2m_pages);
+	i = 0;
+	list_for_each_entry(page, &ctl->pages, lru)
+		vmballoon_add_page(b, i++, page);
 
-	locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages,
-			target);
-	if (locked > 0) {
-		for (i = 0; i < num_pages; i++) {
-			u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
-			struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+	batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size,
+					 ctl->op);
 
-			vmballoon_free_page(p, is_2m_pages);
-		}
+	/*
+	 * Iterate over the pages in the provided list. Since we are changing
+	 * @ctl->n_pages we are saving the original value in @num_pages and
+	 * use this value to bound the loop.
+	 */
+	for (i = 0; i < num_pages; i++) {
+		unsigned long status;
 
-		return -EIO;
-	}
+		status = vmballoon_status_page(b, i, &page);
 
-	for (i = 0; i < num_pages; i++) {
-		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
-		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
-		struct vmballoon_page_size *page_size =
-				&b->page_sizes[is_2m_pages];
+		/*
+		 * Failure of the whole batch overrides a single operation
+		 * results.
+		 */
+		if (batch_status != VMW_BALLOON_SUCCESS)
+			status = batch_status;
 
-		locked = vmballoon_batch_get_status(b->batch_page, i);
+		/* Continue if no error happened */
+		if (!vmballoon_handle_one_result(b, page, ctl->page_size,
+						 status))
+			continue;
 
-		switch (locked) {
-		case VMW_BALLOON_SUCCESS:
-			list_add(&p->lru, &page_size->pages);
-			b->size += size_per_page;
-			break;
-		case VMW_BALLOON_ERROR_PPN_PINNED:
-		case VMW_BALLOON_ERROR_PPN_INVALID:
-			if (page_size->n_refused_pages
-					< VMW_BALLOON_MAX_REFUSED) {
-				list_add(&p->lru, &page_size->refused_pages);
-				page_size->n_refused_pages++;
-				break;
-			}
-			/* Fallthrough */
-		case VMW_BALLOON_ERROR_RESET:
-		case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
-			vmballoon_free_page(p, is_2m_pages);
-			break;
-		default:
-			/* This should never happen */
-			WARN_ON_ONCE(true);
-		}
+		/*
+		 * Error happened. Move the pages to the refused list and update
+		 * the pages number.
+		 */
+		list_move(&page->lru, &ctl->refused_pages);
+		ctl->n_pages--;
+		ctl->n_refused_pages++;
 	}
 
-	return 0;
+	spin_unlock(&b->comm_lock);
+
+	return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
 }
 
-/*
- * Release the page allocated for the balloon. Note that we first notify
- * the host so it can make sure the page will be available for the guest
- * to use, if needed.
+/**
+ * vmballoon_release_page_list() - Releases a page list
+ *
+ * @page_list: list of pages to release.
+ * @n_pages: pointer to the number of pages.
+ * @page_size: whether the pages in the list are 2MB (or else 4KB).
+ *
+ * Releases the list of pages and zeros the number of pages.
  */
-static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
-		bool is_2m_pages, unsigned int *target)
+static void vmballoon_release_page_list(struct list_head *page_list,
+				       int *n_pages,
+				       enum vmballoon_page_size_type page_size)
 {
-	struct page *page = b->page;
-	struct vmballoon_page_size *page_size = &b->page_sizes[false];
-
-	/* is_2m_pages can never happen as 2m pages support implies batching */
+	struct page *page, *tmp;
 
-	if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
-		list_add(&page->lru, &page_size->pages);
-		return -EIO;
+	list_for_each_entry_safe(page, tmp, page_list, lru) {
+		list_del(&page->lru);
+		__free_pages(page, vmballoon_page_order(page_size));
 	}
 
-	/* deallocate page */
-	vmballoon_free_page(page, false);
-	STATS_INC(b->stats.free[false]);
+	*n_pages = 0;
+}
 
-	/* update balloon size */
-	b->size--;
 
-	return 0;
+/*
+ * Release pages that were allocated while attempting to inflate the
+ * balloon but were refused by the host for one reason or another.
+ */
+static void vmballoon_release_refused_pages(struct vmballoon *b,
+					    struct vmballoon_ctl *ctl)
+{
+	vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
+				 ctl->page_size);
+
+	vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages,
+				    ctl->page_size);
 }
 
-static int vmballoon_unlock_batched_page(struct vmballoon *b,
-				unsigned int num_pages, bool is_2m_pages,
-				unsigned int *target)
+/**
+ * vmballoon_change - retrieve the required balloon change
+ *
+ * @b: pointer for the balloon.
+ *
+ * Return: the required change for the balloon size. A positive number
+ * indicates inflation, a negative number indicates a deflation.
+ */
+static int64_t vmballoon_change(struct vmballoon *b)
 {
-	int locked, i, ret = 0;
-	bool hv_success;
-	u16 size_per_page = vmballoon_page_size(is_2m_pages);
+	int64_t size, target;
 
-	hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages,
-			target);
-	if (!hv_success)
-		ret = -EIO;
+	size = atomic64_read(&b->size);
+	target = READ_ONCE(b->target);
 
-	for (i = 0; i < num_pages; i++) {
-		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
-		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
-		struct vmballoon_page_size *page_size =
-				&b->page_sizes[is_2m_pages];
+	/*
+	 * We must cast first because of int sizes
+	 * Otherwise we might get huge positives instead of negatives
+	 */
 
-		locked = vmballoon_batch_get_status(b->batch_page, i);
-		if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
-			/*
-			 * That page wasn't successfully unlocked by the
-			 * hypervisor, re-add it to the list of pages owned by
-			 * the balloon driver.
-			 */
-			list_add(&p->lru, &page_size->pages);
-		} else {
-			/* deallocate page */
-			vmballoon_free_page(p, is_2m_pages);
-			STATS_INC(b->stats.free[is_2m_pages]);
-
-			/* update balloon size */
-			b->size -= size_per_page;
-		}
-	}
+	if (b->reset_required)
+		return 0;
+
+	/* consider a 2MB slack on deflate, unless the balloon is emptied */
+	if (target < size && target != 0 &&
+	    size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE))
+		return 0;
 
-	return ret;
+	return target - size;
 }
 
-/*
- * Release pages that were allocated while attempting to inflate the
- * balloon but were refused by the host for one reason or another.
+/**
+ * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation.
+ *
+ * @b: pointer to balloon.
+ * @pages: list of pages to enqueue.
+ * @n_pages: pointer to number of pages in list. The value is zeroed.
+ * @page_size: whether the pages are 2MB or 4KB pages.
+ *
+ * Enqueues the provides list of pages in the ballooned page list, clears the
+ * list and zeroes the number of pages that was provided.
  */
-static void vmballoon_release_refused_pages(struct vmballoon *b,
-		bool is_2m_pages)
+static void vmballoon_enqueue_page_list(struct vmballoon *b,
+					struct list_head *pages,
+					unsigned int *n_pages,
+					enum vmballoon_page_size_type page_size)
 {
-	struct page *page, *next;
-	struct vmballoon_page_size *page_size =
-			&b->page_sizes[is_2m_pages];
+	struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size];
 
-	list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
-		list_del(&page->lru);
-		vmballoon_free_page(page, is_2m_pages);
-		STATS_INC(b->stats.refused_free[is_2m_pages]);
-	}
-
-	page_size->n_refused_pages = 0;
+	list_splice_init(pages, &page_size_info->pages);
+	*n_pages = 0;
 }
 
-static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
+/**
+ * vmballoon_dequeue_page_list() - Dequeues page lists for deflation.
+ *
+ * @b: pointer to balloon.
+ * @pages: list of pages to enqueue.
+ * @n_pages: pointer to number of pages in list. The value is zeroed.
+ * @page_size: whether the pages are 2MB or 4KB pages.
+ * @n_req_pages: the number of requested pages.
+ *
+ * Dequeues the number of requested pages from the balloon for deflation. The
+ * number of dequeued pages may be lower, if not enough pages in the requested
+ * size are available.
+ */
+static void vmballoon_dequeue_page_list(struct vmballoon *b,
+					struct list_head *pages,
+					unsigned int *n_pages,
+					enum vmballoon_page_size_type page_size,
+					unsigned int n_req_pages)
 {
-	b->page = p;
-}
+	struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size];
+	struct page *page, *tmp;
+	unsigned int i = 0;
 
-static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
-				struct page *p)
-{
-	vmballoon_batch_set_pa(b->batch_page, idx,
-			(u64)page_to_pfn(p) << PAGE_SHIFT);
+	list_for_each_entry_safe(page, tmp, &page_size_info->pages, lru) {
+		list_move(&page->lru, pages);
+		if (++i == n_req_pages)
+			break;
+	}
+	*n_pages = i;
 }
 
-/*
- * Inflate the balloon towards its target size. Note that we try to limit
- * the rate of allocation to make sure we are not choking the rest of the
- * system.
+/**
+ * vmballoon_inflate() - Inflate the balloon towards its target size.
+ *
+ * @b: pointer to the balloon.
  */
 static void vmballoon_inflate(struct vmballoon *b)
 {
-	unsigned int num_pages = 0;
-	int error = 0;
-	gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
-	bool is_2m_pages;
+	int64_t to_inflate_frames;
+	struct vmballoon_ctl ctl = {
+		.pages = LIST_HEAD_INIT(ctl.pages),
+		.refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
+		.page_size = b->max_page_size,
+		.op = VMW_BALLOON_INFLATE
+	};
 
-	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
+	while ((to_inflate_frames = vmballoon_change(b)) > 0) {
+		unsigned int to_inflate_pages, page_in_frames;
+		int alloc_error, lock_error = 0;
 
-	/*
-	 * First try NOSLEEP page allocations to inflate balloon.
-	 *
-	 * If we do not throttle nosleep allocations, we can drain all
-	 * free pages in the guest quickly (if the balloon target is high).
-	 * As a side-effect, draining free pages helps to inform (force)
-	 * the guest to start swapping if balloon target is not met yet,
-	 * which is a desired behavior. However, balloon driver can consume
-	 * all available CPU cycles if too many pages are allocated in a
-	 * second. Therefore, we throttle nosleep allocations even when
-	 * the guest is not under memory pressure. OTOH, if we have already
-	 * predicted that the guest is under memory pressure, then we
-	 * slowdown page allocations considerably.
-	 */
+		VM_BUG_ON(!list_empty(&ctl.pages));
+		VM_BUG_ON(ctl.n_pages != 0);
 
-	/*
-	 * Start with no sleep allocation rate which may be higher
-	 * than sleeping allocation rate.
-	 */
-	is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
+		page_in_frames = vmballoon_page_in_frames(ctl.page_size);
 
-	pr_debug("%s - goal: %d",  __func__, b->target - b->size);
+		to_inflate_pages = min_t(unsigned long, b->batch_max_pages,
+					 DIV_ROUND_UP_ULL(to_inflate_frames,
+							  page_in_frames));
 
-	while (!b->reset_required &&
-		b->size + num_pages * vmballoon_page_size(is_2m_pages)
-		< b->target) {
-		struct page *page;
+		/* Start by allocating */
+		alloc_error = vmballoon_alloc_page_list(b, &ctl,
+							to_inflate_pages);
 
-		if (flags == VMW_PAGE_ALLOC_NOSLEEP)
-			STATS_INC(b->stats.alloc[is_2m_pages]);
-		else
-			STATS_INC(b->stats.sleep_alloc);
-
-		page = vmballoon_alloc_page(flags, is_2m_pages);
-		if (!page) {
-			STATS_INC(b->stats.alloc_fail[is_2m_pages]);
-
-			if (is_2m_pages) {
-				b->ops->lock(b, num_pages, true, &b->target);
-
-				/*
-				 * ignore errors from locking as we now switch
-				 * to 4k pages and we might get different
-				 * errors.
-				 */
-
-				num_pages = 0;
-				is_2m_pages = false;
-				continue;
-			}
-
-			if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
-				/*
-				 * CANSLEEP page allocation failed, so guest
-				 * is under severe memory pressure. We just log
-				 * the event, but do not stop the inflation
-				 * due to its negative impact on performance.
-				 */
-				STATS_INC(b->stats.sleep_alloc_fail);
+		/* Actually lock the pages by telling the hypervisor */
+		lock_error = vmballoon_lock(b, &ctl);
+
+		/*
+		 * If an error indicates that something serious went wrong,
+		 * stop the inflation.
+		 */
+		if (lock_error)
+			break;
+
+		/* Update the balloon size */
+		atomic64_add(ctl.n_pages * page_in_frames, &b->size);
+
+		vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages,
+					    ctl.page_size);
+
+		/*
+		 * If allocation failed or the number of refused pages exceeds
+		 * the maximum allowed, move to the next page size.
+		 */
+		if (alloc_error ||
+		    ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) {
+			if (ctl.page_size == VMW_BALLOON_4K_PAGE)
 				break;
-			}
 
 			/*
-			 * NOSLEEP page allocation failed, so the guest is
-			 * under memory pressure. Slowing down page alloctions
-			 * seems to be reasonable, but doing so might actually
-			 * cause the hypervisor to throttle us down, resulting
-			 * in degraded performance. We will count on the
-			 * scheduler and standard memory management mechanisms
-			 * for now.
+			 * Ignore errors from locking as we now switch to 4k
+			 * pages and we might get different errors.
 			 */
-			flags = VMW_PAGE_ALLOC_CANSLEEP;
-			continue;
-		}
-
-		b->ops->add_page(b, num_pages++, page);
-		if (num_pages == b->batch_max_pages) {
-			error = b->ops->lock(b, num_pages, is_2m_pages,
-					&b->target);
-			num_pages = 0;
-			if (error)
-				break;
+			vmballoon_release_refused_pages(b, &ctl);
+			ctl.page_size--;
 		}
 
 		cond_resched();
 	}
 
-	if (num_pages > 0)
-		b->ops->lock(b, num_pages, is_2m_pages, &b->target);
-
-	vmballoon_release_refused_pages(b, true);
-	vmballoon_release_refused_pages(b, false);
+	/*
+	 * Release pages that were allocated while attempting to inflate the
+	 * balloon but were refused by the host for one reason or another,
+	 * and update the statistics.
+	 */
+	if (ctl.n_refused_pages != 0)
+		vmballoon_release_refused_pages(b, &ctl);
 }
 
-/*
+/**
+ * vmballoon_deflate() - Decrease the size of the balloon.
+ *
+ * @b: pointer to the balloon
+ * @n_frames: the number of frames to deflate. If zero, automatically
+ * calculated according to the target size.
+ * @coordinated: whether to coordinate with the host
+ *
  * Decrease the size of the balloon allowing guest to use more memory.
+ *
+ * Return: The number of deflated frames (i.e., basic page size units)
  */
-static void vmballoon_deflate(struct vmballoon *b)
+static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames,
+				       bool coordinated)
 {
-	unsigned is_2m_pages;
-
-	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
+	unsigned long deflated_frames = 0;
+	unsigned long tried_frames = 0;
+	struct vmballoon_ctl ctl = {
+		.pages = LIST_HEAD_INIT(ctl.pages),
+		.refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
+		.page_size = VMW_BALLOON_4K_PAGE,
+		.op = VMW_BALLOON_DEFLATE
+	};
 
 	/* free pages to reach target */
-	for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
-			is_2m_pages++) {
-		struct page *page, *next;
-		unsigned int num_pages = 0;
-		struct vmballoon_page_size *page_size =
-				&b->page_sizes[is_2m_pages];
-
-		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
-			if (b->reset_required ||
-				(b->target > 0 &&
-					b->size - num_pages
-					* vmballoon_page_size(is_2m_pages)
-				< b->target + vmballoon_page_size(true)))
-				break;
+	while (true) {
+		unsigned int to_deflate_pages, n_unlocked_frames;
+		unsigned int page_in_frames;
+		int64_t to_deflate_frames;
+		bool deflated_all;
+
+		page_in_frames = vmballoon_page_in_frames(ctl.page_size);
+
+		VM_BUG_ON(!list_empty(&ctl.pages));
+		VM_BUG_ON(ctl.n_pages);
+		VM_BUG_ON(!list_empty(&ctl.refused_pages));
+		VM_BUG_ON(ctl.n_refused_pages);
+
+		/*
+		 * If we were requested a specific number of frames, we try to
+		 * deflate this number of frames. Otherwise, deflation is
+		 * performed according to the target and balloon size.
+		 */
+		to_deflate_frames = n_frames ? n_frames - tried_frames :
+					       -vmballoon_change(b);
+
+		/* break if no work to do */
+		if (to_deflate_frames <= 0)
+			break;
+
+		/*
+		 * Calculate the number of frames based on current page size,
+		 * but limit the deflated frames to a single chunk
+		 */
+		to_deflate_pages = min_t(unsigned long, b->batch_max_pages,
+					 DIV_ROUND_UP_ULL(to_deflate_frames,
+							  page_in_frames));
+
+		/* First take the pages from the balloon pages. */
+		vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages,
+					    ctl.page_size, to_deflate_pages);
+
+		/*
+		 * Before pages are moving to the refused list, count their
+		 * frames as frames that we tried to deflate.
+		 */
+		tried_frames += ctl.n_pages * page_in_frames;
+
+		/*
+		 * Unlock the pages by communicating with the hypervisor if the
+		 * communication is coordinated (i.e., not pop). We ignore the
+		 * return code. Instead we check if all the pages we manage to
+		 * unlock all the pages. If we failed, we will move to the next
+		 * page size, and would eventually try again later.
+		 */
+		if (coordinated)
+			vmballoon_lock(b, &ctl);
+
+		/*
+		 * Check if we deflated enough. We will move to the next page
+		 * size if we did not manage to do so. This calculation takes
+		 * place now, as once the pages are released, the number of
+		 * pages is zeroed.
+		 */
+		deflated_all = (ctl.n_pages == to_deflate_pages);
+
+		/* Update local and global counters */
+		n_unlocked_frames = ctl.n_pages * page_in_frames;
+		atomic64_sub(n_unlocked_frames, &b->size);
+		deflated_frames += n_unlocked_frames;
 
-			list_del(&page->lru);
-			b->ops->add_page(b, num_pages++, page);
+		vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE,
+					 ctl.page_size, ctl.n_pages);
 
-			if (num_pages == b->batch_max_pages) {
-				int error;
+		/* free the ballooned pages */
+		vmballoon_release_page_list(&ctl.pages, &ctl.n_pages,
+					    ctl.page_size);
 
-				error = b->ops->unlock(b, num_pages,
-						is_2m_pages, &b->target);
-				num_pages = 0;
-				if (error)
-					return;
-			}
+		/* Return the refused pages to the ballooned list. */
+		vmballoon_enqueue_page_list(b, &ctl.refused_pages,
+					    &ctl.n_refused_pages,
+					    ctl.page_size);
 
-			cond_resched();
+		/* If we failed to unlock all the pages, move to next size. */
+		if (!deflated_all) {
+			if (ctl.page_size == b->max_page_size)
+				break;
+			ctl.page_size++;
 		}
 
-		if (num_pages > 0)
-			b->ops->unlock(b, num_pages, is_2m_pages, &b->target);
+		cond_resched();
 	}
-}
 
-static const struct vmballoon_ops vmballoon_basic_ops = {
-	.add_page = vmballoon_add_page,
-	.lock = vmballoon_lock_page,
-	.unlock = vmballoon_unlock_page
-};
+	return deflated_frames;
+}
 
-static const struct vmballoon_ops vmballoon_batched_ops = {
-	.add_page = vmballoon_add_batched_page,
-	.lock = vmballoon_lock_batched_page,
-	.unlock = vmballoon_unlock_batched_page
-};
+/**
+ * vmballoon_deinit_batching - disables batching mode.
+ *
+ * @b: pointer to &struct vmballoon.
+ *
+ * Disables batching, by deallocating the page for communication with the
+ * hypervisor and disabling the static key to indicate that batching is off.
+ */
+static void vmballoon_deinit_batching(struct vmballoon *b)
+{
+	free_page((unsigned long)b->batch_page);
+	b->batch_page = NULL;
+	static_branch_disable(&vmw_balloon_batching);
+	b->batch_max_pages = 1;
+}
 
-static bool vmballoon_init_batching(struct vmballoon *b)
+/**
+ * vmballoon_init_batching - enable batching mode.
+ *
+ * @b: pointer to &struct vmballoon.
+ *
+ * Enables batching, by allocating a page for communication with the hypervisor
+ * and enabling the static_key to use batching.
+ *
+ * Return: zero on success or an appropriate error-code.
+ */
+static int vmballoon_init_batching(struct vmballoon *b)
 {
 	struct page *page;
 
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
-		return false;
+		return -ENOMEM;
 
 	b->batch_page = page_address(page);
-	return true;
+	b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry);
+
+	static_branch_enable(&vmw_balloon_batching);
+
+	return 0;
 }
 
 /*
@@ -932,7 +1197,7 @@ static void vmballoon_doorbell(void *client_data)
 {
 	struct vmballoon *b = client_data;
 
-	STATS_INC(b->stats.doorbell);
+	vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL);
 
 	mod_delayed_work(system_freezable_wq, &b->dwork, 0);
 }
@@ -942,11 +1207,8 @@ static void vmballoon_doorbell(void *client_data)
  */
 static void vmballoon_vmci_cleanup(struct vmballoon *b)
 {
-	int error;
-
-	VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID,
-			VMCI_INVALID_ID, error);
-	STATS_INC(b->stats.doorbell_unset);
+	vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
+		      VMCI_INVALID_ID, VMCI_INVALID_ID);
 
 	if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
 		vmci_doorbell_destroy(b->vmci_doorbell);
@@ -954,12 +1216,19 @@ static void vmballoon_vmci_cleanup(struct vmballoon *b)
 	}
 }
 
-/*
- * Initialize vmci doorbell, to get notified as soon as balloon changes
+/**
+ * vmballoon_vmci_init - Initialize vmci doorbell.
+ *
+ * @b: pointer to the balloon.
+ *
+ * Return: zero on success or when wakeup command not supported. Error-code
+ * otherwise.
+ *
+ * Initialize vmci doorbell, to get notified as soon as balloon changes.
  */
 static int vmballoon_vmci_init(struct vmballoon *b)
 {
-	unsigned long error, dummy;
+	unsigned long error;
 
 	if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
 		return 0;
@@ -971,10 +1240,9 @@ static int vmballoon_vmci_init(struct vmballoon *b)
 	if (error != VMCI_SUCCESS)
 		goto fail;
 
-	error = VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, b->vmci_doorbell.context,
-				   b->vmci_doorbell.resource, dummy);
-
-	STATS_INC(b->stats.doorbell_set);
+	error =	__vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
+				b->vmci_doorbell.context,
+				b->vmci_doorbell.resource, NULL);
 
 	if (error != VMW_BALLOON_SUCCESS)
 		goto fail;
@@ -985,6 +1253,23 @@ fail:
 	return -EIO;
 }
 
+/**
+ * vmballoon_pop - Quickly release all pages allocate for the balloon.
+ *
+ * @b: pointer to the balloon.
+ *
+ * This function is called when host decides to "reset" balloon for one reason
+ * or another. Unlike normal "deflate" we do not (shall not) notify host of the
+ * pages being released.
+ */
+static void vmballoon_pop(struct vmballoon *b)
+{
+	unsigned long size;
+
+	while ((size = atomic64_read(&b->size)))
+		vmballoon_deflate(b, size, false);
+}
+
 /*
  * Perform standard reset sequence by popping the balloon (in case it
  * is not  empty) and then restarting protocol. This operation normally
@@ -994,18 +1279,18 @@ static void vmballoon_reset(struct vmballoon *b)
 {
 	int error;
 
+	down_write(&b->conf_sem);
+
 	vmballoon_vmci_cleanup(b);
 
 	/* free all pages, skipping monitor unlock */
 	vmballoon_pop(b);
 
-	if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
+	if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
 		return;
 
 	if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
-		b->ops = &vmballoon_batched_ops;
-		b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
-		if (!vmballoon_init_batching(b)) {
+		if (vmballoon_init_batching(b)) {
 			/*
 			 * We failed to initialize batching, inform the monitor
 			 * about it by sending a null capability.
@@ -1016,52 +1301,70 @@ static void vmballoon_reset(struct vmballoon *b)
 			return;
 		}
 	} else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
-		b->ops = &vmballoon_basic_ops;
-		b->batch_max_pages = 1;
+		vmballoon_deinit_batching(b);
 	}
 
+	vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET);
 	b->reset_required = false;
 
 	error = vmballoon_vmci_init(b);
 	if (error)
 		pr_err("failed to initialize vmci doorbell\n");
 
-	if (!vmballoon_send_guest_id(b))
+	if (vmballoon_send_guest_id(b))
 		pr_err("failed to send guest ID to the host\n");
+
+	up_write(&b->conf_sem);
 }
 
-/*
- * Balloon work function: reset protocol, if needed, get the new size and
- * adjust balloon as needed. Repeat in 1 sec.
+/**
+ * vmballoon_work - periodic balloon worker for reset, inflation and deflation.
+ *
+ * @work: pointer to the &work_struct which is provided by the workqueue.
+ *
+ * Resets the protocol if needed, gets the new size and adjusts balloon as
+ * needed. Repeat in 1 sec.
  */
 static void vmballoon_work(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
-	unsigned int target;
-
-	STATS_INC(b->stats.timer);
+	int64_t change = 0;
 
 	if (b->reset_required)
 		vmballoon_reset(b);
 
-	if (!b->reset_required && vmballoon_send_get_target(b, &target)) {
-		/* update target, adjust size */
-		b->target = target;
+	down_read(&b->conf_sem);
+
+	/*
+	 * Update the stats while holding the semaphore to ensure that
+	 * @stats_enabled is consistent with whether the stats are actually
+	 * enabled
+	 */
+	vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER);
+
+	if (!vmballoon_send_get_target(b))
+		change = vmballoon_change(b);
+
+	if (change != 0) {
+		pr_debug("%s - size: %llu, target %lu\n", __func__,
+			 atomic64_read(&b->size), READ_ONCE(b->target));
 
-		if (b->size < target)
+		if (change > 0)
 			vmballoon_inflate(b);
-		else if (target == 0 ||
-				b->size > target + vmballoon_page_size(true))
-			vmballoon_deflate(b);
+		else  /* (change < 0) */
+			vmballoon_deflate(b, 0, true);
 	}
 
+	up_read(&b->conf_sem);
+
 	/*
 	 * We are using a freezable workqueue so that balloon operations are
 	 * stopped while the system transitions to/from sleep/hibernation.
 	 */
 	queue_delayed_work(system_freezable_wq,
 			   dwork, round_jiffies_relative(HZ));
+
 }
 
 /*
@@ -1069,64 +1372,100 @@ static void vmballoon_work(struct work_struct *work)
  */
 #ifdef CONFIG_DEBUG_FS
 
+static const char * const vmballoon_stat_page_names[] = {
+	[VMW_BALLOON_PAGE_STAT_ALLOC]		= "alloc",
+	[VMW_BALLOON_PAGE_STAT_ALLOC_FAIL]	= "allocFail",
+	[VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC]	= "errAlloc",
+	[VMW_BALLOON_PAGE_STAT_REFUSED_FREE]	= "errFree",
+	[VMW_BALLOON_PAGE_STAT_FREE]		= "free"
+};
+
+static const char * const vmballoon_stat_names[] = {
+	[VMW_BALLOON_STAT_TIMER]		= "timer",
+	[VMW_BALLOON_STAT_DOORBELL]		= "doorbell",
+	[VMW_BALLOON_STAT_RESET]		= "reset",
+};
+
+static int vmballoon_enable_stats(struct vmballoon *b)
+{
+	int r = 0;
+
+	down_write(&b->conf_sem);
+
+	/* did we somehow race with another reader which enabled stats? */
+	if (b->stats)
+		goto out;
+
+	b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL);
+
+	if (!b->stats) {
+		/* allocation failed */
+		r = -ENOMEM;
+		goto out;
+	}
+	static_key_enable(&balloon_stat_enabled.key);
+out:
+	up_write(&b->conf_sem);
+	return r;
+}
+
+/**
+ * vmballoon_debug_show - shows statistics of balloon operations.
+ * @f: pointer to the &struct seq_file.
+ * @offset: ignored.
+ *
+ * Provides the statistics that can be accessed in vmmemctl in the debugfs.
+ * To avoid the overhead - mainly that of memory - of collecting the statistics,
+ * we only collect statistics after the first time the counters are read.
+ *
+ * Return: zero on success or an error code.
+ */
 static int vmballoon_debug_show(struct seq_file *f, void *offset)
 {
 	struct vmballoon *b = f->private;
-	struct vmballoon_stats *stats = &b->stats;
+	int i, j;
+
+	/* enables stats if they are disabled */
+	if (!b->stats) {
+		int r = vmballoon_enable_stats(b);
+
+		if (r)
+			return r;
+	}
 
 	/* format capabilities info */
-	seq_printf(f,
-		   "balloon capabilities:   %#4x\n"
-		   "used capabilities:      %#4lx\n"
-		   "is resetting:           %c\n",
-		   VMW_BALLOON_CAPABILITIES, b->capabilities,
-		   b->reset_required ? 'y' : 'n');
+	seq_printf(f, "%-22s: %#16x\n", "balloon capabilities",
+		   VMW_BALLOON_CAPABILITIES);
+	seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities);
+	seq_printf(f, "%-22s: %16s\n", "is resetting",
+		   b->reset_required ? "y" : "n");
 
 	/* format size info */
-	seq_printf(f,
-		   "target:             %8d pages\n"
-		   "current:            %8d pages\n",
-		   b->target, b->size);
-
-	seq_printf(f,
-		   "\n"
-		   "timer:              %8u\n"
-		   "doorbell:           %8u\n"
-		   "start:              %8u (%4u failed)\n"
-		   "guestType:          %8u (%4u failed)\n"
-		   "2m-lock:            %8u (%4u failed)\n"
-		   "lock:               %8u (%4u failed)\n"
-		   "2m-unlock:          %8u (%4u failed)\n"
-		   "unlock:             %8u (%4u failed)\n"
-		   "target:             %8u (%4u failed)\n"
-		   "prim2mAlloc:        %8u (%4u failed)\n"
-		   "primNoSleepAlloc:   %8u (%4u failed)\n"
-		   "primCanSleepAlloc:  %8u (%4u failed)\n"
-		   "prim2mFree:         %8u\n"
-		   "primFree:           %8u\n"
-		   "err2mAlloc:         %8u\n"
-		   "errAlloc:           %8u\n"
-		   "err2mFree:          %8u\n"
-		   "errFree:            %8u\n"
-		   "doorbellSet:        %8u\n"
-		   "doorbellUnset:      %8u\n",
-		   stats->timer,
-		   stats->doorbell,
-		   stats->start, stats->start_fail,
-		   stats->guest_type, stats->guest_type_fail,
-		   stats->lock[true],  stats->lock_fail[true],
-		   stats->lock[false],  stats->lock_fail[false],
-		   stats->unlock[true], stats->unlock_fail[true],
-		   stats->unlock[false], stats->unlock_fail[false],
-		   stats->target, stats->target_fail,
-		   stats->alloc[true], stats->alloc_fail[true],
-		   stats->alloc[false], stats->alloc_fail[false],
-		   stats->sleep_alloc, stats->sleep_alloc_fail,
-		   stats->free[true],
-		   stats->free[false],
-		   stats->refused_alloc[true], stats->refused_alloc[false],
-		   stats->refused_free[true], stats->refused_free[false],
-		   stats->doorbell_set, stats->doorbell_unset);
+	seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target));
+	seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size));
+
+	for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) {
+		if (vmballoon_cmd_names[i] == NULL)
+			continue;
+
+		seq_printf(f, "%-22s: %16llu (%llu failed)\n",
+			   vmballoon_cmd_names[i],
+			   atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]),
+			   atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT]));
+	}
+
+	for (i = 0; i < VMW_BALLOON_STAT_NUM; i++)
+		seq_printf(f, "%-22s: %16llu\n",
+			   vmballoon_stat_names[i],
+			   atomic64_read(&b->stats->general_stat[i]));
+
+	for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) {
+		for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++)
+			seq_printf(f, "%-18s(%s): %16llu\n",
+				   vmballoon_stat_page_names[i],
+				   vmballoon_page_size_names[j],
+				   atomic64_read(&b->stats->page_stat[i][j]));
+	}
 
 	return 0;
 }
@@ -1161,7 +1500,10 @@ static int __init vmballoon_debugfs_init(struct vmballoon *b)
 
 static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
 {
+	static_key_disable(&balloon_stat_enabled.key);
 	debugfs_remove(b->dbg_entry);
+	kfree(b->stats);
+	b->stats = NULL;
 }
 
 #else
@@ -1179,8 +1521,9 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b)
 
 static int __init vmballoon_init(void)
 {
+	enum vmballoon_page_size_type page_size;
 	int error;
-	unsigned is_2m_pages;
+
 	/*
 	 * Check if we are running on VMware's hypervisor and bail out
 	 * if we are not.
@@ -1188,11 +1531,10 @@ static int __init vmballoon_init(void)
 	if (x86_hyper_type != X86_HYPER_VMWARE)
 		return -ENODEV;
 
-	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
-			is_2m_pages++) {
-		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
-		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
-	}
+	for (page_size = VMW_BALLOON_4K_PAGE;
+	     page_size <= VMW_BALLOON_LAST_SIZE; page_size++)
+		INIT_LIST_HEAD(&balloon.page_sizes[page_size].pages);
+
 
 	INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
 
@@ -1200,6 +1542,8 @@ static int __init vmballoon_init(void)
 	if (error)
 		return error;
 
+	spin_lock_init(&balloon.comm_lock);
+	init_rwsem(&balloon.conf_sem);
 	balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
 	balloon.batch_page = NULL;
 	balloon.page = NULL;
diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c
index d7eaf1eb11e7..003bfba40758 100644
--- a/drivers/misc/vmw_vmci/vmci_driver.c
+++ b/drivers/misc/vmw_vmci/vmci_driver.c
@@ -113,5 +113,5 @@ module_exit(vmci_drv_exit);
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface.");
-MODULE_VERSION("1.1.5.0-k");
+MODULE_VERSION("1.1.6.0-k");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index 83e0c95d20a4..edfffc9699ba 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -15,7 +15,6 @@
 
 #include <linux/vmw_vmci_defs.h>
 #include <linux/vmw_vmci_api.h>
-#include <linux/moduleparam.h>
 #include <linux/miscdevice.h>
 #include <linux/interrupt.h>
 #include <linux/highmem.h>
@@ -448,15 +447,12 @@ static int vmci_host_do_alloc_queuepair(struct vmci_host_dev *vmci_host_dev,
 	struct vmci_handle handle;
 	int vmci_status;
 	int __user *retptr;
-	u32 cid;
 
 	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
 		vmci_ioctl_err("only valid for contexts\n");
 		return -EINVAL;
 	}
 
-	cid = vmci_ctx_get_id(vmci_host_dev->context);
-
 	if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
 		struct vmci_qp_alloc_info_vmvm alloc_info;
 		struct vmci_qp_alloc_info_vmvm __user *info = uptr;
diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c
index 1ab6e8737a5f..da1ee2e1ba99 100644
--- a/drivers/misc/vmw_vmci/vmci_resource.c
+++ b/drivers/misc/vmw_vmci/vmci_resource.c
@@ -57,7 +57,8 @@ static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle,
 
 		if (r->type == type &&
 		    rid == handle.resource &&
-		    (cid == handle.context || cid == VMCI_INVALID_ID)) {
+		    (cid == handle.context || cid == VMCI_INVALID_ID ||
+		     handle.context == VMCI_INVALID_ID)) {
 			resource = r;
 			break;
 		}
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 35564a8a48f9..a6cbaca37e94 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -341,7 +341,7 @@ static struct device_node *bcmgenet_mii_of_find_mdio(struct bcmgenet_priv *priv)
 	if (!compat)
 		return NULL;
 
-	priv->mdio_dn = of_find_compatible_node(dn, NULL, compat);
+	priv->mdio_dn = of_get_compatible_child(dn, compat);
 	kfree(compat);
 	if (!priv->mdio_dn) {
 		dev_err(kdev, "unable to find MDIO bus node\n");
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 267322693ed5..9a6065a3fa46 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -520,10 +520,20 @@ setup_sge_txq_uld(struct adapter *adap, unsigned int uld_type,
 	txq_info = kzalloc(sizeof(*txq_info), GFP_KERNEL);
 	if (!txq_info)
 		return -ENOMEM;
+	if (uld_type == CXGB4_ULD_CRYPTO) {
+		i = min_t(int, adap->vres.ncrypto_fc,
+			  num_online_cpus());
+		txq_info->ntxq = rounddown(i, adap->params.nports);
+		if (txq_info->ntxq <= 0) {
+			dev_warn(adap->pdev_dev, "Crypto Tx Queues can't be zero\n");
+			kfree(txq_info);
+			return -EINVAL;
+		}
 
-	i = min_t(int, uld_info->ntxq, num_online_cpus());
-	txq_info->ntxq = roundup(i, adap->params.nports);
-
+	} else {
+		i = min_t(int, uld_info->ntxq, num_online_cpus());
+		txq_info->ntxq = roundup(i, adap->params.nports);
+	}
 	txq_info->uldtxq = kcalloc(txq_info->ntxq, sizeof(struct sge_uld_txq),
 				   GFP_KERNEL);
 	if (!txq_info->uldtxq) {
@@ -546,11 +556,14 @@ static void uld_queue_init(struct adapter *adap, unsigned int uld_type,
 			   struct cxgb4_lld_info *lli)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	int tx_uld_type = TX_ULD(uld_type);
+	struct sge_uld_txq_info *txq_info = adap->sge.uld_txq_info[tx_uld_type];
 
 	lli->rxq_ids = rxq_info->rspq_id;
 	lli->nrxq = rxq_info->nrxq;
 	lli->ciq_ids = rxq_info->rspq_id + rxq_info->nrxq;
 	lli->nciq = rxq_info->nciq;
+	lli->ntxq = txq_info->ntxq;
 }
 
 int t4_uld_mem_alloc(struct adapter *adap)
@@ -634,7 +647,6 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
 	lld->ports = adap->port;
 	lld->vr = &adap->vres;
 	lld->mtus = adap->params.mtus;
-	lld->ntxq = adap->sge.ofldqsets;
 	lld->nchan = adap->params.nports;
 	lld->nports = adap->params.nports;
 	lld->wr_cred = adap->params.ofldq_wr_cred;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index f9a61f90cfbc..0f660af01a4b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -714,8 +714,9 @@ static int get_ephy_nodes(struct stmmac_priv *priv)
 		return -ENODEV;
 	}
 
-	mdio_internal = of_find_compatible_node(mdio_mux, NULL,
+	mdio_internal = of_get_compatible_child(mdio_mux,
 						"allwinner,sun8i-h3-mdio-internal");
+	of_node_put(mdio_mux);
 	if (!mdio_internal) {
 		dev_err(priv->device, "Cannot get internal_mdio node\n");
 		return -ENODEV;
@@ -729,13 +730,20 @@ static int get_ephy_nodes(struct stmmac_priv *priv)
 		gmac->rst_ephy = of_reset_control_get_exclusive(iphynode, NULL);
 		if (IS_ERR(gmac->rst_ephy)) {
 			ret = PTR_ERR(gmac->rst_ephy);
-			if (ret == -EPROBE_DEFER)
+			if (ret == -EPROBE_DEFER) {
+				of_node_put(iphynode);
+				of_node_put(mdio_internal);
 				return ret;
+			}
 			continue;
 		}
 		dev_info(priv->device, "Found internal PHY node\n");
+		of_node_put(iphynode);
+		of_node_put(mdio_internal);
 		return 0;
 	}
+
+	of_node_put(mdio_internal);
 	return -ENODEV;
 }
 
diff --git a/drivers/net/ppp/ppp_mppe.c b/drivers/net/ppp/ppp_mppe.c
index a205750b431b..7ccdc62c6052 100644
--- a/drivers/net/ppp/ppp_mppe.c
+++ b/drivers/net/ppp/ppp_mppe.c
@@ -95,7 +95,7 @@ static inline void sha_pad_init(struct sha_pad *shapad)
  * State for an MPPE (de)compressor.
  */
 struct ppp_mppe_state {
-	struct crypto_skcipher *arc4;
+	struct crypto_sync_skcipher *arc4;
 	struct shash_desc *sha1;
 	unsigned char *sha1_digest;
 	unsigned char master_key[MPPE_MAX_KEY_LEN];
@@ -155,15 +155,15 @@ static void get_new_key_from_sha(struct ppp_mppe_state * state)
 static void mppe_rekey(struct ppp_mppe_state * state, int initial_key)
 {
 	struct scatterlist sg_in[1], sg_out[1];
-	SKCIPHER_REQUEST_ON_STACK(req, state->arc4);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, state->arc4);
 
-	skcipher_request_set_tfm(req, state->arc4);
+	skcipher_request_set_sync_tfm(req, state->arc4);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 
 	get_new_key_from_sha(state);
 	if (!initial_key) {
-		crypto_skcipher_setkey(state->arc4, state->sha1_digest,
-				       state->keylen);
+		crypto_sync_skcipher_setkey(state->arc4, state->sha1_digest,
+					    state->keylen);
 		sg_init_table(sg_in, 1);
 		sg_init_table(sg_out, 1);
 		setup_sg(sg_in, state->sha1_digest, state->keylen);
@@ -181,7 +181,8 @@ static void mppe_rekey(struct ppp_mppe_state * state, int initial_key)
 		state->session_key[1] = 0x26;
 		state->session_key[2] = 0x9e;
 	}
-	crypto_skcipher_setkey(state->arc4, state->session_key, state->keylen);
+	crypto_sync_skcipher_setkey(state->arc4, state->session_key,
+				    state->keylen);
 	skcipher_request_zero(req);
 }
 
@@ -203,7 +204,7 @@ static void *mppe_alloc(unsigned char *options, int optlen)
 		goto out;
 
 
-	state->arc4 = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+	state->arc4 = crypto_alloc_sync_skcipher("ecb(arc4)", 0, 0);
 	if (IS_ERR(state->arc4)) {
 		state->arc4 = NULL;
 		goto out_free;
@@ -250,7 +251,7 @@ out_free:
 		crypto_free_shash(state->sha1->tfm);
 		kzfree(state->sha1);
 	}
-	crypto_free_skcipher(state->arc4);
+	crypto_free_sync_skcipher(state->arc4);
 	kfree(state);
 out:
 	return NULL;
@@ -266,7 +267,7 @@ static void mppe_free(void *arg)
 		kfree(state->sha1_digest);
 		crypto_free_shash(state->sha1->tfm);
 		kzfree(state->sha1);
-		crypto_free_skcipher(state->arc4);
+		crypto_free_sync_skcipher(state->arc4);
 		kfree(state);
 	}
 }
@@ -366,7 +367,7 @@ mppe_compress(void *arg, unsigned char *ibuf, unsigned char *obuf,
 	      int isize, int osize)
 {
 	struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg;
-	SKCIPHER_REQUEST_ON_STACK(req, state->arc4);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, state->arc4);
 	int proto;
 	int err;
 	struct scatterlist sg_in[1], sg_out[1];
@@ -426,7 +427,7 @@ mppe_compress(void *arg, unsigned char *ibuf, unsigned char *obuf,
 	setup_sg(sg_in, ibuf, isize);
 	setup_sg(sg_out, obuf, osize);
 
-	skcipher_request_set_tfm(req, state->arc4);
+	skcipher_request_set_sync_tfm(req, state->arc4);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg_in, sg_out, isize, NULL);
 	err = crypto_skcipher_encrypt(req);
@@ -480,7 +481,7 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf,
 		int osize)
 {
 	struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg;
-	SKCIPHER_REQUEST_ON_STACK(req, state->arc4);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, state->arc4);
 	unsigned ccount;
 	int flushed = MPPE_BITS(ibuf) & MPPE_BIT_FLUSHED;
 	struct scatterlist sg_in[1], sg_out[1];
@@ -615,7 +616,7 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf,
 	setup_sg(sg_in, ibuf, 1);
 	setup_sg(sg_out, obuf, 1);
 
-	skcipher_request_set_tfm(req, state->arc4);
+	skcipher_request_set_sync_tfm(req, state->arc4);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg_in, sg_out, 1, NULL);
 	if (crypto_skcipher_decrypt(req)) {
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c
index 91162f8e0366..9a22056e8d9e 100644
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -73,10 +73,9 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
 	struct device_node *matched_node;
 	int ret;
 
-	matched_node = of_find_compatible_node(node, NULL, "marvell,nfc-uart");
+	matched_node = of_get_compatible_child(node, "marvell,nfc-uart");
 	if (!matched_node) {
-		matched_node = of_find_compatible_node(node, NULL,
-						       "mrvl,nfc-uart");
+		matched_node = of_get_compatible_child(node, "mrvl,nfc-uart");
 		if (!matched_node)
 			return -ENODEV;
 	}
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index aa1657831b70..9b18ce90f907 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * nvmem framework core.
  *
  * Copyright (C) 2015 Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
  * Copyright (C) 2013 Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/device.h>
@@ -19,6 +11,7 @@
 #include <linux/fs.h>
 #include <linux/idr.h>
 #include <linux/init.h>
+#include <linux/kref.h>
 #include <linux/module.h>
 #include <linux/nvmem-consumer.h>
 #include <linux/nvmem-provider.h>
@@ -26,18 +19,18 @@
 #include <linux/slab.h>
 
 struct nvmem_device {
-	const char		*name;
 	struct module		*owner;
 	struct device		dev;
 	int			stride;
 	int			word_size;
 	int			id;
-	int			users;
+	struct kref		refcnt;
 	size_t			size;
 	bool			read_only;
 	int			flags;
 	struct bin_attribute	eeprom;
 	struct device		*base_dev;
+	struct list_head	cells;
 	nvmem_reg_read_t	reg_read;
 	nvmem_reg_write_t	reg_write;
 	void *priv;
@@ -58,8 +51,13 @@ struct nvmem_cell {
 static DEFINE_MUTEX(nvmem_mutex);
 static DEFINE_IDA(nvmem_ida);
 
-static LIST_HEAD(nvmem_cells);
-static DEFINE_MUTEX(nvmem_cells_mutex);
+static DEFINE_MUTEX(nvmem_cell_mutex);
+static LIST_HEAD(nvmem_cell_tables);
+
+static DEFINE_MUTEX(nvmem_lookup_mutex);
+static LIST_HEAD(nvmem_lookup_list);
+
+static BLOCKING_NOTIFIER_HEAD(nvmem_notifier);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key eeprom_lock_key;
@@ -156,7 +154,7 @@ static ssize_t bin_attr_nvmem_write(struct file *filp, struct kobject *kobj,
 static struct bin_attribute bin_attr_rw_nvmem = {
 	.attr	= {
 		.name	= "nvmem",
-		.mode	= S_IWUSR | S_IRUGO,
+		.mode	= 0644,
 	},
 	.read	= bin_attr_nvmem_read,
 	.write	= bin_attr_nvmem_write,
@@ -180,7 +178,7 @@ static const struct attribute_group *nvmem_rw_dev_groups[] = {
 static struct bin_attribute bin_attr_ro_nvmem = {
 	.attr	= {
 		.name	= "nvmem",
-		.mode	= S_IRUGO,
+		.mode	= 0444,
 	},
 	.read	= bin_attr_nvmem_read,
 };
@@ -203,7 +201,7 @@ static const struct attribute_group *nvmem_ro_dev_groups[] = {
 static struct bin_attribute bin_attr_rw_root_nvmem = {
 	.attr	= {
 		.name	= "nvmem",
-		.mode	= S_IWUSR | S_IRUSR,
+		.mode	= 0600,
 	},
 	.read	= bin_attr_nvmem_read,
 	.write	= bin_attr_nvmem_write,
@@ -227,7 +225,7 @@ static const struct attribute_group *nvmem_rw_root_dev_groups[] = {
 static struct bin_attribute bin_attr_ro_root_nvmem = {
 	.attr	= {
 		.name	= "nvmem",
-		.mode	= S_IRUSR,
+		.mode	= 0400,
 	},
 	.read	= bin_attr_nvmem_read,
 };
@@ -282,48 +280,42 @@ static struct nvmem_device *of_nvmem_find(struct device_node *nvmem_np)
 	return to_nvmem_device(d);
 }
 
-static struct nvmem_cell *nvmem_find_cell(const char *cell_id)
+static struct nvmem_device *nvmem_find(const char *name)
 {
-	struct nvmem_cell *p;
-
-	mutex_lock(&nvmem_cells_mutex);
+	struct device *d;
 
-	list_for_each_entry(p, &nvmem_cells, node)
-		if (!strcmp(p->name, cell_id)) {
-			mutex_unlock(&nvmem_cells_mutex);
-			return p;
-		}
+	d = bus_find_device_by_name(&nvmem_bus_type, NULL, name);
 
-	mutex_unlock(&nvmem_cells_mutex);
+	if (!d)
+		return NULL;
 
-	return NULL;
+	return to_nvmem_device(d);
 }
 
 static void nvmem_cell_drop(struct nvmem_cell *cell)
 {
-	mutex_lock(&nvmem_cells_mutex);
+	blocking_notifier_call_chain(&nvmem_notifier, NVMEM_CELL_REMOVE, cell);
+	mutex_lock(&nvmem_mutex);
 	list_del(&cell->node);
-	mutex_unlock(&nvmem_cells_mutex);
+	mutex_unlock(&nvmem_mutex);
+	kfree(cell->name);
 	kfree(cell);
 }
 
 static void nvmem_device_remove_all_cells(const struct nvmem_device *nvmem)
 {
-	struct nvmem_cell *cell;
-	struct list_head *p, *n;
+	struct nvmem_cell *cell, *p;
 
-	list_for_each_safe(p, n, &nvmem_cells) {
-		cell = list_entry(p, struct nvmem_cell, node);
-		if (cell->nvmem == nvmem)
-			nvmem_cell_drop(cell);
-	}
+	list_for_each_entry_safe(cell, p, &nvmem->cells, node)
+		nvmem_cell_drop(cell);
 }
 
 static void nvmem_cell_add(struct nvmem_cell *cell)
 {
-	mutex_lock(&nvmem_cells_mutex);
-	list_add_tail(&cell->node, &nvmem_cells);
-	mutex_unlock(&nvmem_cells_mutex);
+	mutex_lock(&nvmem_mutex);
+	list_add_tail(&cell->node, &cell->nvmem->cells);
+	mutex_unlock(&nvmem_mutex);
+	blocking_notifier_call_chain(&nvmem_notifier, NVMEM_CELL_ADD, cell);
 }
 
 static int nvmem_cell_info_to_nvmem_cell(struct nvmem_device *nvmem,
@@ -361,7 +353,7 @@ static int nvmem_cell_info_to_nvmem_cell(struct nvmem_device *nvmem,
  *
  * Return: 0 or negative error code on failure.
  */
-int nvmem_add_cells(struct nvmem_device *nvmem,
+static int nvmem_add_cells(struct nvmem_device *nvmem,
 		    const struct nvmem_cell_info *info,
 		    int ncells)
 {
@@ -400,7 +392,6 @@ err:
 
 	return rval;
 }
-EXPORT_SYMBOL_GPL(nvmem_add_cells);
 
 /*
  * nvmem_setup_compat() - Create an additional binary entry in
@@ -440,6 +431,136 @@ static int nvmem_setup_compat(struct nvmem_device *nvmem,
 }
 
 /**
+ * nvmem_register_notifier() - Register a notifier block for nvmem events.
+ *
+ * @nb: notifier block to be called on nvmem events.
+ *
+ * Return: 0 on success, negative error number on failure.
+ */
+int nvmem_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&nvmem_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(nvmem_register_notifier);
+
+/**
+ * nvmem_unregister_notifier() - Unregister a notifier block for nvmem events.
+ *
+ * @nb: notifier block to be unregistered.
+ *
+ * Return: 0 on success, negative error number on failure.
+ */
+int nvmem_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&nvmem_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(nvmem_unregister_notifier);
+
+static int nvmem_add_cells_from_table(struct nvmem_device *nvmem)
+{
+	const struct nvmem_cell_info *info;
+	struct nvmem_cell_table *table;
+	struct nvmem_cell *cell;
+	int rval = 0, i;
+
+	mutex_lock(&nvmem_cell_mutex);
+	list_for_each_entry(table, &nvmem_cell_tables, node) {
+		if (strcmp(nvmem_dev_name(nvmem), table->nvmem_name) == 0) {
+			for (i = 0; i < table->ncells; i++) {
+				info = &table->cells[i];
+
+				cell = kzalloc(sizeof(*cell), GFP_KERNEL);
+				if (!cell) {
+					rval = -ENOMEM;
+					goto out;
+				}
+
+				rval = nvmem_cell_info_to_nvmem_cell(nvmem,
+								     info,
+								     cell);
+				if (rval) {
+					kfree(cell);
+					goto out;
+				}
+
+				nvmem_cell_add(cell);
+			}
+		}
+	}
+
+out:
+	mutex_unlock(&nvmem_cell_mutex);
+	return rval;
+}
+
+static struct nvmem_cell *
+nvmem_find_cell_by_name(struct nvmem_device *nvmem, const char *cell_id)
+{
+	struct nvmem_cell *cell = NULL;
+
+	mutex_lock(&nvmem_mutex);
+	list_for_each_entry(cell, &nvmem->cells, node) {
+		if (strcmp(cell_id, cell->name) == 0)
+			break;
+	}
+	mutex_unlock(&nvmem_mutex);
+
+	return cell;
+}
+
+static int nvmem_add_cells_from_of(struct nvmem_device *nvmem)
+{
+	struct device_node *parent, *child;
+	struct device *dev = &nvmem->dev;
+	struct nvmem_cell *cell;
+	const __be32 *addr;
+	int len;
+
+	parent = dev->of_node;
+
+	for_each_child_of_node(parent, child) {
+		addr = of_get_property(child, "reg", &len);
+		if (!addr || (len < 2 * sizeof(u32))) {
+			dev_err(dev, "nvmem: invalid reg on %pOF\n", child);
+			return -EINVAL;
+		}
+
+		cell = kzalloc(sizeof(*cell), GFP_KERNEL);
+		if (!cell)
+			return -ENOMEM;
+
+		cell->nvmem = nvmem;
+		cell->offset = be32_to_cpup(addr++);
+		cell->bytes = be32_to_cpup(addr);
+		cell->name = kasprintf(GFP_KERNEL, "%pOFn", child);
+
+		addr = of_get_property(child, "bits", &len);
+		if (addr && len == (2 * sizeof(u32))) {
+			cell->bit_offset = be32_to_cpup(addr++);
+			cell->nbits = be32_to_cpup(addr);
+		}
+
+		if (cell->nbits)
+			cell->bytes = DIV_ROUND_UP(
+					cell->nbits + cell->bit_offset,
+					BITS_PER_BYTE);
+
+		if (!IS_ALIGNED(cell->offset, nvmem->stride)) {
+			dev_err(dev, "cell %s unaligned to nvmem stride %d\n",
+				cell->name, nvmem->stride);
+			/* Cells already added will be freed later. */
+			kfree(cell->name);
+			kfree(cell);
+			return -EINVAL;
+		}
+
+		nvmem_cell_add(cell);
+	}
+
+	return 0;
+}
+
+/**
  * nvmem_register() - Register a nvmem device for given nvmem_config.
  * Also creates an binary entry in /sys/bus/nvmem/devices/dev-name/nvmem
  *
@@ -467,6 +588,9 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
 		return ERR_PTR(rval);
 	}
 
+	kref_init(&nvmem->refcnt);
+	INIT_LIST_HEAD(&nvmem->cells);
+
 	nvmem->id = rval;
 	nvmem->owner = config->owner;
 	if (!nvmem->owner && config->dev->driver)
@@ -516,11 +640,31 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
 			goto err_device_del;
 	}
 
-	if (config->cells)
-		nvmem_add_cells(nvmem, config->cells, config->ncells);
+	if (config->cells) {
+		rval = nvmem_add_cells(nvmem, config->cells, config->ncells);
+		if (rval)
+			goto err_teardown_compat;
+	}
+
+	rval = nvmem_add_cells_from_table(nvmem);
+	if (rval)
+		goto err_remove_cells;
+
+	rval = nvmem_add_cells_from_of(nvmem);
+	if (rval)
+		goto err_remove_cells;
+
+	rval = blocking_notifier_call_chain(&nvmem_notifier, NVMEM_ADD, nvmem);
+	if (rval)
+		goto err_remove_cells;
 
 	return nvmem;
 
+err_remove_cells:
+	nvmem_device_remove_all_cells(nvmem);
+err_teardown_compat:
+	if (config->compat)
+		device_remove_bin_file(nvmem->base_dev, &nvmem->eeprom);
 err_device_del:
 	device_del(&nvmem->dev);
 err_put_device:
@@ -530,21 +674,13 @@ err_put_device:
 }
 EXPORT_SYMBOL_GPL(nvmem_register);
 
-/**
- * nvmem_unregister() - Unregister previously registered nvmem device
- *
- * @nvmem: Pointer to previously registered nvmem device.
- *
- * Return: Will be an negative on error or a zero on success.
- */
-int nvmem_unregister(struct nvmem_device *nvmem)
+static void nvmem_device_release(struct kref *kref)
 {
-	mutex_lock(&nvmem_mutex);
-	if (nvmem->users) {
-		mutex_unlock(&nvmem_mutex);
-		return -EBUSY;
-	}
-	mutex_unlock(&nvmem_mutex);
+	struct nvmem_device *nvmem;
+
+	nvmem = container_of(kref, struct nvmem_device, refcnt);
+
+	blocking_notifier_call_chain(&nvmem_notifier, NVMEM_REMOVE, nvmem);
 
 	if (nvmem->flags & FLAG_COMPAT)
 		device_remove_bin_file(nvmem->base_dev, &nvmem->eeprom);
@@ -552,14 +688,22 @@ int nvmem_unregister(struct nvmem_device *nvmem)
 	nvmem_device_remove_all_cells(nvmem);
 	device_del(&nvmem->dev);
 	put_device(&nvmem->dev);
+}
 
-	return 0;
+/**
+ * nvmem_unregister() - Unregister previously registered nvmem device
+ *
+ * @nvmem: Pointer to previously registered nvmem device.
+ */
+void nvmem_unregister(struct nvmem_device *nvmem)
+{
+	kref_put(&nvmem->refcnt, nvmem_device_release);
 }
 EXPORT_SYMBOL_GPL(nvmem_unregister);
 
 static void devm_nvmem_release(struct device *dev, void *res)
 {
-	WARN_ON(nvmem_unregister(*(struct nvmem_device **)res));
+	nvmem_unregister(*(struct nvmem_device **)res);
 }
 
 /**
@@ -617,71 +761,34 @@ int devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem)
 }
 EXPORT_SYMBOL(devm_nvmem_unregister);
 
-
 static struct nvmem_device *__nvmem_device_get(struct device_node *np,
-					       struct nvmem_cell **cellp,
-					       const char *cell_id)
+					       const char *nvmem_name)
 {
 	struct nvmem_device *nvmem = NULL;
 
 	mutex_lock(&nvmem_mutex);
-
-	if (np) {
-		nvmem = of_nvmem_find(np);
-		if (!nvmem) {
-			mutex_unlock(&nvmem_mutex);
-			return ERR_PTR(-EPROBE_DEFER);
-		}
-	} else {
-		struct nvmem_cell *cell = nvmem_find_cell(cell_id);
-
-		if (cell) {
-			nvmem = cell->nvmem;
-			*cellp = cell;
-		}
-
-		if (!nvmem) {
-			mutex_unlock(&nvmem_mutex);
-			return ERR_PTR(-ENOENT);
-		}
-	}
-
-	nvmem->users++;
+	nvmem = np ? of_nvmem_find(np) : nvmem_find(nvmem_name);
 	mutex_unlock(&nvmem_mutex);
+	if (!nvmem)
+		return ERR_PTR(-EPROBE_DEFER);
 
 	if (!try_module_get(nvmem->owner)) {
 		dev_err(&nvmem->dev,
 			"could not increase module refcount for cell %s\n",
-			nvmem->name);
-
-		mutex_lock(&nvmem_mutex);
-		nvmem->users--;
-		mutex_unlock(&nvmem_mutex);
+			nvmem_dev_name(nvmem));
 
 		return ERR_PTR(-EINVAL);
 	}
 
+	kref_get(&nvmem->refcnt);
+
 	return nvmem;
 }
 
 static void __nvmem_device_put(struct nvmem_device *nvmem)
 {
 	module_put(nvmem->owner);
-	mutex_lock(&nvmem_mutex);
-	nvmem->users--;
-	mutex_unlock(&nvmem_mutex);
-}
-
-static struct nvmem_device *nvmem_find(const char *name)
-{
-	struct device *d;
-
-	d = bus_find_device_by_name(&nvmem_bus_type, NULL, name);
-
-	if (!d)
-		return NULL;
-
-	return to_nvmem_device(d);
+	kref_put(&nvmem->refcnt, nvmem_device_release);
 }
 
 #if IS_ENABLED(CONFIG_OF)
@@ -706,7 +813,7 @@ struct nvmem_device *of_nvmem_device_get(struct device_node *np, const char *id)
 	if (!nvmem_np)
 		return ERR_PTR(-EINVAL);
 
-	return __nvmem_device_get(nvmem_np, NULL, NULL);
+	return __nvmem_device_get(nvmem_np, NULL);
 }
 EXPORT_SYMBOL_GPL(of_nvmem_device_get);
 #endif
@@ -810,44 +917,86 @@ struct nvmem_device *devm_nvmem_device_get(struct device *dev, const char *id)
 }
 EXPORT_SYMBOL_GPL(devm_nvmem_device_get);
 
-static struct nvmem_cell *nvmem_cell_get_from_list(const char *cell_id)
+static struct nvmem_cell *
+nvmem_cell_get_from_lookup(struct device *dev, const char *con_id)
 {
-	struct nvmem_cell *cell = NULL;
+	struct nvmem_cell *cell = ERR_PTR(-ENOENT);
+	struct nvmem_cell_lookup *lookup;
 	struct nvmem_device *nvmem;
+	const char *dev_id;
 
-	nvmem = __nvmem_device_get(NULL, &cell, cell_id);
-	if (IS_ERR(nvmem))
-		return ERR_CAST(nvmem);
+	if (!dev)
+		return ERR_PTR(-EINVAL);
 
+	dev_id = dev_name(dev);
+
+	mutex_lock(&nvmem_lookup_mutex);
+
+	list_for_each_entry(lookup, &nvmem_lookup_list, node) {
+		if ((strcmp(lookup->dev_id, dev_id) == 0) &&
+		    (strcmp(lookup->con_id, con_id) == 0)) {
+			/* This is the right entry. */
+			nvmem = __nvmem_device_get(NULL, lookup->nvmem_name);
+			if (IS_ERR(nvmem)) {
+				/* Provider may not be registered yet. */
+				cell = ERR_CAST(nvmem);
+				goto out;
+			}
+
+			cell = nvmem_find_cell_by_name(nvmem,
+						       lookup->cell_name);
+			if (!cell) {
+				__nvmem_device_put(nvmem);
+				cell = ERR_PTR(-ENOENT);
+				goto out;
+			}
+		}
+	}
+
+out:
+	mutex_unlock(&nvmem_lookup_mutex);
 	return cell;
 }
 
 #if IS_ENABLED(CONFIG_OF)
+static struct nvmem_cell *
+nvmem_find_cell_by_index(struct nvmem_device *nvmem, int index)
+{
+	struct nvmem_cell *cell = NULL;
+	int i = 0;
+
+	mutex_lock(&nvmem_mutex);
+	list_for_each_entry(cell, &nvmem->cells, node) {
+		if (index == i++)
+			break;
+	}
+	mutex_unlock(&nvmem_mutex);
+
+	return cell;
+}
+
 /**
  * of_nvmem_cell_get() - Get a nvmem cell from given device node and cell id
  *
  * @np: Device tree node that uses the nvmem cell.
- * @name: nvmem cell name from nvmem-cell-names property, or NULL
- *	  for the cell at index 0 (the lone cell with no accompanying
- *	  nvmem-cell-names property).
+ * @id: nvmem cell name from nvmem-cell-names property, or NULL
+ *      for the cell at index 0 (the lone cell with no accompanying
+ *      nvmem-cell-names property).
  *
  * Return: Will be an ERR_PTR() on error or a valid pointer
  * to a struct nvmem_cell.  The nvmem_cell will be freed by the
  * nvmem_cell_put().
  */
-struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
-					    const char *name)
+struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *id)
 {
 	struct device_node *cell_np, *nvmem_np;
-	struct nvmem_cell *cell;
 	struct nvmem_device *nvmem;
-	const __be32 *addr;
-	int rval, len;
+	struct nvmem_cell *cell;
 	int index = 0;
 
 	/* if cell name exists, find index to the name */
-	if (name)
-		index = of_property_match_string(np, "nvmem-cell-names", name);
+	if (id)
+		index = of_property_match_string(np, "nvmem-cell-names", id);
 
 	cell_np = of_parse_phandle(np, "nvmem-cells", index);
 	if (!cell_np)
@@ -857,59 +1006,18 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
 	if (!nvmem_np)
 		return ERR_PTR(-EINVAL);
 
-	nvmem = __nvmem_device_get(nvmem_np, NULL, NULL);
+	nvmem = __nvmem_device_get(nvmem_np, NULL);
 	of_node_put(nvmem_np);
 	if (IS_ERR(nvmem))
 		return ERR_CAST(nvmem);
 
-	addr = of_get_property(cell_np, "reg", &len);
-	if (!addr || (len < 2 * sizeof(u32))) {
-		dev_err(&nvmem->dev, "nvmem: invalid reg on %pOF\n",
-			cell_np);
-		rval  = -EINVAL;
-		goto err_mem;
-	}
-
-	cell = kzalloc(sizeof(*cell), GFP_KERNEL);
+	cell = nvmem_find_cell_by_index(nvmem, index);
 	if (!cell) {
-		rval = -ENOMEM;
-		goto err_mem;
-	}
-
-	cell->nvmem = nvmem;
-	cell->offset = be32_to_cpup(addr++);
-	cell->bytes = be32_to_cpup(addr);
-	cell->name = cell_np->name;
-
-	addr = of_get_property(cell_np, "bits", &len);
-	if (addr && len == (2 * sizeof(u32))) {
-		cell->bit_offset = be32_to_cpup(addr++);
-		cell->nbits = be32_to_cpup(addr);
+		__nvmem_device_put(nvmem);
+		return ERR_PTR(-ENOENT);
 	}
 
-	if (cell->nbits)
-		cell->bytes = DIV_ROUND_UP(cell->nbits + cell->bit_offset,
-					   BITS_PER_BYTE);
-
-	if (!IS_ALIGNED(cell->offset, nvmem->stride)) {
-			dev_err(&nvmem->dev,
-				"cell %s unaligned to nvmem stride %d\n",
-				cell->name, nvmem->stride);
-		rval  = -EINVAL;
-		goto err_sanity;
-	}
-
-	nvmem_cell_add(cell);
-
 	return cell;
-
-err_sanity:
-	kfree(cell);
-
-err_mem:
-	__nvmem_device_put(nvmem);
-
-	return ERR_PTR(rval);
 }
 EXPORT_SYMBOL_GPL(of_nvmem_cell_get);
 #endif
@@ -918,27 +1026,29 @@ EXPORT_SYMBOL_GPL(of_nvmem_cell_get);
  * nvmem_cell_get() - Get nvmem cell of device form a given cell name
  *
  * @dev: Device that requests the nvmem cell.
- * @cell_id: nvmem cell name to get.
+ * @id: nvmem cell name to get (this corresponds with the name from the
+ *      nvmem-cell-names property for DT systems and with the con_id from
+ *      the lookup entry for non-DT systems).
  *
  * Return: Will be an ERR_PTR() on error or a valid pointer
  * to a struct nvmem_cell.  The nvmem_cell will be freed by the
  * nvmem_cell_put().
  */
-struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *cell_id)
+struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *id)
 {
 	struct nvmem_cell *cell;
 
 	if (dev->of_node) { /* try dt first */
-		cell = of_nvmem_cell_get(dev->of_node, cell_id);
+		cell = of_nvmem_cell_get(dev->of_node, id);
 		if (!IS_ERR(cell) || PTR_ERR(cell) == -EPROBE_DEFER)
 			return cell;
 	}
 
-	/* NULL cell_id only allowed for device tree; invalid otherwise */
-	if (!cell_id)
+	/* NULL cell id only allowed for device tree; invalid otherwise */
+	if (!id)
 		return ERR_PTR(-EINVAL);
 
-	return nvmem_cell_get_from_list(cell_id);
+	return nvmem_cell_get_from_lookup(dev, id);
 }
 EXPORT_SYMBOL_GPL(nvmem_cell_get);
 
@@ -1015,7 +1125,6 @@ void nvmem_cell_put(struct nvmem_cell *cell)
 	struct nvmem_device *nvmem = cell->nvmem;
 
 	__nvmem_device_put(nvmem);
-	nvmem_cell_drop(cell);
 }
 EXPORT_SYMBOL_GPL(nvmem_cell_put);
 
@@ -1267,7 +1376,7 @@ EXPORT_SYMBOL_GPL(nvmem_device_cell_read);
  * @buf: buffer to be written to cell.
  *
  * Return: length of bytes written or negative error code on failure.
- * */
+ */
 int nvmem_device_cell_write(struct nvmem_device *nvmem,
 			    struct nvmem_cell_info *info, void *buf)
 {
@@ -1323,7 +1432,7 @@ EXPORT_SYMBOL_GPL(nvmem_device_read);
  * @buf: buffer to be written.
  *
  * Return: length of bytes written or negative error code on failure.
- * */
+ */
 int nvmem_device_write(struct nvmem_device *nvmem,
 		       unsigned int offset,
 		       size_t bytes, void *buf)
@@ -1343,6 +1452,80 @@ int nvmem_device_write(struct nvmem_device *nvmem,
 }
 EXPORT_SYMBOL_GPL(nvmem_device_write);
 
+/**
+ * nvmem_add_cell_table() - register a table of cell info entries
+ *
+ * @table: table of cell info entries
+ */
+void nvmem_add_cell_table(struct nvmem_cell_table *table)
+{
+	mutex_lock(&nvmem_cell_mutex);
+	list_add_tail(&table->node, &nvmem_cell_tables);
+	mutex_unlock(&nvmem_cell_mutex);
+}
+EXPORT_SYMBOL_GPL(nvmem_add_cell_table);
+
+/**
+ * nvmem_del_cell_table() - remove a previously registered cell info table
+ *
+ * @table: table of cell info entries
+ */
+void nvmem_del_cell_table(struct nvmem_cell_table *table)
+{
+	mutex_lock(&nvmem_cell_mutex);
+	list_del(&table->node);
+	mutex_unlock(&nvmem_cell_mutex);
+}
+EXPORT_SYMBOL_GPL(nvmem_del_cell_table);
+
+/**
+ * nvmem_add_cell_lookups() - register a list of cell lookup entries
+ *
+ * @entries: array of cell lookup entries
+ * @nentries: number of cell lookup entries in the array
+ */
+void nvmem_add_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries)
+{
+	int i;
+
+	mutex_lock(&nvmem_lookup_mutex);
+	for (i = 0; i < nentries; i++)
+		list_add_tail(&entries[i].node, &nvmem_lookup_list);
+	mutex_unlock(&nvmem_lookup_mutex);
+}
+EXPORT_SYMBOL_GPL(nvmem_add_cell_lookups);
+
+/**
+ * nvmem_del_cell_lookups() - remove a list of previously added cell lookup
+ *                            entries
+ *
+ * @entries: array of cell lookup entries
+ * @nentries: number of cell lookup entries in the array
+ */
+void nvmem_del_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries)
+{
+	int i;
+
+	mutex_lock(&nvmem_lookup_mutex);
+	for (i = 0; i < nentries; i++)
+		list_del(&entries[i].node);
+	mutex_unlock(&nvmem_lookup_mutex);
+}
+EXPORT_SYMBOL_GPL(nvmem_del_cell_lookups);
+
+/**
+ * nvmem_dev_name() - Get the name of a given nvmem device.
+ *
+ * @nvmem: nvmem device.
+ *
+ * Return: name of the nvmem device.
+ */
+const char *nvmem_dev_name(struct nvmem_device *nvmem)
+{
+	return dev_name(&nvmem->dev);
+}
+EXPORT_SYMBOL_GPL(nvmem_dev_name);
+
 static int __init nvmem_init(void)
 {
 	return bus_register(&nvmem_bus_type);
diff --git a/drivers/nvmem/lpc18xx_eeprom.c b/drivers/nvmem/lpc18xx_eeprom.c
index a9534a6e8636..66cff1e2147a 100644
--- a/drivers/nvmem/lpc18xx_eeprom.c
+++ b/drivers/nvmem/lpc18xx_eeprom.c
@@ -236,7 +236,7 @@ static int lpc18xx_eeprom_probe(struct platform_device *pdev)
 	lpc18xx_nvmem_config.dev = dev;
 	lpc18xx_nvmem_config.priv = eeprom;
 
-	eeprom->nvmem = nvmem_register(&lpc18xx_nvmem_config);
+	eeprom->nvmem = devm_nvmem_register(dev, &lpc18xx_nvmem_config);
 	if (IS_ERR(eeprom->nvmem)) {
 		ret = PTR_ERR(eeprom->nvmem);
 		goto err_clk;
@@ -255,11 +255,6 @@ err_clk:
 static int lpc18xx_eeprom_remove(struct platform_device *pdev)
 {
 	struct lpc18xx_eeprom_dev *eeprom = platform_get_drvdata(pdev);
-	int ret;
-
-	ret = nvmem_unregister(eeprom->nvmem);
-	if (ret < 0)
-		return ret;
 
 	clk_disable_unprepare(eeprom->clk);
 
diff --git a/drivers/nvmem/mxs-ocotp.c b/drivers/nvmem/mxs-ocotp.c
index 7018e2ef5714..53122f59c4b2 100644
--- a/drivers/nvmem/mxs-ocotp.c
+++ b/drivers/nvmem/mxs-ocotp.c
@@ -177,7 +177,7 @@ static int mxs_ocotp_probe(struct platform_device *pdev)
 	ocotp_config.size = data->size;
 	ocotp_config.priv = otp;
 	ocotp_config.dev = dev;
-	otp->nvmem = nvmem_register(&ocotp_config);
+	otp->nvmem = devm_nvmem_register(dev, &ocotp_config);
 	if (IS_ERR(otp->nvmem)) {
 		ret = PTR_ERR(otp->nvmem);
 		goto err_clk;
@@ -199,7 +199,7 @@ static int mxs_ocotp_remove(struct platform_device *pdev)
 
 	clk_unprepare(otp->clk);
 
-	return nvmem_unregister(otp->nvmem);
+	return 0;
 }
 
 static struct platform_driver mxs_ocotp_driver = {
diff --git a/drivers/nvmem/sunxi_sid.c b/drivers/nvmem/sunxi_sid.c
index d020f89248fd..570a2e354f30 100644
--- a/drivers/nvmem/sunxi_sid.c
+++ b/drivers/nvmem/sunxi_sid.c
@@ -154,7 +154,7 @@ static int sunxi_sid_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct nvmem_device *nvmem;
 	struct sunxi_sid *sid;
-	int ret, i, size;
+	int i, size;
 	char *randomness;
 	const struct sunxi_sid_cfg *cfg;
 
@@ -181,15 +181,13 @@ static int sunxi_sid_probe(struct platform_device *pdev)
 	else
 		econfig.reg_read = sunxi_sid_read;
 	econfig.priv = sid;
-	nvmem = nvmem_register(&econfig);
+	nvmem = devm_nvmem_register(dev, &econfig);
 	if (IS_ERR(nvmem))
 		return PTR_ERR(nvmem);
 
 	randomness = kzalloc(size, GFP_KERNEL);
-	if (!randomness) {
-		ret = -EINVAL;
-		goto err_unreg_nvmem;
-	}
+	if (!randomness)
+		return -ENOMEM;
 
 	for (i = 0; i < size; i++)
 		econfig.reg_read(sid, i, &randomness[i], 1);
@@ -200,17 +198,6 @@ static int sunxi_sid_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, nvmem);
 
 	return 0;
-
-err_unreg_nvmem:
-	nvmem_unregister(nvmem);
-	return ret;
-}
-
-static int sunxi_sid_remove(struct platform_device *pdev)
-{
-	struct nvmem_device *nvmem = platform_get_drvdata(pdev);
-
-	return nvmem_unregister(nvmem);
 }
 
 static const struct sunxi_sid_cfg sun4i_a10_cfg = {
@@ -243,7 +230,6 @@ MODULE_DEVICE_TABLE(of, sunxi_sid_of_match);
 
 static struct platform_driver sunxi_sid_driver = {
 	.probe = sunxi_sid_probe,
-	.remove = sunxi_sid_remove,
 	.driver = {
 		.name = "eeprom-sunxi-sid",
 		.of_match_table = sunxi_sid_of_match,
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 74eaedd5b860..13ebb16be64e 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -67,6 +67,7 @@ bool of_node_name_eq(const struct device_node *np, const char *name)
 
 	return (strlen(name) == len) && (strncmp(node_name, name, len) == 0);
 }
+EXPORT_SYMBOL(of_node_name_eq);
 
 bool of_node_name_prefix(const struct device_node *np, const char *prefix)
 {
@@ -75,6 +76,7 @@ bool of_node_name_prefix(const struct device_node *np, const char *prefix)
 
 	return strncmp(kbasename(np->full_name), prefix, strlen(prefix)) == 0;
 }
+EXPORT_SYMBOL(of_node_name_prefix);
 
 int of_n_addr_cells(struct device_node *np)
 {
@@ -330,6 +332,8 @@ static bool __of_find_n_match_cpu_property(struct device_node *cpun,
 
 	ac = of_n_addr_cells(cpun);
 	cell = of_get_property(cpun, prop_name, &prop_len);
+	if (!cell && !ac && arch_match_cpu_phys_id(cpu, 0))
+		return true;
 	if (!cell || !ac)
 		return false;
 	prop_len /= sizeof(*cell) * ac;
@@ -390,7 +394,7 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
 {
 	struct device_node *cpun;
 
-	for_each_node_by_type(cpun, "cpu") {
+	for_each_of_cpu_node(cpun) {
 		if (arch_find_n_match_cpu_physical_id(cpun, cpu, thread))
 			return cpun;
 	}
@@ -745,6 +749,45 @@ struct device_node *of_get_next_available_child(const struct device_node *node,
 EXPORT_SYMBOL(of_get_next_available_child);
 
 /**
+ *	of_get_next_cpu_node - Iterate on cpu nodes
+ *	@prev:	previous child of the /cpus node, or NULL to get first
+ *
+ *	Returns a cpu node pointer with refcount incremented, use of_node_put()
+ *	on it when done. Returns NULL when prev is the last child. Decrements
+ *	the refcount of prev.
+ */
+struct device_node *of_get_next_cpu_node(struct device_node *prev)
+{
+	struct device_node *next = NULL;
+	unsigned long flags;
+	struct device_node *node;
+
+	if (!prev)
+		node = of_find_node_by_path("/cpus");
+
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	if (prev)
+		next = prev->sibling;
+	else if (node) {
+		next = node->child;
+		of_node_put(node);
+	}
+	for (; next; next = next->sibling) {
+		if (!(of_node_name_eq(next, "cpu") ||
+		      (next->type && !of_node_cmp(next->type, "cpu"))))
+			continue;
+		if (!__of_device_is_available(next))
+			continue;
+		if (of_node_get(next))
+			break;
+	}
+	of_node_put(prev);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+	return next;
+}
+EXPORT_SYMBOL(of_get_next_cpu_node);
+
+/**
  * of_get_compatible_child - Find compatible child node
  * @parent:	parent node
  * @compatible:	compatible string
@@ -2013,7 +2056,7 @@ struct device_node *of_find_next_cache_node(const struct device_node *np)
 	/* OF on pmac has nodes instead of properties named "l2-cache"
 	 * beneath CPU nodes.
 	 */
-	if (!strcmp(np->type, "cpu"))
+	if (IS_ENABLED(CONFIG_PPC_PMAC) && !strcmp(np->type, "cpu"))
 		for_each_child_of_node(np, child)
 			if (!strcmp(child->type, "cache"))
 				return child;
@@ -2045,3 +2088,105 @@ int of_find_last_cache_level(unsigned int cpu)
 
 	return cache_level;
 }
+
+/**
+ * of_map_rid - Translate a requester ID through a downstream mapping.
+ * @np: root complex device node.
+ * @rid: device requester ID to map.
+ * @map_name: property name of the map to use.
+ * @map_mask_name: optional property name of the mask to use.
+ * @target: optional pointer to a target device node.
+ * @id_out: optional pointer to receive the translated ID.
+ *
+ * Given a device requester ID, look up the appropriate implementation-defined
+ * platform ID and/or the target device which receives transactions on that
+ * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
+ * @id_out may be NULL if only the other is required. If @target points to
+ * a non-NULL device node pointer, only entries targeting that node will be
+ * matched; if it points to a NULL value, it will receive the device node of
+ * the first matching target phandle, with a reference held.
+ *
+ * Return: 0 on success or a standard error code on failure.
+ */
+int of_map_rid(struct device_node *np, u32 rid,
+	       const char *map_name, const char *map_mask_name,
+	       struct device_node **target, u32 *id_out)
+{
+	u32 map_mask, masked_rid;
+	int map_len;
+	const __be32 *map = NULL;
+
+	if (!np || !map_name || (!target && !id_out))
+		return -EINVAL;
+
+	map = of_get_property(np, map_name, &map_len);
+	if (!map) {
+		if (target)
+			return -ENODEV;
+		/* Otherwise, no map implies no translation */
+		*id_out = rid;
+		return 0;
+	}
+
+	if (!map_len || map_len % (4 * sizeof(*map))) {
+		pr_err("%pOF: Error: Bad %s length: %d\n", np,
+			map_name, map_len);
+		return -EINVAL;
+	}
+
+	/* The default is to select all bits. */
+	map_mask = 0xffffffff;
+
+	/*
+	 * Can be overridden by "{iommu,msi}-map-mask" property.
+	 * If of_property_read_u32() fails, the default is used.
+	 */
+	if (map_mask_name)
+		of_property_read_u32(np, map_mask_name, &map_mask);
+
+	masked_rid = map_mask & rid;
+	for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) {
+		struct device_node *phandle_node;
+		u32 rid_base = be32_to_cpup(map + 0);
+		u32 phandle = be32_to_cpup(map + 1);
+		u32 out_base = be32_to_cpup(map + 2);
+		u32 rid_len = be32_to_cpup(map + 3);
+
+		if (rid_base & ~map_mask) {
+			pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n",
+				np, map_name, map_name,
+				map_mask, rid_base);
+			return -EFAULT;
+		}
+
+		if (masked_rid < rid_base || masked_rid >= rid_base + rid_len)
+			continue;
+
+		phandle_node = of_find_node_by_phandle(phandle);
+		if (!phandle_node)
+			return -ENODEV;
+
+		if (target) {
+			if (*target)
+				of_node_put(phandle_node);
+			else
+				*target = phandle_node;
+
+			if (*target != phandle_node)
+				continue;
+		}
+
+		if (id_out)
+			*id_out = masked_rid - rid_base + out_base;
+
+		pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n",
+			np, map_name, map_mask, rid_base, out_base,
+			rid_len, rid, masked_rid - rid_base + out_base);
+		return 0;
+	}
+
+	pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n",
+		np, map_name, rid, target && *target ? *target : NULL);
+	return -EFAULT;
+}
+EXPORT_SYMBOL_GPL(of_map_rid);
diff --git a/drivers/of/device.c b/drivers/of/device.c
index c7fa5a9697c9..0f27fad9fe94 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -207,7 +207,8 @@ static ssize_t of_device_get_modalias(struct device *dev, char *str, ssize_t len
 		return -ENODEV;
 
 	/* Name & Type */
-	csize = snprintf(str, len, "of:N%sT%s", dev->of_node->name,
+	/* %p eats all alphanum characters, so %c must be used here */
+	csize = snprintf(str, len, "of:N%pOFn%c%s", dev->of_node, 'T',
 			 dev->of_node->type);
 	tsize = csize;
 	len -= csize;
@@ -286,7 +287,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 	if ((!dev) || (!dev->of_node))
 		return;
 
-	add_uevent_var(env, "OF_NAME=%s", dev->of_node->name);
+	add_uevent_var(env, "OF_NAME=%pOFn", dev->of_node);
 	add_uevent_var(env, "OF_FULLNAME=%pOF", dev->of_node);
 	if (dev->of_node->type && strcmp("<NULL>", dev->of_node->type) != 0)
 		add_uevent_var(env, "OF_TYPE=%s", dev->of_node->type);
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 02ad93a304a4..e1f6f392a4c0 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -22,7 +22,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_pci.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 
@@ -588,8 +587,8 @@ static u32 __of_msi_map_rid(struct device *dev, struct device_node **np,
 	 * "msi-map" property.
 	 */
 	for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent)
-		if (!of_pci_map_rid(parent_dev->of_node, rid_in, "msi-map",
-				    "msi-map-mask", np, &rid_out))
+		if (!of_map_rid(parent_dev->of_node, rid_in, "msi-map",
+				"msi-map-mask", np, &rid_out))
 			break;
 	return rid_out;
 }
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index e92391d6d1bd..5ad1342f5682 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -97,8 +97,8 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio,
 		return rc;
 	}
 
-	dev_dbg(&mdio->dev, "registered phy %s at address %i\n",
-		child->name, addr);
+	dev_dbg(&mdio->dev, "registered phy %pOFn at address %i\n",
+		child, addr);
 	return 0;
 }
 
@@ -127,8 +127,8 @@ static int of_mdiobus_register_device(struct mii_bus *mdio,
 		return rc;
 	}
 
-	dev_dbg(&mdio->dev, "registered mdio device %s at address %i\n",
-		child->name, addr);
+	dev_dbg(&mdio->dev, "registered mdio device %pOFn at address %i\n",
+		child, addr);
 	return 0;
 }
 
@@ -263,8 +263,8 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 				continue;
 
 			/* be noisy to encourage people to set reg property */
-			dev_info(&mdio->dev, "scan phy %s at address %i\n",
-				 child->name, addr);
+			dev_info(&mdio->dev, "scan phy %pOFn at address %i\n",
+				 child, addr);
 
 			if (of_mdiobus_child_is_phy(child)) {
 				rc = of_mdiobus_register_phy(mdio, child, addr);
diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c
index 27d9b4bba535..35c64a4295e0 100644
--- a/drivers/of/of_numa.c
+++ b/drivers/of/of_numa.c
@@ -24,18 +24,9 @@ static void __init of_numa_parse_cpu_nodes(void)
 {
 	u32 nid;
 	int r;
-	struct device_node *cpus;
-	struct device_node *np = NULL;
-
-	cpus = of_find_node_by_path("/cpus");
-	if (!cpus)
-		return;
-
-	for_each_child_of_node(cpus, np) {
-		/* Skip things that are not CPUs */
-		if (of_node_cmp(np->type, "cpu") != 0)
-			continue;
+	struct device_node *np;
 
+	for_each_of_cpu_node(np) {
 		r = of_property_read_u32(np, "numa-node-id", &nid);
 		if (r)
 			continue;
@@ -46,8 +37,6 @@ static void __init of_numa_parse_cpu_nodes(void)
 		else
 			node_set(nid, numa_nodes_parsed);
 	}
-
-	of_node_put(cpus);
 }
 
 static int __init of_numa_parse_memory_nodes(void)
@@ -163,8 +152,8 @@ int of_node_to_nid(struct device_node *device)
 		np = of_get_next_parent(np);
 	}
 	if (np && r)
-		pr_warn("Invalid \"numa-node-id\" property in node %s\n",
-			np->name);
+		pr_warn("Invalid \"numa-node-id\" property in node %pOFn\n",
+			np);
 	of_node_put(np);
 
 	/*
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index 216175d11d3d..5d1567025358 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -27,6 +27,14 @@ struct alias_prop {
 	char stem[0];
 };
 
+#if defined(CONFIG_SPARC)
+#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 2
+#else
+#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 1
+#endif
+
+#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT 1
+
 extern struct mutex of_mutex;
 extern struct list_head aliases_lookup;
 extern struct kset *of_kset;
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index eda57ef12fd0..42b1f73ac5f6 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -425,8 +425,8 @@ static int build_changeset_next_level(struct overlay_changeset *ovcs,
 	for_each_child_of_node(overlay_node, child) {
 		ret = add_changeset_node(ovcs, target_node, child);
 		if (ret) {
-			pr_debug("Failed to apply node @%pOF/%s, err=%d\n",
-				 target_node, child->name, ret);
+			pr_debug("Failed to apply node @%pOF/%pOFn, err=%d\n",
+				 target_node, child, ret);
 			of_node_put(child);
 			return ret;
 		}
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 6c59673933e9..04ad312fd85b 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -91,8 +91,8 @@ static void of_device_make_bus_id(struct device *dev)
 		 */
 		reg = of_get_property(node, "reg", NULL);
 		if (reg && (addr = of_translate_address(node, reg)) != OF_BAD_ADDR) {
-			dev_set_name(dev, dev_name(dev) ? "%llx.%s:%s" : "%llx.%s",
-				     (unsigned long long)addr, node->name,
+			dev_set_name(dev, dev_name(dev) ? "%llx.%pOFn:%s" : "%llx.%pOFn",
+				     (unsigned long long)addr, node,
 				     dev_name(dev));
 			return;
 		}
@@ -142,8 +142,8 @@ struct platform_device *of_device_alloc(struct device_node *np,
 			WARN_ON(rc);
 		}
 		if (of_irq_to_resource_table(np, res, num_irq) != num_irq)
-			pr_debug("not all legacy IRQ resources mapped for %s\n",
-				 np->name);
+			pr_debug("not all legacy IRQ resources mapped for %pOFn\n",
+				 np);
 	}
 
 	dev->dev.of_node = of_node_get(np);
diff --git a/drivers/of/unittest-data/overlay_15.dts b/drivers/of/unittest-data/overlay_15.dts
index b98f2514df4b..5728490474f6 100644
--- a/drivers/of/unittest-data/overlay_15.dts
+++ b/drivers/of/unittest-data/overlay_15.dts
@@ -20,8 +20,8 @@
 			#size-cells = <0>;
 			reg = <0>;
 
-			test-mux-dev {
-				reg = <32>;
+			test-mux-dev@20 {
+				reg = <0x20>;
 				compatible = "unittest-i2c-dev";
 				status = "okay";
 			};
diff --git a/drivers/of/unittest-data/tests-overlay.dtsi b/drivers/of/unittest-data/tests-overlay.dtsi
index 25cf397b8f6b..4ea024d908ee 100644
--- a/drivers/of/unittest-data/tests-overlay.dtsi
+++ b/drivers/of/unittest-data/tests-overlay.dtsi
@@ -103,8 +103,8 @@
 							#size-cells = <0>;
 							reg = <0>;
 
-							test-mux-dev {
-								reg = <32>;
+							test-mux-dev@20 {
+								reg = <0x20>;
 								compatible = "unittest-i2c-dev";
 								status = "okay";
 							};
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 41b49716ac75..a3a6866765f2 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -212,8 +212,8 @@ static int __init of_unittest_check_node_linkage(struct device_node *np)
 
 	for_each_child_of_node(np, child) {
 		if (child->parent != np) {
-			pr_err("Child node %s links to wrong parent %s\n",
-				 child->name, np->name);
+			pr_err("Child node %pOFn links to wrong parent %pOFn\n",
+				 child, np);
 			rc = -EINVAL;
 			goto put_child;
 		}
@@ -299,6 +299,10 @@ static void __init of_unittest_printf(void)
 
 	of_unittest_printf_one(np, "%pOF",  full_name);
 	of_unittest_printf_one(np, "%pOFf", full_name);
+	of_unittest_printf_one(np, "%pOFn", "dev");
+	of_unittest_printf_one(np, "%2pOFn", "dev");
+	of_unittest_printf_one(np, "%5pOFn", "  dev");
+	of_unittest_printf_one(np, "%pOFnc", "dev:test-sub-device");
 	of_unittest_printf_one(np, "%pOFp", phandle_str);
 	of_unittest_printf_one(np, "%pOFP", "dev@100");
 	of_unittest_printf_one(np, "ABC %pOFP ABC", "ABC dev@100 ABC");
@@ -1046,16 +1050,16 @@ static void __init of_unittest_platform_populate(void)
 	for_each_child_of_node(np, child) {
 		for_each_child_of_node(child, grandchild)
 			unittest(of_find_device_by_node(grandchild),
-				 "Could not create device for node '%s'\n",
-				 grandchild->name);
+				 "Could not create device for node '%pOFn'\n",
+				 grandchild);
 	}
 
 	of_platform_depopulate(&test_bus->dev);
 	for_each_child_of_node(np, child) {
 		for_each_child_of_node(child, grandchild)
 			unittest(!of_find_device_by_node(grandchild),
-				 "device didn't get destroyed '%s'\n",
-				 grandchild->name);
+				 "device didn't get destroyed '%pOFn'\n",
+				 grandchild);
 	}
 
 	platform_device_unregister(test_bus);
@@ -2357,11 +2361,14 @@ static __init void of_unittest_overlay_high_level(void)
 		}
 	}
 
-	for (np = overlay_base_root->child; np; np = np->sibling) {
-		if (of_get_child_by_name(of_root, np->name)) {
-			unittest(0, "illegal node name in overlay_base %s",
-				np->name);
-			return;
+	for_each_child_of_node(overlay_base_root, np) {
+		struct device_node *base_child;
+		for_each_child_of_node(of_root, base_child) {
+			if (!strcmp(np->full_name, base_child->full_name)) {
+				unittest(0, "illegal node name in overlay_base %pOFn",
+					 np);
+				return;
+			}
 		}
 	}
 
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index 1836b8ddf292..4c4217d0c3f1 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -355,107 +355,6 @@ failed:
 EXPORT_SYMBOL_GPL(devm_of_pci_get_host_bridge_resources);
 #endif /* CONFIG_OF_ADDRESS */
 
-/**
- * of_pci_map_rid - Translate a requester ID through a downstream mapping.
- * @np: root complex device node.
- * @rid: PCI requester ID to map.
- * @map_name: property name of the map to use.
- * @map_mask_name: optional property name of the mask to use.
- * @target: optional pointer to a target device node.
- * @id_out: optional pointer to receive the translated ID.
- *
- * Given a PCI requester ID, look up the appropriate implementation-defined
- * platform ID and/or the target device which receives transactions on that
- * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
- * @id_out may be NULL if only the other is required. If @target points to
- * a non-NULL device node pointer, only entries targeting that node will be
- * matched; if it points to a NULL value, it will receive the device node of
- * the first matching target phandle, with a reference held.
- *
- * Return: 0 on success or a standard error code on failure.
- */
-int of_pci_map_rid(struct device_node *np, u32 rid,
-		   const char *map_name, const char *map_mask_name,
-		   struct device_node **target, u32 *id_out)
-{
-	u32 map_mask, masked_rid;
-	int map_len;
-	const __be32 *map = NULL;
-
-	if (!np || !map_name || (!target && !id_out))
-		return -EINVAL;
-
-	map = of_get_property(np, map_name, &map_len);
-	if (!map) {
-		if (target)
-			return -ENODEV;
-		/* Otherwise, no map implies no translation */
-		*id_out = rid;
-		return 0;
-	}
-
-	if (!map_len || map_len % (4 * sizeof(*map))) {
-		pr_err("%pOF: Error: Bad %s length: %d\n", np,
-			map_name, map_len);
-		return -EINVAL;
-	}
-
-	/* The default is to select all bits. */
-	map_mask = 0xffffffff;
-
-	/*
-	 * Can be overridden by "{iommu,msi}-map-mask" property.
-	 * If of_property_read_u32() fails, the default is used.
-	 */
-	if (map_mask_name)
-		of_property_read_u32(np, map_mask_name, &map_mask);
-
-	masked_rid = map_mask & rid;
-	for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) {
-		struct device_node *phandle_node;
-		u32 rid_base = be32_to_cpup(map + 0);
-		u32 phandle = be32_to_cpup(map + 1);
-		u32 out_base = be32_to_cpup(map + 2);
-		u32 rid_len = be32_to_cpup(map + 3);
-
-		if (rid_base & ~map_mask) {
-			pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n",
-				np, map_name, map_name,
-				map_mask, rid_base);
-			return -EFAULT;
-		}
-
-		if (masked_rid < rid_base || masked_rid >= rid_base + rid_len)
-			continue;
-
-		phandle_node = of_find_node_by_phandle(phandle);
-		if (!phandle_node)
-			return -ENODEV;
-
-		if (target) {
-			if (*target)
-				of_node_put(phandle_node);
-			else
-				*target = phandle_node;
-
-			if (*target != phandle_node)
-				continue;
-		}
-
-		if (id_out)
-			*id_out = masked_rid - rid_base + out_base;
-
-		pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n",
-			np, map_name, map_mask, rid_base, out_base,
-			rid_len, rid, masked_rid - rid_base + out_base);
-		return 0;
-	}
-
-	pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n",
-		np, map_name, rid, target && *target ? *target : NULL);
-	return -EFAULT;
-}
-
 #if IS_ENABLED(CONFIG_OF_IRQ)
 /**
  * of_irq_parse_pci - Resolve the interrupt for a PCI device
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index c89d3effd99d..60f949e2a684 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -43,6 +43,7 @@ config PHY_XGENE
 source "drivers/phy/allwinner/Kconfig"
 source "drivers/phy/amlogic/Kconfig"
 source "drivers/phy/broadcom/Kconfig"
+source "drivers/phy/cadence/Kconfig"
 source "drivers/phy/hisilicon/Kconfig"
 source "drivers/phy/lantiq/Kconfig"
 source "drivers/phy/marvell/Kconfig"
@@ -54,6 +55,7 @@ source "drivers/phy/ralink/Kconfig"
 source "drivers/phy/renesas/Kconfig"
 source "drivers/phy/rockchip/Kconfig"
 source "drivers/phy/samsung/Kconfig"
+source "drivers/phy/socionext/Kconfig"
 source "drivers/phy/st/Kconfig"
 source "drivers/phy/tegra/Kconfig"
 source "drivers/phy/ti/Kconfig"
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
index ce8339ff0022..0301e25d07c1 100644
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_ARCH_RENESAS)		+= renesas/
 obj-$(CONFIG_ARCH_ROCKCHIP)		+= rockchip/
 obj-$(CONFIG_ARCH_TEGRA)		+= tegra/
 obj-y					+= broadcom/	\
+					   cadence/	\
 					   hisilicon/	\
 					   marvell/	\
 					   motorola/	\
@@ -22,5 +23,6 @@ obj-y					+= broadcom/	\
 					   qualcomm/	\
 					   ralink/	\
 					   samsung/	\
+					   socionext/	\
 					   st/		\
 					   ti/
diff --git a/drivers/phy/broadcom/Kconfig b/drivers/phy/broadcom/Kconfig
index 8786a9674471..aa917a61071d 100644
--- a/drivers/phy/broadcom/Kconfig
+++ b/drivers/phy/broadcom/Kconfig
@@ -60,7 +60,8 @@ config PHY_NS2_USB_DRD
 
 config PHY_BRCM_SATA
 	tristate "Broadcom SATA PHY driver"
-	depends on ARCH_BRCMSTB || ARCH_BCM_IPROC || BMIPS_GENERIC || COMPILE_TEST
+	depends on ARCH_BRCMSTB || ARCH_BCM_IPROC || BMIPS_GENERIC || \
+		   ARCH_BCM_63XX || COMPILE_TEST
 	depends on OF
 	select GENERIC_PHY
 	default ARCH_BCM_IPROC
diff --git a/drivers/phy/broadcom/phy-bcm-cygnus-pcie.c b/drivers/phy/broadcom/phy-bcm-cygnus-pcie.c
index 0f4ac5d63cff..b074682d9dd8 100644
--- a/drivers/phy/broadcom/phy-bcm-cygnus-pcie.c
+++ b/drivers/phy/broadcom/phy-bcm-cygnus-pcie.c
@@ -153,8 +153,8 @@ static int cygnus_pcie_phy_probe(struct platform_device *pdev)
 		struct cygnus_pcie_phy *p;
 
 		if (of_property_read_u32(child, "reg", &id)) {
-			dev_err(dev, "missing reg property for %s\n",
-				child->name);
+			dev_err(dev, "missing reg property for %pOFn\n",
+				child);
 			ret = -EINVAL;
 			goto put_child;
 		}
diff --git a/drivers/phy/broadcom/phy-brcm-sata.c b/drivers/phy/broadcom/phy-brcm-sata.c
index 8708ea3b4d6d..0f4a06ff7fd3 100644
--- a/drivers/phy/broadcom/phy-brcm-sata.c
+++ b/drivers/phy/broadcom/phy-brcm-sata.c
@@ -47,6 +47,7 @@ enum brcm_sata_phy_version {
 	BRCM_SATA_PHY_IPROC_NS2,
 	BRCM_SATA_PHY_IPROC_NSP,
 	BRCM_SATA_PHY_IPROC_SR,
+	BRCM_SATA_PHY_DSL_28NM,
 };
 
 enum brcm_sata_phy_rxaeq_mode {
@@ -96,7 +97,10 @@ enum sata_phy_regs {
 	PLLCONTROL_0_FREQ_DET_RESTART		= BIT(13),
 	PLLCONTROL_0_FREQ_MONITOR		= BIT(12),
 	PLLCONTROL_0_SEQ_START			= BIT(15),
+	PLL_CAP_CHARGE_TIME			= 0x83,
+	PLL_VCO_CAL_THRESH			= 0x84,
 	PLL_CAP_CONTROL				= 0x85,
+	PLL_FREQ_DET_TIME			= 0x86,
 	PLL_ACTRL2				= 0x8b,
 	PLL_ACTRL2_SELDIV_MASK			= 0x1f,
 	PLL_ACTRL2_SELDIV_SHIFT			= 9,
@@ -106,6 +110,9 @@ enum sata_phy_regs {
 	PLL1_ACTRL2				= 0x82,
 	PLL1_ACTRL3				= 0x83,
 	PLL1_ACTRL4				= 0x84,
+	PLL1_ACTRL5				= 0x85,
+	PLL1_ACTRL6				= 0x86,
+	PLL1_ACTRL7				= 0x87,
 
 	TX_REG_BANK				= 0x070,
 	TX_ACTRL0				= 0x80,
@@ -119,6 +126,8 @@ enum sata_phy_regs {
 	AEQ_FRC_EQ_FORCE			= BIT(0),
 	AEQ_FRC_EQ_FORCE_VAL			= BIT(1),
 	AEQRX_REG_BANK_1			= 0xe0,
+	AEQRX_SLCAL0_CTRL0			= 0x82,
+	AEQRX_SLCAL1_CTRL0			= 0x86,
 
 	OOB_REG_BANK				= 0x150,
 	OOB1_REG_BANK				= 0x160,
@@ -168,6 +177,7 @@ static inline void __iomem *brcm_sata_pcb_base(struct brcm_sata_port *port)
 	switch (priv->version) {
 	case BRCM_SATA_PHY_STB_28NM:
 	case BRCM_SATA_PHY_IPROC_NS2:
+	case BRCM_SATA_PHY_DSL_28NM:
 		size = SATA_PCB_REG_28NM_SPACE_SIZE;
 		break;
 	case BRCM_SATA_PHY_STB_40NM:
@@ -482,6 +492,61 @@ static int brcm_sr_sata_init(struct brcm_sata_port *port)
 	return 0;
 }
 
+static int brcm_dsl_sata_init(struct brcm_sata_port *port)
+{
+	void __iomem *base = brcm_sata_pcb_base(port);
+	struct device *dev = port->phy_priv->dev;
+	unsigned int try;
+	u32 tmp;
+
+	brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL7, 0, 0x873);
+
+	brcm_sata_phy_wr(base, PLL1_REG_BANK, PLL1_ACTRL6, 0, 0xc000);
+
+	brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
+			 0, 0x3089);
+	usleep_range(1000, 2000);
+
+	brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_REG_BANK_0_PLLCONTROL_0,
+			 0, 0x3088);
+	usleep_range(1000, 2000);
+
+	brcm_sata_phy_wr(base, AEQRX_REG_BANK_1, AEQRX_SLCAL0_CTRL0,
+			 0, 0x3000);
+
+	brcm_sata_phy_wr(base, AEQRX_REG_BANK_1, AEQRX_SLCAL1_CTRL0,
+			 0, 0x3000);
+	usleep_range(1000, 2000);
+
+	brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_CAP_CHARGE_TIME, 0, 0x32);
+
+	brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_VCO_CAL_THRESH, 0, 0xa);
+
+	brcm_sata_phy_wr(base, PLL_REG_BANK_0, PLL_FREQ_DET_TIME, 0, 0x64);
+	usleep_range(1000, 2000);
+
+	/* Acquire PLL lock */
+	try = 50;
+	while (try) {
+		tmp = brcm_sata_phy_rd(base, BLOCK0_REG_BANK,
+				       BLOCK0_XGXSSTATUS);
+		if (tmp & BLOCK0_XGXSSTATUS_PLL_LOCK)
+			break;
+		msleep(20);
+		try--;
+	};
+
+	if (!try) {
+		/* PLL did not lock; give up */
+		dev_err(dev, "port%d PLL did not lock\n", port->portnum);
+		return -ETIMEDOUT;
+	}
+
+	dev_dbg(dev, "port%d initialized\n", port->portnum);
+
+	return 0;
+}
+
 static int brcm_sata_phy_init(struct phy *phy)
 {
 	int rc;
@@ -501,6 +566,9 @@ static int brcm_sata_phy_init(struct phy *phy)
 	case BRCM_SATA_PHY_IPROC_SR:
 		rc = brcm_sr_sata_init(port);
 		break;
+	case BRCM_SATA_PHY_DSL_28NM:
+		rc = brcm_dsl_sata_init(port);
+		break;
 	default:
 		rc = -ENODEV;
 	}
@@ -552,6 +620,8 @@ static const struct of_device_id brcm_sata_phy_of_match[] = {
 	  .data = (void *)BRCM_SATA_PHY_IPROC_NSP },
 	{ .compatible	= "brcm,iproc-sr-sata-phy",
 	  .data = (void *)BRCM_SATA_PHY_IPROC_SR },
+	{ .compatible	= "brcm,bcm63138-sata-phy",
+	  .data = (void *)BRCM_SATA_PHY_DSL_28NM },
 	{},
 };
 MODULE_DEVICE_TABLE(of, brcm_sata_phy_of_match);
@@ -600,8 +670,8 @@ static int brcm_sata_phy_probe(struct platform_device *pdev)
 		struct brcm_sata_port *port;
 
 		if (of_property_read_u32(child, "reg", &id)) {
-			dev_err(dev, "missing reg property in node %s\n",
-					child->name);
+			dev_err(dev, "missing reg property in node %pOFn\n",
+					child);
 			ret = -EINVAL;
 			goto put_child;
 		}
diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c
index d1dab36fa5b7..f59b1dc30399 100644
--- a/drivers/phy/broadcom/phy-brcm-usb.c
+++ b/drivers/phy/broadcom/phy-brcm-usb.c
@@ -372,10 +372,8 @@ static int brcm_usb_phy_probe(struct platform_device *pdev)
 	clk_disable(priv->usb_30_clk);
 
 	phy_provider = devm_of_phy_provider_register(dev, brcm_usb_phy_xlate);
-	if (IS_ERR(phy_provider))
-		return PTR_ERR(phy_provider);
 
-	return 0;
+	return PTR_ERR_OR_ZERO(phy_provider);
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/phy/cadence/Kconfig b/drivers/phy/cadence/Kconfig
new file mode 100644
index 000000000000..57fff7de4031
--- /dev/null
+++ b/drivers/phy/cadence/Kconfig
@@ -0,0 +1,10 @@
+#
+# Phy driver for Cadence MHDP DisplayPort controller
+#
+config PHY_CADENCE_DP
+	tristate "Cadence MHDP DisplayPort PHY driver"
+	depends on OF
+	depends on HAS_IOMEM
+	select GENERIC_PHY
+	help
+	  Support for Cadence MHDP DisplayPort PHY.
diff --git a/drivers/phy/cadence/Makefile b/drivers/phy/cadence/Makefile
new file mode 100644
index 000000000000..e5b0a11cf28a
--- /dev/null
+++ b/drivers/phy/cadence/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_PHY_CADENCE_DP)	+= phy-cadence-dp.o
diff --git a/drivers/phy/cadence/phy-cadence-dp.c b/drivers/phy/cadence/phy-cadence-dp.c
new file mode 100644
index 000000000000..bc10cb264b7a
--- /dev/null
+++ b/drivers/phy/cadence/phy-cadence-dp.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Cadence MHDP DisplayPort SD0801 PHY driver.
+ *
+ * Copyright 2018 Cadence Design Systems, Inc.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+
+#define DEFAULT_NUM_LANES	2
+#define MAX_NUM_LANES		4
+#define DEFAULT_MAX_BIT_RATE	8100 /* in Mbps */
+
+#define POLL_TIMEOUT_US		2000
+#define LANE_MASK		0x7
+
+/*
+ * register offsets from DPTX PHY register block base (i.e MHDP
+ * register base + 0x30a00)
+ */
+#define PHY_AUX_CONFIG			0x00
+#define PHY_AUX_CTRL			0x04
+#define PHY_RESET			0x20
+#define PHY_PMA_XCVR_PLLCLK_EN		0x24
+#define PHY_PMA_XCVR_PLLCLK_EN_ACK	0x28
+#define PHY_PMA_XCVR_POWER_STATE_REQ	0x2c
+#define PHY_POWER_STATE_LN_0	0x0000
+#define PHY_POWER_STATE_LN_1	0x0008
+#define PHY_POWER_STATE_LN_2	0x0010
+#define PHY_POWER_STATE_LN_3	0x0018
+#define PHY_PMA_XCVR_POWER_STATE_ACK	0x30
+#define PHY_PMA_CMN_READY		0x34
+#define PHY_PMA_XCVR_TX_VMARGIN		0x38
+#define PHY_PMA_XCVR_TX_DEEMPH		0x3c
+
+/*
+ * register offsets from SD0801 PHY register block base (i.e MHDP
+ * register base + 0x500000)
+ */
+#define CMN_SSM_BANDGAP_TMR		0x00084
+#define CMN_SSM_BIAS_TMR		0x00088
+#define CMN_PLLSM0_PLLPRE_TMR		0x000a8
+#define CMN_PLLSM0_PLLLOCK_TMR		0x000b0
+#define CMN_PLLSM1_PLLPRE_TMR		0x000c8
+#define CMN_PLLSM1_PLLLOCK_TMR		0x000d0
+#define CMN_BGCAL_INIT_TMR		0x00190
+#define CMN_BGCAL_ITER_TMR		0x00194
+#define CMN_IBCAL_INIT_TMR		0x001d0
+#define CMN_PLL0_VCOCAL_INIT_TMR	0x00210
+#define CMN_PLL0_VCOCAL_ITER_TMR	0x00214
+#define CMN_PLL0_VCOCAL_REFTIM_START	0x00218
+#define CMN_PLL0_VCOCAL_PLLCNT_START	0x00220
+#define CMN_PLL0_INTDIV_M0		0x00240
+#define CMN_PLL0_FRACDIVL_M0		0x00244
+#define CMN_PLL0_FRACDIVH_M0		0x00248
+#define CMN_PLL0_HIGH_THR_M0		0x0024c
+#define CMN_PLL0_DSM_DIAG_M0		0x00250
+#define CMN_PLL0_LOCK_PLLCNT_START	0x00278
+#define CMN_PLL1_VCOCAL_INIT_TMR	0x00310
+#define CMN_PLL1_VCOCAL_ITER_TMR	0x00314
+#define CMN_PLL1_DSM_DIAG_M0		0x00350
+#define CMN_TXPUCAL_INIT_TMR		0x00410
+#define CMN_TXPUCAL_ITER_TMR		0x00414
+#define CMN_TXPDCAL_INIT_TMR		0x00430
+#define CMN_TXPDCAL_ITER_TMR		0x00434
+#define CMN_RXCAL_INIT_TMR		0x00450
+#define CMN_RXCAL_ITER_TMR		0x00454
+#define CMN_SD_CAL_INIT_TMR		0x00490
+#define CMN_SD_CAL_ITER_TMR		0x00494
+#define CMN_SD_CAL_REFTIM_START		0x00498
+#define CMN_SD_CAL_PLLCNT_START		0x004a0
+#define CMN_PDIAG_PLL0_CTRL_M0		0x00680
+#define CMN_PDIAG_PLL0_CLK_SEL_M0	0x00684
+#define CMN_PDIAG_PLL0_CP_PADJ_M0	0x00690
+#define CMN_PDIAG_PLL0_CP_IADJ_M0	0x00694
+#define CMN_PDIAG_PLL0_FILT_PADJ_M0	0x00698
+#define CMN_PDIAG_PLL0_CP_PADJ_M1	0x006d0
+#define CMN_PDIAG_PLL0_CP_IADJ_M1	0x006d4
+#define CMN_PDIAG_PLL1_CLK_SEL_M0	0x00704
+#define XCVR_DIAG_PLLDRC_CTRL		0x10394
+#define XCVR_DIAG_HSCLK_SEL		0x10398
+#define XCVR_DIAG_HSCLK_DIV		0x1039c
+#define TX_PSC_A0			0x10400
+#define TX_PSC_A1			0x10404
+#define TX_PSC_A2			0x10408
+#define TX_PSC_A3			0x1040c
+#define RX_PSC_A0			0x20000
+#define RX_PSC_A1			0x20004
+#define RX_PSC_A2			0x20008
+#define RX_PSC_A3			0x2000c
+#define PHY_PLL_CFG			0x30038
+
+struct cdns_dp_phy {
+	void __iomem *base;	/* DPTX registers base */
+	void __iomem *sd_base; /* SD0801 registers base */
+	u32 num_lanes; /* Number of lanes to use */
+	u32 max_bit_rate; /* Maximum link bit rate to use (in Mbps) */
+	struct device *dev;
+};
+
+static int cdns_dp_phy_init(struct phy *phy);
+static void cdns_dp_phy_run(struct cdns_dp_phy *cdns_phy);
+static void cdns_dp_phy_wait_pma_cmn_ready(struct cdns_dp_phy *cdns_phy);
+static void cdns_dp_phy_pma_cfg(struct cdns_dp_phy *cdns_phy);
+static void cdns_dp_phy_pma_cmn_cfg_25mhz(struct cdns_dp_phy *cdns_phy);
+static void cdns_dp_phy_pma_lane_cfg(struct cdns_dp_phy *cdns_phy,
+					 unsigned int lane);
+static void cdns_dp_phy_pma_cmn_vco_cfg_25mhz(struct cdns_dp_phy *cdns_phy);
+static void cdns_dp_phy_pma_cmn_rate(struct cdns_dp_phy *cdns_phy);
+static void cdns_dp_phy_write_field(struct cdns_dp_phy *cdns_phy,
+					unsigned int offset,
+					unsigned char start_bit,
+					unsigned char num_bits,
+					unsigned int val);
+
+static const struct phy_ops cdns_dp_phy_ops = {
+	.init		= cdns_dp_phy_init,
+	.owner		= THIS_MODULE,
+};
+
+static int cdns_dp_phy_init(struct phy *phy)
+{
+	unsigned char lane_bits;
+
+	struct cdns_dp_phy *cdns_phy = phy_get_drvdata(phy);
+
+	writel(0x0003, cdns_phy->base + PHY_AUX_CTRL); /* enable AUX */
+
+	/* PHY PMA registers configuration function */
+	cdns_dp_phy_pma_cfg(cdns_phy);
+
+	/*
+	 * Set lines power state to A0
+	 * Set lines pll clk enable to 0
+	 */
+
+	cdns_dp_phy_write_field(cdns_phy, PHY_PMA_XCVR_POWER_STATE_REQ,
+				PHY_POWER_STATE_LN_0, 6, 0x0000);
+
+	if (cdns_phy->num_lanes >= 2) {
+		cdns_dp_phy_write_field(cdns_phy,
+					PHY_PMA_XCVR_POWER_STATE_REQ,
+					PHY_POWER_STATE_LN_1, 6, 0x0000);
+
+		if (cdns_phy->num_lanes == 4) {
+			cdns_dp_phy_write_field(cdns_phy,
+						PHY_PMA_XCVR_POWER_STATE_REQ,
+						PHY_POWER_STATE_LN_2, 6, 0);
+			cdns_dp_phy_write_field(cdns_phy,
+						PHY_PMA_XCVR_POWER_STATE_REQ,
+						PHY_POWER_STATE_LN_3, 6, 0);
+		}
+	}
+
+	cdns_dp_phy_write_field(cdns_phy, PHY_PMA_XCVR_PLLCLK_EN,
+				0, 1, 0x0000);
+
+	if (cdns_phy->num_lanes >= 2) {
+		cdns_dp_phy_write_field(cdns_phy, PHY_PMA_XCVR_PLLCLK_EN,
+					1, 1, 0x0000);
+		if (cdns_phy->num_lanes == 4) {
+			cdns_dp_phy_write_field(cdns_phy,
+						PHY_PMA_XCVR_PLLCLK_EN,
+						2, 1, 0x0000);
+			cdns_dp_phy_write_field(cdns_phy,
+						PHY_PMA_XCVR_PLLCLK_EN,
+						3, 1, 0x0000);
+		}
+	}
+
+	/*
+	 * release phy_l0*_reset_n and pma_tx_elec_idle_ln_* based on
+	 * used lanes
+	 */
+	lane_bits = (1 << cdns_phy->num_lanes) - 1;
+	writel(((0xF & ~lane_bits) << 4) | (0xF & lane_bits),
+		   cdns_phy->base + PHY_RESET);
+
+	/* release pma_xcvr_pllclk_en_ln_*, only for the master lane */
+	writel(0x0001, cdns_phy->base + PHY_PMA_XCVR_PLLCLK_EN);
+
+	/* PHY PMA registers configuration functions */
+	cdns_dp_phy_pma_cmn_vco_cfg_25mhz(cdns_phy);
+	cdns_dp_phy_pma_cmn_rate(cdns_phy);
+
+	/* take out of reset */
+	cdns_dp_phy_write_field(cdns_phy, PHY_RESET, 8, 1, 1);
+	cdns_dp_phy_wait_pma_cmn_ready(cdns_phy);
+	cdns_dp_phy_run(cdns_phy);
+
+	return 0;
+}
+
+static void cdns_dp_phy_wait_pma_cmn_ready(struct cdns_dp_phy *cdns_phy)
+{
+	unsigned int reg;
+	int ret;
+
+	ret = readl_poll_timeout(cdns_phy->base + PHY_PMA_CMN_READY, reg,
+				 reg & 1, 0, 500);
+	if (ret == -ETIMEDOUT)
+		dev_err(cdns_phy->dev,
+			"timeout waiting for PMA common ready\n");
+}
+
+static void cdns_dp_phy_pma_cfg(struct cdns_dp_phy *cdns_phy)
+{
+	unsigned int i;
+
+	/* PMA common configuration */
+	cdns_dp_phy_pma_cmn_cfg_25mhz(cdns_phy);
+
+	/* PMA lane configuration to deal with multi-link operation */
+	for (i = 0; i < cdns_phy->num_lanes; i++)
+		cdns_dp_phy_pma_lane_cfg(cdns_phy, i);
+}
+
+static void cdns_dp_phy_pma_cmn_cfg_25mhz(struct cdns_dp_phy *cdns_phy)
+{
+	/* refclock registers - assumes 25 MHz refclock */
+	writel(0x0019, cdns_phy->sd_base + CMN_SSM_BIAS_TMR);
+	writel(0x0032, cdns_phy->sd_base + CMN_PLLSM0_PLLPRE_TMR);
+	writel(0x00D1, cdns_phy->sd_base + CMN_PLLSM0_PLLLOCK_TMR);
+	writel(0x0032, cdns_phy->sd_base + CMN_PLLSM1_PLLPRE_TMR);
+	writel(0x00D1, cdns_phy->sd_base + CMN_PLLSM1_PLLLOCK_TMR);
+	writel(0x007D, cdns_phy->sd_base + CMN_BGCAL_INIT_TMR);
+	writel(0x007D, cdns_phy->sd_base + CMN_BGCAL_ITER_TMR);
+	writel(0x0019, cdns_phy->sd_base + CMN_IBCAL_INIT_TMR);
+	writel(0x001E, cdns_phy->sd_base + CMN_TXPUCAL_INIT_TMR);
+	writel(0x0006, cdns_phy->sd_base + CMN_TXPUCAL_ITER_TMR);
+	writel(0x001E, cdns_phy->sd_base + CMN_TXPDCAL_INIT_TMR);
+	writel(0x0006, cdns_phy->sd_base + CMN_TXPDCAL_ITER_TMR);
+	writel(0x02EE, cdns_phy->sd_base + CMN_RXCAL_INIT_TMR);
+	writel(0x0006, cdns_phy->sd_base + CMN_RXCAL_ITER_TMR);
+	writel(0x0002, cdns_phy->sd_base + CMN_SD_CAL_INIT_TMR);
+	writel(0x0002, cdns_phy->sd_base + CMN_SD_CAL_ITER_TMR);
+	writel(0x000E, cdns_phy->sd_base + CMN_SD_CAL_REFTIM_START);
+	writel(0x012B, cdns_phy->sd_base + CMN_SD_CAL_PLLCNT_START);
+	/* PLL registers */
+	writel(0x0409, cdns_phy->sd_base + CMN_PDIAG_PLL0_CP_PADJ_M0);
+	writel(0x1001, cdns_phy->sd_base + CMN_PDIAG_PLL0_CP_IADJ_M0);
+	writel(0x0F08, cdns_phy->sd_base + CMN_PDIAG_PLL0_FILT_PADJ_M0);
+	writel(0x0004, cdns_phy->sd_base + CMN_PLL0_DSM_DIAG_M0);
+	writel(0x00FA, cdns_phy->sd_base + CMN_PLL0_VCOCAL_INIT_TMR);
+	writel(0x0004, cdns_phy->sd_base + CMN_PLL0_VCOCAL_ITER_TMR);
+	writel(0x00FA, cdns_phy->sd_base + CMN_PLL1_VCOCAL_INIT_TMR);
+	writel(0x0004, cdns_phy->sd_base + CMN_PLL1_VCOCAL_ITER_TMR);
+	writel(0x0318, cdns_phy->sd_base + CMN_PLL0_VCOCAL_REFTIM_START);
+}
+
+static void cdns_dp_phy_pma_cmn_vco_cfg_25mhz(struct cdns_dp_phy *cdns_phy)
+{
+	/* Assumes 25 MHz refclock */
+	switch (cdns_phy->max_bit_rate) {
+		/* Setting VCO for 10.8GHz */
+	case 2700:
+	case 5400:
+		writel(0x01B0, cdns_phy->sd_base + CMN_PLL0_INTDIV_M0);
+		writel(0x0000, cdns_phy->sd_base + CMN_PLL0_FRACDIVL_M0);
+		writel(0x0002, cdns_phy->sd_base + CMN_PLL0_FRACDIVH_M0);
+		writel(0x0120, cdns_phy->sd_base + CMN_PLL0_HIGH_THR_M0);
+		break;
+		/* Setting VCO for 9.72GHz */
+	case 2430:
+	case 3240:
+		writel(0x0184, cdns_phy->sd_base + CMN_PLL0_INTDIV_M0);
+		writel(0xCCCD, cdns_phy->sd_base + CMN_PLL0_FRACDIVL_M0);
+		writel(0x0002, cdns_phy->sd_base + CMN_PLL0_FRACDIVH_M0);
+		writel(0x0104, cdns_phy->sd_base + CMN_PLL0_HIGH_THR_M0);
+		break;
+		/* Setting VCO for 8.64GHz */
+	case 2160:
+	case 4320:
+		writel(0x0159, cdns_phy->sd_base + CMN_PLL0_INTDIV_M0);
+		writel(0x999A, cdns_phy->sd_base + CMN_PLL0_FRACDIVL_M0);
+		writel(0x0002, cdns_phy->sd_base + CMN_PLL0_FRACDIVH_M0);
+		writel(0x00E7, cdns_phy->sd_base + CMN_PLL0_HIGH_THR_M0);
+		break;
+		/* Setting VCO for 8.1GHz */
+	case 8100:
+		writel(0x0144, cdns_phy->sd_base + CMN_PLL0_INTDIV_M0);
+		writel(0x0000, cdns_phy->sd_base + CMN_PLL0_FRACDIVL_M0);
+		writel(0x0002, cdns_phy->sd_base + CMN_PLL0_FRACDIVH_M0);
+		writel(0x00D8, cdns_phy->sd_base + CMN_PLL0_HIGH_THR_M0);
+		break;
+	}
+
+	writel(0x0002, cdns_phy->sd_base + CMN_PDIAG_PLL0_CTRL_M0);
+	writel(0x0318, cdns_phy->sd_base + CMN_PLL0_VCOCAL_PLLCNT_START);
+}
+
+static void cdns_dp_phy_pma_cmn_rate(struct cdns_dp_phy *cdns_phy)
+{
+	unsigned int clk_sel_val = 0;
+	unsigned int hsclk_div_val = 0;
+	unsigned int i;
+
+	/* 16'h0000 for single DP link configuration */
+	writel(0x0000, cdns_phy->sd_base + PHY_PLL_CFG);
+
+	switch (cdns_phy->max_bit_rate) {
+	case 1620:
+		clk_sel_val = 0x0f01;
+		hsclk_div_val = 2;
+		break;
+	case 2160:
+	case 2430:
+	case 2700:
+		clk_sel_val = 0x0701;
+		 hsclk_div_val = 1;
+		break;
+	case 3240:
+		clk_sel_val = 0x0b00;
+		hsclk_div_val = 2;
+		break;
+	case 4320:
+	case 5400:
+		clk_sel_val = 0x0301;
+		hsclk_div_val = 0;
+		break;
+	case 8100:
+		clk_sel_val = 0x0200;
+		hsclk_div_val = 0;
+		break;
+	}
+
+	writel(clk_sel_val, cdns_phy->sd_base + CMN_PDIAG_PLL0_CLK_SEL_M0);
+
+	/* PMA lane configuration to deal with multi-link operation */
+	for (i = 0; i < cdns_phy->num_lanes; i++) {
+		writel(hsclk_div_val,
+		       cdns_phy->sd_base + (XCVR_DIAG_HSCLK_DIV | (i<<11)));
+	}
+}
+
+static void cdns_dp_phy_pma_lane_cfg(struct cdns_dp_phy *cdns_phy,
+				     unsigned int lane)
+{
+	unsigned int lane_bits = (lane & LANE_MASK) << 11;
+
+	/* Writing Tx/Rx Power State Controllers registers */
+	writel(0x00FB, cdns_phy->sd_base + (TX_PSC_A0 | lane_bits));
+	writel(0x04AA, cdns_phy->sd_base + (TX_PSC_A2 | lane_bits));
+	writel(0x04AA, cdns_phy->sd_base + (TX_PSC_A3 | lane_bits));
+	writel(0x0000, cdns_phy->sd_base + (RX_PSC_A0 | lane_bits));
+	writel(0x0000, cdns_phy->sd_base + (RX_PSC_A2 | lane_bits));
+	writel(0x0000, cdns_phy->sd_base + (RX_PSC_A3 | lane_bits));
+
+	writel(0x0001, cdns_phy->sd_base + (XCVR_DIAG_PLLDRC_CTRL | lane_bits));
+	writel(0x0000, cdns_phy->sd_base + (XCVR_DIAG_HSCLK_SEL | lane_bits));
+}
+
+static void cdns_dp_phy_run(struct cdns_dp_phy *cdns_phy)
+{
+	unsigned int read_val;
+	u32 write_val1 = 0;
+	u32 write_val2 = 0;
+	u32 mask = 0;
+	int ret;
+
+	/*
+	 * waiting for ACK of pma_xcvr_pllclk_en_ln_*, only for the
+	 * master lane
+	 */
+	ret = readl_poll_timeout(cdns_phy->base + PHY_PMA_XCVR_PLLCLK_EN_ACK,
+				 read_val, read_val & 1, 0, POLL_TIMEOUT_US);
+	if (ret == -ETIMEDOUT)
+		dev_err(cdns_phy->dev,
+			"timeout waiting for link PLL clock enable ack\n");
+
+	ndelay(100);
+
+	switch (cdns_phy->num_lanes) {
+
+	case 1:	/* lane 0 */
+		write_val1 = 0x00000004;
+		write_val2 = 0x00000001;
+		mask = 0x0000003f;
+		break;
+	case 2: /* lane 0-1 */
+		write_val1 = 0x00000404;
+		write_val2 = 0x00000101;
+		mask = 0x00003f3f;
+		break;
+	case 4: /* lane 0-3 */
+		write_val1 = 0x04040404;
+		write_val2 = 0x01010101;
+		mask = 0x3f3f3f3f;
+		break;
+	}
+
+	writel(write_val1, cdns_phy->base + PHY_PMA_XCVR_POWER_STATE_REQ);
+
+	ret = readl_poll_timeout(cdns_phy->base + PHY_PMA_XCVR_POWER_STATE_ACK,
+				 read_val, (read_val & mask) == write_val1, 0,
+				 POLL_TIMEOUT_US);
+	if (ret == -ETIMEDOUT)
+		dev_err(cdns_phy->dev,
+			"timeout waiting for link power state ack\n");
+
+	writel(0, cdns_phy->base + PHY_PMA_XCVR_POWER_STATE_REQ);
+	ndelay(100);
+
+	writel(write_val2, cdns_phy->base + PHY_PMA_XCVR_POWER_STATE_REQ);
+
+	ret = readl_poll_timeout(cdns_phy->base + PHY_PMA_XCVR_POWER_STATE_ACK,
+				 read_val, (read_val & mask) == write_val2, 0,
+				 POLL_TIMEOUT_US);
+	if (ret == -ETIMEDOUT)
+		dev_err(cdns_phy->dev,
+			"timeout waiting for link power state ack\n");
+
+	writel(0, cdns_phy->base + PHY_PMA_XCVR_POWER_STATE_REQ);
+	ndelay(100);
+}
+
+static void cdns_dp_phy_write_field(struct cdns_dp_phy *cdns_phy,
+				    unsigned int offset,
+				    unsigned char start_bit,
+				    unsigned char num_bits,
+				    unsigned int val)
+{
+	unsigned int read_val;
+
+	read_val = readl(cdns_phy->base + offset);
+	writel(((val << start_bit) | (read_val & ~(((1 << num_bits) - 1) <<
+		start_bit))), cdns_phy->base + offset);
+}
+
+static int cdns_dp_phy_probe(struct platform_device *pdev)
+{
+	struct resource *regs;
+	struct cdns_dp_phy *cdns_phy;
+	struct device *dev = &pdev->dev;
+	struct phy_provider *phy_provider;
+	struct phy *phy;
+	int err;
+
+	cdns_phy = devm_kzalloc(dev, sizeof(*cdns_phy), GFP_KERNEL);
+	if (!cdns_phy)
+		return -ENOMEM;
+
+	cdns_phy->dev = &pdev->dev;
+
+	phy = devm_phy_create(dev, NULL, &cdns_dp_phy_ops);
+	if (IS_ERR(phy)) {
+		dev_err(dev, "failed to create DisplayPort PHY\n");
+		return PTR_ERR(phy);
+	}
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	cdns_phy->base = devm_ioremap_resource(&pdev->dev, regs);
+	if (IS_ERR(cdns_phy->base))
+		return PTR_ERR(cdns_phy->base);
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	cdns_phy->sd_base = devm_ioremap_resource(&pdev->dev, regs);
+	if (IS_ERR(cdns_phy->sd_base))
+		return PTR_ERR(cdns_phy->sd_base);
+
+	err = device_property_read_u32(dev, "num_lanes",
+				       &(cdns_phy->num_lanes));
+	if (err)
+		cdns_phy->num_lanes = DEFAULT_NUM_LANES;
+
+	switch (cdns_phy->num_lanes) {
+	case 1:
+	case 2:
+	case 4:
+		/* valid number of lanes */
+		break;
+	default:
+		dev_err(dev, "unsupported number of lanes: %d\n",
+			cdns_phy->num_lanes);
+		return -EINVAL;
+	}
+
+	err = device_property_read_u32(dev, "max_bit_rate",
+		   &(cdns_phy->max_bit_rate));
+	if (err)
+		cdns_phy->max_bit_rate = DEFAULT_MAX_BIT_RATE;
+
+	switch (cdns_phy->max_bit_rate) {
+	case 2160:
+	case 2430:
+	case 2700:
+	case 3240:
+	case 4320:
+	case 5400:
+	case 8100:
+		/* valid bit rate */
+		break;
+	default:
+		dev_err(dev, "unsupported max bit rate: %dMbps\n",
+			cdns_phy->max_bit_rate);
+		return -EINVAL;
+	}
+
+	phy_set_drvdata(phy, cdns_phy);
+
+	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+
+	dev_info(dev, "%d lanes, max bit rate %d.%03d Gbps\n",
+		 cdns_phy->num_lanes,
+		 cdns_phy->max_bit_rate / 1000,
+		 cdns_phy->max_bit_rate % 1000);
+
+	return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static const struct of_device_id cdns_dp_phy_of_match[] = {
+	{
+		.compatible = "cdns,dp-phy"
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(of, cdns_dp_phy_of_match);
+
+static struct platform_driver cdns_dp_phy_driver = {
+	.probe	= cdns_dp_phy_probe,
+	.driver = {
+		.name	= "cdns-dp-phy",
+		.of_match_table	= cdns_dp_phy_of_match,
+	}
+};
+module_platform_driver(cdns_dp_phy_driver);
+
+MODULE_AUTHOR("Cadence Design Systems, Inc.");
+MODULE_DESCRIPTION("Cadence MHDP PHY driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c b/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c
index 986224fca9e9..f9e0dd19ff26 100644
--- a/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c
+++ b/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c
@@ -156,7 +156,6 @@ static int ltq_rcu_usb2_of_parse(struct ltq_rcu_usb2_priv *priv,
 {
 	struct device *dev = priv->dev;
 	const __be32 *offset;
-	int ret;
 
 	priv->reg_bits = of_device_get_match_data(dev);
 
@@ -196,10 +195,8 @@ static int ltq_rcu_usb2_of_parse(struct ltq_rcu_usb2_priv *priv,
 	}
 
 	priv->phy_reset = devm_reset_control_get_optional(dev, "phy");
-	if (IS_ERR(priv->phy_reset))
-		return PTR_ERR(priv->phy_reset);
 
-	return 0;
+	return PTR_ERR_OR_ZERO(priv->phy_reset);
 }
 
 static int ltq_rcu_usb2_phy_probe(struct platform_device *pdev)
diff --git a/drivers/phy/marvell/Kconfig b/drivers/phy/marvell/Kconfig
index 68e321225400..6fb4b56e4c14 100644
--- a/drivers/phy/marvell/Kconfig
+++ b/drivers/phy/marvell/Kconfig
@@ -59,3 +59,14 @@ config PHY_PXA_28NM_USB2
 	  The PHY driver will be used by Marvell udc/ehci/otg driver.
 
 	  To compile this driver as a module, choose M here.
+
+config PHY_PXA_USB
+	tristate "Marvell PXA USB PHY Driver"
+	depends on ARCH_PXA || ARCH_MMP
+	select GENERIC_PHY
+	help
+	  Enable this to support Marvell PXA USB PHY driver for Marvell
+	  SoC. This driver will do the PHY initialization and shutdown.
+	  The PHY driver will be used by Marvell udc/ehci/otg driver.
+
+	  To compile this driver as a module, choose M here.
diff --git a/drivers/phy/marvell/Makefile b/drivers/phy/marvell/Makefile
index 5c3ec5d10e0d..3975b144f8ec 100644
--- a/drivers/phy/marvell/Makefile
+++ b/drivers/phy/marvell/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_PHY_MVEBU_CP110_COMPHY)	+= phy-mvebu-cp110-comphy.o
 obj-$(CONFIG_PHY_MVEBU_SATA)		+= phy-mvebu-sata.o
 obj-$(CONFIG_PHY_PXA_28NM_HSIC)		+= phy-pxa-28nm-hsic.o
 obj-$(CONFIG_PHY_PXA_28NM_USB2)		+= phy-pxa-28nm-usb2.o
+obj-$(CONFIG_PHY_PXA_USB)		+= phy-pxa-usb.o
diff --git a/drivers/phy/marvell/phy-berlin-sata.c b/drivers/phy/marvell/phy-berlin-sata.c
index c1bb6725e48f..a91fc67fc4e0 100644
--- a/drivers/phy/marvell/phy-berlin-sata.c
+++ b/drivers/phy/marvell/phy-berlin-sata.c
@@ -231,14 +231,14 @@ static int phy_berlin_sata_probe(struct platform_device *pdev)
 		struct phy_berlin_desc *phy_desc;
 
 		if (of_property_read_u32(child, "reg", &phy_id)) {
-			dev_err(dev, "missing reg property in node %s\n",
-				child->name);
+			dev_err(dev, "missing reg property in node %pOFn\n",
+				child);
 			ret = -EINVAL;
 			goto put_child;
 		}
 
 		if (phy_id >= ARRAY_SIZE(phy_berlin_power_down_bits)) {
-			dev_err(dev, "invalid reg in node %s\n", child->name);
+			dev_err(dev, "invalid reg in node %pOFn\n", child);
 			ret = -EINVAL;
 			goto put_child;
 		}
diff --git a/drivers/phy/marvell/phy-pxa-usb.c b/drivers/phy/marvell/phy-pxa-usb.c
new file mode 100644
index 000000000000..87ff7550b912
--- /dev/null
+++ b/drivers/phy/marvell/phy-pxa-usb.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2011 Marvell International Ltd. All rights reserved.
+ * Copyright (C) 2018 Lubomir Rintel <lkundrak@v3.sk>
+ */
+
+#include <dt-bindings/phy/phy.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+
+/* phy regs */
+#define UTMI_REVISION		0x0
+#define UTMI_CTRL		0x4
+#define UTMI_PLL		0x8
+#define UTMI_TX			0xc
+#define UTMI_RX			0x10
+#define UTMI_IVREF		0x14
+#define UTMI_T0			0x18
+#define UTMI_T1			0x1c
+#define UTMI_T2			0x20
+#define UTMI_T3			0x24
+#define UTMI_T4			0x28
+#define UTMI_T5			0x2c
+#define UTMI_RESERVE		0x30
+#define UTMI_USB_INT		0x34
+#define UTMI_DBG_CTL		0x38
+#define UTMI_OTG_ADDON		0x3c
+
+/* For UTMICTRL Register */
+#define UTMI_CTRL_USB_CLK_EN                    (1 << 31)
+/* pxa168 */
+#define UTMI_CTRL_SUSPEND_SET1                  (1 << 30)
+#define UTMI_CTRL_SUSPEND_SET2                  (1 << 29)
+#define UTMI_CTRL_RXBUF_PDWN                    (1 << 24)
+#define UTMI_CTRL_TXBUF_PDWN                    (1 << 11)
+
+#define UTMI_CTRL_INPKT_DELAY_SHIFT             30
+#define UTMI_CTRL_INPKT_DELAY_SOF_SHIFT		28
+#define UTMI_CTRL_PU_REF_SHIFT			20
+#define UTMI_CTRL_ARC_PULLDN_SHIFT              12
+#define UTMI_CTRL_PLL_PWR_UP_SHIFT              1
+#define UTMI_CTRL_PWR_UP_SHIFT                  0
+
+/* For UTMI_PLL Register */
+#define UTMI_PLL_PLLCALI12_SHIFT		29
+#define UTMI_PLL_PLLCALI12_MASK			(0x3 << 29)
+
+#define UTMI_PLL_PLLVDD18_SHIFT			27
+#define UTMI_PLL_PLLVDD18_MASK			(0x3 << 27)
+
+#define UTMI_PLL_PLLVDD12_SHIFT			25
+#define UTMI_PLL_PLLVDD12_MASK			(0x3 << 25)
+
+#define UTMI_PLL_CLK_BLK_EN_SHIFT               24
+#define CLK_BLK_EN                              (0x1 << 24)
+#define PLL_READY                               (0x1 << 23)
+#define KVCO_EXT                                (0x1 << 22)
+#define VCOCAL_START                            (0x1 << 21)
+
+#define UTMI_PLL_KVCO_SHIFT			15
+#define UTMI_PLL_KVCO_MASK                      (0x7 << 15)
+
+#define UTMI_PLL_ICP_SHIFT			12
+#define UTMI_PLL_ICP_MASK                       (0x7 << 12)
+
+#define UTMI_PLL_FBDIV_SHIFT                    4
+#define UTMI_PLL_FBDIV_MASK                     (0xFF << 4)
+
+#define UTMI_PLL_REFDIV_SHIFT                   0
+#define UTMI_PLL_REFDIV_MASK                    (0xF << 0)
+
+/* For UTMI_TX Register */
+#define UTMI_TX_REG_EXT_FS_RCAL_SHIFT		27
+#define UTMI_TX_REG_EXT_FS_RCAL_MASK		(0xf << 27)
+
+#define UTMI_TX_REG_EXT_FS_RCAL_EN_SHIFT	26
+#define UTMI_TX_REG_EXT_FS_RCAL_EN_MASK		(0x1 << 26)
+
+#define UTMI_TX_TXVDD12_SHIFT                   22
+#define UTMI_TX_TXVDD12_MASK                    (0x3 << 22)
+
+#define UTMI_TX_CK60_PHSEL_SHIFT                17
+#define UTMI_TX_CK60_PHSEL_MASK                 (0xf << 17)
+
+#define UTMI_TX_IMPCAL_VTH_SHIFT                14
+#define UTMI_TX_IMPCAL_VTH_MASK                 (0x7 << 14)
+
+#define REG_RCAL_START                          (0x1 << 12)
+
+#define UTMI_TX_LOW_VDD_EN_SHIFT                11
+
+#define UTMI_TX_AMP_SHIFT			0
+#define UTMI_TX_AMP_MASK			(0x7 << 0)
+
+/* For UTMI_RX Register */
+#define UTMI_REG_SQ_LENGTH_SHIFT                15
+#define UTMI_REG_SQ_LENGTH_MASK                 (0x3 << 15)
+
+#define UTMI_RX_SQ_THRESH_SHIFT                 4
+#define UTMI_RX_SQ_THRESH_MASK                  (0xf << 4)
+
+#define UTMI_OTG_ADDON_OTG_ON			(1 << 0)
+
+enum pxa_usb_phy_version {
+	PXA_USB_PHY_MMP2,
+	PXA_USB_PHY_PXA910,
+	PXA_USB_PHY_PXA168,
+};
+
+struct pxa_usb_phy {
+	struct phy *phy;
+	void __iomem *base;
+	enum pxa_usb_phy_version version;
+};
+
+/*****************************************************************************
+ * The registers read/write routines
+ *****************************************************************************/
+
+static unsigned int u2o_get(void __iomem *base, unsigned int offset)
+{
+	return readl_relaxed(base + offset);
+}
+
+static void u2o_set(void __iomem *base, unsigned int offset,
+		unsigned int value)
+{
+	u32 reg;
+
+	reg = readl_relaxed(base + offset);
+	reg |= value;
+	writel_relaxed(reg, base + offset);
+	readl_relaxed(base + offset);
+}
+
+static void u2o_clear(void __iomem *base, unsigned int offset,
+		unsigned int value)
+{
+	u32 reg;
+
+	reg = readl_relaxed(base + offset);
+	reg &= ~value;
+	writel_relaxed(reg, base + offset);
+	readl_relaxed(base + offset);
+}
+
+static void u2o_write(void __iomem *base, unsigned int offset,
+		unsigned int value)
+{
+	writel_relaxed(value, base + offset);
+	readl_relaxed(base + offset);
+}
+
+static int pxa_usb_phy_init(struct phy *phy)
+{
+	struct pxa_usb_phy *pxa_usb_phy = phy_get_drvdata(phy);
+	void __iomem *base = pxa_usb_phy->base;
+	int loops;
+
+	dev_info(&phy->dev, "initializing Marvell PXA USB PHY");
+
+	/* Initialize the USB PHY power */
+	if (pxa_usb_phy->version == PXA_USB_PHY_PXA910) {
+		u2o_set(base, UTMI_CTRL, (1<<UTMI_CTRL_INPKT_DELAY_SOF_SHIFT)
+			| (1<<UTMI_CTRL_PU_REF_SHIFT));
+	}
+
+	u2o_set(base, UTMI_CTRL, 1<<UTMI_CTRL_PLL_PWR_UP_SHIFT);
+	u2o_set(base, UTMI_CTRL, 1<<UTMI_CTRL_PWR_UP_SHIFT);
+
+	/* UTMI_PLL settings */
+	u2o_clear(base, UTMI_PLL, UTMI_PLL_PLLVDD18_MASK
+		| UTMI_PLL_PLLVDD12_MASK | UTMI_PLL_PLLCALI12_MASK
+		| UTMI_PLL_FBDIV_MASK | UTMI_PLL_REFDIV_MASK
+		| UTMI_PLL_ICP_MASK | UTMI_PLL_KVCO_MASK);
+
+	u2o_set(base, UTMI_PLL, 0xee<<UTMI_PLL_FBDIV_SHIFT
+		| 0xb<<UTMI_PLL_REFDIV_SHIFT | 3<<UTMI_PLL_PLLVDD18_SHIFT
+		| 3<<UTMI_PLL_PLLVDD12_SHIFT | 3<<UTMI_PLL_PLLCALI12_SHIFT
+		| 1<<UTMI_PLL_ICP_SHIFT | 3<<UTMI_PLL_KVCO_SHIFT);
+
+	/* UTMI_TX */
+	u2o_clear(base, UTMI_TX, UTMI_TX_REG_EXT_FS_RCAL_EN_MASK
+		| UTMI_TX_TXVDD12_MASK | UTMI_TX_CK60_PHSEL_MASK
+		| UTMI_TX_IMPCAL_VTH_MASK | UTMI_TX_REG_EXT_FS_RCAL_MASK
+		| UTMI_TX_AMP_MASK);
+	u2o_set(base, UTMI_TX, 3<<UTMI_TX_TXVDD12_SHIFT
+		| 4<<UTMI_TX_CK60_PHSEL_SHIFT | 4<<UTMI_TX_IMPCAL_VTH_SHIFT
+		| 8<<UTMI_TX_REG_EXT_FS_RCAL_SHIFT | 3<<UTMI_TX_AMP_SHIFT);
+
+	/* UTMI_RX */
+	u2o_clear(base, UTMI_RX, UTMI_RX_SQ_THRESH_MASK
+		| UTMI_REG_SQ_LENGTH_MASK);
+	u2o_set(base, UTMI_RX, 7<<UTMI_RX_SQ_THRESH_SHIFT
+		| 2<<UTMI_REG_SQ_LENGTH_SHIFT);
+
+	/* UTMI_IVREF */
+	if (pxa_usb_phy->version == PXA_USB_PHY_PXA168) {
+		/*
+		 * fixing Microsoft Altair board interface with NEC hub issue -
+		 * Set UTMI_IVREF from 0x4a3 to 0x4bf
+		 */
+		u2o_write(base, UTMI_IVREF, 0x4bf);
+	}
+
+	/* toggle VCOCAL_START bit of UTMI_PLL */
+	udelay(200);
+	u2o_set(base, UTMI_PLL, VCOCAL_START);
+	udelay(40);
+	u2o_clear(base, UTMI_PLL, VCOCAL_START);
+
+	/* toggle REG_RCAL_START bit of UTMI_TX */
+	udelay(400);
+	u2o_set(base, UTMI_TX, REG_RCAL_START);
+	udelay(40);
+	u2o_clear(base, UTMI_TX, REG_RCAL_START);
+	udelay(400);
+
+	/* Make sure PHY PLL is ready */
+	loops = 0;
+	while ((u2o_get(base, UTMI_PLL) & PLL_READY) == 0) {
+		mdelay(1);
+		loops++;
+		if (loops > 100) {
+			dev_warn(&phy->dev, "calibrate timeout, UTMI_PLL %x\n",
+						u2o_get(base, UTMI_PLL));
+			break;
+		}
+	}
+
+	if (pxa_usb_phy->version == PXA_USB_PHY_PXA168) {
+		u2o_set(base, UTMI_RESERVE, 1 << 5);
+		/* Turn on UTMI PHY OTG extension */
+		u2o_write(base, UTMI_OTG_ADDON, 1);
+	}
+
+	return 0;
+
+}
+
+static int pxa_usb_phy_exit(struct phy *phy)
+{
+	struct pxa_usb_phy *pxa_usb_phy = phy_get_drvdata(phy);
+	void __iomem *base = pxa_usb_phy->base;
+
+	dev_info(&phy->dev, "deinitializing Marvell PXA USB PHY");
+
+	if (pxa_usb_phy->version == PXA_USB_PHY_PXA168)
+		u2o_clear(base, UTMI_OTG_ADDON, UTMI_OTG_ADDON_OTG_ON);
+
+	u2o_clear(base, UTMI_CTRL, UTMI_CTRL_RXBUF_PDWN);
+	u2o_clear(base, UTMI_CTRL, UTMI_CTRL_TXBUF_PDWN);
+	u2o_clear(base, UTMI_CTRL, UTMI_CTRL_USB_CLK_EN);
+	u2o_clear(base, UTMI_CTRL, 1<<UTMI_CTRL_PWR_UP_SHIFT);
+	u2o_clear(base, UTMI_CTRL, 1<<UTMI_CTRL_PLL_PWR_UP_SHIFT);
+
+	return 0;
+}
+
+static const struct phy_ops pxa_usb_phy_ops = {
+	.init	= pxa_usb_phy_init,
+	.exit	= pxa_usb_phy_exit,
+	.owner	= THIS_MODULE,
+};
+
+static const struct of_device_id pxa_usb_phy_of_match[] = {
+	{
+		.compatible = "marvell,mmp2-usb-phy",
+		.data = (void *)PXA_USB_PHY_MMP2,
+	}, {
+		.compatible = "marvell,pxa910-usb-phy",
+		.data = (void *)PXA_USB_PHY_PXA910,
+	}, {
+		.compatible = "marvell,pxa168-usb-phy",
+		.data = (void *)PXA_USB_PHY_PXA168,
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, pxa_usb_phy_of_match);
+
+static int pxa_usb_phy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *resource;
+	struct pxa_usb_phy *pxa_usb_phy;
+	struct phy_provider *provider;
+	const struct of_device_id *of_id;
+
+	pxa_usb_phy = devm_kzalloc(dev, sizeof(struct pxa_usb_phy), GFP_KERNEL);
+	if (!pxa_usb_phy)
+		return -ENOMEM;
+
+	of_id = of_match_node(pxa_usb_phy_of_match, dev->of_node);
+	if (of_id)
+		pxa_usb_phy->version = (enum pxa_usb_phy_version)of_id->data;
+	else
+		pxa_usb_phy->version = PXA_USB_PHY_MMP2;
+
+	resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pxa_usb_phy->base = devm_ioremap_resource(dev, resource);
+	if (IS_ERR(pxa_usb_phy->base)) {
+		dev_err(dev, "failed to remap PHY regs\n");
+		return PTR_ERR(pxa_usb_phy->base);
+	}
+
+	pxa_usb_phy->phy = devm_phy_create(dev, NULL, &pxa_usb_phy_ops);
+	if (IS_ERR(pxa_usb_phy->phy)) {
+		dev_err(dev, "failed to create PHY\n");
+		return PTR_ERR(pxa_usb_phy->phy);
+	}
+
+	phy_set_drvdata(pxa_usb_phy->phy, pxa_usb_phy);
+	provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+	if (IS_ERR(provider)) {
+		dev_err(dev, "failed to register PHY provider\n");
+		return PTR_ERR(provider);
+	}
+
+	if (!dev->of_node) {
+		phy_create_lookup(pxa_usb_phy->phy, "usb", "mv-udc");
+		phy_create_lookup(pxa_usb_phy->phy, "usb", "pxa-u2oehci");
+		phy_create_lookup(pxa_usb_phy->phy, "usb", "mv-otg");
+	}
+
+	dev_info(dev, "Marvell PXA USB PHY");
+	return 0;
+}
+
+static struct platform_driver pxa_usb_phy_driver = {
+	.probe		= pxa_usb_phy_probe,
+	.driver		= {
+		.name	= "pxa-usb-phy",
+		.of_match_table = pxa_usb_phy_of_match,
+	},
+};
+module_platform_driver(pxa_usb_phy_driver);
+
+MODULE_AUTHOR("Lubomir Rintel <lkundrak@v3.sk>");
+MODULE_DESCRIPTION("Marvell PXA USB PHY Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig
index 632a0e73ee10..32f7d34eb784 100644
--- a/drivers/phy/qualcomm/Kconfig
+++ b/drivers/phy/qualcomm/Kconfig
@@ -50,6 +50,23 @@ config PHY_QCOM_UFS
 	help
 	  Support for UFS PHY on QCOM chipsets.
 
+if PHY_QCOM_UFS
+
+config PHY_QCOM_UFS_14NM
+	tristate
+	default PHY_QCOM_UFS
+	help
+	  Support for 14nm UFS QMP phy present on QCOM chipsets.
+
+config PHY_QCOM_UFS_20NM
+	tristate
+	default PHY_QCOM_UFS
+	depends on BROKEN
+	help
+	  Support for 20nm UFS QMP phy present on QCOM chipsets.
+
+endif
+
 config PHY_QCOM_USB_HS
 	tristate "Qualcomm USB HS PHY module"
 	depends on USB_ULPI_BUS
diff --git a/drivers/phy/qualcomm/Makefile b/drivers/phy/qualcomm/Makefile
index deb831f453ae..c56efd3af205 100644
--- a/drivers/phy/qualcomm/Makefile
+++ b/drivers/phy/qualcomm/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_PHY_QCOM_IPQ806X_SATA)	+= phy-qcom-ipq806x-sata.o
 obj-$(CONFIG_PHY_QCOM_QMP)		+= phy-qcom-qmp.o
 obj-$(CONFIG_PHY_QCOM_QUSB2)		+= phy-qcom-qusb2.o
 obj-$(CONFIG_PHY_QCOM_UFS)		+= phy-qcom-ufs.o
-obj-$(CONFIG_PHY_QCOM_UFS)		+= phy-qcom-ufs-qmp-14nm.o
-obj-$(CONFIG_PHY_QCOM_UFS)		+= phy-qcom-ufs-qmp-20nm.o
+obj-$(CONFIG_PHY_QCOM_UFS_14NM)		+= phy-qcom-ufs-qmp-14nm.o
+obj-$(CONFIG_PHY_QCOM_UFS_20NM)		+= phy-qcom-ufs-qmp-20nm.o
 obj-$(CONFIG_PHY_QCOM_USB_HS) 		+= phy-qcom-usb-hs.o
 obj-$(CONFIG_PHY_QCOM_USB_HSIC) 	+= phy-qcom-usb-hsic.o
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c
index 4c470104a0d6..a83332411026 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.c
@@ -156,6 +156,11 @@ static const unsigned int qmp_v3_usb3phy_regs_layout[] = {
 	[QPHY_PCS_LFPS_RXTERM_IRQ_STATUS] = 0x170,
 };
 
+static const unsigned int sdm845_ufsphy_regs_layout[] = {
+	[QPHY_START_CTRL]		= 0x00,
+	[QPHY_PCS_READY_STATUS]		= 0x160,
+};
+
 static const struct qmp_phy_init_tbl msm8996_pcie_serdes_tbl[] = {
 	QMP_PHY_INIT_CFG(QSERDES_COM_BIAS_EN_CLKBUFLR_EN, 0x1c),
 	QMP_PHY_INIT_CFG(QSERDES_COM_CLK_ENABLE1, 0x10),
@@ -601,6 +606,83 @@ static const struct qmp_phy_init_tbl qmp_v3_usb3_uniphy_pcs_tbl[] = {
 	QMP_PHY_INIT_CFG(QPHY_V3_PCS_REFGEN_REQ_CONFIG2, 0x60),
 };
 
+static const struct qmp_phy_init_tbl sdm845_ufsphy_serdes_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYS_CLK_CTRL, 0x02),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_BG_TIMER, 0x0a),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_IVCO, 0x07),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_CMN_CONFIG, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYSCLK_EN_SEL, 0xd5),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_RESETSM_CNTRL, 0x20),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_CLK_SELECT, 0x30),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_HSCLK_SEL, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP_EN, 0x01),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE_CTRL, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_CORE_CLK_EN, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE_MAP, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SVS_MODE_CLK_SEL, 0x05),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE_INITVAL1, 0xff),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE_INITVAL2, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_DEC_START_MODE0, 0x82),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_CP_CTRL_MODE0, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_RCTRL_MODE0, 0x16),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_CCTRL_MODE0, 0x36),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_INTEGLOOP_GAIN0_MODE0, 0x3f),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_INTEGLOOP_GAIN1_MODE0, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE1_MODE0, 0xda),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE2_MODE0, 0x01),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP1_MODE0, 0xff),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP2_MODE0, 0x0c),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_DEC_START_MODE1, 0x98),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_CP_CTRL_MODE1, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_RCTRL_MODE1, 0x16),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_CCTRL_MODE1, 0x36),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_INTEGLOOP_GAIN0_MODE1, 0x3f),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_INTEGLOOP_GAIN1_MODE1, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE1_MODE1, 0xc1),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE2_MODE1, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP1_MODE1, 0x32),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP2_MODE1, 0x0f),
+
+	/* Rate B */
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE_MAP, 0x44),
+};
+
+static const struct qmp_phy_init_tbl sdm845_ufsphy_tx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V3_TX_LANE_MODE_1, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V3_TX_RES_CODE_LANE_OFFSET_TX, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V3_TX_RES_CODE_LANE_OFFSET_RX, 0x07),
+};
+
+static const struct qmp_phy_init_tbl sdm845_ufsphy_rx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_LVL, 0x24),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_CNTRL, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_DEGLITCH_CNTRL, 0x1e),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_INTERFACE_MODE, 0x40),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_FO_GAIN, 0x0b),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_TERM_BW, 0x5b),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL2, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL3, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL4, 0x1b),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SVS_SO_GAIN_HALF, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SVS_SO_GAIN_QUARTER, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SVS_SO_GAIN, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x4b),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_PI_CONTROLS, 0x81),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_LOW, 0x80),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_MODE_00, 0x59),
+};
+
+static const struct qmp_phy_init_tbl sdm845_ufsphy_pcs_tbl[] = {
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RX_SIGDET_CTRL2, 0x6e),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TX_LARGE_AMP_DRV_LVL, 0x0a),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TX_SMALL_AMP_DRV_LVL, 0x02),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RX_SYM_RESYNC_CTRL, 0x03),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TX_MID_TERM_CTRL1, 0x43),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RX_SIGDET_CTRL1, 0x0f),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RX_MIN_HIBERN8_TIME, 0x9a),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_MULTI_LANE_CTRL1, 0x02),
+};
 
 /* struct qmp_phy_cfg - per-PHY initialization config */
 struct qmp_phy_cfg {
@@ -649,9 +731,14 @@ struct qmp_phy_cfg {
 
 	/* true, if PHY has a separate DP_COM control block */
 	bool has_phy_dp_com_ctrl;
+	/* true, if PHY has secondary tx/rx lanes to be configured */
+	bool is_dual_lane_phy;
 	/* Register offset of secondary tx/rx lanes for USB DP combo PHY */
 	unsigned int tx_b_lane_offset;
 	unsigned int rx_b_lane_offset;
+
+	/* true, if PCS block has no separate SW_RESET register */
+	bool no_pcs_sw_reset;
 };
 
 /**
@@ -748,6 +835,10 @@ static const char * const qmp_v3_phy_clk_l[] = {
 	"aux", "cfg_ahb", "ref", "com_aux",
 };
 
+static const char * const sdm845_ufs_phy_clk_l[] = {
+	"ref", "ref_aux",
+};
+
 /* list of resets */
 static const char * const msm8996_pciephy_reset_l[] = {
 	"phy", "common", "cfg",
@@ -758,7 +849,7 @@ static const char * const msm8996_usb3phy_reset_l[] = {
 };
 
 /* list of regulators */
-static const char * const msm8996_phy_vreg_l[] = {
+static const char * const qmp_phy_vreg_l[] = {
 	"vdda-phy", "vdda-pll",
 };
 
@@ -778,8 +869,8 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = {
 	.num_clks		= ARRAY_SIZE(msm8996_phy_clk_l),
 	.reset_list		= msm8996_pciephy_reset_l,
 	.num_resets		= ARRAY_SIZE(msm8996_pciephy_reset_l),
-	.vreg_list		= msm8996_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(msm8996_phy_vreg_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= pciephy_regs_layout,
 
 	.start_ctrl		= PCS_START | PLL_READY_GATE_EN,
@@ -809,8 +900,8 @@ static const struct qmp_phy_cfg msm8996_usb3phy_cfg = {
 	.num_clks		= ARRAY_SIZE(msm8996_phy_clk_l),
 	.reset_list		= msm8996_usb3phy_reset_l,
 	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
-	.vreg_list		= msm8996_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(msm8996_phy_vreg_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= usb3phy_regs_layout,
 
 	.start_ctrl		= SERDES_START | PCS_START,
@@ -870,8 +961,8 @@ static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = {
 	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
 	.reset_list		= msm8996_usb3phy_reset_l,
 	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
-	.vreg_list		= msm8996_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(msm8996_phy_vreg_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v3_usb3phy_regs_layout,
 
 	.start_ctrl		= SERDES_START | PCS_START,
@@ -883,6 +974,7 @@ static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = {
 	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 
 	.has_phy_dp_com_ctrl	= true,
+	.is_dual_lane_phy	= true,
 	.tx_b_lane_offset	= 0x400,
 	.rx_b_lane_offset	= 0x400,
 };
@@ -903,8 +995,8 @@ static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
 	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
 	.reset_list		= msm8996_usb3phy_reset_l,
 	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
-	.vreg_list		= msm8996_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(msm8996_phy_vreg_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v3_usb3phy_regs_layout,
 
 	.start_ctrl		= SERDES_START | PCS_START,
@@ -916,6 +1008,35 @@ static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
 	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
+static const struct qmp_phy_cfg sdm845_ufsphy_cfg = {
+	.type			= PHY_TYPE_UFS,
+	.nlanes			= 2,
+
+	.serdes_tbl		= sdm845_ufsphy_serdes_tbl,
+	.serdes_tbl_num		= ARRAY_SIZE(sdm845_ufsphy_serdes_tbl),
+	.tx_tbl			= sdm845_ufsphy_tx_tbl,
+	.tx_tbl_num		= ARRAY_SIZE(sdm845_ufsphy_tx_tbl),
+	.rx_tbl			= sdm845_ufsphy_rx_tbl,
+	.rx_tbl_num		= ARRAY_SIZE(sdm845_ufsphy_rx_tbl),
+	.pcs_tbl		= sdm845_ufsphy_pcs_tbl,
+	.pcs_tbl_num		= ARRAY_SIZE(sdm845_ufsphy_pcs_tbl),
+	.clk_list		= sdm845_ufs_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(sdm845_ufs_phy_clk_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= sdm845_ufsphy_regs_layout,
+
+	.start_ctrl		= SERDES_START,
+	.pwrdn_ctrl		= SW_PWRDN,
+	.mask_pcs_ready		= PCS_READY,
+
+	.is_dual_lane_phy	= true,
+	.tx_b_lane_offset	= 0x400,
+	.rx_b_lane_offset	= 0x400,
+
+	.no_pcs_sw_reset	= true,
+};
+
 static void qcom_qmp_phy_configure(void __iomem *base,
 				   const unsigned int *regs,
 				   const struct qmp_phy_init_tbl tbl[],
@@ -935,10 +1056,12 @@ static void qcom_qmp_phy_configure(void __iomem *base,
 	}
 }
 
-static int qcom_qmp_phy_com_init(struct qcom_qmp *qmp)
+static int qcom_qmp_phy_com_init(struct qmp_phy *qphy)
 {
+	struct qcom_qmp *qmp = qphy->qmp;
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	void __iomem *serdes = qmp->serdes;
+	void __iomem *pcs = qphy->pcs;
 	void __iomem *dp_com = qmp->dp_com;
 	int ret, i;
 
@@ -979,10 +1102,6 @@ static int qcom_qmp_phy_com_init(struct qcom_qmp *qmp)
 		goto err_rst;
 	}
 
-	if (cfg->has_phy_com_ctrl)
-		qphy_setbits(serdes, cfg->regs[QPHY_COM_POWER_DOWN_CONTROL],
-			     SW_PWRDN);
-
 	if (cfg->has_phy_dp_com_ctrl) {
 		qphy_setbits(dp_com, QPHY_V3_DP_COM_POWER_DOWN_CTRL,
 			     SW_PWRDN);
@@ -1000,6 +1119,12 @@ static int qcom_qmp_phy_com_init(struct qcom_qmp *qmp)
 			     SW_USB3PHY_RESET_MUX | SW_USB3PHY_RESET);
 	}
 
+	if (cfg->has_phy_com_ctrl)
+		qphy_setbits(serdes, cfg->regs[QPHY_COM_POWER_DOWN_CONTROL],
+			     SW_PWRDN);
+	else
+		qphy_setbits(pcs, QPHY_POWER_DOWN_CONTROL, cfg->pwrdn_ctrl);
+
 	/* Serdes configuration */
 	qcom_qmp_phy_configure(serdes, cfg->regs, cfg->serdes_tbl,
 			       cfg->serdes_tbl_num);
@@ -1090,7 +1215,7 @@ static int qcom_qmp_phy_init(struct phy *phy)
 
 	dev_vdbg(qmp->dev, "Initializing QMP phy\n");
 
-	ret = qcom_qmp_phy_com_init(qmp);
+	ret = qcom_qmp_phy_com_init(qphy);
 	if (ret)
 		return ret;
 
@@ -1112,22 +1237,31 @@ static int qcom_qmp_phy_init(struct phy *phy)
 	/* Tx, Rx, and PCS configurations */
 	qcom_qmp_phy_configure(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num);
 	/* Configuration for other LANE for USB-DP combo PHY */
-	if (cfg->has_phy_dp_com_ctrl)
+	if (cfg->is_dual_lane_phy)
 		qcom_qmp_phy_configure(tx + cfg->tx_b_lane_offset, cfg->regs,
 				       cfg->tx_tbl, cfg->tx_tbl_num);
 
 	qcom_qmp_phy_configure(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num);
-	if (cfg->has_phy_dp_com_ctrl)
+	if (cfg->is_dual_lane_phy)
 		qcom_qmp_phy_configure(rx + cfg->rx_b_lane_offset, cfg->regs,
 				       cfg->rx_tbl, cfg->rx_tbl_num);
 
 	qcom_qmp_phy_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
 	/*
+	 * UFS PHY requires the deassert of software reset before serdes start.
+	 * For UFS PHYs that do not have software reset control bits, defer
+	 * starting serdes until the power on callback.
+	 */
+	if ((cfg->type == PHY_TYPE_UFS) && cfg->no_pcs_sw_reset)
+		goto out;
+
+	/*
 	 * Pull out PHY from POWER DOWN state.
 	 * This is active low enable signal to power-down PHY.
 	 */
-	qphy_setbits(pcs, QPHY_POWER_DOWN_CONTROL, cfg->pwrdn_ctrl);
+	if(cfg->type == PHY_TYPE_PCIE)
+		qphy_setbits(pcs, QPHY_POWER_DOWN_CONTROL, cfg->pwrdn_ctrl);
 
 	if (cfg->has_pwrdn_delay)
 		usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max);
@@ -1151,6 +1285,7 @@ static int qcom_qmp_phy_init(struct phy *phy)
 	}
 	qmp->phy_initialized = true;
 
+out:
 	return ret;
 
 err_pcs_ready:
@@ -1173,7 +1308,8 @@ static int qcom_qmp_phy_exit(struct phy *phy)
 	clk_disable_unprepare(qphy->pipe_clk);
 
 	/* PHY reset */
-	qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
+	if (!cfg->no_pcs_sw_reset)
+		qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 	/* stop SerDes and Phy-Coding-Sublayer */
 	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
@@ -1191,6 +1327,44 @@ static int qcom_qmp_phy_exit(struct phy *phy)
 	return 0;
 }
 
+static int qcom_qmp_phy_poweron(struct phy *phy)
+{
+	struct qmp_phy *qphy = phy_get_drvdata(phy);
+	struct qcom_qmp *qmp = qphy->qmp;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *pcs = qphy->pcs;
+	void __iomem *status;
+	unsigned int mask, val;
+	int ret = 0;
+
+	if (cfg->type != PHY_TYPE_UFS)
+		return 0;
+
+	/*
+	 * For UFS PHY that has not software reset control, serdes start
+	 * should only happen when UFS driver explicitly calls phy_power_on
+	 * after it deasserts software reset.
+	 */
+	if (cfg->no_pcs_sw_reset && !qmp->phy_initialized &&
+	    (qmp->init_count != 0)) {
+		/* start SerDes and Phy-Coding-Sublayer */
+		qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
+
+		status = pcs + cfg->regs[QPHY_PCS_READY_STATUS];
+		mask = cfg->mask_pcs_ready;
+
+		ret = readl_poll_timeout(status, val, !(val & mask), 1,
+					 PHY_INIT_COMPLETE_TIMEOUT);
+		if (ret) {
+			dev_err(qmp->dev, "phy initialization timed-out\n");
+			return ret;
+		}
+		qmp->phy_initialized = true;
+	}
+
+	return ret;
+}
+
 static int qcom_qmp_phy_set_mode(struct phy *phy, enum phy_mode mode)
 {
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
@@ -1400,7 +1574,7 @@ static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np)
 
 	ret = of_property_read_string(np, "clock-output-names", &init.name);
 	if (ret) {
-		dev_err(qmp->dev, "%s: No clock-output-names\n", np->name);
+		dev_err(qmp->dev, "%pOFn: No clock-output-names\n", np);
 		return ret;
 	}
 
@@ -1420,6 +1594,7 @@ static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np)
 static const struct phy_ops qcom_qmp_phy_gen_ops = {
 	.init		= qcom_qmp_phy_init,
 	.exit		= qcom_qmp_phy_exit,
+	.power_on	= qcom_qmp_phy_poweron,
 	.set_mode	= qcom_qmp_phy_set_mode,
 	.owner		= THIS_MODULE,
 };
@@ -1522,6 +1697,9 @@ static const struct of_device_id qcom_qmp_phy_of_match_table[] = {
 	}, {
 		.compatible = "qcom,sdm845-qmp-usb3-uni-phy",
 		.data = &qmp_v3_usb3_uniphy_cfg,
+	}, {
+		.compatible = "qcom,sdm845-qmp-ufs-phy",
+		.data = &sdm845_ufsphy_cfg,
 	},
 	{ },
 };
@@ -1586,7 +1764,9 @@ static int qcom_qmp_phy_probe(struct platform_device *pdev)
 
 	ret = qcom_qmp_phy_vreg_init(dev);
 	if (ret) {
-		dev_err(dev, "failed to get regulator supplies\n");
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "failed to get regulator supplies: %d\n",
+				ret);
 		return ret;
 	}
 
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h
index 5d78d43ba9fc..d201cc307151 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.h
@@ -184,6 +184,8 @@
 #define QSERDES_V3_COM_VCO_TUNE2_MODE0			0x0f8
 #define QSERDES_V3_COM_VCO_TUNE1_MODE1			0x0fc
 #define QSERDES_V3_COM_VCO_TUNE2_MODE1			0x100
+#define QSERDES_V3_COM_VCO_TUNE_INITVAL1		0x104
+#define QSERDES_V3_COM_VCO_TUNE_INITVAL2		0x108
 #define QSERDES_V3_COM_VCO_TUNE_TIMER1			0x11c
 #define QSERDES_V3_COM_VCO_TUNE_TIMER2			0x120
 #define QSERDES_V3_COM_CLK_SELECT			0x138
@@ -211,8 +213,13 @@
 /* Only for QMP V3 PHY - RX registers */
 #define QSERDES_V3_RX_UCDR_SO_GAIN_HALF			0x00c
 #define QSERDES_V3_RX_UCDR_SO_GAIN			0x014
+#define QSERDES_V3_RX_UCDR_SVS_SO_GAIN_HALF		0x024
+#define QSERDES_V3_RX_UCDR_SVS_SO_GAIN_QUARTER		0x028
+#define QSERDES_V3_RX_UCDR_SVS_SO_GAIN			0x02c
 #define QSERDES_V3_RX_UCDR_FASTLOCK_FO_GAIN		0x030
 #define QSERDES_V3_RX_UCDR_SO_SATURATION_AND_ENABLE	0x034
+#define QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_LOW		0x03c
+#define QSERDES_V3_RX_UCDR_PI_CONTROLS			0x044
 #define QSERDES_V3_RX_RX_TERM_BW			0x07c
 #define QSERDES_V3_RX_VGA_CAL_CNTRL1			0x0bc
 #define QSERDES_V3_RX_VGA_CAL_CNTRL2			0x0c0
@@ -239,6 +246,8 @@
 #define QPHY_V3_PCS_TXMGN_V3				0x018
 #define QPHY_V3_PCS_TXMGN_V4				0x01c
 #define QPHY_V3_PCS_TXMGN_LS				0x020
+#define QPHY_V3_PCS_TX_LARGE_AMP_DRV_LVL		0x02c
+#define QPHY_V3_PCS_TX_SMALL_AMP_DRV_LVL		0x034
 #define QPHY_V3_PCS_TXDEEMPH_M6DB_V0			0x024
 #define QPHY_V3_PCS_TXDEEMPH_M3P5DB_V0			0x028
 #define QPHY_V3_PCS_TXDEEMPH_M6DB_V1			0x02c
@@ -275,6 +284,12 @@
 #define QPHY_V3_PCS_FLL_CNT_VAL_L			0x0cc
 #define QPHY_V3_PCS_FLL_CNT_VAL_H_TOL			0x0d0
 #define QPHY_V3_PCS_FLL_MAN_CODE			0x0d4
+#define QPHY_V3_PCS_RX_SYM_RESYNC_CTRL			0x134
+#define QPHY_V3_PCS_RX_MIN_HIBERN8_TIME			0x138
+#define QPHY_V3_PCS_RX_SIGDET_CTRL1			0x13c
+#define QPHY_V3_PCS_RX_SIGDET_CTRL2			0x140
+#define QPHY_V3_PCS_TX_MID_TERM_CTRL1			0x1bc
+#define QPHY_V3_PCS_MULTI_LANE_CTRL1			0x1c4
 #define QPHY_V3_PCS_RX_SIGDET_LVL			0x1d8
 #define QPHY_V3_PCS_REFGEN_REQ_CONFIG1			0x20c
 #define QPHY_V3_PCS_REFGEN_REQ_CONFIG2			0x210
diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c
index e70e425f26f5..9ce531194f8a 100644
--- a/drivers/phy/qualcomm/phy-qcom-qusb2.c
+++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c
@@ -800,7 +800,9 @@ static int qusb2_phy_probe(struct platform_device *pdev)
 
 	ret = devm_regulator_bulk_get(dev, num, qphy->vregs);
 	if (ret) {
-		dev_err(dev, "failed to get regulator supplies\n");
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "failed to get regulator supplies: %d\n",
+				ret);
 		return ret;
 	}
 
diff --git a/drivers/phy/qualcomm/phy-qcom-ufs-i.h b/drivers/phy/qualcomm/phy-qcom-ufs-i.h
index 822c83b8efcd..681644e43248 100644
--- a/drivers/phy/qualcomm/phy-qcom-ufs-i.h
+++ b/drivers/phy/qualcomm/phy-qcom-ufs-i.h
@@ -17,9 +17,9 @@
 
 #include <linux/module.h>
 #include <linux/clk.h>
+#include <linux/phy/phy.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
-#include <linux/phy/phy-qcom-ufs.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/delay.h>
diff --git a/drivers/phy/qualcomm/phy-qcom-ufs.c b/drivers/phy/qualcomm/phy-qcom-ufs.c
index c5493ea51282..f2979ccad00a 100644
--- a/drivers/phy/qualcomm/phy-qcom-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-ufs.c
@@ -431,56 +431,6 @@ static void ufs_qcom_phy_disable_ref_clk(struct ufs_qcom_phy *phy)
 	}
 }
 
-#define UFS_REF_CLK_EN	(1 << 5)
-
-static void ufs_qcom_phy_dev_ref_clk_ctrl(struct phy *generic_phy, bool enable)
-{
-	struct ufs_qcom_phy *phy = get_ufs_qcom_phy(generic_phy);
-
-	if (phy->dev_ref_clk_ctrl_mmio &&
-	    (enable ^ phy->is_dev_ref_clk_enabled)) {
-		u32 temp = readl_relaxed(phy->dev_ref_clk_ctrl_mmio);
-
-		if (enable)
-			temp |= UFS_REF_CLK_EN;
-		else
-			temp &= ~UFS_REF_CLK_EN;
-
-		/*
-		 * If we are here to disable this clock immediately after
-		 * entering into hibern8, we need to make sure that device
-		 * ref_clk is active atleast 1us after the hibern8 enter.
-		 */
-		if (!enable)
-			udelay(1);
-
-		writel_relaxed(temp, phy->dev_ref_clk_ctrl_mmio);
-		/* ensure that ref_clk is enabled/disabled before we return */
-		wmb();
-		/*
-		 * If we call hibern8 exit after this, we need to make sure that
-		 * device ref_clk is stable for atleast 1us before the hibern8
-		 * exit command.
-		 */
-		if (enable)
-			udelay(1);
-
-		phy->is_dev_ref_clk_enabled = enable;
-	}
-}
-
-void ufs_qcom_phy_enable_dev_ref_clk(struct phy *generic_phy)
-{
-	ufs_qcom_phy_dev_ref_clk_ctrl(generic_phy, true);
-}
-EXPORT_SYMBOL_GPL(ufs_qcom_phy_enable_dev_ref_clk);
-
-void ufs_qcom_phy_disable_dev_ref_clk(struct phy *generic_phy)
-{
-	ufs_qcom_phy_dev_ref_clk_ctrl(generic_phy, false);
-}
-EXPORT_SYMBOL_GPL(ufs_qcom_phy_disable_dev_ref_clk);
-
 /* Turn ON M-PHY RMMI interface clocks */
 static int ufs_qcom_phy_enable_iface_clk(struct ufs_qcom_phy *phy)
 {
diff --git a/drivers/phy/renesas/Kconfig b/drivers/phy/renesas/Kconfig
index 4bd390c79d21..e340a925bbb1 100644
--- a/drivers/phy/renesas/Kconfig
+++ b/drivers/phy/renesas/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Phy drivers for Renesas platforms
 #
diff --git a/drivers/phy/renesas/Makefile b/drivers/phy/renesas/Makefile
index 4b76fc439ed6..b599ff8a4349 100644
--- a/drivers/phy/renesas/Makefile
+++ b/drivers/phy/renesas/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_PHY_RCAR_GEN2)		+= phy-rcar-gen2.o
 obj-$(CONFIG_PHY_RCAR_GEN3_PCIE)	+= phy-rcar-gen3-pcie.o
 obj-$(CONFIG_PHY_RCAR_GEN3_USB2)	+= phy-rcar-gen3-usb2.o
diff --git a/drivers/phy/renesas/phy-rcar-gen2.c b/drivers/phy/renesas/phy-rcar-gen2.c
index 97d4dd6ea924..72eeb066912d 100644
--- a/drivers/phy/renesas/phy-rcar-gen2.c
+++ b/drivers/phy/renesas/phy-rcar-gen2.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Renesas R-Car Gen2 PHY driver
  *
  * Copyright (C) 2014 Renesas Solutions Corp.
  * Copyright (C) 2014 Cogent Embedded, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
index fb8f05e39cf7..d0f412c25981 100644
--- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Renesas R-Car Gen3 for USB2.0 PHY driver
  *
@@ -6,10 +7,6 @@
  * This is based on the phy-rcar-gen2 driver:
  * Copyright (C) 2014 Renesas Solutions Corp.
  * Copyright (C) 2014 Cogent Embedded, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/extcon-provider.h>
@@ -81,18 +78,29 @@
 #define USB2_ADPCTRL_IDPULLUP		BIT(5)	/* 1 = ID sampling is enabled */
 #define USB2_ADPCTRL_DRVVBUS		BIT(4)
 
-#define RCAR_GEN3_PHY_HAS_DEDICATED_PINS	1
-
 struct rcar_gen3_chan {
 	void __iomem *base;
 	struct extcon_dev *extcon;
 	struct phy *phy;
 	struct regulator *vbus;
 	struct work_struct work;
+	enum usb_dr_mode dr_mode;
 	bool extcon_host;
-	bool has_otg_pins;
+	bool is_otg_channel;
+	bool uses_otg_pins;
 };
 
+/*
+ * Combination about is_otg_channel and uses_otg_pins:
+ *
+ * Parameters				|| Behaviors
+ * is_otg_channel	| uses_otg_pins	|| irqs		| role sysfs
+ * ---------------------+---------------++--------------+------------
+ * true			| true		|| enabled	| enabled
+ * true                 | false		|| disabled	| enabled
+ * false                | any		|| disabled	| disabled
+ */
+
 static void rcar_gen3_phy_usb2_work(struct work_struct *work)
 {
 	struct rcar_gen3_chan *ch = container_of(work, struct rcar_gen3_chan,
@@ -147,6 +155,18 @@ static void rcar_gen3_enable_vbus_ctrl(struct rcar_gen3_chan *ch, int vbus)
 	writel(val, usb2_base + USB2_ADPCTRL);
 }
 
+static void rcar_gen3_control_otg_irq(struct rcar_gen3_chan *ch, int enable)
+{
+	void __iomem *usb2_base = ch->base;
+	u32 val = readl(usb2_base + USB2_OBINTEN);
+
+	if (ch->uses_otg_pins && enable)
+		val |= USB2_OBINT_BITS;
+	else
+		val &= ~USB2_OBINT_BITS;
+	writel(val, usb2_base + USB2_OBINTEN);
+}
+
 static void rcar_gen3_init_for_host(struct rcar_gen3_chan *ch)
 {
 	rcar_gen3_set_linectrl(ch, 1, 1);
@@ -192,20 +212,19 @@ static void rcar_gen3_init_for_a_peri(struct rcar_gen3_chan *ch)
 
 static void rcar_gen3_init_from_a_peri_to_a_host(struct rcar_gen3_chan *ch)
 {
-	void __iomem *usb2_base = ch->base;
-	u32 val;
+	rcar_gen3_control_otg_irq(ch, 0);
 
-	val = readl(usb2_base + USB2_OBINTEN);
-	writel(val & ~USB2_OBINT_BITS, usb2_base + USB2_OBINTEN);
-
-	rcar_gen3_enable_vbus_ctrl(ch, 0);
+	rcar_gen3_enable_vbus_ctrl(ch, 1);
 	rcar_gen3_init_for_host(ch);
 
-	writel(val | USB2_OBINT_BITS, usb2_base + USB2_OBINTEN);
+	rcar_gen3_control_otg_irq(ch, 1);
 }
 
 static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch)
 {
+	if (!ch->uses_otg_pins)
+		return (ch->dr_mode == USB_DR_MODE_HOST) ? false : true;
+
 	return !!(readl(ch->base + USB2_ADPCTRL) & USB2_ADPCTRL_IDDIG);
 }
 
@@ -237,7 +256,7 @@ static ssize_t role_store(struct device *dev, struct device_attribute *attr,
 	bool is_b_device;
 	enum phy_mode cur_mode, new_mode;
 
-	if (!ch->has_otg_pins || !ch->phy->init_count)
+	if (!ch->is_otg_channel || !ch->phy->init_count)
 		return -EIO;
 
 	if (!strncmp(buf, "host", strlen("host")))
@@ -275,7 +294,7 @@ static ssize_t role_show(struct device *dev, struct device_attribute *attr,
 {
 	struct rcar_gen3_chan *ch = dev_get_drvdata(dev);
 
-	if (!ch->has_otg_pins || !ch->phy->init_count)
+	if (!ch->is_otg_channel || !ch->phy->init_count)
 		return -EIO;
 
 	return sprintf(buf, "%s\n", rcar_gen3_is_host(ch) ? "host" :
@@ -291,8 +310,7 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch)
 	val = readl(usb2_base + USB2_VBCTRL);
 	writel(val | USB2_VBCTRL_DRVVBUSSEL, usb2_base + USB2_VBCTRL);
 	writel(USB2_OBINT_BITS, usb2_base + USB2_OBINTSTA);
-	val = readl(usb2_base + USB2_OBINTEN);
-	writel(val | USB2_OBINT_BITS, usb2_base + USB2_OBINTEN);
+	rcar_gen3_control_otg_irq(ch, 1);
 	val = readl(usb2_base + USB2_ADPCTRL);
 	writel(val | USB2_ADPCTRL_IDPULLUP, usb2_base + USB2_ADPCTRL);
 	val = readl(usb2_base + USB2_LINECTRL1);
@@ -314,7 +332,7 @@ static int rcar_gen3_phy_usb2_init(struct phy *p)
 	writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET);
 
 	/* Initialize otg part */
-	if (channel->has_otg_pins)
+	if (channel->is_otg_channel)
 		rcar_gen3_init_otg(channel);
 
 	return 0;
@@ -388,21 +406,10 @@ static irqreturn_t rcar_gen3_phy_usb2_irq(int irq, void *_ch)
 }
 
 static const struct of_device_id rcar_gen3_phy_usb2_match_table[] = {
-	{
-		.compatible = "renesas,usb2-phy-r8a7795",
-		.data = (void *)RCAR_GEN3_PHY_HAS_DEDICATED_PINS,
-	},
-	{
-		.compatible = "renesas,usb2-phy-r8a7796",
-		.data = (void *)RCAR_GEN3_PHY_HAS_DEDICATED_PINS,
-	},
-	{
-		.compatible = "renesas,usb2-phy-r8a77965",
-		.data = (void *)RCAR_GEN3_PHY_HAS_DEDICATED_PINS,
-	},
-	{
-		.compatible = "renesas,rcar-gen3-usb2-phy",
-	},
+	{ .compatible = "renesas,usb2-phy-r8a7795" },
+	{ .compatible = "renesas,usb2-phy-r8a7796" },
+	{ .compatible = "renesas,usb2-phy-r8a77965" },
+	{ .compatible = "renesas,rcar-gen3-usb2-phy" },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, rcar_gen3_phy_usb2_match_table);
@@ -445,10 +452,13 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
 			dev_err(dev, "No irq handler (%d)\n", irq);
 	}
 
-	if (of_usb_get_dr_mode_by_phy(dev->of_node, 0) == USB_DR_MODE_OTG) {
+	channel->dr_mode = of_usb_get_dr_mode_by_phy(dev->of_node, 0);
+	if (channel->dr_mode != USB_DR_MODE_UNKNOWN) {
 		int ret;
 
-		channel->has_otg_pins = (uintptr_t)of_device_get_match_data(dev);
+		channel->is_otg_channel = true;
+		channel->uses_otg_pins = !of_property_read_bool(dev->of_node,
+							"renesas,no-otg-pins");
 		channel->extcon = devm_extcon_dev_allocate(dev,
 							rcar_gen3_phy_cable);
 		if (IS_ERR(channel->extcon))
@@ -490,7 +500,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
 		dev_err(dev, "Failed to register PHY provider\n");
 		ret = PTR_ERR(provider);
 		goto error;
-	} else if (channel->has_otg_pins) {
+	} else if (channel->is_otg_channel) {
 		int ret;
 
 		ret = device_create_file(dev, &dev_attr_role);
@@ -510,7 +520,7 @@ static int rcar_gen3_phy_usb2_remove(struct platform_device *pdev)
 {
 	struct rcar_gen3_chan *channel = platform_get_drvdata(pdev);
 
-	if (channel->has_otg_pins)
+	if (channel->is_otg_channel)
 		device_remove_file(&pdev->dev, &dev_attr_role);
 
 	pm_runtime_disable(&pdev->dev);
diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb3.c b/drivers/phy/renesas/phy-rcar-gen3-usb3.c
index 88c83c9b8ff9..566b4cf4ff38 100644
--- a/drivers/phy/renesas/phy-rcar-gen3-usb3.c
+++ b/drivers/phy/renesas/phy-rcar-gen3-usb3.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Renesas R-Car Gen3 for USB3.0 PHY driver
  *
  * Copyright (C) 2017 Renesas Electronics Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/phy/rockchip/Kconfig b/drivers/phy/rockchip/Kconfig
index 0e15119ddfc6..990204a46eb6 100644
--- a/drivers/phy/rockchip/Kconfig
+++ b/drivers/phy/rockchip/Kconfig
@@ -15,6 +15,14 @@ config PHY_ROCKCHIP_EMMC
 	help
 	  Enable this to support the Rockchip EMMC PHY.
 
+config PHY_ROCKCHIP_INNO_HDMI
+	tristate "Rockchip INNO HDMI PHY Driver"
+	depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF
+	depends on COMMON_CLK
+	select GENERIC_PHY
+	help
+	  Enable this to support the Rockchip Innosilicon HDMI PHY.
+
 config PHY_ROCKCHIP_INNO_USB2
 	tristate "Rockchip INNO USB2PHY Driver"
 	depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF
diff --git a/drivers/phy/rockchip/Makefile b/drivers/phy/rockchip/Makefile
index 7f149d989046..fd21cbaf40dd 100644
--- a/drivers/phy/rockchip/Makefile
+++ b/drivers/phy/rockchip/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_PHY_ROCKCHIP_DP)		+= phy-rockchip-dp.o
 obj-$(CONFIG_PHY_ROCKCHIP_EMMC)		+= phy-rockchip-emmc.o
+obj-$(CONFIG_PHY_ROCKCHIP_INNO_HDMI)	+= phy-rockchip-inno-hdmi.o
 obj-$(CONFIG_PHY_ROCKCHIP_INNO_USB2)	+= phy-rockchip-inno-usb2.o
 obj-$(CONFIG_PHY_ROCKCHIP_PCIE)		+= phy-rockchip-pcie.o
 obj-$(CONFIG_PHY_ROCKCHIP_TYPEC)	+= phy-rockchip-typec.o
diff --git a/drivers/phy/rockchip/phy-rockchip-emmc.c b/drivers/phy/rockchip/phy-rockchip-emmc.c
index b237360f95f6..19bf84f0bc67 100644
--- a/drivers/phy/rockchip/phy-rockchip-emmc.c
+++ b/drivers/phy/rockchip/phy-rockchip-emmc.c
@@ -337,8 +337,8 @@ static int rockchip_emmc_phy_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	if (of_property_read_u32(dev->of_node, "reg", &reg_offset)) {
-		dev_err(dev, "missing reg property in node %s\n",
-			dev->of_node->name);
+		dev_err(dev, "missing reg property in node %pOFn\n",
+			dev->of_node);
 		return -EINVAL;
 	}
 
diff --git a/drivers/phy/rockchip/phy-rockchip-inno-hdmi.c b/drivers/phy/rockchip/phy-rockchip-inno-hdmi.c
new file mode 100644
index 000000000000..b10a84cab4a7
--- /dev/null
+++ b/drivers/phy/rockchip/phy-rockchip-inno-hdmi.c
@@ -0,0 +1,1277 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2017 Rockchip Electronics Co. Ltd.
+ *
+ * Author: Zheng Yang <zhengyang@rock-chips.com>
+ *         Heiko Stuebner <heiko@sntech.de>
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/phy/phy.h>
+#include <linux/slab.h>
+
+#define UPDATE(x, h, l)		(((x) << (l)) & GENMASK((h), (l)))
+
+/* REG: 0x00 */
+#define RK3228_PRE_PLL_REFCLK_SEL_PCLK			BIT(0)
+/* REG: 0x01 */
+#define RK3228_BYPASS_RXSENSE_EN			BIT(2)
+#define RK3228_BYPASS_PWRON_EN				BIT(1)
+#define RK3228_BYPASS_PLLPD_EN				BIT(0)
+/* REG: 0x02 */
+#define RK3228_BYPASS_PDATA_EN				BIT(4)
+#define RK3228_PDATAEN_DISABLE				BIT(0)
+/* REG: 0x03 */
+#define RK3228_BYPASS_AUTO_TERM_RES_CAL			BIT(7)
+#define RK3228_AUTO_TERM_RES_CAL_SPEED_14_8(x)		UPDATE(x, 6, 0)
+/* REG: 0x04 */
+#define RK3228_AUTO_TERM_RES_CAL_SPEED_7_0(x)		UPDATE(x, 7, 0)
+/* REG: 0xaa */
+#define RK3228_POST_PLL_CTRL_MANUAL			BIT(0)
+/* REG: 0xe0 */
+#define RK3228_POST_PLL_POWER_DOWN			BIT(5)
+#define RK3228_PRE_PLL_POWER_DOWN			BIT(4)
+#define RK3228_RXSENSE_CLK_CH_ENABLE			BIT(3)
+#define RK3228_RXSENSE_DATA_CH2_ENABLE			BIT(2)
+#define RK3228_RXSENSE_DATA_CH1_ENABLE			BIT(1)
+#define RK3228_RXSENSE_DATA_CH0_ENABLE			BIT(0)
+/* REG: 0xe1 */
+#define RK3228_BANDGAP_ENABLE				BIT(4)
+#define RK3228_TMDS_DRIVER_ENABLE			GENMASK(3, 0)
+/* REG: 0xe2 */
+#define RK3228_PRE_PLL_FB_DIV_8_MASK			BIT(7)
+#define RK3228_PRE_PLL_FB_DIV_8(x)			UPDATE((x) >> 8, 7, 7)
+#define RK3228_PCLK_VCO_DIV_5_MASK			BIT(5)
+#define RK3228_PCLK_VCO_DIV_5(x)			UPDATE(x, 5, 5)
+#define RK3228_PRE_PLL_PRE_DIV_MASK			GENMASK(4, 0)
+#define RK3228_PRE_PLL_PRE_DIV(x)			UPDATE(x, 4, 0)
+/* REG: 0xe3 */
+#define RK3228_PRE_PLL_FB_DIV_7_0(x)			UPDATE(x, 7, 0)
+/* REG: 0xe4 */
+#define RK3228_PRE_PLL_PCLK_DIV_B_MASK			GENMASK(6, 5)
+#define RK3228_PRE_PLL_PCLK_DIV_B_SHIFT			5
+#define RK3228_PRE_PLL_PCLK_DIV_B(x)			UPDATE(x, 6, 5)
+#define RK3228_PRE_PLL_PCLK_DIV_A_MASK			GENMASK(4, 0)
+#define RK3228_PRE_PLL_PCLK_DIV_A(x)			UPDATE(x, 4, 0)
+/* REG: 0xe5 */
+#define RK3228_PRE_PLL_PCLK_DIV_C_MASK			GENMASK(6, 5)
+#define RK3228_PRE_PLL_PCLK_DIV_C(x)			UPDATE(x, 6, 5)
+#define RK3228_PRE_PLL_PCLK_DIV_D_MASK			GENMASK(4, 0)
+#define RK3228_PRE_PLL_PCLK_DIV_D(x)			UPDATE(x, 4, 0)
+/* REG: 0xe6 */
+#define RK3228_PRE_PLL_TMDSCLK_DIV_C_MASK		GENMASK(5, 4)
+#define RK3228_PRE_PLL_TMDSCLK_DIV_C(x)			UPDATE(x, 5, 4)
+#define RK3228_PRE_PLL_TMDSCLK_DIV_A_MASK		GENMASK(3, 2)
+#define RK3228_PRE_PLL_TMDSCLK_DIV_A(x)			UPDATE(x, 3, 2)
+#define RK3228_PRE_PLL_TMDSCLK_DIV_B_MASK		GENMASK(1, 0)
+#define RK3228_PRE_PLL_TMDSCLK_DIV_B(x)			UPDATE(x, 1, 0)
+/* REG: 0xe8 */
+#define RK3228_PRE_PLL_LOCK_STATUS			BIT(0)
+/* REG: 0xe9 */
+#define RK3228_POST_PLL_POST_DIV_ENABLE			UPDATE(3, 7, 6)
+#define RK3228_POST_PLL_PRE_DIV_MASK			GENMASK(4, 0)
+#define RK3228_POST_PLL_PRE_DIV(x)			UPDATE(x, 4, 0)
+/* REG: 0xea */
+#define RK3228_POST_PLL_FB_DIV_7_0(x)			UPDATE(x, 7, 0)
+/* REG: 0xeb */
+#define RK3228_POST_PLL_FB_DIV_8_MASK			BIT(7)
+#define RK3228_POST_PLL_FB_DIV_8(x)			UPDATE((x) >> 8, 7, 7)
+#define RK3228_POST_PLL_POST_DIV_MASK			GENMASK(5, 4)
+#define RK3228_POST_PLL_POST_DIV(x)			UPDATE(x, 5, 4)
+#define RK3228_POST_PLL_LOCK_STATUS			BIT(0)
+/* REG: 0xee */
+#define RK3228_TMDS_CH_TA_ENABLE			GENMASK(7, 4)
+/* REG: 0xef */
+#define RK3228_TMDS_CLK_CH_TA(x)			UPDATE(x, 7, 6)
+#define RK3228_TMDS_DATA_CH2_TA(x)			UPDATE(x, 5, 4)
+#define RK3228_TMDS_DATA_CH1_TA(x)			UPDATE(x, 3, 2)
+#define RK3228_TMDS_DATA_CH0_TA(x)			UPDATE(x, 1, 0)
+/* REG: 0xf0 */
+#define RK3228_TMDS_DATA_CH2_PRE_EMPHASIS_MASK		GENMASK(5, 4)
+#define RK3228_TMDS_DATA_CH2_PRE_EMPHASIS(x)		UPDATE(x, 5, 4)
+#define RK3228_TMDS_DATA_CH1_PRE_EMPHASIS_MASK		GENMASK(3, 2)
+#define RK3228_TMDS_DATA_CH1_PRE_EMPHASIS(x)		UPDATE(x, 3, 2)
+#define RK3228_TMDS_DATA_CH0_PRE_EMPHASIS_MASK		GENMASK(1, 0)
+#define RK3228_TMDS_DATA_CH0_PRE_EMPHASIS(x)		UPDATE(x, 1, 0)
+/* REG: 0xf1 */
+#define RK3228_TMDS_CLK_CH_OUTPUT_SWING(x)		UPDATE(x, 7, 4)
+#define RK3228_TMDS_DATA_CH2_OUTPUT_SWING(x)		UPDATE(x, 3, 0)
+/* REG: 0xf2 */
+#define RK3228_TMDS_DATA_CH1_OUTPUT_SWING(x)		UPDATE(x, 7, 4)
+#define RK3228_TMDS_DATA_CH0_OUTPUT_SWING(x)		UPDATE(x, 3, 0)
+
+/* REG: 0x01 */
+#define RK3328_BYPASS_RXSENSE_EN			BIT(2)
+#define RK3328_BYPASS_POWERON_EN			BIT(1)
+#define RK3328_BYPASS_PLLPD_EN				BIT(0)
+/* REG: 0x02 */
+#define RK3328_INT_POL_HIGH				BIT(7)
+#define RK3328_BYPASS_PDATA_EN				BIT(4)
+#define RK3328_PDATA_EN					BIT(0)
+/* REG:0x05 */
+#define RK3328_INT_TMDS_CLK(x)				UPDATE(x, 7, 4)
+#define RK3328_INT_TMDS_D2(x)				UPDATE(x, 3, 0)
+/* REG:0x07 */
+#define RK3328_INT_TMDS_D1(x)				UPDATE(x, 7, 4)
+#define RK3328_INT_TMDS_D0(x)				UPDATE(x, 3, 0)
+/* for all RK3328_INT_TMDS_*, ESD_DET as defined in 0xc8-0xcb */
+#define RK3328_INT_AGND_LOW_PULSE_LOCKED		BIT(3)
+#define RK3328_INT_RXSENSE_LOW_PULSE_LOCKED		BIT(2)
+#define RK3328_INT_VSS_AGND_ESD_DET			BIT(1)
+#define RK3328_INT_AGND_VSS_ESD_DET			BIT(0)
+/* REG: 0xa0 */
+#define RK3328_PCLK_VCO_DIV_5_MASK			BIT(1)
+#define RK3328_PCLK_VCO_DIV_5(x)			UPDATE(x, 1, 1)
+#define RK3328_PRE_PLL_POWER_DOWN			BIT(0)
+/* REG: 0xa1 */
+#define RK3328_PRE_PLL_PRE_DIV_MASK			GENMASK(5, 0)
+#define RK3328_PRE_PLL_PRE_DIV(x)			UPDATE(x, 5, 0)
+/* REG: 0xa2 */
+/* unset means center spread */
+#define RK3328_SPREAD_SPECTRUM_MOD_DOWN			BIT(7)
+#define RK3328_SPREAD_SPECTRUM_MOD_DISABLE		BIT(6)
+#define RK3328_PRE_PLL_FRAC_DIV_DISABLE			UPDATE(3, 5, 4)
+#define RK3328_PRE_PLL_FB_DIV_11_8_MASK			GENMASK(3, 0)
+#define RK3328_PRE_PLL_FB_DIV_11_8(x)			UPDATE((x) >> 8, 3, 0)
+/* REG: 0xa3 */
+#define RK3328_PRE_PLL_FB_DIV_7_0(x)			UPDATE(x, 7, 0)
+/* REG: 0xa4*/
+#define RK3328_PRE_PLL_TMDSCLK_DIV_C_MASK		GENMASK(1, 0)
+#define RK3328_PRE_PLL_TMDSCLK_DIV_C(x)			UPDATE(x, 1, 0)
+#define RK3328_PRE_PLL_TMDSCLK_DIV_B_MASK		GENMASK(3, 2)
+#define RK3328_PRE_PLL_TMDSCLK_DIV_B(x)			UPDATE(x, 3, 2)
+#define RK3328_PRE_PLL_TMDSCLK_DIV_A_MASK		GENMASK(5, 4)
+#define RK3328_PRE_PLL_TMDSCLK_DIV_A(x)			UPDATE(x, 5, 4)
+/* REG: 0xa5 */
+#define RK3328_PRE_PLL_PCLK_DIV_B_SHIFT			5
+#define RK3328_PRE_PLL_PCLK_DIV_B_MASK			GENMASK(6, 5)
+#define RK3328_PRE_PLL_PCLK_DIV_B(x)			UPDATE(x, 6, 5)
+#define RK3328_PRE_PLL_PCLK_DIV_A_MASK			GENMASK(4, 0)
+#define RK3328_PRE_PLL_PCLK_DIV_A(x)			UPDATE(x, 4, 0)
+/* REG: 0xa6 */
+#define RK3328_PRE_PLL_PCLK_DIV_C_SHIFT			5
+#define RK3328_PRE_PLL_PCLK_DIV_C_MASK			GENMASK(6, 5)
+#define RK3328_PRE_PLL_PCLK_DIV_C(x)			UPDATE(x, 6, 5)
+#define RK3328_PRE_PLL_PCLK_DIV_D_MASK			GENMASK(4, 0)
+#define RK3328_PRE_PLL_PCLK_DIV_D(x)			UPDATE(x, 4, 0)
+/* REG: 0xa9 */
+#define RK3328_PRE_PLL_LOCK_STATUS			BIT(0)
+/* REG: 0xaa */
+#define RK3328_POST_PLL_POST_DIV_ENABLE			GENMASK(3, 2)
+#define RK3328_POST_PLL_REFCLK_SEL_TMDS			BIT(1)
+#define RK3328_POST_PLL_POWER_DOWN			BIT(0)
+/* REG:0xab */
+#define RK3328_POST_PLL_FB_DIV_8(x)			UPDATE((x) >> 8, 7, 7)
+#define RK3328_POST_PLL_PRE_DIV(x)			UPDATE(x, 4, 0)
+/* REG: 0xac */
+#define RK3328_POST_PLL_FB_DIV_7_0(x)			UPDATE(x, 7, 0)
+/* REG: 0xad */
+#define RK3328_POST_PLL_POST_DIV_MASK			GENMASK(1, 0)
+#define RK3328_POST_PLL_POST_DIV_2			0x0
+#define RK3328_POST_PLL_POST_DIV_4			0x1
+#define RK3328_POST_PLL_POST_DIV_8			0x3
+/* REG: 0xaf */
+#define RK3328_POST_PLL_LOCK_STATUS			BIT(0)
+/* REG: 0xb0 */
+#define RK3328_BANDGAP_ENABLE				BIT(2)
+/* REG: 0xb2 */
+#define RK3328_TMDS_CLK_DRIVER_EN			BIT(3)
+#define RK3328_TMDS_D2_DRIVER_EN			BIT(2)
+#define RK3328_TMDS_D1_DRIVER_EN			BIT(1)
+#define RK3328_TMDS_D0_DRIVER_EN			BIT(0)
+#define RK3328_TMDS_DRIVER_ENABLE		(RK3328_TMDS_CLK_DRIVER_EN | \
+						RK3328_TMDS_D2_DRIVER_EN | \
+						RK3328_TMDS_D1_DRIVER_EN | \
+						RK3328_TMDS_D0_DRIVER_EN)
+/* REG:0xc5 */
+#define RK3328_BYPASS_TERM_RESISTOR_CALIB		BIT(7)
+#define RK3328_TERM_RESISTOR_CALIB_SPEED_14_8(x)	UPDATE((x) >> 8, 6, 0)
+/* REG:0xc6 */
+#define RK3328_TERM_RESISTOR_CALIB_SPEED_7_0(x)		UPDATE(x, 7, 9)
+/* REG:0xc7 */
+#define RK3328_TERM_RESISTOR_50				UPDATE(0, 2, 1)
+#define RK3328_TERM_RESISTOR_62_5			UPDATE(1, 2, 1)
+#define RK3328_TERM_RESISTOR_75				UPDATE(2, 2, 1)
+#define RK3328_TERM_RESISTOR_100			UPDATE(3, 2, 1)
+/* REG 0xc8 - 0xcb */
+#define RK3328_ESD_DETECT_MASK				GENMASK(7, 6)
+#define RK3328_ESD_DETECT_340MV				(0x0 << 6)
+#define RK3328_ESD_DETECT_280MV				(0x1 << 6)
+#define RK3328_ESD_DETECT_260MV				(0x2 << 6)
+#define RK3328_ESD_DETECT_240MV				(0x3 << 6)
+/* resistors can be used in parallel */
+#define RK3328_TMDS_TERM_RESIST_MASK			GENMASK(5, 0)
+#define RK3328_TMDS_TERM_RESIST_75			BIT(5)
+#define RK3328_TMDS_TERM_RESIST_150			BIT(4)
+#define RK3328_TMDS_TERM_RESIST_300			BIT(3)
+#define RK3328_TMDS_TERM_RESIST_600			BIT(2)
+#define RK3328_TMDS_TERM_RESIST_1000			BIT(1)
+#define RK3328_TMDS_TERM_RESIST_2000			BIT(0)
+/* REG: 0xd1 */
+#define RK3328_PRE_PLL_FRAC_DIV_23_16(x)		UPDATE((x) >> 16, 7, 0)
+/* REG: 0xd2 */
+#define RK3328_PRE_PLL_FRAC_DIV_15_8(x)			UPDATE((x) >> 8, 7, 0)
+/* REG: 0xd3 */
+#define RK3328_PRE_PLL_FRAC_DIV_7_0(x)			UPDATE(x, 7, 0)
+
+struct inno_hdmi_phy_drv_data;
+
+struct inno_hdmi_phy {
+	struct device *dev;
+	struct regmap *regmap;
+	int irq;
+
+	struct phy *phy;
+	struct clk *sysclk;
+	struct clk *refoclk;
+	struct clk *refpclk;
+
+	/* platform data */
+	const struct inno_hdmi_phy_drv_data *plat_data;
+	int chip_version;
+
+	/* clk provider */
+	struct clk_hw hw;
+	struct clk *phyclk;
+	unsigned long pixclock;
+};
+
+struct pre_pll_config {
+	unsigned long pixclock;
+	unsigned long tmdsclock;
+	u8 prediv;
+	u16 fbdiv;
+	u8 tmds_div_a;
+	u8 tmds_div_b;
+	u8 tmds_div_c;
+	u8 pclk_div_a;
+	u8 pclk_div_b;
+	u8 pclk_div_c;
+	u8 pclk_div_d;
+	u8 vco_div_5_en;
+	u32 fracdiv;
+};
+
+struct post_pll_config {
+	unsigned long tmdsclock;
+	u8 prediv;
+	u16 fbdiv;
+	u8 postdiv;
+	u8 version;
+};
+
+struct phy_config {
+	unsigned long	tmdsclock;
+	u8		regs[14];
+};
+
+struct inno_hdmi_phy_ops {
+	int (*init)(struct inno_hdmi_phy *inno);
+	int (*power_on)(struct inno_hdmi_phy *inno,
+			const struct post_pll_config *cfg,
+			const struct phy_config *phy_cfg);
+	void (*power_off)(struct inno_hdmi_phy *inno);
+};
+
+struct inno_hdmi_phy_drv_data {
+	const struct inno_hdmi_phy_ops	*ops;
+	const struct clk_ops		*clk_ops;
+	const struct phy_config		*phy_cfg_table;
+};
+
+static const struct pre_pll_config pre_pll_cfg_table[] = {
+	{ 27000000,  27000000, 1,  90, 3, 2, 2, 10, 3, 3, 4, 0, 0},
+	{ 27000000,  33750000, 1,  90, 1, 3, 3, 10, 3, 3, 4, 0, 0},
+	{ 40000000,  40000000, 1,  80, 2, 2, 2, 12, 2, 2, 2, 0, 0},
+	{ 59341000,  59341000, 1,  98, 3, 1, 2,  1, 3, 3, 4, 0, 0xE6AE6B},
+	{ 59400000,  59400000, 1,  99, 3, 1, 1,  1, 3, 3, 4, 0, 0},
+	{ 59341000,  74176250, 1,  98, 0, 3, 3,  1, 3, 3, 4, 0, 0xE6AE6B},
+	{ 59400000,  74250000, 1,  99, 1, 2, 2,  1, 3, 3, 4, 0, 0},
+	{ 74176000,  74176000, 1,  98, 1, 2, 2,  1, 2, 3, 4, 0, 0xE6AE6B},
+	{ 74250000,  74250000, 1,  99, 1, 2, 2,  1, 2, 3, 4, 0, 0},
+	{ 74176000,  92720000, 4, 494, 1, 2, 2,  1, 3, 3, 4, 0, 0x816817},
+	{ 74250000,  92812500, 4, 495, 1, 2, 2,  1, 3, 3, 4, 0, 0},
+	{148352000, 148352000, 1,  98, 1, 1, 1,  1, 2, 2, 2, 0, 0xE6AE6B},
+	{148500000, 148500000, 1,  99, 1, 1, 1,  1, 2, 2, 2, 0, 0},
+	{148352000, 185440000, 4, 494, 0, 2, 2,  1, 3, 2, 2, 0, 0x816817},
+	{148500000, 185625000, 4, 495, 0, 2, 2,  1, 3, 2, 2, 0, 0},
+	{296703000, 296703000, 1,  98, 0, 1, 1,  1, 0, 2, 2, 0, 0xE6AE6B},
+	{297000000, 297000000, 1,  99, 0, 1, 1,  1, 0, 2, 2, 0, 0},
+	{296703000, 370878750, 4, 494, 1, 2, 0,  1, 3, 1, 1, 0, 0x816817},
+	{297000000, 371250000, 4, 495, 1, 2, 0,  1, 3, 1, 1, 0, 0},
+	{593407000, 296703500, 1,  98, 0, 1, 1,  1, 0, 2, 1, 0, 0xE6AE6B},
+	{594000000, 297000000, 1,  99, 0, 1, 1,  1, 0, 2, 1, 0, 0},
+	{593407000, 370879375, 4, 494, 1, 2, 0,  1, 3, 1, 1, 1, 0x816817},
+	{594000000, 371250000, 4, 495, 1, 2, 0,  1, 3, 1, 1, 1, 0},
+	{593407000, 593407000, 1,  98, 0, 2, 0,  1, 0, 1, 1, 0, 0xE6AE6B},
+	{594000000, 594000000, 1,  99, 0, 2, 0,  1, 0, 1, 1, 0, 0},
+	{ /* sentinel */ }
+};
+
+static const struct post_pll_config post_pll_cfg_table[] = {
+	{33750000,  1, 40, 8, 1},
+	{33750000,  1, 80, 8, 2},
+	{74250000,  1, 40, 8, 1},
+	{74250000, 18, 80, 8, 2},
+	{148500000, 2, 40, 4, 3},
+	{297000000, 4, 40, 2, 3},
+	{594000000, 8, 40, 1, 3},
+	{ /* sentinel */ }
+};
+
+/* phy tuning values for an undocumented set of registers */
+static const struct phy_config rk3228_phy_cfg[] = {
+	{	165000000, {
+			0xaa, 0x00, 0x44, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00,
+		},
+	}, {
+		340000000, {
+			0xaa, 0x15, 0x6a, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00,
+		},
+	}, {
+		594000000, {
+			0xaa, 0x15, 0x7a, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00,
+		},
+	}, { /* sentinel */ },
+};
+
+/* phy tuning values for an undocumented set of registers */
+static const struct phy_config rk3328_phy_cfg[] = {
+	{	165000000, {
+			0x07, 0x0a, 0x0a, 0x0a, 0x00, 0x00, 0x08, 0x08, 0x08,
+			0x00, 0xac, 0xcc, 0xcc, 0xcc,
+		},
+	}, {
+		340000000, {
+			0x0b, 0x0d, 0x0d, 0x0d, 0x07, 0x15, 0x08, 0x08, 0x08,
+			0x3f, 0xac, 0xcc, 0xcd, 0xdd,
+		},
+	}, {
+		594000000, {
+			0x10, 0x1a, 0x1a, 0x1a, 0x07, 0x15, 0x08, 0x08, 0x08,
+			0x00, 0xac, 0xcc, 0xcc, 0xcc,
+		},
+	}, { /* sentinel */ },
+};
+
+static inline struct inno_hdmi_phy *to_inno_hdmi_phy(struct clk_hw *hw)
+{
+	return container_of(hw, struct inno_hdmi_phy, hw);
+}
+
+/*
+ * The register description of the IP block does not use any distinct names
+ * but instead the databook simply numbers the registers in one-increments.
+ * As the registers are obviously 32bit sized, the inno_* functions
+ * translate the databook register names to the actual registers addresses.
+ */
+static inline void inno_write(struct inno_hdmi_phy *inno, u32 reg, u8 val)
+{
+	regmap_write(inno->regmap, reg * 4, val);
+}
+
+static inline u8 inno_read(struct inno_hdmi_phy *inno, u32 reg)
+{
+	u32 val;
+
+	regmap_read(inno->regmap, reg * 4, &val);
+
+	return val;
+}
+
+static inline void inno_update_bits(struct inno_hdmi_phy *inno, u8 reg,
+				    u8 mask, u8 val)
+{
+	regmap_update_bits(inno->regmap, reg * 4, mask, val);
+}
+
+#define inno_poll(inno, reg, val, cond, sleep_us, timeout_us) \
+	regmap_read_poll_timeout((inno)->regmap, (reg) * 4, val, cond, \
+				 sleep_us, timeout_us)
+
+static unsigned long inno_hdmi_phy_get_tmdsclk(struct inno_hdmi_phy *inno,
+					       unsigned long rate)
+{
+	int bus_width = phy_get_bus_width(inno->phy);
+
+	switch (bus_width) {
+	case 4:
+	case 5:
+	case 6:
+	case 10:
+	case 12:
+	case 16:
+		return (u64)rate * bus_width / 8;
+	default:
+		return rate;
+	}
+}
+
+static irqreturn_t inno_hdmi_phy_rk3328_hardirq(int irq, void *dev_id)
+{
+	struct inno_hdmi_phy *inno = dev_id;
+	int intr_stat1, intr_stat2, intr_stat3;
+
+	intr_stat1 = inno_read(inno, 0x04);
+	intr_stat2 = inno_read(inno, 0x06);
+	intr_stat3 = inno_read(inno, 0x08);
+
+	if (intr_stat1)
+		inno_write(inno, 0x04, intr_stat1);
+	if (intr_stat2)
+		inno_write(inno, 0x06, intr_stat2);
+	if (intr_stat3)
+		inno_write(inno, 0x08, intr_stat3);
+
+	if (intr_stat1 || intr_stat2 || intr_stat3)
+		return IRQ_WAKE_THREAD;
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t inno_hdmi_phy_rk3328_irq(int irq, void *dev_id)
+{
+	struct inno_hdmi_phy *inno = dev_id;
+
+	inno_update_bits(inno, 0x02, RK3328_PDATA_EN, 0);
+	usleep_range(10, 20);
+	inno_update_bits(inno, 0x02, RK3328_PDATA_EN, RK3328_PDATA_EN);
+
+	return IRQ_HANDLED;
+}
+
+static int inno_hdmi_phy_power_on(struct phy *phy)
+{
+	struct inno_hdmi_phy *inno = phy_get_drvdata(phy);
+	const struct post_pll_config *cfg = post_pll_cfg_table;
+	const struct phy_config *phy_cfg = inno->plat_data->phy_cfg_table;
+	unsigned long tmdsclock = inno_hdmi_phy_get_tmdsclk(inno,
+							    inno->pixclock);
+	int ret;
+
+	if (!tmdsclock) {
+		dev_err(inno->dev, "TMDS clock is zero!\n");
+		return -EINVAL;
+	}
+
+	if (!inno->plat_data->ops->power_on)
+		return -EINVAL;
+
+	for (; cfg->tmdsclock != 0; cfg++)
+		if (tmdsclock <= cfg->tmdsclock &&
+		    cfg->version & inno->chip_version)
+			break;
+
+	for (; phy_cfg->tmdsclock != 0; phy_cfg++)
+		if (tmdsclock <= phy_cfg->tmdsclock)
+			break;
+
+	if (cfg->tmdsclock == 0 || phy_cfg->tmdsclock == 0)
+		return -EINVAL;
+
+	dev_dbg(inno->dev, "Inno HDMI PHY Power On\n");
+
+	ret = clk_prepare_enable(inno->phyclk);
+	if (ret)
+		return ret;
+
+	ret = inno->plat_data->ops->power_on(inno, cfg, phy_cfg);
+	if (ret) {
+		clk_disable_unprepare(inno->phyclk);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int inno_hdmi_phy_power_off(struct phy *phy)
+{
+	struct inno_hdmi_phy *inno = phy_get_drvdata(phy);
+
+	if (!inno->plat_data->ops->power_off)
+		return -EINVAL;
+
+	inno->plat_data->ops->power_off(inno);
+
+	clk_disable_unprepare(inno->phyclk);
+
+	dev_dbg(inno->dev, "Inno HDMI PHY Power Off\n");
+
+	return 0;
+}
+
+static const struct phy_ops inno_hdmi_phy_ops = {
+	.owner = THIS_MODULE,
+	.power_on = inno_hdmi_phy_power_on,
+	.power_off = inno_hdmi_phy_power_off,
+};
+
+static const
+struct pre_pll_config *inno_hdmi_phy_get_pre_pll_cfg(struct inno_hdmi_phy *inno,
+						     unsigned long rate)
+{
+	const struct pre_pll_config *cfg = pre_pll_cfg_table;
+	unsigned long tmdsclock = inno_hdmi_phy_get_tmdsclk(inno, rate);
+
+	for (; cfg->pixclock != 0; cfg++)
+		if (cfg->pixclock == rate && cfg->tmdsclock == tmdsclock)
+			break;
+
+	if (cfg->pixclock == 0)
+		return ERR_PTR(-EINVAL);
+
+	return cfg;
+}
+
+static int inno_hdmi_phy_rk3228_clk_is_prepared(struct clk_hw *hw)
+{
+	struct inno_hdmi_phy *inno = to_inno_hdmi_phy(hw);
+	u8 status;
+
+	status = inno_read(inno, 0xe0) & RK3228_PRE_PLL_POWER_DOWN;
+	return status ? 0 : 1;
+}
+
+static int inno_hdmi_phy_rk3228_clk_prepare(struct clk_hw *hw)
+{
+	struct inno_hdmi_phy *inno = to_inno_hdmi_phy(hw);
+
+	inno_update_bits(inno, 0xe0, RK3228_PRE_PLL_POWER_DOWN, 0);
+	return 0;
+}
+
+static void inno_hdmi_phy_rk3228_clk_unprepare(struct clk_hw *hw)
+{
+	struct inno_hdmi_phy *inno = to_inno_hdmi_phy(hw);
+
+	inno_update_bits(inno, 0xe0, RK3228_PRE_PLL_POWER_DOWN,
+			 RK3228_PRE_PLL_POWER_DOWN);
+}
+
+static
+unsigned long inno_hdmi_phy_rk3228_clk_recalc_rate(struct clk_hw *hw,
+						   unsigned long parent_rate)
+{
+	struct inno_hdmi_phy *inno = to_inno_hdmi_phy(hw);
+	u8 nd, no_a, no_b, no_d;
+	u64 vco;
+	u16 nf;
+
+	nd = inno_read(inno, 0xe2) & RK3228_PRE_PLL_PRE_DIV_MASK;
+	nf = (inno_read(inno, 0xe2) & RK3228_PRE_PLL_FB_DIV_8_MASK) << 1;
+	nf |= inno_read(inno, 0xe3);
+	vco = parent_rate * nf;
+
+	if (inno_read(inno, 0xe2) & RK3228_PCLK_VCO_DIV_5_MASK) {
+		do_div(vco, nd * 5);
+	} else {
+		no_a = inno_read(inno, 0xe4) & RK3228_PRE_PLL_PCLK_DIV_A_MASK;
+		if (!no_a)
+			no_a = 1;
+		no_b = inno_read(inno, 0xe4) & RK3228_PRE_PLL_PCLK_DIV_B_MASK;
+		no_b >>= RK3228_PRE_PLL_PCLK_DIV_B_SHIFT;
+		no_b += 2;
+		no_d = inno_read(inno, 0xe5) & RK3228_PRE_PLL_PCLK_DIV_D_MASK;
+
+		do_div(vco, (nd * (no_a == 1 ? no_b : no_a) * no_d * 2));
+	}
+
+	inno->pixclock = vco;
+
+	dev_dbg(inno->dev, "%s rate %lu\n", __func__, inno->pixclock);
+
+	return vco;
+}
+
+static long inno_hdmi_phy_rk3228_clk_round_rate(struct clk_hw *hw,
+						unsigned long rate,
+						unsigned long *parent_rate)
+{
+	const struct pre_pll_config *cfg = pre_pll_cfg_table;
+
+	for (; cfg->pixclock != 0; cfg++)
+		if (cfg->pixclock == rate && !cfg->fracdiv)
+			break;
+
+	if (cfg->pixclock == 0)
+		return -EINVAL;
+
+	return cfg->pixclock;
+}
+
+static int inno_hdmi_phy_rk3228_clk_set_rate(struct clk_hw *hw,
+					     unsigned long rate,
+					     unsigned long parent_rate)
+{
+	struct inno_hdmi_phy *inno = to_inno_hdmi_phy(hw);
+	const struct pre_pll_config *cfg = pre_pll_cfg_table;
+	unsigned long tmdsclock = inno_hdmi_phy_get_tmdsclk(inno, rate);
+	u32 v;
+	int ret;
+
+	dev_dbg(inno->dev, "%s rate %lu tmdsclk %lu\n",
+		__func__, rate, tmdsclock);
+
+	cfg = inno_hdmi_phy_get_pre_pll_cfg(inno, rate);
+	if (IS_ERR(cfg))
+		return PTR_ERR(cfg);
+
+	/* Power down PRE-PLL */
+	inno_update_bits(inno, 0xe0, RK3228_PRE_PLL_POWER_DOWN,
+			 RK3228_PRE_PLL_POWER_DOWN);
+
+	inno_update_bits(inno, 0xe2, RK3228_PRE_PLL_FB_DIV_8_MASK |
+			 RK3228_PCLK_VCO_DIV_5_MASK |
+			 RK3228_PRE_PLL_PRE_DIV_MASK,
+			 RK3228_PRE_PLL_FB_DIV_8(cfg->fbdiv) |
+			 RK3228_PCLK_VCO_DIV_5(cfg->vco_div_5_en) |
+			 RK3228_PRE_PLL_PRE_DIV(cfg->prediv));
+	inno_write(inno, 0xe3, RK3228_PRE_PLL_FB_DIV_7_0(cfg->fbdiv));
+	inno_update_bits(inno, 0xe4, RK3228_PRE_PLL_PCLK_DIV_B_MASK |
+			 RK3228_PRE_PLL_PCLK_DIV_A_MASK,
+			 RK3228_PRE_PLL_PCLK_DIV_B(cfg->pclk_div_b) |
+			 RK3228_PRE_PLL_PCLK_DIV_A(cfg->pclk_div_a));
+	inno_update_bits(inno, 0xe5, RK3228_PRE_PLL_PCLK_DIV_C_MASK |
+			 RK3228_PRE_PLL_PCLK_DIV_D_MASK,
+			 RK3228_PRE_PLL_PCLK_DIV_C(cfg->pclk_div_c) |
+			 RK3228_PRE_PLL_PCLK_DIV_D(cfg->pclk_div_d));
+	inno_update_bits(inno, 0xe6, RK3228_PRE_PLL_TMDSCLK_DIV_C_MASK |
+			 RK3228_PRE_PLL_TMDSCLK_DIV_A_MASK |
+			 RK3228_PRE_PLL_TMDSCLK_DIV_B_MASK,
+			 RK3228_PRE_PLL_TMDSCLK_DIV_C(cfg->tmds_div_c) |
+			 RK3228_PRE_PLL_TMDSCLK_DIV_A(cfg->tmds_div_a) |
+			 RK3228_PRE_PLL_TMDSCLK_DIV_B(cfg->tmds_div_b));
+
+	/* Power up PRE-PLL */
+	inno_update_bits(inno, 0xe0, RK3228_PRE_PLL_POWER_DOWN, 0);
+
+	/* Wait for Pre-PLL lock */
+	ret = inno_poll(inno, 0xe8, v, v & RK3228_PRE_PLL_LOCK_STATUS,
+			100, 100000);
+	if (ret) {
+		dev_err(inno->dev, "Pre-PLL locking failed\n");
+		return ret;
+	}
+
+	inno->pixclock = rate;
+
+	return 0;
+}
+
+static const struct clk_ops inno_hdmi_phy_rk3228_clk_ops = {
+	.prepare = inno_hdmi_phy_rk3228_clk_prepare,
+	.unprepare = inno_hdmi_phy_rk3228_clk_unprepare,
+	.is_prepared = inno_hdmi_phy_rk3228_clk_is_prepared,
+	.recalc_rate = inno_hdmi_phy_rk3228_clk_recalc_rate,
+	.round_rate = inno_hdmi_phy_rk3228_clk_round_rate,
+	.set_rate = inno_hdmi_phy_rk3228_clk_set_rate,
+};
+
+static int inno_hdmi_phy_rk3328_clk_is_prepared(struct clk_hw *hw)
+{
+	struct inno_hdmi_phy *inno = to_inno_hdmi_phy(hw);
+	u8 status;
+
+	status = inno_read(inno, 0xa0) & RK3328_PRE_PLL_POWER_DOWN;
+	return status ? 0 : 1;
+}
+
+static int inno_hdmi_phy_rk3328_clk_prepare(struct clk_hw *hw)
+{
+	struct inno_hdmi_phy *inno = to_inno_hdmi_phy(hw);
+
+	inno_update_bits(inno, 0xa0, RK3328_PRE_PLL_POWER_DOWN, 0);
+	return 0;
+}
+
+static void inno_hdmi_phy_rk3328_clk_unprepare(struct clk_hw *hw)
+{
+	struct inno_hdmi_phy *inno = to_inno_hdmi_phy(hw);
+
+	inno_update_bits(inno, 0xa0, RK3328_PRE_PLL_POWER_DOWN,
+			 RK3328_PRE_PLL_POWER_DOWN);
+}
+
+static
+unsigned long inno_hdmi_phy_rk3328_clk_recalc_rate(struct clk_hw *hw,
+						   unsigned long parent_rate)
+{
+	struct inno_hdmi_phy *inno = to_inno_hdmi_phy(hw);
+	unsigned long frac;
+	u8 nd, no_a, no_b, no_c, no_d;
+	u64 vco;
+	u16 nf;
+
+	nd = inno_read(inno, 0xa1) & RK3328_PRE_PLL_PRE_DIV_MASK;
+	nf = ((inno_read(inno, 0xa2) & RK3328_PRE_PLL_FB_DIV_11_8_MASK) << 8);
+	nf |= inno_read(inno, 0xa3);
+	vco = parent_rate * nf;
+
+	if (!(inno_read(inno, 0xa2) & RK3328_PRE_PLL_FRAC_DIV_DISABLE)) {
+		frac = inno_read(inno, 0xd3) |
+		       (inno_read(inno, 0xd2) << 8) |
+		       (inno_read(inno, 0xd1) << 16);
+		vco += DIV_ROUND_CLOSEST(parent_rate * frac, (1 << 24));
+	}
+
+	if (inno_read(inno, 0xa0) & RK3328_PCLK_VCO_DIV_5_MASK) {
+		do_div(vco, nd * 5);
+	} else {
+		no_a = inno_read(inno, 0xa5) & RK3328_PRE_PLL_PCLK_DIV_A_MASK;
+		no_b = inno_read(inno, 0xa5) & RK3328_PRE_PLL_PCLK_DIV_B_MASK;
+		no_b >>= RK3328_PRE_PLL_PCLK_DIV_B_SHIFT;
+		no_b += 2;
+		no_c = inno_read(inno, 0xa6) & RK3328_PRE_PLL_PCLK_DIV_C_MASK;
+		no_c >>= RK3328_PRE_PLL_PCLK_DIV_C_SHIFT;
+		no_c = 1 << no_c;
+		no_d = inno_read(inno, 0xa6) & RK3328_PRE_PLL_PCLK_DIV_D_MASK;
+
+		do_div(vco, (nd * (no_a == 1 ? no_b : no_a) * no_d * 2));
+	}
+
+	inno->pixclock = vco;
+	dev_dbg(inno->dev, "%s rate %lu\n", __func__, inno->pixclock);
+
+	return vco;
+}
+
+static long inno_hdmi_phy_rk3328_clk_round_rate(struct clk_hw *hw,
+						unsigned long rate,
+						unsigned long *parent_rate)
+{
+	const struct pre_pll_config *cfg = pre_pll_cfg_table;
+
+	for (; cfg->pixclock != 0; cfg++)
+		if (cfg->pixclock == rate)
+			break;
+
+	if (cfg->pixclock == 0)
+		return -EINVAL;
+
+	return cfg->pixclock;
+}
+
+static int inno_hdmi_phy_rk3328_clk_set_rate(struct clk_hw *hw,
+					     unsigned long rate,
+					     unsigned long parent_rate)
+{
+	struct inno_hdmi_phy *inno = to_inno_hdmi_phy(hw);
+	const struct pre_pll_config *cfg = pre_pll_cfg_table;
+	unsigned long tmdsclock = inno_hdmi_phy_get_tmdsclk(inno, rate);
+	u32 val;
+	int ret;
+
+	dev_dbg(inno->dev, "%s rate %lu tmdsclk %lu\n",
+		__func__, rate, tmdsclock);
+
+	cfg = inno_hdmi_phy_get_pre_pll_cfg(inno, rate);
+	if (IS_ERR(cfg))
+		return PTR_ERR(cfg);
+
+	inno_update_bits(inno, 0xa0, RK3328_PRE_PLL_POWER_DOWN,
+			 RK3328_PRE_PLL_POWER_DOWN);
+
+	/* Configure pre-pll */
+	inno_update_bits(inno, 0xa0, RK3228_PCLK_VCO_DIV_5_MASK,
+			 RK3228_PCLK_VCO_DIV_5(cfg->vco_div_5_en));
+	inno_write(inno, 0xa1, RK3328_PRE_PLL_PRE_DIV(cfg->prediv));
+
+	val = RK3328_SPREAD_SPECTRUM_MOD_DISABLE;
+	if (!cfg->fracdiv)
+		val |= RK3328_PRE_PLL_FRAC_DIV_DISABLE;
+	inno_write(inno, 0xa2, RK3328_PRE_PLL_FB_DIV_11_8(cfg->fbdiv) | val);
+	inno_write(inno, 0xa3, RK3328_PRE_PLL_FB_DIV_7_0(cfg->fbdiv));
+	inno_write(inno, 0xa5, RK3328_PRE_PLL_PCLK_DIV_A(cfg->pclk_div_a) |
+		   RK3328_PRE_PLL_PCLK_DIV_B(cfg->pclk_div_b));
+	inno_write(inno, 0xa6, RK3328_PRE_PLL_PCLK_DIV_C(cfg->pclk_div_c) |
+		   RK3328_PRE_PLL_PCLK_DIV_D(cfg->pclk_div_d));
+	inno_write(inno, 0xa4, RK3328_PRE_PLL_TMDSCLK_DIV_C(cfg->tmds_div_c) |
+		   RK3328_PRE_PLL_TMDSCLK_DIV_A(cfg->tmds_div_a) |
+		   RK3328_PRE_PLL_TMDSCLK_DIV_B(cfg->tmds_div_b));
+	inno_write(inno, 0xd3, RK3328_PRE_PLL_FRAC_DIV_7_0(cfg->fracdiv));
+	inno_write(inno, 0xd2, RK3328_PRE_PLL_FRAC_DIV_15_8(cfg->fracdiv));
+	inno_write(inno, 0xd1, RK3328_PRE_PLL_FRAC_DIV_23_16(cfg->fracdiv));
+
+	inno_update_bits(inno, 0xa0, RK3328_PRE_PLL_POWER_DOWN, 0);
+
+	/* Wait for Pre-PLL lock */
+	ret = inno_poll(inno, 0xa9, val, val & RK3328_PRE_PLL_LOCK_STATUS,
+			1000, 10000);
+	if (ret) {
+		dev_err(inno->dev, "Pre-PLL locking failed\n");
+		return ret;
+	}
+
+	inno->pixclock = rate;
+
+	return 0;
+}
+
+static const struct clk_ops inno_hdmi_phy_rk3328_clk_ops = {
+	.prepare = inno_hdmi_phy_rk3328_clk_prepare,
+	.unprepare = inno_hdmi_phy_rk3328_clk_unprepare,
+	.is_prepared = inno_hdmi_phy_rk3328_clk_is_prepared,
+	.recalc_rate = inno_hdmi_phy_rk3328_clk_recalc_rate,
+	.round_rate = inno_hdmi_phy_rk3328_clk_round_rate,
+	.set_rate = inno_hdmi_phy_rk3328_clk_set_rate,
+};
+
+static int inno_hdmi_phy_clk_register(struct inno_hdmi_phy *inno)
+{
+	struct device *dev = inno->dev;
+	struct device_node *np = dev->of_node;
+	struct clk_init_data init;
+	const char *parent_name;
+	int ret;
+
+	parent_name = __clk_get_name(inno->refoclk);
+
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+	init.flags = 0;
+	init.name = "pin_hd20_pclk";
+	init.ops = inno->plat_data->clk_ops;
+
+	/* optional override of the clock name */
+	of_property_read_string(np, "clock-output-names", &init.name);
+
+	inno->hw.init = &init;
+
+	inno->phyclk = devm_clk_register(dev, &inno->hw);
+	if (IS_ERR(inno->phyclk)) {
+		ret = PTR_ERR(inno->phyclk);
+		dev_err(dev, "failed to register clock: %d\n", ret);
+		return ret;
+	}
+
+	ret = of_clk_add_provider(np, of_clk_src_simple_get, inno->phyclk);
+	if (ret) {
+		dev_err(dev, "failed to register clock provider: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int inno_hdmi_phy_rk3228_init(struct inno_hdmi_phy *inno)
+{
+	/*
+	 * Use phy internal register control
+	 * rxsense/poweron/pllpd/pdataen signal.
+	 */
+	inno_write(inno, 0x01, RK3228_BYPASS_RXSENSE_EN |
+		   RK3228_BYPASS_PWRON_EN |
+		   RK3228_BYPASS_PLLPD_EN);
+	inno_update_bits(inno, 0x02, RK3228_BYPASS_PDATA_EN,
+			 RK3228_BYPASS_PDATA_EN);
+
+	/* manual power down post-PLL */
+	inno_update_bits(inno, 0xaa, RK3228_POST_PLL_CTRL_MANUAL,
+			 RK3228_POST_PLL_CTRL_MANUAL);
+
+	inno->chip_version = 1;
+
+	return 0;
+}
+
+static int
+inno_hdmi_phy_rk3228_power_on(struct inno_hdmi_phy *inno,
+			      const struct post_pll_config *cfg,
+			      const struct phy_config *phy_cfg)
+{
+	int ret;
+	u32 v;
+
+	inno_update_bits(inno, 0x02, RK3228_PDATAEN_DISABLE,
+			 RK3228_PDATAEN_DISABLE);
+	inno_update_bits(inno, 0xe0, RK3228_PRE_PLL_POWER_DOWN |
+			 RK3228_POST_PLL_POWER_DOWN,
+			 RK3228_PRE_PLL_POWER_DOWN |
+			 RK3228_POST_PLL_POWER_DOWN);
+
+	/* Post-PLL update */
+	inno_update_bits(inno, 0xe9, RK3228_POST_PLL_PRE_DIV_MASK,
+			 RK3228_POST_PLL_PRE_DIV(cfg->prediv));
+	inno_update_bits(inno, 0xeb, RK3228_POST_PLL_FB_DIV_8_MASK,
+			 RK3228_POST_PLL_FB_DIV_8(cfg->fbdiv));
+	inno_write(inno, 0xea, RK3228_POST_PLL_FB_DIV_7_0(cfg->fbdiv));
+
+	if (cfg->postdiv == 1) {
+		inno_update_bits(inno, 0xe9, RK3228_POST_PLL_POST_DIV_ENABLE,
+				 0);
+	} else {
+		int div = cfg->postdiv / 2 - 1;
+
+		inno_update_bits(inno, 0xe9, RK3228_POST_PLL_POST_DIV_ENABLE,
+				 RK3228_POST_PLL_POST_DIV_ENABLE);
+		inno_update_bits(inno, 0xeb, RK3228_POST_PLL_POST_DIV_MASK,
+				 RK3228_POST_PLL_POST_DIV(div));
+	}
+
+	for (v = 0; v < 4; v++)
+		inno_write(inno, 0xef + v, phy_cfg->regs[v]);
+
+	inno_update_bits(inno, 0xe0, RK3228_PRE_PLL_POWER_DOWN |
+			 RK3228_POST_PLL_POWER_DOWN, 0);
+	inno_update_bits(inno, 0xe1, RK3228_BANDGAP_ENABLE,
+			 RK3228_BANDGAP_ENABLE);
+	inno_update_bits(inno, 0xe1, RK3228_TMDS_DRIVER_ENABLE,
+			 RK3228_TMDS_DRIVER_ENABLE);
+
+	/* Wait for post PLL lock */
+	ret = inno_poll(inno, 0xeb, v, v & RK3228_POST_PLL_LOCK_STATUS,
+			100, 100000);
+	if (ret) {
+		dev_err(inno->dev, "Post-PLL locking failed\n");
+		return ret;
+	}
+
+	if (cfg->tmdsclock > 340000000)
+		msleep(100);
+
+	inno_update_bits(inno, 0x02, RK3228_PDATAEN_DISABLE, 0);
+	return 0;
+}
+
+static void inno_hdmi_phy_rk3228_power_off(struct inno_hdmi_phy *inno)
+{
+	inno_update_bits(inno, 0xe1, RK3228_TMDS_DRIVER_ENABLE, 0);
+	inno_update_bits(inno, 0xe1, RK3228_BANDGAP_ENABLE, 0);
+	inno_update_bits(inno, 0xe0, RK3228_POST_PLL_POWER_DOWN,
+			 RK3228_POST_PLL_POWER_DOWN);
+}
+
+static const struct inno_hdmi_phy_ops rk3228_hdmi_phy_ops = {
+	.init = inno_hdmi_phy_rk3228_init,
+	.power_on = inno_hdmi_phy_rk3228_power_on,
+	.power_off = inno_hdmi_phy_rk3228_power_off,
+};
+
+static int inno_hdmi_phy_rk3328_init(struct inno_hdmi_phy *inno)
+{
+	struct nvmem_cell *cell;
+	unsigned char *efuse_buf;
+	size_t len;
+
+	/*
+	 * Use phy internal register control
+	 * rxsense/poweron/pllpd/pdataen signal.
+	 */
+	inno_write(inno, 0x01, RK3328_BYPASS_RXSENSE_EN |
+		   RK3328_BYPASS_POWERON_EN |
+		   RK3328_BYPASS_PLLPD_EN);
+	inno_write(inno, 0x02, RK3328_INT_POL_HIGH | RK3328_BYPASS_PDATA_EN |
+		   RK3328_PDATA_EN);
+
+	/* Disable phy irq */
+	inno_write(inno, 0x05, 0);
+	inno_write(inno, 0x07, 0);
+
+	/* try to read the chip-version */
+	inno->chip_version = 1;
+	cell = nvmem_cell_get(inno->dev, "cpu-version");
+	if (IS_ERR(cell)) {
+		if (PTR_ERR(cell) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		return 0;
+	}
+
+	efuse_buf = nvmem_cell_read(cell, &len);
+	nvmem_cell_put(cell);
+
+	if (IS_ERR(efuse_buf))
+		return 0;
+	if (len == 1)
+		inno->chip_version = efuse_buf[0] + 1;
+	kfree(efuse_buf);
+
+	return 0;
+}
+
+static int
+inno_hdmi_phy_rk3328_power_on(struct inno_hdmi_phy *inno,
+			      const struct post_pll_config *cfg,
+			      const struct phy_config *phy_cfg)
+{
+	int ret;
+	u32 v;
+
+	inno_update_bits(inno, 0x02, RK3328_PDATA_EN, 0);
+	inno_update_bits(inno, 0xaa, RK3328_POST_PLL_POWER_DOWN,
+			 RK3328_POST_PLL_POWER_DOWN);
+
+	inno_write(inno, 0xac, RK3328_POST_PLL_FB_DIV_7_0(cfg->fbdiv));
+	if (cfg->postdiv == 1) {
+		inno_write(inno, 0xaa, RK3328_POST_PLL_REFCLK_SEL_TMDS);
+		inno_write(inno, 0xab, RK3328_POST_PLL_FB_DIV_8(cfg->fbdiv) |
+			   RK3328_POST_PLL_PRE_DIV(cfg->prediv));
+	} else {
+		v = (cfg->postdiv / 2) - 1;
+		v &= RK3328_POST_PLL_POST_DIV_MASK;
+		inno_write(inno, 0xad, v);
+		inno_write(inno, 0xab, RK3328_POST_PLL_FB_DIV_8(cfg->fbdiv) |
+			   RK3328_POST_PLL_PRE_DIV(cfg->prediv));
+		inno_write(inno, 0xaa, RK3328_POST_PLL_POST_DIV_ENABLE |
+			   RK3328_POST_PLL_REFCLK_SEL_TMDS);
+	}
+
+	for (v = 0; v < 14; v++)
+		inno_write(inno, 0xb5 + v, phy_cfg->regs[v]);
+
+	/* set ESD detection threshold for TMDS CLK, D2, D1 and D0 */
+	for (v = 0; v < 4; v++)
+		inno_update_bits(inno, 0xc8 + v, RK3328_ESD_DETECT_MASK,
+				 RK3328_ESD_DETECT_340MV);
+
+	if (phy_cfg->tmdsclock > 340000000) {
+		/* Set termination resistor to 100ohm */
+		v = clk_get_rate(inno->sysclk) / 100000;
+		inno_write(inno, 0xc5, RK3328_TERM_RESISTOR_CALIB_SPEED_14_8(v)
+			   | RK3328_BYPASS_TERM_RESISTOR_CALIB);
+		inno_write(inno, 0xc6, RK3328_TERM_RESISTOR_CALIB_SPEED_7_0(v));
+		inno_write(inno, 0xc7, RK3328_TERM_RESISTOR_100);
+		inno_update_bits(inno, 0xc5,
+				 RK3328_BYPASS_TERM_RESISTOR_CALIB, 0);
+	} else {
+		inno_write(inno, 0xc5, RK3328_BYPASS_TERM_RESISTOR_CALIB);
+
+		/* clk termination resistor is 50ohm (parallel resistors) */
+		if (phy_cfg->tmdsclock > 165000000)
+			inno_update_bits(inno, 0xc8,
+					 RK3328_TMDS_TERM_RESIST_MASK,
+					 RK3328_TMDS_TERM_RESIST_75 |
+					 RK3328_TMDS_TERM_RESIST_150);
+
+		/* data termination resistor for D2, D1 and D0 is 150ohm */
+		for (v = 0; v < 3; v++)
+			inno_update_bits(inno, 0xc9 + v,
+					 RK3328_TMDS_TERM_RESIST_MASK,
+					 RK3328_TMDS_TERM_RESIST_150);
+	}
+
+	inno_update_bits(inno, 0xaa, RK3328_POST_PLL_POWER_DOWN, 0);
+	inno_update_bits(inno, 0xb0, RK3328_BANDGAP_ENABLE,
+			 RK3328_BANDGAP_ENABLE);
+	inno_update_bits(inno, 0xb2, RK3328_TMDS_DRIVER_ENABLE,
+			 RK3328_TMDS_DRIVER_ENABLE);
+
+	/* Wait for post PLL lock */
+	ret = inno_poll(inno, 0xaf, v, v & RK3328_POST_PLL_LOCK_STATUS,
+			1000, 10000);
+	if (ret) {
+		dev_err(inno->dev, "Post-PLL locking failed\n");
+		return ret;
+	}
+
+	if (phy_cfg->tmdsclock > 340000000)
+		msleep(100);
+
+	inno_update_bits(inno, 0x02, RK3328_PDATA_EN, RK3328_PDATA_EN);
+
+	/* Enable PHY IRQ */
+	inno_write(inno, 0x05, RK3328_INT_TMDS_CLK(RK3328_INT_VSS_AGND_ESD_DET)
+		   | RK3328_INT_TMDS_D2(RK3328_INT_VSS_AGND_ESD_DET));
+	inno_write(inno, 0x07, RK3328_INT_TMDS_D1(RK3328_INT_VSS_AGND_ESD_DET)
+		   | RK3328_INT_TMDS_D0(RK3328_INT_VSS_AGND_ESD_DET));
+	return 0;
+}
+
+static void inno_hdmi_phy_rk3328_power_off(struct inno_hdmi_phy *inno)
+{
+	inno_update_bits(inno, 0xb2, RK3328_TMDS_DRIVER_ENABLE, 0);
+	inno_update_bits(inno, 0xb0, RK3328_BANDGAP_ENABLE, 0);
+	inno_update_bits(inno, 0xaa, RK3328_POST_PLL_POWER_DOWN,
+			 RK3328_POST_PLL_POWER_DOWN);
+
+	/* Disable PHY IRQ */
+	inno_write(inno, 0x05, 0);
+	inno_write(inno, 0x07, 0);
+}
+
+static const struct inno_hdmi_phy_ops rk3328_hdmi_phy_ops = {
+	.init = inno_hdmi_phy_rk3328_init,
+	.power_on = inno_hdmi_phy_rk3328_power_on,
+	.power_off = inno_hdmi_phy_rk3328_power_off,
+};
+
+static const struct inno_hdmi_phy_drv_data rk3228_hdmi_phy_drv_data = {
+	.ops = &rk3228_hdmi_phy_ops,
+	.clk_ops = &inno_hdmi_phy_rk3228_clk_ops,
+	.phy_cfg_table = rk3228_phy_cfg,
+};
+
+static const struct inno_hdmi_phy_drv_data rk3328_hdmi_phy_drv_data = {
+	.ops = &rk3328_hdmi_phy_ops,
+	.clk_ops = &inno_hdmi_phy_rk3328_clk_ops,
+	.phy_cfg_table = rk3328_phy_cfg,
+};
+
+static const struct regmap_config inno_hdmi_phy_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = 0x400,
+};
+
+static void inno_hdmi_phy_action(void *data)
+{
+	struct inno_hdmi_phy *inno = data;
+
+	clk_disable_unprepare(inno->refpclk);
+	clk_disable_unprepare(inno->sysclk);
+}
+
+static int inno_hdmi_phy_probe(struct platform_device *pdev)
+{
+	struct inno_hdmi_phy *inno;
+	struct phy_provider *phy_provider;
+	struct resource *res;
+	void __iomem *regs;
+	int ret;
+
+	inno = devm_kzalloc(&pdev->dev, sizeof(*inno), GFP_KERNEL);
+	if (!inno)
+		return -ENOMEM;
+
+	inno->dev = &pdev->dev;
+
+	inno->plat_data = of_device_get_match_data(inno->dev);
+	if (!inno->plat_data || !inno->plat_data->ops)
+		return -EINVAL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	regs = devm_ioremap_resource(inno->dev, res);
+	if (IS_ERR(regs))
+		return PTR_ERR(regs);
+
+	inno->sysclk = devm_clk_get(inno->dev, "sysclk");
+	if (IS_ERR(inno->sysclk)) {
+		ret = PTR_ERR(inno->sysclk);
+		dev_err(inno->dev, "failed to get sysclk: %d\n", ret);
+		return ret;
+	}
+
+	inno->refpclk = devm_clk_get(inno->dev, "refpclk");
+	if (IS_ERR(inno->refpclk)) {
+		ret = PTR_ERR(inno->refpclk);
+		dev_err(inno->dev, "failed to get ref clock: %d\n", ret);
+		return ret;
+	}
+
+	inno->refoclk = devm_clk_get(inno->dev, "refoclk");
+	if (IS_ERR(inno->refoclk)) {
+		ret = PTR_ERR(inno->refoclk);
+		dev_err(inno->dev, "failed to get oscillator-ref clock: %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(inno->sysclk);
+	if (ret) {
+		dev_err(inno->dev, "Cannot enable inno phy sysclk: %d\n", ret);
+		return ret;
+	}
+
+	/*
+	 * Refpclk needs to be on, on at least the rk3328 for still
+	 * unknown reasons.
+	 */
+	ret = clk_prepare_enable(inno->refpclk);
+	if (ret) {
+		dev_err(inno->dev, "failed to enable refpclk\n");
+		clk_disable_unprepare(inno->sysclk);
+		return ret;
+	}
+
+	ret = devm_add_action_or_reset(inno->dev, inno_hdmi_phy_action,
+				       inno);
+	if (ret)
+		return ret;
+
+	inno->regmap = devm_regmap_init_mmio(inno->dev, regs,
+					     &inno_hdmi_phy_regmap_config);
+	if (IS_ERR(inno->regmap))
+		return PTR_ERR(inno->regmap);
+
+	/* only the newer rk3328 hdmiphy has an interrupt */
+	inno->irq = platform_get_irq(pdev, 0);
+	if (inno->irq > 0) {
+		ret = devm_request_threaded_irq(inno->dev, inno->irq,
+						inno_hdmi_phy_rk3328_hardirq,
+						inno_hdmi_phy_rk3328_irq,
+						IRQF_SHARED,
+						dev_name(inno->dev), inno);
+		if (ret)
+			return ret;
+	}
+
+	inno->phy = devm_phy_create(inno->dev, NULL, &inno_hdmi_phy_ops);
+	if (IS_ERR(inno->phy)) {
+		dev_err(inno->dev, "failed to create HDMI PHY\n");
+		return PTR_ERR(inno->phy);
+	}
+
+	phy_set_drvdata(inno->phy, inno);
+	phy_set_bus_width(inno->phy, 8);
+
+	if (inno->plat_data->ops->init) {
+		ret = inno->plat_data->ops->init(inno);
+		if (ret)
+			return ret;
+	}
+
+	ret = inno_hdmi_phy_clk_register(inno);
+	if (ret)
+		return ret;
+
+	phy_provider = devm_of_phy_provider_register(inno->dev,
+						     of_phy_simple_xlate);
+	return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static int inno_hdmi_phy_remove(struct platform_device *pdev)
+{
+	of_clk_del_provider(pdev->dev.of_node);
+
+	return 0;
+}
+
+static const struct of_device_id inno_hdmi_phy_of_match[] = {
+	{
+		.compatible = "rockchip,rk3228-hdmi-phy",
+		.data = &rk3228_hdmi_phy_drv_data
+	}, {
+		.compatible = "rockchip,rk3328-hdmi-phy",
+		.data = &rk3328_hdmi_phy_drv_data
+	}, { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, inno_hdmi_phy_of_match);
+
+static struct platform_driver inno_hdmi_phy_driver = {
+	.probe  = inno_hdmi_phy_probe,
+	.remove = inno_hdmi_phy_remove,
+	.driver = {
+		.name = "inno-hdmi-phy",
+		.of_match_table = inno_hdmi_phy_of_match,
+	},
+};
+module_platform_driver(inno_hdmi_phy_driver);
+
+MODULE_AUTHOR("Zheng Yang <zhengyang@rock-chips.com>");
+MODULE_DESCRIPTION("Innosilion HDMI 2.0 Transmitter PHY Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
index 5049dac79bd0..24bd2717abdb 100644
--- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
+++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
@@ -1116,8 +1116,8 @@ static int rockchip_usb2phy_probe(struct platform_device *pdev)
 	}
 
 	if (of_property_read_u32(np, "reg", &reg)) {
-		dev_err(dev, "the reg property is not assigned in %s node\n",
-			np->name);
+		dev_err(dev, "the reg property is not assigned in %pOFn node\n",
+			np);
 		return -EINVAL;
 	}
 
@@ -1143,8 +1143,8 @@ static int rockchip_usb2phy_probe(struct platform_device *pdev)
 	}
 
 	if (!rphy->phy_cfg) {
-		dev_err(dev, "no phy-config can be matched with %s node\n",
-			np->name);
+		dev_err(dev, "no phy-config can be matched with %pOFn node\n",
+			np);
 		return -EINVAL;
 	}
 
diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c
index 76a4b58ec771..c57e496f0b0c 100644
--- a/drivers/phy/rockchip/phy-rockchip-typec.c
+++ b/drivers/phy/rockchip/phy-rockchip-typec.c
@@ -1145,8 +1145,8 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev)
 	}
 
 	if (!tcphy->port_cfgs) {
-		dev_err(dev, "no phy-config can be matched with %s node\n",
-			np->name);
+		dev_err(dev, "no phy-config can be matched with %pOFn node\n",
+			np);
 		return -EINVAL;
 	}
 
@@ -1186,8 +1186,8 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev)
 			continue;
 
 		if (IS_ERR(phy)) {
-			dev_err(dev, "failed to create phy: %s\n",
-				child_np->name);
+			dev_err(dev, "failed to create phy: %pOFn\n",
+				child_np);
 			pm_runtime_disable(dev);
 			return PTR_ERR(phy);
 		}
diff --git a/drivers/phy/rockchip/phy-rockchip-usb.c b/drivers/phy/rockchip/phy-rockchip-usb.c
index 3378eeb7a562..b2899c744ad9 100644
--- a/drivers/phy/rockchip/phy-rockchip-usb.c
+++ b/drivers/phy/rockchip/phy-rockchip-usb.c
@@ -36,7 +36,22 @@ static int enable_usb_uart;
 #define HIWORD_UPDATE(val, mask) \
 		((val) | (mask) << 16)
 
-#define UOC_CON0_SIDDQ BIT(13)
+#define UOC_CON0					0x00
+#define UOC_CON0_SIDDQ					BIT(13)
+#define UOC_CON0_DISABLE				BIT(4)
+#define UOC_CON0_COMMON_ON_N				BIT(0)
+
+#define UOC_CON2					0x08
+#define UOC_CON2_SOFT_CON_SEL				BIT(2)
+
+#define UOC_CON3					0x0c
+/* bits present on rk3188 and rk3288 phys */
+#define UOC_CON3_UTMI_TERMSEL_FULLSPEED			BIT(5)
+#define UOC_CON3_UTMI_XCVRSEELCT_FSTRANSC		(1 << 3)
+#define UOC_CON3_UTMI_XCVRSEELCT_MASK			(3 << 3)
+#define UOC_CON3_UTMI_OPMODE_NODRIVING			(1 << 1)
+#define UOC_CON3_UTMI_OPMODE_MASK			(3 << 1)
+#define UOC_CON3_UTMI_SUSPENDN				BIT(0)
 
 struct rockchip_usb_phys {
 	int reg;
@@ -46,7 +61,8 @@ struct rockchip_usb_phys {
 struct rockchip_usb_phy_base;
 struct rockchip_usb_phy_pdata {
 	struct rockchip_usb_phys *phys;
-	int (*init_usb_uart)(struct regmap *grf);
+	int (*init_usb_uart)(struct regmap *grf,
+			     const struct rockchip_usb_phy_pdata *pdata);
 	int usb_uart_phy;
 };
 
@@ -208,8 +224,8 @@ static int rockchip_usb_phy_init(struct rockchip_usb_phy_base *base,
 	rk_phy->np = child;
 
 	if (of_property_read_u32(child, "reg", &reg_offset)) {
-		dev_err(base->dev, "missing reg property in node %s\n",
-			child->name);
+		dev_err(base->dev, "missing reg property in node %pOFn\n",
+			child);
 		return -EINVAL;
 	}
 
@@ -313,28 +329,88 @@ static const struct rockchip_usb_phy_pdata rk3066a_pdata = {
 	},
 };
 
+static int __init rockchip_init_usb_uart_common(struct regmap *grf,
+				const struct rockchip_usb_phy_pdata *pdata)
+{
+	int regoffs = pdata->phys[pdata->usb_uart_phy].reg;
+	int ret;
+	u32 val;
+
+	/*
+	 * COMMON_ON and DISABLE settings are described in the TRM,
+	 * but were not present in the original code.
+	 * Also disable the analog phy components to save power.
+	 */
+	val = HIWORD_UPDATE(UOC_CON0_COMMON_ON_N
+				| UOC_CON0_DISABLE
+				| UOC_CON0_SIDDQ,
+			    UOC_CON0_COMMON_ON_N
+				| UOC_CON0_DISABLE
+				| UOC_CON0_SIDDQ);
+	ret = regmap_write(grf, regoffs + UOC_CON0, val);
+	if (ret)
+		return ret;
+
+	val = HIWORD_UPDATE(UOC_CON2_SOFT_CON_SEL,
+			    UOC_CON2_SOFT_CON_SEL);
+	ret = regmap_write(grf, regoffs + UOC_CON2, val);
+	if (ret)
+		return ret;
+
+	val = HIWORD_UPDATE(UOC_CON3_UTMI_OPMODE_NODRIVING
+				| UOC_CON3_UTMI_XCVRSEELCT_FSTRANSC
+				| UOC_CON3_UTMI_TERMSEL_FULLSPEED,
+			    UOC_CON3_UTMI_SUSPENDN
+				| UOC_CON3_UTMI_OPMODE_MASK
+				| UOC_CON3_UTMI_XCVRSEELCT_MASK
+				| UOC_CON3_UTMI_TERMSEL_FULLSPEED);
+	ret = regmap_write(grf, UOC_CON3, val);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+#define RK3188_UOC0_CON0				0x10c
+#define RK3188_UOC0_CON0_BYPASSSEL			BIT(9)
+#define RK3188_UOC0_CON0_BYPASSDMEN			BIT(8)
+
+/*
+ * Enable the bypass of uart2 data through the otg usb phy.
+ * See description of rk3288-variant for details.
+ */
+static int __init rk3188_init_usb_uart(struct regmap *grf,
+				const struct rockchip_usb_phy_pdata *pdata)
+{
+	u32 val;
+	int ret;
+
+	ret = rockchip_init_usb_uart_common(grf, pdata);
+	if (ret)
+		return ret;
+
+	val = HIWORD_UPDATE(RK3188_UOC0_CON0_BYPASSSEL
+				| RK3188_UOC0_CON0_BYPASSDMEN,
+			    RK3188_UOC0_CON0_BYPASSSEL
+				| RK3188_UOC0_CON0_BYPASSDMEN);
+	ret = regmap_write(grf, RK3188_UOC0_CON0, val);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static const struct rockchip_usb_phy_pdata rk3188_pdata = {
 	.phys = (struct rockchip_usb_phys[]){
 		{ .reg = 0x10c, .pll_name = "sclk_otgphy0_480m" },
 		{ .reg = 0x11c, .pll_name = "sclk_otgphy1_480m" },
 		{ /* sentinel */ }
 	},
+	.init_usb_uart = rk3188_init_usb_uart,
+	.usb_uart_phy = 0,
 };
 
-#define RK3288_UOC0_CON0				0x320
-#define RK3288_UOC0_CON0_COMMON_ON_N			BIT(0)
-#define RK3288_UOC0_CON0_DISABLE			BIT(4)
-
-#define RK3288_UOC0_CON2				0x328
-#define RK3288_UOC0_CON2_SOFT_CON_SEL			BIT(2)
-
 #define RK3288_UOC0_CON3				0x32c
-#define RK3288_UOC0_CON3_UTMI_SUSPENDN			BIT(0)
-#define RK3288_UOC0_CON3_UTMI_OPMODE_NODRIVING		(1 << 1)
-#define RK3288_UOC0_CON3_UTMI_OPMODE_MASK		(3 << 1)
-#define RK3288_UOC0_CON3_UTMI_XCVRSEELCT_FSTRANSC	(1 << 3)
-#define RK3288_UOC0_CON3_UTMI_XCVRSEELCT_MASK		(3 << 3)
-#define RK3288_UOC0_CON3_UTMI_TERMSEL_FULLSPEED		BIT(5)
 #define RK3288_UOC0_CON3_BYPASSDMEN			BIT(6)
 #define RK3288_UOC0_CON3_BYPASSSEL			BIT(7)
 
@@ -353,40 +429,13 @@ static const struct rockchip_usb_phy_pdata rk3188_pdata = {
  *
  * The actual code in the vendor kernel does some things differently.
  */
-static int __init rk3288_init_usb_uart(struct regmap *grf)
+static int __init rk3288_init_usb_uart(struct regmap *grf,
+				const struct rockchip_usb_phy_pdata *pdata)
 {
 	u32 val;
 	int ret;
 
-	/*
-	 * COMMON_ON and DISABLE settings are described in the TRM,
-	 * but were not present in the original code.
-	 * Also disable the analog phy components to save power.
-	 */
-	val = HIWORD_UPDATE(RK3288_UOC0_CON0_COMMON_ON_N
-				| RK3288_UOC0_CON0_DISABLE
-				| UOC_CON0_SIDDQ,
-			    RK3288_UOC0_CON0_COMMON_ON_N
-				| RK3288_UOC0_CON0_DISABLE
-				| UOC_CON0_SIDDQ);
-	ret = regmap_write(grf, RK3288_UOC0_CON0, val);
-	if (ret)
-		return ret;
-
-	val = HIWORD_UPDATE(RK3288_UOC0_CON2_SOFT_CON_SEL,
-			    RK3288_UOC0_CON2_SOFT_CON_SEL);
-	ret = regmap_write(grf, RK3288_UOC0_CON2, val);
-	if (ret)
-		return ret;
-
-	val = HIWORD_UPDATE(RK3288_UOC0_CON3_UTMI_OPMODE_NODRIVING
-				| RK3288_UOC0_CON3_UTMI_XCVRSEELCT_FSTRANSC
-				| RK3288_UOC0_CON3_UTMI_TERMSEL_FULLSPEED,
-			    RK3288_UOC0_CON3_UTMI_SUSPENDN
-				| RK3288_UOC0_CON3_UTMI_OPMODE_MASK
-				| RK3288_UOC0_CON3_UTMI_XCVRSEELCT_MASK
-				| RK3288_UOC0_CON3_UTMI_TERMSEL_FULLSPEED);
-	ret = regmap_write(grf, RK3288_UOC0_CON3, val);
+	ret = rockchip_init_usb_uart_common(grf, pdata);
 	if (ret)
 		return ret;
 
@@ -516,7 +565,7 @@ static int __init rockchip_init_usb_uart(void)
 		return PTR_ERR(grf);
 	}
 
-	ret = data->init_usb_uart(grf);
+	ret = data->init_usb_uart(grf, data);
 	if (ret) {
 		pr_err("%s: could not init usb_uart, %d\n", __func__, ret);
 		enable_usb_uart = 0;
diff --git a/drivers/phy/socionext/Kconfig b/drivers/phy/socionext/Kconfig
new file mode 100644
index 000000000000..467e8147972b
--- /dev/null
+++ b/drivers/phy/socionext/Kconfig
@@ -0,0 +1,34 @@
+#
+# PHY drivers for Socionext platforms.
+#
+
+config PHY_UNIPHIER_USB2
+	tristate "UniPhier USB2 PHY driver"
+	depends on ARCH_UNIPHIER || COMPILE_TEST
+	depends on OF && HAS_IOMEM
+	select GENERIC_PHY
+	select MFD_SYSCON
+	help
+	  Enable this to support USB PHY implemented on USB2 controller
+	  on UniPhier SoCs. This driver provides interface to interact
+	  with USB 2.0 PHY that is part of the UniPhier SoC.
+	  In case of Pro4, it is necessary to specify this USB2 PHY instead
+	  of USB3 HS-PHY.
+
+config PHY_UNIPHIER_USB3
+	tristate "UniPhier USB3 PHY driver"
+	depends on ARCH_UNIPHIER || COMPILE_TEST
+	depends on OF && HAS_IOMEM
+	select GENERIC_PHY
+	help
+	  Enable this to support USB PHY implemented in USB3 controller
+	  on UniPhier SoCs. This controller supports USB3.0 and lower speed.
+
+config PHY_UNIPHIER_PCIE
+	tristate "Uniphier PHY driver for PCIe controller"
+	depends on (ARCH_UNIPHIER || COMPILE_TEST) && OF
+	default PCIE_UNIPHIER
+	select GENERIC_PHY
+	help
+	  Enable this to support PHY implemented in PCIe controller
+	  on UniPhier SoCs. This driver supports LD20 and PXs3 SoCs.
diff --git a/drivers/phy/socionext/Makefile b/drivers/phy/socionext/Makefile
new file mode 100644
index 000000000000..7dc9095b5bb7
--- /dev/null
+++ b/drivers/phy/socionext/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the phy drivers.
+#
+
+obj-$(CONFIG_PHY_UNIPHIER_USB2)	+= phy-uniphier-usb2.o
+obj-$(CONFIG_PHY_UNIPHIER_USB3)	+= phy-uniphier-usb3hs.o phy-uniphier-usb3ss.o
+obj-$(CONFIG_PHY_UNIPHIER_PCIE)	+= phy-uniphier-pcie.o
diff --git a/drivers/phy/socionext/phy-uniphier-pcie.c b/drivers/phy/socionext/phy-uniphier-pcie.c
new file mode 100644
index 000000000000..93ffbd2940fa
--- /dev/null
+++ b/drivers/phy/socionext/phy-uniphier-pcie.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * phy-uniphier-pcie.c - PHY driver for UniPhier PCIe controller
+ * Copyright 2018, Socionext Inc.
+ * Author: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/iopoll.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/resource.h>
+
+/* PHY */
+#define PCL_PHY_TEST_I		0x2000
+#define PCL_PHY_TEST_O		0x2004
+#define TESTI_DAT_MASK		GENMASK(13, 6)
+#define TESTI_ADR_MASK		GENMASK(5, 1)
+#define TESTI_WR_EN		BIT(0)
+
+#define PCL_PHY_RESET		0x200c
+#define PCL_PHY_RESET_N_MNMODE	BIT(8)	/* =1:manual */
+#define PCL_PHY_RESET_N		BIT(0)	/* =1:deasssert */
+
+/* SG */
+#define SG_USBPCIESEL		0x590
+#define SG_USBPCIESEL_PCIE	BIT(0)
+
+#define PCL_PHY_R00		0
+#define   RX_EQ_ADJ_EN		BIT(3)		/* enable for EQ adjustment */
+#define PCL_PHY_R06		6
+#define   RX_EQ_ADJ		GENMASK(5, 0)	/* EQ adjustment value */
+#define   RX_EQ_ADJ_VAL		0
+#define PCL_PHY_R26		26
+#define   VCO_CTRL		GENMASK(7, 4)	/* Tx VCO adjustment value */
+#define   VCO_CTRL_INIT_VAL	5
+
+struct uniphier_pciephy_priv {
+	void __iomem *base;
+	struct device *dev;
+	struct clk *clk;
+	struct reset_control *rst;
+	const struct uniphier_pciephy_soc_data *data;
+};
+
+struct uniphier_pciephy_soc_data {
+	bool has_syscon;
+};
+
+static void uniphier_pciephy_testio_write(struct uniphier_pciephy_priv *priv,
+					  u32 data)
+{
+	/* need to read TESTO twice after accessing TESTI */
+	writel(data, priv->base + PCL_PHY_TEST_I);
+	readl(priv->base + PCL_PHY_TEST_O);
+	readl(priv->base + PCL_PHY_TEST_O);
+}
+
+static void uniphier_pciephy_set_param(struct uniphier_pciephy_priv *priv,
+				       u32 reg, u32 mask, u32 param)
+{
+	u32 val;
+
+	/* read previous data */
+	val  = FIELD_PREP(TESTI_DAT_MASK, 1);
+	val |= FIELD_PREP(TESTI_ADR_MASK, reg);
+	uniphier_pciephy_testio_write(priv, val);
+	val = readl(priv->base + PCL_PHY_TEST_O);
+
+	/* update value */
+	val &= ~FIELD_PREP(TESTI_DAT_MASK, mask);
+	val  = FIELD_PREP(TESTI_DAT_MASK, mask & param);
+	val |= FIELD_PREP(TESTI_ADR_MASK, reg);
+	uniphier_pciephy_testio_write(priv, val);
+	uniphier_pciephy_testio_write(priv, val | TESTI_WR_EN);
+	uniphier_pciephy_testio_write(priv, val);
+
+	/* read current data as dummy */
+	val  = FIELD_PREP(TESTI_DAT_MASK, 1);
+	val |= FIELD_PREP(TESTI_ADR_MASK, reg);
+	uniphier_pciephy_testio_write(priv, val);
+	readl(priv->base + PCL_PHY_TEST_O);
+}
+
+static void uniphier_pciephy_assert(struct uniphier_pciephy_priv *priv)
+{
+	u32 val;
+
+	val = readl(priv->base + PCL_PHY_RESET);
+	val &= ~PCL_PHY_RESET_N;
+	val |= PCL_PHY_RESET_N_MNMODE;
+	writel(val, priv->base + PCL_PHY_RESET);
+}
+
+static void uniphier_pciephy_deassert(struct uniphier_pciephy_priv *priv)
+{
+	u32 val;
+
+	val = readl(priv->base + PCL_PHY_RESET);
+	val |= PCL_PHY_RESET_N_MNMODE | PCL_PHY_RESET_N;
+	writel(val, priv->base + PCL_PHY_RESET);
+}
+
+static int uniphier_pciephy_init(struct phy *phy)
+{
+	struct uniphier_pciephy_priv *priv = phy_get_drvdata(phy);
+	int ret;
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		return ret;
+
+	ret = reset_control_deassert(priv->rst);
+	if (ret)
+		goto out_clk_disable;
+
+	uniphier_pciephy_set_param(priv, PCL_PHY_R00,
+				   RX_EQ_ADJ_EN, RX_EQ_ADJ_EN);
+	uniphier_pciephy_set_param(priv, PCL_PHY_R06, RX_EQ_ADJ,
+				   FIELD_PREP(RX_EQ_ADJ, RX_EQ_ADJ_VAL));
+	uniphier_pciephy_set_param(priv, PCL_PHY_R26, VCO_CTRL,
+				   FIELD_PREP(VCO_CTRL, VCO_CTRL_INIT_VAL));
+	usleep_range(1, 10);
+
+	uniphier_pciephy_deassert(priv);
+	usleep_range(1, 10);
+
+	return 0;
+
+out_clk_disable:
+	clk_disable_unprepare(priv->clk);
+
+	return ret;
+}
+
+static int uniphier_pciephy_exit(struct phy *phy)
+{
+	struct uniphier_pciephy_priv *priv = phy_get_drvdata(phy);
+
+	uniphier_pciephy_assert(priv);
+	reset_control_assert(priv->rst);
+	clk_disable_unprepare(priv->clk);
+
+	return 0;
+}
+
+static const struct phy_ops uniphier_pciephy_ops = {
+	.init  = uniphier_pciephy_init,
+	.exit  = uniphier_pciephy_exit,
+	.owner = THIS_MODULE,
+};
+
+static int uniphier_pciephy_probe(struct platform_device *pdev)
+{
+	struct uniphier_pciephy_priv *priv;
+	struct phy_provider *phy_provider;
+	struct device *dev = &pdev->dev;
+	struct regmap *regmap;
+	struct resource *res;
+	struct phy *phy;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->data = of_device_get_match_data(dev);
+	if (WARN_ON(!priv->data))
+		return -EINVAL;
+
+	priv->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	priv->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(priv->clk))
+		return PTR_ERR(priv->clk);
+
+	priv->rst = devm_reset_control_get_shared(dev, NULL);
+	if (IS_ERR(priv->rst))
+		return PTR_ERR(priv->rst);
+
+	phy = devm_phy_create(dev, dev->of_node, &uniphier_pciephy_ops);
+	if (IS_ERR(phy))
+		return PTR_ERR(phy);
+
+	regmap = syscon_regmap_lookup_by_phandle(dev->of_node,
+						 "socionext,syscon");
+	if (!IS_ERR(regmap) && priv->data->has_syscon)
+		regmap_update_bits(regmap, SG_USBPCIESEL,
+				   SG_USBPCIESEL_PCIE, SG_USBPCIESEL_PCIE);
+
+	phy_set_drvdata(phy, priv);
+	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+
+	return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static const struct uniphier_pciephy_soc_data uniphier_ld20_data = {
+	.has_syscon = true,
+};
+
+static const struct uniphier_pciephy_soc_data uniphier_pxs3_data = {
+	.has_syscon = false,
+};
+
+static const struct of_device_id uniphier_pciephy_match[] = {
+	{
+		.compatible = "socionext,uniphier-ld20-pcie-phy",
+		.data = &uniphier_ld20_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pxs3-pcie-phy",
+		.data = &uniphier_pxs3_data,
+	},
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, uniphier_pciephy_match);
+
+static struct platform_driver uniphier_pciephy_driver = {
+	.probe = uniphier_pciephy_probe,
+	.driver = {
+		.name = "uniphier-pcie-phy",
+		.of_match_table = uniphier_pciephy_match,
+	},
+};
+module_platform_driver(uniphier_pciephy_driver);
+
+MODULE_AUTHOR("Kunihiko Hayashi <hayashi.kunihiko@socionext.com>");
+MODULE_DESCRIPTION("UniPhier PHY driver for PCIe controller");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/socionext/phy-uniphier-usb2.c b/drivers/phy/socionext/phy-uniphier-usb2.c
new file mode 100644
index 000000000000..3f2086ed4fe4
--- /dev/null
+++ b/drivers/phy/socionext/phy-uniphier-usb2.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * phy-uniphier-usb2.c - PHY driver for UniPhier USB2 controller
+ * Copyright 2015-2018 Socionext Inc.
+ * Author:
+ *      Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+ */
+
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+
+#define SG_USBPHY1CTRL		0x500
+#define SG_USBPHY1CTRL2		0x504
+#define SG_USBPHY2CTRL		0x508
+#define SG_USBPHY2CTRL2		0x50c	/* LD11 */
+#define SG_USBPHY12PLL		0x50c	/* Pro4 */
+#define SG_USBPHY3CTRL		0x510
+#define SG_USBPHY3CTRL2		0x514
+#define SG_USBPHY4CTRL		0x518	/* Pro4 */
+#define SG_USBPHY4CTRL2		0x51c	/* Pro4 */
+#define SG_USBPHY34PLL		0x51c	/* Pro4 */
+
+struct uniphier_u2phy_param {
+	u32 offset;
+	u32 value;
+};
+
+struct uniphier_u2phy_soc_data {
+	struct uniphier_u2phy_param config0;
+	struct uniphier_u2phy_param config1;
+};
+
+struct uniphier_u2phy_priv {
+	struct regmap *regmap;
+	struct phy *phy;
+	struct regulator *vbus;
+	const struct uniphier_u2phy_soc_data *data;
+	struct uniphier_u2phy_priv *next;
+};
+
+static int uniphier_u2phy_power_on(struct phy *phy)
+{
+	struct uniphier_u2phy_priv *priv = phy_get_drvdata(phy);
+	int ret = 0;
+
+	if (priv->vbus)
+		ret = regulator_enable(priv->vbus);
+
+	return ret;
+}
+
+static int uniphier_u2phy_power_off(struct phy *phy)
+{
+	struct uniphier_u2phy_priv *priv = phy_get_drvdata(phy);
+
+	if (priv->vbus)
+		regulator_disable(priv->vbus);
+
+	return 0;
+}
+
+static int uniphier_u2phy_init(struct phy *phy)
+{
+	struct uniphier_u2phy_priv *priv = phy_get_drvdata(phy);
+
+	if (!priv->data)
+		return 0;
+
+	regmap_write(priv->regmap, priv->data->config0.offset,
+		     priv->data->config0.value);
+	regmap_write(priv->regmap, priv->data->config1.offset,
+		     priv->data->config1.value);
+
+	return 0;
+}
+
+static struct phy *uniphier_u2phy_xlate(struct device *dev,
+					struct of_phandle_args *args)
+{
+	struct uniphier_u2phy_priv *priv = dev_get_drvdata(dev);
+
+	while (priv && args->np != priv->phy->dev.of_node)
+		priv = priv->next;
+
+	if (!priv) {
+		dev_err(dev, "Failed to find appropriate phy\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return priv->phy;
+}
+
+static const struct phy_ops uniphier_u2phy_ops = {
+	.init      = uniphier_u2phy_init,
+	.power_on  = uniphier_u2phy_power_on,
+	.power_off = uniphier_u2phy_power_off,
+	.owner = THIS_MODULE,
+};
+
+static int uniphier_u2phy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *parent, *child;
+	struct uniphier_u2phy_priv *priv = NULL, *next = NULL;
+	struct phy_provider *phy_provider;
+	struct regmap *regmap;
+	const struct uniphier_u2phy_soc_data *data;
+	int ret, data_idx, ndatas;
+
+	data = of_device_get_match_data(dev);
+	if (WARN_ON(!data))
+		return -EINVAL;
+
+	/* get number of data */
+	for (ndatas = 0; data[ndatas].config0.offset; ndatas++)
+		;
+
+	parent = of_get_parent(dev->of_node);
+	regmap = syscon_node_to_regmap(parent);
+	of_node_put(parent);
+	if (IS_ERR(regmap)) {
+		dev_err(dev, "Failed to get regmap\n");
+		return PTR_ERR(regmap);
+	}
+
+	for_each_child_of_node(dev->of_node, child) {
+		priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+		if (!priv) {
+			ret = -ENOMEM;
+			goto out_put_child;
+		}
+		priv->regmap = regmap;
+
+		priv->vbus = devm_regulator_get_optional(dev, "vbus");
+		if (IS_ERR(priv->vbus)) {
+			if (PTR_ERR(priv->vbus) == -EPROBE_DEFER) {
+				ret = PTR_ERR(priv->vbus);
+				goto out_put_child;
+			}
+			priv->vbus = NULL;
+		}
+
+		priv->phy = devm_phy_create(dev, child, &uniphier_u2phy_ops);
+		if (IS_ERR(priv->phy)) {
+			dev_err(dev, "Failed to create phy\n");
+			ret = PTR_ERR(priv->phy);
+			goto out_put_child;
+		}
+
+		ret = of_property_read_u32(child, "reg", &data_idx);
+		if (ret) {
+			dev_err(dev, "Failed to get reg property\n");
+			goto out_put_child;
+		}
+
+		if (data_idx < ndatas)
+			priv->data = &data[data_idx];
+		else
+			dev_warn(dev, "No phy configuration: %s\n",
+				 child->full_name);
+
+		phy_set_drvdata(priv->phy, priv);
+		priv->next = next;
+		next = priv;
+	}
+
+	dev_set_drvdata(dev, priv);
+	phy_provider = devm_of_phy_provider_register(dev,
+						     uniphier_u2phy_xlate);
+	return PTR_ERR_OR_ZERO(phy_provider);
+
+out_put_child:
+	of_node_put(child);
+
+	return ret;
+}
+
+static const struct uniphier_u2phy_soc_data uniphier_pro4_data[] = {
+	{
+		.config0 = { SG_USBPHY1CTRL, 0x05142400 },
+		.config1 = { SG_USBPHY12PLL, 0x00010010 },
+	},
+	{
+		.config0 = { SG_USBPHY2CTRL, 0x05142400 },
+		.config1 = { SG_USBPHY12PLL, 0x00010010 },
+	},
+	{
+		.config0 = { SG_USBPHY3CTRL, 0x05142400 },
+		.config1 = { SG_USBPHY34PLL, 0x00010010 },
+	},
+	{
+		.config0 = { SG_USBPHY4CTRL, 0x05142400 },
+		.config1 = { SG_USBPHY34PLL, 0x00010010 },
+	},
+	{ /* sentinel */ }
+};
+
+static const struct uniphier_u2phy_soc_data uniphier_ld11_data[] = {
+	{
+		.config0 = { SG_USBPHY1CTRL,  0x82280000 },
+		.config1 = { SG_USBPHY1CTRL2, 0x00000106 },
+	},
+	{
+		.config0 = { SG_USBPHY2CTRL,  0x82280000 },
+		.config1 = { SG_USBPHY2CTRL2, 0x00000106 },
+	},
+	{
+		.config0 = { SG_USBPHY3CTRL,  0x82280000 },
+		.config1 = { SG_USBPHY3CTRL2, 0x00000106 },
+	},
+	{ /* sentinel */ }
+};
+
+static const struct of_device_id uniphier_u2phy_match[] = {
+	{
+		.compatible = "socionext,uniphier-pro4-usb2-phy",
+		.data = &uniphier_pro4_data,
+	},
+	{
+		.compatible = "socionext,uniphier-ld11-usb2-phy",
+		.data = &uniphier_ld11_data,
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, uniphier_u2phy_match);
+
+static struct platform_driver uniphier_u2phy_driver = {
+	.probe = uniphier_u2phy_probe,
+	.driver = {
+		.name = "uniphier-usb2-phy",
+		.of_match_table = uniphier_u2phy_match,
+	},
+};
+module_platform_driver(uniphier_u2phy_driver);
+
+MODULE_AUTHOR("Kunihiko Hayashi <hayashi.kunihiko@socionext.com>");
+MODULE_DESCRIPTION("UniPhier PHY driver for USB2 controller");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/socionext/phy-uniphier-usb3hs.c b/drivers/phy/socionext/phy-uniphier-usb3hs.c
new file mode 100644
index 000000000000..b1b048be6166
--- /dev/null
+++ b/drivers/phy/socionext/phy-uniphier-usb3hs.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * phy-uniphier-usb3hs.c - HS-PHY driver for Socionext UniPhier USB3 controller
+ * Copyright 2015-2018 Socionext Inc.
+ * Author:
+ *      Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+ * Contributors:
+ *      Motoya Tanigawa <tanigawa.motoya@socionext.com>
+ *      Masami Hiramatsu <masami.hiramatsu@linaro.org>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+
+#define HSPHY_CFG0		0x0
+#define HSPHY_CFG0_HS_I_MASK	GENMASK(31, 28)
+#define HSPHY_CFG0_HSDISC_MASK	GENMASK(27, 26)
+#define HSPHY_CFG0_SWING_MASK	GENMASK(17, 16)
+#define HSPHY_CFG0_SEL_T_MASK	GENMASK(15, 12)
+#define HSPHY_CFG0_RTERM_MASK	GENMASK(7, 6)
+#define HSPHY_CFG0_TRIMMASK	(HSPHY_CFG0_HS_I_MASK \
+				 | HSPHY_CFG0_SEL_T_MASK \
+				 | HSPHY_CFG0_RTERM_MASK)
+
+#define HSPHY_CFG1		0x4
+#define HSPHY_CFG1_DAT_EN	BIT(29)
+#define HSPHY_CFG1_ADR_EN	BIT(28)
+#define HSPHY_CFG1_ADR_MASK	GENMASK(27, 16)
+#define HSPHY_CFG1_DAT_MASK	GENMASK(23, 16)
+
+#define PHY_F(regno, msb, lsb) { (regno), (msb), (lsb) }
+
+#define LS_SLEW		PHY_F(10, 6, 6)	/* LS mode slew rate */
+#define FS_LS_DRV	PHY_F(10, 5, 5)	/* FS/LS slew rate */
+
+#define MAX_PHY_PARAMS	2
+
+struct uniphier_u3hsphy_param {
+	struct {
+		int reg_no;
+		int msb;
+		int lsb;
+	} field;
+	u8 value;
+};
+
+struct uniphier_u3hsphy_trim_param {
+	unsigned int rterm;
+	unsigned int sel_t;
+	unsigned int hs_i;
+};
+
+#define trim_param_is_valid(p)	((p)->rterm || (p)->sel_t || (p)->hs_i)
+
+struct uniphier_u3hsphy_priv {
+	struct device *dev;
+	void __iomem *base;
+	struct clk *clk, *clk_parent, *clk_ext;
+	struct reset_control *rst, *rst_parent;
+	struct regulator *vbus;
+	const struct uniphier_u3hsphy_soc_data *data;
+};
+
+struct uniphier_u3hsphy_soc_data {
+	int nparams;
+	const struct uniphier_u3hsphy_param param[MAX_PHY_PARAMS];
+	u32 config0;
+	u32 config1;
+	void (*trim_func)(struct uniphier_u3hsphy_priv *priv, u32 *pconfig,
+			  struct uniphier_u3hsphy_trim_param *pt);
+};
+
+static void uniphier_u3hsphy_trim_ld20(struct uniphier_u3hsphy_priv *priv,
+				       u32 *pconfig,
+				       struct uniphier_u3hsphy_trim_param *pt)
+{
+	*pconfig &= ~HSPHY_CFG0_RTERM_MASK;
+	*pconfig |= FIELD_PREP(HSPHY_CFG0_RTERM_MASK, pt->rterm);
+
+	*pconfig &= ~HSPHY_CFG0_SEL_T_MASK;
+	*pconfig |= FIELD_PREP(HSPHY_CFG0_SEL_T_MASK, pt->sel_t);
+
+	*pconfig &= ~HSPHY_CFG0_HS_I_MASK;
+	*pconfig |= FIELD_PREP(HSPHY_CFG0_HS_I_MASK,  pt->hs_i);
+}
+
+static int uniphier_u3hsphy_get_nvparam(struct uniphier_u3hsphy_priv *priv,
+					const char *name, unsigned int *val)
+{
+	struct nvmem_cell *cell;
+	u8 *buf;
+
+	cell = devm_nvmem_cell_get(priv->dev, name);
+	if (IS_ERR(cell))
+		return PTR_ERR(cell);
+
+	buf = nvmem_cell_read(cell, NULL);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	*val = *buf;
+
+	kfree(buf);
+
+	return 0;
+}
+
+static int uniphier_u3hsphy_get_nvparams(struct uniphier_u3hsphy_priv *priv,
+					 struct uniphier_u3hsphy_trim_param *pt)
+{
+	int ret;
+
+	ret = uniphier_u3hsphy_get_nvparam(priv, "rterm", &pt->rterm);
+	if (ret)
+		return ret;
+
+	ret = uniphier_u3hsphy_get_nvparam(priv, "sel_t", &pt->sel_t);
+	if (ret)
+		return ret;
+
+	ret = uniphier_u3hsphy_get_nvparam(priv, "hs_i", &pt->hs_i);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int uniphier_u3hsphy_update_config(struct uniphier_u3hsphy_priv *priv,
+					  u32 *pconfig)
+{
+	struct uniphier_u3hsphy_trim_param trim;
+	int ret, trimmed = 0;
+
+	if (priv->data->trim_func) {
+		ret = uniphier_u3hsphy_get_nvparams(priv, &trim);
+		if (ret == -EPROBE_DEFER)
+			return ret;
+
+		/*
+		 * call trim_func only when trimming parameters that aren't
+		 * all-zero can be acquired. All-zero parameters mean nothing
+		 * has been written to nvmem.
+		 */
+		if (!ret && trim_param_is_valid(&trim)) {
+			priv->data->trim_func(priv, pconfig, &trim);
+			trimmed = 1;
+		} else {
+			dev_dbg(priv->dev, "can't get parameter from nvmem\n");
+		}
+	}
+
+	/* use default parameters without trimming values */
+	if (!trimmed) {
+		*pconfig &= ~HSPHY_CFG0_HSDISC_MASK;
+		*pconfig |= FIELD_PREP(HSPHY_CFG0_HSDISC_MASK, 3);
+	}
+
+	return 0;
+}
+
+static void uniphier_u3hsphy_set_param(struct uniphier_u3hsphy_priv *priv,
+				       const struct uniphier_u3hsphy_param *p)
+{
+	u32 val;
+	u32 field_mask = GENMASK(p->field.msb, p->field.lsb);
+	u8 data;
+
+	val = readl(priv->base + HSPHY_CFG1);
+	val &= ~HSPHY_CFG1_ADR_MASK;
+	val |= FIELD_PREP(HSPHY_CFG1_ADR_MASK, p->field.reg_no)
+		| HSPHY_CFG1_ADR_EN;
+	writel(val, priv->base + HSPHY_CFG1);
+
+	val = readl(priv->base + HSPHY_CFG1);
+	val &= ~HSPHY_CFG1_ADR_EN;
+	writel(val, priv->base + HSPHY_CFG1);
+
+	val = readl(priv->base + HSPHY_CFG1);
+	val &= ~FIELD_PREP(HSPHY_CFG1_DAT_MASK, field_mask);
+	data = field_mask & (p->value << p->field.lsb);
+	val |=  FIELD_PREP(HSPHY_CFG1_DAT_MASK, data) | HSPHY_CFG1_DAT_EN;
+	writel(val, priv->base + HSPHY_CFG1);
+
+	val = readl(priv->base + HSPHY_CFG1);
+	val &= ~HSPHY_CFG1_DAT_EN;
+	writel(val, priv->base + HSPHY_CFG1);
+}
+
+static int uniphier_u3hsphy_power_on(struct phy *phy)
+{
+	struct uniphier_u3hsphy_priv *priv = phy_get_drvdata(phy);
+	int ret;
+
+	ret = clk_prepare_enable(priv->clk_ext);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		goto out_clk_ext_disable;
+
+	ret = reset_control_deassert(priv->rst);
+	if (ret)
+		goto out_clk_disable;
+
+	if (priv->vbus) {
+		ret = regulator_enable(priv->vbus);
+		if (ret)
+			goto out_rst_assert;
+	}
+
+	return 0;
+
+out_rst_assert:
+	reset_control_assert(priv->rst);
+out_clk_disable:
+	clk_disable_unprepare(priv->clk);
+out_clk_ext_disable:
+	clk_disable_unprepare(priv->clk_ext);
+
+	return ret;
+}
+
+static int uniphier_u3hsphy_power_off(struct phy *phy)
+{
+	struct uniphier_u3hsphy_priv *priv = phy_get_drvdata(phy);
+
+	if (priv->vbus)
+		regulator_disable(priv->vbus);
+
+	reset_control_assert(priv->rst);
+	clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->clk_ext);
+
+	return 0;
+}
+
+static int uniphier_u3hsphy_init(struct phy *phy)
+{
+	struct uniphier_u3hsphy_priv *priv = phy_get_drvdata(phy);
+	u32 config0, config1;
+	int i, ret;
+
+	ret = clk_prepare_enable(priv->clk_parent);
+	if (ret)
+		return ret;
+
+	ret = reset_control_deassert(priv->rst_parent);
+	if (ret)
+		goto out_clk_disable;
+
+	if (!priv->data->config0 && !priv->data->config1)
+		return 0;
+
+	config0 = priv->data->config0;
+	config1 = priv->data->config1;
+
+	ret = uniphier_u3hsphy_update_config(priv, &config0);
+	if (ret)
+		goto out_rst_assert;
+
+	writel(config0, priv->base + HSPHY_CFG0);
+	writel(config1, priv->base + HSPHY_CFG1);
+
+	for (i = 0; i < priv->data->nparams; i++)
+		uniphier_u3hsphy_set_param(priv, &priv->data->param[i]);
+
+	return 0;
+
+out_rst_assert:
+	reset_control_assert(priv->rst_parent);
+out_clk_disable:
+	clk_disable_unprepare(priv->clk_parent);
+
+	return ret;
+}
+
+static int uniphier_u3hsphy_exit(struct phy *phy)
+{
+	struct uniphier_u3hsphy_priv *priv = phy_get_drvdata(phy);
+
+	reset_control_assert(priv->rst_parent);
+	clk_disable_unprepare(priv->clk_parent);
+
+	return 0;
+}
+
+static const struct phy_ops uniphier_u3hsphy_ops = {
+	.init           = uniphier_u3hsphy_init,
+	.exit           = uniphier_u3hsphy_exit,
+	.power_on       = uniphier_u3hsphy_power_on,
+	.power_off      = uniphier_u3hsphy_power_off,
+	.owner          = THIS_MODULE,
+};
+
+static int uniphier_u3hsphy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct uniphier_u3hsphy_priv *priv;
+	struct phy_provider *phy_provider;
+	struct resource *res;
+	struct phy *phy;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->dev = dev;
+	priv->data = of_device_get_match_data(dev);
+	if (WARN_ON(!priv->data ||
+		    priv->data->nparams > MAX_PHY_PARAMS))
+		return -EINVAL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	priv->clk = devm_clk_get(dev, "phy");
+	if (IS_ERR(priv->clk))
+		return PTR_ERR(priv->clk);
+
+	priv->clk_parent = devm_clk_get(dev, "link");
+	if (IS_ERR(priv->clk_parent))
+		return PTR_ERR(priv->clk_parent);
+
+	priv->clk_ext = devm_clk_get(dev, "phy-ext");
+	if (IS_ERR(priv->clk_ext)) {
+		if (PTR_ERR(priv->clk_ext) == -ENOENT)
+			priv->clk_ext = NULL;
+		else
+			return PTR_ERR(priv->clk_ext);
+	}
+
+	priv->rst = devm_reset_control_get_shared(dev, "phy");
+	if (IS_ERR(priv->rst))
+		return PTR_ERR(priv->rst);
+
+	priv->rst_parent = devm_reset_control_get_shared(dev, "link");
+	if (IS_ERR(priv->rst_parent))
+		return PTR_ERR(priv->rst_parent);
+
+	priv->vbus = devm_regulator_get_optional(dev, "vbus");
+	if (IS_ERR(priv->vbus)) {
+		if (PTR_ERR(priv->vbus) == -EPROBE_DEFER)
+			return PTR_ERR(priv->vbus);
+		priv->vbus = NULL;
+	}
+
+	phy = devm_phy_create(dev, dev->of_node, &uniphier_u3hsphy_ops);
+	if (IS_ERR(phy))
+		return PTR_ERR(phy);
+
+	phy_set_drvdata(phy, priv);
+	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+
+	return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static const struct uniphier_u3hsphy_soc_data uniphier_pxs2_data = {
+	.nparams = 0,
+};
+
+static const struct uniphier_u3hsphy_soc_data uniphier_ld20_data = {
+	.nparams = 2,
+	.param = {
+		{ LS_SLEW, 1 },
+		{ FS_LS_DRV, 1 },
+	},
+	.trim_func = uniphier_u3hsphy_trim_ld20,
+	.config0 = 0x92316680,
+	.config1 = 0x00000106,
+};
+
+static const struct uniphier_u3hsphy_soc_data uniphier_pxs3_data = {
+	.nparams = 0,
+	.trim_func = uniphier_u3hsphy_trim_ld20,
+	.config0 = 0x92316680,
+	.config1 = 0x00000106,
+};
+
+static const struct of_device_id uniphier_u3hsphy_match[] = {
+	{
+		.compatible = "socionext,uniphier-pxs2-usb3-hsphy",
+		.data = &uniphier_pxs2_data,
+	},
+	{
+		.compatible = "socionext,uniphier-ld20-usb3-hsphy",
+		.data = &uniphier_ld20_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pxs3-usb3-hsphy",
+		.data = &uniphier_pxs3_data,
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, uniphier_u3hsphy_match);
+
+static struct platform_driver uniphier_u3hsphy_driver = {
+	.probe = uniphier_u3hsphy_probe,
+	.driver	= {
+		.name = "uniphier-usb3-hsphy",
+		.of_match_table	= uniphier_u3hsphy_match,
+	},
+};
+
+module_platform_driver(uniphier_u3hsphy_driver);
+
+MODULE_AUTHOR("Kunihiko Hayashi <hayashi.kunihiko@socionext.com>");
+MODULE_DESCRIPTION("UniPhier HS-PHY driver for USB3 controller");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/socionext/phy-uniphier-usb3ss.c b/drivers/phy/socionext/phy-uniphier-usb3ss.c
new file mode 100644
index 000000000000..4be95679c7d8
--- /dev/null
+++ b/drivers/phy/socionext/phy-uniphier-usb3ss.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * phy-uniphier-usb3ss.c - SS-PHY driver for Socionext UniPhier USB3 controller
+ * Copyright 2015-2018 Socionext Inc.
+ * Author:
+ *      Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+ * Contributors:
+ *      Motoya Tanigawa <tanigawa.motoya@socionext.com>
+ *      Masami Hiramatsu <masami.hiramatsu@linaro.org>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/reset.h>
+
+#define SSPHY_TESTI		0x0
+#define SSPHY_TESTO		0x4
+#define TESTI_DAT_MASK		GENMASK(13, 6)
+#define TESTI_ADR_MASK		GENMASK(5, 1)
+#define TESTI_WR_EN		BIT(0)
+
+#define PHY_F(regno, msb, lsb) { (regno), (msb), (lsb) }
+
+#define CDR_CPD_TRIM	PHY_F(7, 3, 0)	/* RxPLL charge pump current */
+#define CDR_CPF_TRIM	PHY_F(8, 3, 0)	/* RxPLL charge pump current 2 */
+#define TX_PLL_TRIM	PHY_F(9, 3, 0)	/* TxPLL charge pump current */
+#define BGAP_TRIM	PHY_F(11, 3, 0)	/* Bandgap voltage */
+#define CDR_TRIM	PHY_F(13, 6, 5)	/* Clock Data Recovery setting */
+#define VCO_CTRL	PHY_F(26, 7, 4)	/* VCO control */
+#define VCOPLL_CTRL	PHY_F(27, 2, 0)	/* TxPLL VCO tuning */
+#define VCOPLL_CM	PHY_F(28, 1, 0)	/* TxPLL voltage */
+
+#define MAX_PHY_PARAMS	7
+
+struct uniphier_u3ssphy_param {
+	struct {
+		int reg_no;
+		int msb;
+		int lsb;
+	} field;
+	u8 value;
+};
+
+struct uniphier_u3ssphy_priv {
+	struct device *dev;
+	void __iomem *base;
+	struct clk *clk, *clk_ext, *clk_parent, *clk_parent_gio;
+	struct reset_control *rst, *rst_parent, *rst_parent_gio;
+	struct regulator *vbus;
+	const struct uniphier_u3ssphy_soc_data *data;
+};
+
+struct uniphier_u3ssphy_soc_data {
+	bool is_legacy;
+	int nparams;
+	const struct uniphier_u3ssphy_param param[MAX_PHY_PARAMS];
+};
+
+static void uniphier_u3ssphy_testio_write(struct uniphier_u3ssphy_priv *priv,
+					  u32 data)
+{
+	/* need to read TESTO twice after accessing TESTI */
+	writel(data, priv->base + SSPHY_TESTI);
+	readl(priv->base + SSPHY_TESTO);
+	readl(priv->base + SSPHY_TESTO);
+}
+
+static void uniphier_u3ssphy_set_param(struct uniphier_u3ssphy_priv *priv,
+				       const struct uniphier_u3ssphy_param *p)
+{
+	u32 val;
+	u8 field_mask = GENMASK(p->field.msb, p->field.lsb);
+	u8 data;
+
+	/* read previous data */
+	val  = FIELD_PREP(TESTI_DAT_MASK, 1);
+	val |= FIELD_PREP(TESTI_ADR_MASK, p->field.reg_no);
+	uniphier_u3ssphy_testio_write(priv, val);
+	val = readl(priv->base + SSPHY_TESTO);
+
+	/* update value */
+	val &= ~FIELD_PREP(TESTI_DAT_MASK, field_mask);
+	data = field_mask & (p->value << p->field.lsb);
+	val  = FIELD_PREP(TESTI_DAT_MASK, data);
+	val |= FIELD_PREP(TESTI_ADR_MASK, p->field.reg_no);
+	uniphier_u3ssphy_testio_write(priv, val);
+	uniphier_u3ssphy_testio_write(priv, val | TESTI_WR_EN);
+	uniphier_u3ssphy_testio_write(priv, val);
+
+	/* read current data as dummy */
+	val  = FIELD_PREP(TESTI_DAT_MASK, 1);
+	val |= FIELD_PREP(TESTI_ADR_MASK, p->field.reg_no);
+	uniphier_u3ssphy_testio_write(priv, val);
+	readl(priv->base + SSPHY_TESTO);
+}
+
+static int uniphier_u3ssphy_power_on(struct phy *phy)
+{
+	struct uniphier_u3ssphy_priv *priv = phy_get_drvdata(phy);
+	int ret;
+
+	ret = clk_prepare_enable(priv->clk_ext);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		goto out_clk_ext_disable;
+
+	ret = reset_control_deassert(priv->rst);
+	if (ret)
+		goto out_clk_disable;
+
+	if (priv->vbus) {
+		ret = regulator_enable(priv->vbus);
+		if (ret)
+			goto out_rst_assert;
+	}
+
+	return 0;
+
+out_rst_assert:
+	reset_control_assert(priv->rst);
+out_clk_disable:
+	clk_disable_unprepare(priv->clk);
+out_clk_ext_disable:
+	clk_disable_unprepare(priv->clk_ext);
+
+	return ret;
+}
+
+static int uniphier_u3ssphy_power_off(struct phy *phy)
+{
+	struct uniphier_u3ssphy_priv *priv = phy_get_drvdata(phy);
+
+	if (priv->vbus)
+		regulator_disable(priv->vbus);
+
+	reset_control_assert(priv->rst);
+	clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->clk_ext);
+
+	return 0;
+}
+
+static int uniphier_u3ssphy_init(struct phy *phy)
+{
+	struct uniphier_u3ssphy_priv *priv = phy_get_drvdata(phy);
+	int i, ret;
+
+	ret = clk_prepare_enable(priv->clk_parent);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(priv->clk_parent_gio);
+	if (ret)
+		goto out_clk_disable;
+
+	ret = reset_control_deassert(priv->rst_parent);
+	if (ret)
+		goto out_clk_gio_disable;
+
+	ret = reset_control_deassert(priv->rst_parent_gio);
+	if (ret)
+		goto out_rst_assert;
+
+	if (priv->data->is_legacy)
+		return 0;
+
+	for (i = 0; i < priv->data->nparams; i++)
+		uniphier_u3ssphy_set_param(priv, &priv->data->param[i]);
+
+	return 0;
+
+out_rst_assert:
+	reset_control_assert(priv->rst_parent);
+out_clk_gio_disable:
+	clk_disable_unprepare(priv->clk_parent_gio);
+out_clk_disable:
+	clk_disable_unprepare(priv->clk_parent);
+
+	return ret;
+}
+
+static int uniphier_u3ssphy_exit(struct phy *phy)
+{
+	struct uniphier_u3ssphy_priv *priv = phy_get_drvdata(phy);
+
+	reset_control_assert(priv->rst_parent_gio);
+	reset_control_assert(priv->rst_parent);
+	clk_disable_unprepare(priv->clk_parent_gio);
+	clk_disable_unprepare(priv->clk_parent);
+
+	return 0;
+}
+
+static const struct phy_ops uniphier_u3ssphy_ops = {
+	.init           = uniphier_u3ssphy_init,
+	.exit           = uniphier_u3ssphy_exit,
+	.power_on       = uniphier_u3ssphy_power_on,
+	.power_off      = uniphier_u3ssphy_power_off,
+	.owner          = THIS_MODULE,
+};
+
+static int uniphier_u3ssphy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct uniphier_u3ssphy_priv *priv;
+	struct phy_provider *phy_provider;
+	struct resource *res;
+	struct phy *phy;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->dev = dev;
+	priv->data = of_device_get_match_data(dev);
+	if (WARN_ON(!priv->data ||
+		    priv->data->nparams > MAX_PHY_PARAMS))
+		return -EINVAL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	if (!priv->data->is_legacy) {
+		priv->clk = devm_clk_get(dev, "phy");
+		if (IS_ERR(priv->clk))
+			return PTR_ERR(priv->clk);
+
+		priv->clk_ext = devm_clk_get(dev, "phy-ext");
+		if (IS_ERR(priv->clk_ext)) {
+			if (PTR_ERR(priv->clk_ext) == -ENOENT)
+				priv->clk_ext = NULL;
+			else
+				return PTR_ERR(priv->clk_ext);
+		}
+
+		priv->rst = devm_reset_control_get_shared(dev, "phy");
+		if (IS_ERR(priv->rst))
+			return PTR_ERR(priv->rst);
+	} else {
+		priv->clk_parent_gio = devm_clk_get(dev, "gio");
+		if (IS_ERR(priv->clk_parent_gio))
+			return PTR_ERR(priv->clk_parent_gio);
+
+		priv->rst_parent_gio =
+			devm_reset_control_get_shared(dev, "gio");
+		if (IS_ERR(priv->rst_parent_gio))
+			return PTR_ERR(priv->rst_parent_gio);
+	}
+
+	priv->clk_parent = devm_clk_get(dev, "link");
+	if (IS_ERR(priv->clk_parent))
+		return PTR_ERR(priv->clk_parent);
+
+	priv->rst_parent = devm_reset_control_get_shared(dev, "link");
+	if (IS_ERR(priv->rst_parent))
+		return PTR_ERR(priv->rst_parent);
+
+	priv->vbus = devm_regulator_get_optional(dev, "vbus");
+	if (IS_ERR(priv->vbus)) {
+		if (PTR_ERR(priv->vbus) == -EPROBE_DEFER)
+			return PTR_ERR(priv->vbus);
+		priv->vbus = NULL;
+	}
+
+	phy = devm_phy_create(dev, dev->of_node, &uniphier_u3ssphy_ops);
+	if (IS_ERR(phy))
+		return PTR_ERR(phy);
+
+	phy_set_drvdata(phy, priv);
+	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+
+	return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static const struct uniphier_u3ssphy_soc_data uniphier_pro4_data = {
+	.is_legacy = true,
+};
+
+static const struct uniphier_u3ssphy_soc_data uniphier_pxs2_data = {
+	.is_legacy = false,
+	.nparams = 7,
+	.param = {
+		{ CDR_CPD_TRIM, 10 },
+		{ CDR_CPF_TRIM, 3 },
+		{ TX_PLL_TRIM, 5 },
+		{ BGAP_TRIM, 9 },
+		{ CDR_TRIM, 2 },
+		{ VCOPLL_CTRL, 7 },
+		{ VCOPLL_CM, 1 },
+	},
+};
+
+static const struct uniphier_u3ssphy_soc_data uniphier_ld20_data = {
+	.is_legacy = false,
+	.nparams = 3,
+	.param = {
+		{ CDR_CPD_TRIM, 6 },
+		{ CDR_TRIM, 2 },
+		{ VCO_CTRL, 5 },
+	},
+};
+
+static const struct of_device_id uniphier_u3ssphy_match[] = {
+	{
+		.compatible = "socionext,uniphier-pro4-usb3-ssphy",
+		.data = &uniphier_pro4_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pxs2-usb3-ssphy",
+		.data = &uniphier_pxs2_data,
+	},
+	{
+		.compatible = "socionext,uniphier-ld20-usb3-ssphy",
+		.data = &uniphier_ld20_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pxs3-usb3-ssphy",
+		.data = &uniphier_ld20_data,
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, uniphier_u3ssphy_match);
+
+static struct platform_driver uniphier_u3ssphy_driver = {
+	.probe = uniphier_u3ssphy_probe,
+	.driver	= {
+		.name = "uniphier-usb3-ssphy",
+		.of_match_table	= uniphier_u3ssphy_match,
+	},
+};
+
+module_platform_driver(uniphier_u3ssphy_driver);
+
+MODULE_AUTHOR("Kunihiko Hayashi <hayashi.kunihiko@socionext.com>");
+MODULE_DESCRIPTION("UniPhier SS-PHY driver for USB3 controller");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
index de1b4ebe4de2..5b3b8863363e 100644
--- a/drivers/phy/tegra/xusb.c
+++ b/drivers/phy/tegra/xusb.c
@@ -115,8 +115,8 @@ int tegra_xusb_lane_parse_dt(struct tegra_xusb_lane *lane,
 
 	err = match_string(lane->soc->funcs, lane->soc->num_funcs, function);
 	if (err < 0) {
-		dev_err(dev, "invalid function \"%s\" for lane \"%s\"\n",
-			function, np->name);
+		dev_err(dev, "invalid function \"%s\" for lane \"%pOFn\"\n",
+			function, np);
 		return err;
 	}
 
diff --git a/drivers/phy/ti/phy-twl4030-usb.c b/drivers/phy/ti/phy-twl4030-usb.c
index a44680d64f9b..c267afb68f07 100644
--- a/drivers/phy/ti/phy-twl4030-usb.c
+++ b/drivers/phy/ti/phy-twl4030-usb.c
@@ -144,6 +144,7 @@
 #define PMBR1				0x0D
 #define GPIO_USB_4PIN_ULPI_2430C	(3 << 0)
 
+static irqreturn_t twl4030_usb_irq(int irq, void *_twl);
 /*
  * If VBUS is valid or ID is ground, then we know a
  * cable is present and we need to be runtime-enabled
@@ -395,6 +396,33 @@ static void __twl4030_phy_power(struct twl4030_usb *twl, int on)
 	WARN_ON(twl4030_usb_write_verify(twl, PHY_PWR_CTRL, pwr) < 0);
 }
 
+static int __maybe_unused twl4030_usb_suspend(struct device *dev)
+{
+	struct twl4030_usb *twl = dev_get_drvdata(dev);
+
+	/*
+	 * we need enabled runtime on resume,
+	 * so turn irq off here, so we do not get it early
+	 * note: wakeup on usb plug works independently of this
+	 */
+	dev_dbg(twl->dev, "%s\n", __func__);
+	disable_irq(twl->irq);
+
+	return 0;
+}
+
+static int __maybe_unused twl4030_usb_resume(struct device *dev)
+{
+	struct twl4030_usb *twl = dev_get_drvdata(dev);
+
+	dev_dbg(twl->dev, "%s\n", __func__);
+	enable_irq(twl->irq);
+	/* check whether cable status changed */
+	twl4030_usb_irq(0, twl);
+
+	return 0;
+}
+
 static int __maybe_unused twl4030_usb_runtime_suspend(struct device *dev)
 {
 	struct twl4030_usb *twl = dev_get_drvdata(dev);
@@ -655,6 +683,7 @@ static const struct phy_ops ops = {
 static const struct dev_pm_ops twl4030_usb_pm_ops = {
 	SET_RUNTIME_PM_OPS(twl4030_usb_runtime_suspend,
 			   twl4030_usb_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(twl4030_usb_suspend, twl4030_usb_resume)
 };
 
 static int twl4030_usb_probe(struct platform_device *pdev)
diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c
index 2da567540c2d..7c639006252e 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2012 Intel, Inc.
  * Copyright (C) 2013 Intel, Inc.
@@ -46,7 +47,6 @@
  * exchange is properly mapped during a transfer.
  */
 
-
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
 #include <linux/interrupt.h>
@@ -59,10 +59,11 @@
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/io.h>
-#include <linux/goldfish.h>
 #include <linux/dma-mapping.h>
 #include <linux/mm.h>
 #include <linux/acpi.h>
+#include <linux/bug.h>
+#include "goldfish_pipe_qemu.h"
 
 /*
  * Update this when something changes in the driver's behavior so the host
@@ -73,71 +74,6 @@ enum {
 	PIPE_CURRENT_DEVICE_VERSION = 2
 };
 
-/*
- * IMPORTANT: The following constants must match the ones used and defined
- * in external/qemu/hw/goldfish_pipe.c in the Android source tree.
- */
-
-/* List of bitflags returned in status of CMD_POLL command */
-enum PipePollFlags {
-	PIPE_POLL_IN	= 1 << 0,
-	PIPE_POLL_OUT	= 1 << 1,
-	PIPE_POLL_HUP	= 1 << 2
-};
-
-/* Possible status values used to signal errors - see goldfish_pipe_error_convert */
-enum PipeErrors {
-	PIPE_ERROR_INVAL  = -1,
-	PIPE_ERROR_AGAIN  = -2,
-	PIPE_ERROR_NOMEM  = -3,
-	PIPE_ERROR_IO     = -4
-};
-
-/* Bit-flags used to signal events from the emulator */
-enum PipeWakeFlags {
-	PIPE_WAKE_CLOSED = 1 << 0,  /* emulator closed pipe */
-	PIPE_WAKE_READ   = 1 << 1,  /* pipe can now be read from */
-	PIPE_WAKE_WRITE  = 1 << 2  /* pipe can now be written to */
-};
-
-/* Bit flags for the 'flags' field */
-enum PipeFlagsBits {
-	BIT_CLOSED_ON_HOST = 0,  /* pipe closed by host */
-	BIT_WAKE_ON_WRITE  = 1,  /* want to be woken on writes */
-	BIT_WAKE_ON_READ   = 2,  /* want to be woken on reads */
-};
-
-enum PipeRegs {
-	PIPE_REG_CMD = 0,
-
-	PIPE_REG_SIGNAL_BUFFER_HIGH = 4,
-	PIPE_REG_SIGNAL_BUFFER = 8,
-	PIPE_REG_SIGNAL_BUFFER_COUNT = 12,
-
-	PIPE_REG_OPEN_BUFFER_HIGH = 20,
-	PIPE_REG_OPEN_BUFFER = 24,
-
-	PIPE_REG_VERSION = 36,
-
-	PIPE_REG_GET_SIGNALLED = 48,
-};
-
-enum PipeCmdCode {
-	PIPE_CMD_OPEN = 1,	/* to be used by the pipe device itself */
-	PIPE_CMD_CLOSE,
-	PIPE_CMD_POLL,
-	PIPE_CMD_WRITE,
-	PIPE_CMD_WAKE_ON_WRITE,
-	PIPE_CMD_READ,
-	PIPE_CMD_WAKE_ON_READ,
-
-	/*
-	 * TODO(zyy): implement a deferred read/write execution to allow
-	 * parallel processing of pipe operations on the host.
-	 */
-	PIPE_CMD_WAKE_ON_DONE_IO,
-};
-
 enum {
 	MAX_BUFFERS_PER_COMMAND = 336,
 	MAX_SIGNALLED_PIPES = 64,
@@ -145,14 +81,12 @@ enum {
 };
 
 struct goldfish_pipe_dev;
-struct goldfish_pipe;
-struct goldfish_pipe_command;
 
 /* A per-pipe command structure, shared with the host */
 struct goldfish_pipe_command {
-	s32 cmd;		/* PipeCmdCode, guest -> host */
-	s32 id;			/* pipe id, guest -> host */
-	s32 status;		/* command execution status, host -> guest */
+	s32 cmd;	/* PipeCmdCode, guest -> host */
+	s32 id;		/* pipe id, guest -> host */
+	s32 status;	/* command execution status, host -> guest */
 	s32 reserved;	/* to pad to 64-bit boundary */
 	union {
 		/* Parameters for PIPE_CMD_{READ,WRITE} */
@@ -184,19 +118,21 @@ struct open_command_param {
 /* Device-level set of buffers shared with the host */
 struct goldfish_pipe_dev_buffers {
 	struct open_command_param open_command_params;
-	struct signalled_pipe_buffer signalled_pipe_buffers[
-		MAX_SIGNALLED_PIPES];
+	struct signalled_pipe_buffer
+		signalled_pipe_buffers[MAX_SIGNALLED_PIPES];
 };
 
 /* This data type models a given pipe instance */
 struct goldfish_pipe {
 	/* pipe ID - index into goldfish_pipe_dev::pipes array */
 	u32 id;
+
 	/* The wake flags pipe is waiting for
 	 * Note: not protected with any lock, uses atomic operations
 	 *  and barriers to make it thread-safe.
 	 */
 	unsigned long flags;
+
 	/* wake flags host have signalled,
 	 *  - protected by goldfish_pipe_dev::lock
 	 */
@@ -220,8 +156,12 @@ struct goldfish_pipe {
 
 	/* A wake queue for sleeping until host signals an event */
 	wait_queue_head_t wake_queue;
+
 	/* Pointer to the parent goldfish_pipe_dev instance */
 	struct goldfish_pipe_dev *dev;
+
+	/* A buffer of pages, too large to fit into a stack frame */
+	struct page *pages[MAX_BUFFERS_PER_COMMAND];
 };
 
 /* The global driver data. Holds a reference to the i/o page used to
@@ -229,6 +169,9 @@ struct goldfish_pipe {
  * waiting to be awoken.
  */
 struct goldfish_pipe_dev {
+	/* A magic number to check if this is an instance of this struct */
+	void *magic;
+
 	/*
 	 * Global device spinlock. Protects the following members:
 	 *  - pipes, pipes_capacity
@@ -261,15 +204,22 @@ struct goldfish_pipe_dev {
 	/* Head of a doubly linked list of signalled pipes */
 	struct goldfish_pipe *first_signalled_pipe;
 
+	/* ptr to platform device's device struct */
+	struct device *pdev_dev;
+
 	/* Some device-specific data */
 	int irq;
 	int version;
 	unsigned char __iomem *base;
-};
 
-static struct goldfish_pipe_dev pipe_dev[1] = {};
+	/* an irq tasklet to run goldfish_interrupt_task */
+	struct tasklet_struct irq_tasklet;
 
-static int goldfish_cmd_locked(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
+	struct miscdevice miscdev;
+};
+
+static int goldfish_pipe_cmd_locked(struct goldfish_pipe *pipe,
+				    enum PipeCmdCode cmd)
 {
 	pipe->command_buffer->cmd = cmd;
 	/* failure by default */
@@ -278,13 +228,13 @@ static int goldfish_cmd_locked(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
 	return pipe->command_buffer->status;
 }
 
-static int goldfish_cmd(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
+static int goldfish_pipe_cmd(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
 {
 	int status;
 
 	if (mutex_lock_interruptible(&pipe->lock))
 		return PIPE_ERROR_IO;
-	status = goldfish_cmd_locked(pipe, cmd);
+	status = goldfish_pipe_cmd_locked(pipe, cmd);
 	mutex_unlock(&pipe->lock);
 	return status;
 }
@@ -307,10 +257,12 @@ static int goldfish_pipe_error_convert(int status)
 	}
 }
 
-static int pin_user_pages(unsigned long first_page, unsigned long last_page,
-	unsigned int last_page_size, int is_write,
-	struct page *pages[MAX_BUFFERS_PER_COMMAND],
-	unsigned int *iter_last_page_size)
+static int pin_user_pages(unsigned long first_page,
+			  unsigned long last_page,
+			  unsigned int last_page_size,
+			  int is_write,
+			  struct page *pages[MAX_BUFFERS_PER_COMMAND],
+			  unsigned int *iter_last_page_size)
 {
 	int ret;
 	int requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1;
@@ -322,18 +274,18 @@ static int pin_user_pages(unsigned long first_page, unsigned long last_page,
 		*iter_last_page_size = last_page_size;
 	}
 
-	ret = get_user_pages_fast(
-			first_page, requested_pages, !is_write, pages);
+	ret = get_user_pages_fast(first_page, requested_pages, !is_write,
+				  pages);
 	if (ret <= 0)
 		return -EFAULT;
 	if (ret < requested_pages)
 		*iter_last_page_size = PAGE_SIZE;
-	return ret;
 
+	return ret;
 }
 
 static void release_user_pages(struct page **pages, int pages_count,
-	int is_write, s32 consumed_size)
+			       int is_write, s32 consumed_size)
 {
 	int i;
 
@@ -345,12 +297,15 @@ static void release_user_pages(struct page **pages, int pages_count,
 }
 
 /* Populate the call parameters, merging adjacent pages together */
-static void populate_rw_params(
-	struct page **pages, int pages_count,
-	unsigned long address, unsigned long address_end,
-	unsigned long first_page, unsigned long last_page,
-	unsigned int iter_last_page_size, int is_write,
-	struct goldfish_pipe_command *command)
+static void populate_rw_params(struct page **pages,
+			       int pages_count,
+			       unsigned long address,
+			       unsigned long address_end,
+			       unsigned long first_page,
+			       unsigned long last_page,
+			       unsigned int iter_last_page_size,
+			       int is_write,
+			       struct goldfish_pipe_command *command)
 {
 	/*
 	 * Process the first page separately - it's the only page that
@@ -382,55 +337,59 @@ static void populate_rw_params(
 }
 
 static int transfer_max_buffers(struct goldfish_pipe *pipe,
-	unsigned long address, unsigned long address_end, int is_write,
-	unsigned long last_page, unsigned int last_page_size,
-	s32 *consumed_size, int *status)
+				unsigned long address,
+				unsigned long address_end,
+				int is_write,
+				unsigned long last_page,
+				unsigned int last_page_size,
+				s32 *consumed_size,
+				int *status)
 {
-	static struct page *pages[MAX_BUFFERS_PER_COMMAND];
 	unsigned long first_page = address & PAGE_MASK;
 	unsigned int iter_last_page_size;
-	int pages_count = pin_user_pages(first_page, last_page,
-			last_page_size, is_write,
-			pages, &iter_last_page_size);
-
-	if (pages_count < 0)
-		return pages_count;
+	int pages_count;
 
 	/* Serialize access to the pipe command buffers */
 	if (mutex_lock_interruptible(&pipe->lock))
 		return -ERESTARTSYS;
 
-	populate_rw_params(pages, pages_count, address, address_end,
-		first_page, last_page, iter_last_page_size, is_write,
-		pipe->command_buffer);
+	pages_count = pin_user_pages(first_page, last_page,
+				     last_page_size, is_write,
+				     pipe->pages, &iter_last_page_size);
+	if (pages_count < 0) {
+		mutex_unlock(&pipe->lock);
+		return pages_count;
+	}
+
+	populate_rw_params(pipe->pages, pages_count, address, address_end,
+			   first_page, last_page, iter_last_page_size, is_write,
+			   pipe->command_buffer);
 
 	/* Transfer the data */
-	*status = goldfish_cmd_locked(pipe,
+	*status = goldfish_pipe_cmd_locked(pipe,
 				is_write ? PIPE_CMD_WRITE : PIPE_CMD_READ);
 
 	*consumed_size = pipe->command_buffer->rw_params.consumed_size;
 
-	release_user_pages(pages, pages_count, is_write, *consumed_size);
+	release_user_pages(pipe->pages, pages_count, is_write, *consumed_size);
 
 	mutex_unlock(&pipe->lock);
-
 	return 0;
 }
 
 static int wait_for_host_signal(struct goldfish_pipe *pipe, int is_write)
 {
-	u32 wakeBit = is_write ? BIT_WAKE_ON_WRITE : BIT_WAKE_ON_READ;
+	u32 wake_bit = is_write ? BIT_WAKE_ON_WRITE : BIT_WAKE_ON_READ;
 
-	set_bit(wakeBit, &pipe->flags);
+	set_bit(wake_bit, &pipe->flags);
 
 	/* Tell the emulator we're going to wait for a wake event */
-	(void)goldfish_cmd(pipe,
+	goldfish_pipe_cmd(pipe,
 		is_write ? PIPE_CMD_WAKE_ON_WRITE : PIPE_CMD_WAKE_ON_READ);
 
-	while (test_bit(wakeBit, &pipe->flags)) {
-		if (wait_event_interruptible(
-				pipe->wake_queue,
-				!test_bit(wakeBit, &pipe->flags)))
+	while (test_bit(wake_bit, &pipe->flags)) {
+		if (wait_event_interruptible(pipe->wake_queue,
+					     !test_bit(wake_bit, &pipe->flags)))
 			return -ERESTARTSYS;
 
 		if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
@@ -441,7 +400,9 @@ static int wait_for_host_signal(struct goldfish_pipe *pipe, int is_write)
 }
 
 static ssize_t goldfish_pipe_read_write(struct file *filp,
-	char __user *buffer, size_t bufflen, int is_write)
+					char __user *buffer,
+					size_t bufflen,
+					int is_write)
 {
 	struct goldfish_pipe *pipe = filp->private_data;
 	int count = 0, ret = -EINVAL;
@@ -456,7 +417,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp,
 		return 0;
 	/* Check the buffer range for access */
 	if (unlikely(!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ,
-			buffer, bufflen)))
+				buffer, bufflen)))
 		return -EFAULT;
 
 	address = (unsigned long)buffer;
@@ -469,8 +430,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp,
 		int status;
 
 		ret = transfer_max_buffers(pipe, address, address_end, is_write,
-				last_page, last_page_size, &consumed_size,
-				&status);
+					   last_page, last_page_size,
+					   &consumed_size, &status);
 		if (ret < 0)
 			break;
 
@@ -496,7 +457,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp,
 			 * err.
 			 */
 			if (status != PIPE_ERROR_AGAIN)
-				pr_info_ratelimited("goldfish_pipe: backend error %d on %s\n",
+				dev_err_ratelimited(pipe->dev->pdev_dev,
+					"backend error %d on %s\n",
 					status, is_write ? "write" : "read");
 			break;
 		}
@@ -522,19 +484,21 @@ static ssize_t goldfish_pipe_read_write(struct file *filp,
 }
 
 static ssize_t goldfish_pipe_read(struct file *filp, char __user *buffer,
-				size_t bufflen, loff_t *ppos)
+				  size_t bufflen, loff_t *ppos)
 {
 	return goldfish_pipe_read_write(filp, buffer, bufflen,
-			/* is_write */ 0);
+					/* is_write */ 0);
 }
 
 static ssize_t goldfish_pipe_write(struct file *filp,
-				const char __user *buffer, size_t bufflen,
-				loff_t *ppos)
+				   const char __user *buffer, size_t bufflen,
+				   loff_t *ppos)
 {
-	return goldfish_pipe_read_write(filp,
-			/* cast away the const */(char __user *)buffer, bufflen,
-			/* is_write */ 1);
+	/* cast away the const */
+	char __user *no_const_buffer = (char __user *)buffer;
+
+	return goldfish_pipe_read_write(filp, no_const_buffer, bufflen,
+					/* is_write */ 1);
 }
 
 static __poll_t goldfish_pipe_poll(struct file *filp, poll_table *wait)
@@ -545,7 +509,7 @@ static __poll_t goldfish_pipe_poll(struct file *filp, poll_table *wait)
 
 	poll_wait(filp, &pipe->wake_queue, wait);
 
-	status = goldfish_cmd(pipe, PIPE_CMD_POLL);
+	status = goldfish_pipe_cmd(pipe, PIPE_CMD_POLL);
 	if (status < 0)
 		return -ERESTARTSYS;
 
@@ -562,7 +526,7 @@ static __poll_t goldfish_pipe_poll(struct file *filp, poll_table *wait)
 }
 
 static void signalled_pipes_add_locked(struct goldfish_pipe_dev *dev,
-	u32 id, u32 flags)
+				       u32 id, u32 flags)
 {
 	struct goldfish_pipe *pipe;
 
@@ -574,8 +538,8 @@ static void signalled_pipes_add_locked(struct goldfish_pipe_dev *dev,
 		return;
 	pipe->signalled_flags |= flags;
 
-	if (pipe->prev_signalled || pipe->next_signalled
-		|| dev->first_signalled_pipe == pipe)
+	if (pipe->prev_signalled || pipe->next_signalled ||
+		dev->first_signalled_pipe == pipe)
 		return;	/* already in the list */
 	pipe->next_signalled = dev->first_signalled_pipe;
 	if (dev->first_signalled_pipe)
@@ -584,7 +548,8 @@ static void signalled_pipes_add_locked(struct goldfish_pipe_dev *dev,
 }
 
 static void signalled_pipes_remove_locked(struct goldfish_pipe_dev *dev,
-	struct goldfish_pipe *pipe) {
+					  struct goldfish_pipe *pipe)
+{
 	if (pipe->prev_signalled)
 		pipe->prev_signalled->next_signalled = pipe->next_signalled;
 	if (pipe->next_signalled)
@@ -623,10 +588,10 @@ static struct goldfish_pipe *signalled_pipes_pop_front(
 	return pipe;
 }
 
-static void goldfish_interrupt_task(unsigned long unused)
+static void goldfish_interrupt_task(unsigned long dev_addr)
 {
-	struct goldfish_pipe_dev *dev = pipe_dev;
 	/* Iterate over the signalled pipes and wake them one by one */
+	struct goldfish_pipe_dev *dev = (struct goldfish_pipe_dev *)dev_addr;
 	struct goldfish_pipe *pipe;
 	int wakes;
 
@@ -646,7 +611,9 @@ static void goldfish_interrupt_task(unsigned long unused)
 		wake_up_interruptible(&pipe->wake_queue);
 	}
 }
-static DECLARE_TASKLET(goldfish_interrupt_tasklet, goldfish_interrupt_task, 0);
+
+static void goldfish_pipe_device_deinit(struct platform_device *pdev,
+					struct goldfish_pipe_dev *dev);
 
 /*
  * The general idea of the interrupt handling:
@@ -668,7 +635,7 @@ static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id)
 	unsigned long flags;
 	struct goldfish_pipe_dev *dev = dev_id;
 
-	if (dev != pipe_dev)
+	if (dev->magic != &goldfish_pipe_device_deinit)
 		return IRQ_NONE;
 
 	/* Request the signalled pipes from the device */
@@ -689,7 +656,7 @@ static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id)
 
 	spin_unlock_irqrestore(&dev->lock, flags);
 
-	tasklet_schedule(&goldfish_interrupt_tasklet);
+	tasklet_schedule(&dev->irq_tasklet);
 	return IRQ_HANDLED;
 }
 
@@ -702,7 +669,10 @@ static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev)
 			return id;
 
 	{
-		/* Reallocate the array */
+		/* Reallocate the array.
+		 * Since get_free_pipe_id_locked runs with interrupts disabled,
+		 * we don't want to make calls that could lead to sleep.
+		 */
 		u32 new_capacity = 2 * dev->pipes_capacity;
 		struct goldfish_pipe **pipes =
 			kcalloc(new_capacity, sizeof(*pipes), GFP_ATOMIC);
@@ -717,6 +687,14 @@ static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev)
 	return id;
 }
 
+/* A helper function to get the instance of goldfish_pipe_dev from file */
+static struct goldfish_pipe_dev *to_goldfish_pipe_dev(struct file *file)
+{
+	struct miscdevice *miscdev = file->private_data;
+
+	return container_of(miscdev, struct goldfish_pipe_dev, miscdev);
+}
+
 /**
  *	goldfish_pipe_open - open a channel to the AVD
  *	@inode: inode of device
@@ -730,14 +708,15 @@ static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev)
  */
 static int goldfish_pipe_open(struct inode *inode, struct file *file)
 {
-	struct goldfish_pipe_dev *dev = pipe_dev;
+	struct goldfish_pipe_dev *dev = to_goldfish_pipe_dev(file);
 	unsigned long flags;
 	int id;
 	int status;
 
 	/* Allocate new pipe kernel object */
 	struct goldfish_pipe *pipe = kzalloc(sizeof(*pipe), GFP_KERNEL);
-	if (pipe == NULL)
+
+	if (!pipe)
 		return -ENOMEM;
 
 	pipe->dev = dev;
@@ -748,6 +727,7 @@ static int goldfish_pipe_open(struct inode *inode, struct file *file)
 	 * Command buffer needs to be allocated on its own page to make sure
 	 * it is physically contiguous in host's address space.
 	 */
+	BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE);
 	pipe->command_buffer =
 		(struct goldfish_pipe_command *)__get_free_page(GFP_KERNEL);
 	if (!pipe->command_buffer) {
@@ -772,7 +752,7 @@ static int goldfish_pipe_open(struct inode *inode, struct file *file)
 			MAX_BUFFERS_PER_COMMAND;
 	dev->buffers->open_command_params.command_buffer_ptr =
 			(u64)(unsigned long)__pa(pipe->command_buffer);
-	status = goldfish_cmd_locked(pipe, PIPE_CMD_OPEN);
+	status = goldfish_pipe_cmd_locked(pipe, PIPE_CMD_OPEN);
 	spin_unlock_irqrestore(&dev->lock, flags);
 	if (status < 0)
 		goto err_cmd;
@@ -798,7 +778,7 @@ static int goldfish_pipe_release(struct inode *inode, struct file *filp)
 	struct goldfish_pipe_dev *dev = pipe->dev;
 
 	/* The guest is closing the channel, so tell the emulator right now */
-	(void)goldfish_cmd(pipe, PIPE_CMD_CLOSE);
+	goldfish_pipe_cmd(pipe, PIPE_CMD_CLOSE);
 
 	spin_lock_irqsave(&dev->lock, flags);
 	dev->pipes[pipe->id] = NULL;
@@ -820,36 +800,55 @@ static const struct file_operations goldfish_pipe_fops = {
 	.release = goldfish_pipe_release,
 };
 
-static struct miscdevice goldfish_pipe_dev = {
-	.minor = MISC_DYNAMIC_MINOR,
-	.name = "goldfish_pipe",
-	.fops = &goldfish_pipe_fops,
-};
+static void init_miscdevice(struct miscdevice *miscdev)
+{
+	memset(miscdev, 0, sizeof(*miscdev));
+
+	miscdev->minor = MISC_DYNAMIC_MINOR;
+	miscdev->name = "goldfish_pipe";
+	miscdev->fops = &goldfish_pipe_fops;
+}
+
+static void write_pa_addr(void *addr, void __iomem *portl, void __iomem *porth)
+{
+	const unsigned long paddr = __pa(addr);
+
+	writel(upper_32_bits(paddr), porth);
+	writel(lower_32_bits(paddr), portl);
+}
 
-static int goldfish_pipe_device_init(struct platform_device *pdev)
+static int goldfish_pipe_device_init(struct platform_device *pdev,
+				     struct goldfish_pipe_dev *dev)
 {
-	char *page;
-	struct goldfish_pipe_dev *dev = pipe_dev;
-	int err = devm_request_irq(&pdev->dev, dev->irq,
-				goldfish_pipe_interrupt,
-				IRQF_SHARED, "goldfish_pipe", dev);
+	int err;
+
+	tasklet_init(&dev->irq_tasklet, &goldfish_interrupt_task,
+		     (unsigned long)dev);
+
+	err = devm_request_irq(&pdev->dev, dev->irq,
+			       goldfish_pipe_interrupt,
+			       IRQF_SHARED, "goldfish_pipe", dev);
 	if (err) {
 		dev_err(&pdev->dev, "unable to allocate IRQ for v2\n");
 		return err;
 	}
 
-	err = misc_register(&goldfish_pipe_dev);
+	init_miscdevice(&dev->miscdev);
+	err = misc_register(&dev->miscdev);
 	if (err) {
 		dev_err(&pdev->dev, "unable to register v2 device\n");
 		return err;
 	}
 
+	dev->pdev_dev = &pdev->dev;
 	dev->first_signalled_pipe = NULL;
 	dev->pipes_capacity = INITIAL_PIPES_CAPACITY;
 	dev->pipes = kcalloc(dev->pipes_capacity, sizeof(*dev->pipes),
-					GFP_KERNEL);
-	if (!dev->pipes)
+			     GFP_KERNEL);
+	if (!dev->pipes) {
+		misc_deregister(&dev->miscdev);
 		return -ENOMEM;
+	}
 
 	/*
 	 * We're going to pass two buffers, open_command_params and
@@ -857,75 +856,67 @@ static int goldfish_pipe_device_init(struct platform_device *pdev)
 	 * needs to be contained in a single physical page. The easiest choice
 	 * is to just allocate a page and place the buffers in it.
 	 */
-	if (WARN_ON(sizeof(*dev->buffers) > PAGE_SIZE))
-		return -ENOMEM;
-
-	page = (char *)__get_free_page(GFP_KERNEL);
-	if (!page) {
+	BUILD_BUG_ON(sizeof(struct goldfish_pipe_dev_buffers) > PAGE_SIZE);
+	dev->buffers = (struct goldfish_pipe_dev_buffers *)
+		__get_free_page(GFP_KERNEL);
+	if (!dev->buffers) {
 		kfree(dev->pipes);
+		misc_deregister(&dev->miscdev);
 		return -ENOMEM;
 	}
-	dev->buffers = (struct goldfish_pipe_dev_buffers *)page;
 
 	/* Send the buffer addresses to the host */
-	{
-		u64 paddr = __pa(&dev->buffers->signalled_pipe_buffers);
-
-		writel((u32)(unsigned long)(paddr >> 32),
-			dev->base + PIPE_REG_SIGNAL_BUFFER_HIGH);
-		writel((u32)(unsigned long)paddr,
-			dev->base + PIPE_REG_SIGNAL_BUFFER);
-		writel((u32)MAX_SIGNALLED_PIPES,
-			dev->base + PIPE_REG_SIGNAL_BUFFER_COUNT);
-
-		paddr = __pa(&dev->buffers->open_command_params);
-		writel((u32)(unsigned long)(paddr >> 32),
-			dev->base + PIPE_REG_OPEN_BUFFER_HIGH);
-		writel((u32)(unsigned long)paddr,
-			dev->base + PIPE_REG_OPEN_BUFFER);
-	}
+	write_pa_addr(&dev->buffers->signalled_pipe_buffers,
+		      dev->base + PIPE_REG_SIGNAL_BUFFER,
+		      dev->base + PIPE_REG_SIGNAL_BUFFER_HIGH);
+
+	writel(MAX_SIGNALLED_PIPES,
+	       dev->base + PIPE_REG_SIGNAL_BUFFER_COUNT);
+
+	write_pa_addr(&dev->buffers->open_command_params,
+		      dev->base + PIPE_REG_OPEN_BUFFER,
+		      dev->base + PIPE_REG_OPEN_BUFFER_HIGH);
+
+	platform_set_drvdata(pdev, dev);
 	return 0;
 }
 
-static void goldfish_pipe_device_deinit(struct platform_device *pdev)
+static void goldfish_pipe_device_deinit(struct platform_device *pdev,
+					struct goldfish_pipe_dev *dev)
 {
-	struct goldfish_pipe_dev *dev = pipe_dev;
-
-	misc_deregister(&goldfish_pipe_dev);
+	misc_deregister(&dev->miscdev);
+	tasklet_kill(&dev->irq_tasklet);
 	kfree(dev->pipes);
 	free_page((unsigned long)dev->buffers);
 }
 
 static int goldfish_pipe_probe(struct platform_device *pdev)
 {
-	int err;
 	struct resource *r;
-	struct goldfish_pipe_dev *dev = pipe_dev;
+	struct goldfish_pipe_dev *dev;
 
-	if (WARN_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE))
+	dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
+	if (!dev)
 		return -ENOMEM;
 
-	/* not thread safe, but this should not happen */
-	WARN_ON(dev->base != NULL);
-
+	dev->magic = &goldfish_pipe_device_deinit;
 	spin_lock_init(&dev->lock);
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (r == NULL || resource_size(r) < PAGE_SIZE) {
+	if (!r || resource_size(r) < PAGE_SIZE) {
 		dev_err(&pdev->dev, "can't allocate i/o page\n");
 		return -EINVAL;
 	}
 	dev->base = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE);
-	if (dev->base == NULL) {
+	if (!dev->base) {
 		dev_err(&pdev->dev, "ioremap failed\n");
 		return -EINVAL;
 	}
 
 	r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (r == NULL) {
-		err = -EINVAL;
-		goto error;
-	}
+	if (!r)
+		return -EINVAL;
+
 	dev->irq = r->start;
 
 	/*
@@ -935,25 +926,19 @@ static int goldfish_pipe_probe(struct platform_device *pdev)
 	 *  reading device version back: this allows the host implementation to
 	 *  detect the old driver (if there was no version write before read).
 	 */
-	writel((u32)PIPE_DRIVER_VERSION, dev->base + PIPE_REG_VERSION);
+	writel(PIPE_DRIVER_VERSION, dev->base + PIPE_REG_VERSION);
 	dev->version = readl(dev->base + PIPE_REG_VERSION);
 	if (WARN_ON(dev->version < PIPE_CURRENT_DEVICE_VERSION))
 		return -EINVAL;
 
-	err = goldfish_pipe_device_init(pdev);
-	if (!err)
-		return 0;
-
-error:
-	dev->base = NULL;
-	return err;
+	return goldfish_pipe_device_init(pdev, dev);
 }
 
 static int goldfish_pipe_remove(struct platform_device *pdev)
 {
-	struct goldfish_pipe_dev *dev = pipe_dev;
-	goldfish_pipe_device_deinit(pdev);
-	dev->base = NULL;
+	struct goldfish_pipe_dev *dev = platform_get_drvdata(pdev);
+
+	goldfish_pipe_device_deinit(pdev, dev);
 	return 0;
 }
 
@@ -981,4 +966,4 @@ static struct platform_driver goldfish_pipe_driver = {
 
 module_platform_driver(goldfish_pipe_driver);
 MODULE_AUTHOR("David Turner <digit@google.com>");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/goldfish/goldfish_pipe_qemu.h b/drivers/platform/goldfish/goldfish_pipe_qemu.h
new file mode 100644
index 000000000000..b4d78c108afd
--- /dev/null
+++ b/drivers/platform/goldfish/goldfish_pipe_qemu.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IMPORTANT: The following constants must match the ones used and defined in
+ * external/qemu/include/hw/misc/goldfish_pipe.h
+ */
+
+#ifndef GOLDFISH_PIPE_QEMU_H
+#define GOLDFISH_PIPE_QEMU_H
+
+/* List of bitflags returned in status of CMD_POLL command */
+enum PipePollFlags {
+	PIPE_POLL_IN	= 1 << 0,
+	PIPE_POLL_OUT	= 1 << 1,
+	PIPE_POLL_HUP	= 1 << 2
+};
+
+/* Possible status values used to signal errors */
+enum PipeErrors {
+	PIPE_ERROR_INVAL	= -1,
+	PIPE_ERROR_AGAIN	= -2,
+	PIPE_ERROR_NOMEM	= -3,
+	PIPE_ERROR_IO		= -4
+};
+
+/* Bit-flags used to signal events from the emulator */
+enum PipeWakeFlags {
+	/* emulator closed pipe */
+	PIPE_WAKE_CLOSED		= 1 << 0,
+
+	/* pipe can now be read from */
+	PIPE_WAKE_READ			= 1 << 1,
+
+	/* pipe can now be written to */
+	PIPE_WAKE_WRITE			= 1 << 2,
+
+	/* unlock this pipe's DMA buffer */
+	PIPE_WAKE_UNLOCK_DMA		= 1 << 3,
+
+	/* unlock DMA buffer of the pipe shared to this pipe */
+	PIPE_WAKE_UNLOCK_DMA_SHARED	= 1 << 4,
+};
+
+/* Possible pipe closing reasons */
+enum PipeCloseReason {
+	/* guest sent a close command */
+	PIPE_CLOSE_GRACEFUL		= 0,
+
+	/* guest rebooted, we're closing the pipes */
+	PIPE_CLOSE_REBOOT		= 1,
+
+	/* close old pipes on snapshot load */
+	PIPE_CLOSE_LOAD_SNAPSHOT	= 2,
+
+	/* some unrecoverable error on the pipe */
+	PIPE_CLOSE_ERROR		= 3,
+};
+
+/* Bit flags for the 'flags' field */
+enum PipeFlagsBits {
+	BIT_CLOSED_ON_HOST = 0,  /* pipe closed by host */
+	BIT_WAKE_ON_WRITE  = 1,  /* want to be woken on writes */
+	BIT_WAKE_ON_READ   = 2,  /* want to be woken on reads */
+};
+
+enum PipeRegs {
+	PIPE_REG_CMD = 0,
+
+	PIPE_REG_SIGNAL_BUFFER_HIGH = 4,
+	PIPE_REG_SIGNAL_BUFFER = 8,
+	PIPE_REG_SIGNAL_BUFFER_COUNT = 12,
+
+	PIPE_REG_OPEN_BUFFER_HIGH = 20,
+	PIPE_REG_OPEN_BUFFER = 24,
+
+	PIPE_REG_VERSION = 36,
+
+	PIPE_REG_GET_SIGNALLED = 48,
+};
+
+enum PipeCmdCode {
+	/* to be used by the pipe device itself */
+	PIPE_CMD_OPEN		= 1,
+
+	PIPE_CMD_CLOSE,
+	PIPE_CMD_POLL,
+	PIPE_CMD_WRITE,
+	PIPE_CMD_WAKE_ON_WRITE,
+	PIPE_CMD_READ,
+	PIPE_CMD_WAKE_ON_READ,
+
+	/*
+	 * TODO(zyy): implement a deferred read/write execution to allow
+	 * parallel processing of pipe operations on the host.
+	 */
+	PIPE_CMD_WAKE_ON_DONE_IO,
+};
+
+#endif /* GOLDFISH_PIPE_QEMU_H */
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 0c1aa6c314f5..bdac939de223 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -867,6 +867,8 @@ config INTEL_CHT_INT33FE
 	tristate "Intel Cherry Trail ACPI INT33FE Driver"
 	depends on X86 && ACPI && I2C && REGULATOR
 	depends on CHARGER_BQ24190=y || (CHARGER_BQ24190=m && m)
+	depends on USB_ROLES_INTEL_XHCI=y || (USB_ROLES_INTEL_XHCI=m && m)
+	depends on TYPEC_MUX_PI3USB30532=y || (TYPEC_MUX_PI3USB30532=m && m)
 	---help---
 	  This driver add support for the INT33FE ACPI device found on
 	  some Intel Cherry Trail devices.
diff --git a/drivers/platform/x86/intel_cht_int33fe.c b/drivers/platform/x86/intel_cht_int33fe.c
index 7166f1cf8a1d..f40b1c192106 100644
--- a/drivers/platform/x86/intel_cht_int33fe.c
+++ b/drivers/platform/x86/intel_cht_int33fe.c
@@ -35,7 +35,7 @@ struct cht_int33fe_data {
 	struct i2c_client *fusb302;
 	struct i2c_client *pi3usb30532;
 	/* Contain a list-head must be per device */
-	struct device_connection connections[3];
+	struct device_connection connections[5];
 };
 
 /*
@@ -175,19 +175,20 @@ static int cht_int33fe_probe(struct platform_device *pdev)
 			return -EPROBE_DEFER; /* Wait for i2c-adapter to load */
 	}
 
-	data->connections[0].endpoint[0] = "i2c-fusb302";
+	data->connections[0].endpoint[0] = "port0";
 	data->connections[0].endpoint[1] = "i2c-pi3usb30532";
 	data->connections[0].id = "typec-switch";
-	data->connections[1].endpoint[0] = "i2c-fusb302";
+	data->connections[1].endpoint[0] = "port0";
 	data->connections[1].endpoint[1] = "i2c-pi3usb30532";
 	data->connections[1].id = "typec-mux";
-	data->connections[2].endpoint[0] = "i2c-fusb302";
-	data->connections[2].endpoint[1] = "intel_xhci_usb_sw-role-switch";
-	data->connections[2].id = "usb-role-switch";
+	data->connections[2].endpoint[0] = "port0";
+	data->connections[2].endpoint[1] = "i2c-pi3usb30532";
+	data->connections[2].id = "idff01m01";
+	data->connections[3].endpoint[0] = "i2c-fusb302";
+	data->connections[3].endpoint[1] = "intel_xhci_usb_sw-role-switch";
+	data->connections[3].id = "usb-role-switch";
 
-	device_connection_add(&data->connections[0]);
-	device_connection_add(&data->connections[1]);
-	device_connection_add(&data->connections[2]);
+	device_connections_add(data->connections);
 
 	memset(&board_info, 0, sizeof(board_info));
 	strlcpy(board_info.type, "typec_fusb302", I2C_NAME_SIZE);
@@ -218,9 +219,7 @@ out_unregister_max17047:
 	if (data->max17047)
 		i2c_unregister_device(data->max17047);
 
-	device_connection_remove(&data->connections[2]);
-	device_connection_remove(&data->connections[1]);
-	device_connection_remove(&data->connections[0]);
+	device_connections_remove(data->connections);
 
 	return -EPROBE_DEFER; /* Wait for the i2c-adapter to load */
 }
@@ -234,9 +233,7 @@ static int cht_int33fe_remove(struct platform_device *pdev)
 	if (data->max17047)
 		i2c_unregister_device(data->max17047);
 
-	device_connection_remove(&data->connections[2]);
-	device_connection_remove(&data->connections[1]);
-	device_connection_remove(&data->connections[0]);
+	device_connections_remove(data->connections);
 
 	return 0;
 }
diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c
index 80582c8f98fa..0e202d4273fb 100644
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -1022,12 +1022,13 @@ static int twl4030_bci_probe(struct platform_device *pdev)
 	if (bci->dev->of_node) {
 		struct device_node *phynode;
 
-		phynode = of_find_compatible_node(bci->dev->of_node->parent,
-						  NULL, "ti,twl4030-usb");
+		phynode = of_get_compatible_child(bci->dev->of_node->parent,
+						  "ti,twl4030-usb");
 		if (phynode) {
 			bci->usb_nb.notifier_call = twl4030_bci_usb_ncb;
 			bci->transceiver = devm_usb_get_phy_by_node(
 				bci->dev, phynode, &bci->usb_nb);
+			of_node_put(phynode);
 			if (IS_ERR(bci->transceiver)) {
 				ret = PTR_ERR(bci->transceiver);
 				if (ret == -EPROBE_DEFER)
diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile
index fd5e215c66b7..6ccd93d0b1cb 100644
--- a/drivers/s390/crypto/Makefile
+++ b/drivers/s390/crypto/Makefile
@@ -15,3 +15,7 @@ obj-$(CONFIG_ZCRYPT) += zcrypt_cex2c.o zcrypt_cex2a.o zcrypt_cex4.o
 # pkey kernel module
 pkey-objs := pkey_api.o
 obj-$(CONFIG_PKEY) += pkey.o
+
+# adjunct processor matrix
+vfio_ap-objs := vfio_ap_drv.o vfio_ap_ops.o
+obj-$(CONFIG_VFIO_AP) += vfio_ap.o
diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
new file mode 100644
index 000000000000..7667b38728f0
--- /dev/null
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * VFIO based AP device driver
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "vfio_ap_private.h"
+
+#define VFIO_AP_ROOT_NAME "vfio_ap"
+#define VFIO_AP_DEV_TYPE_NAME "ap_matrix"
+#define VFIO_AP_DEV_NAME "matrix"
+
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018");
+MODULE_LICENSE("GPL v2");
+
+static struct ap_driver vfio_ap_drv;
+
+static struct device_type vfio_ap_dev_type = {
+	.name = VFIO_AP_DEV_TYPE_NAME,
+};
+
+struct ap_matrix_dev *matrix_dev;
+
+/* Only type 10 adapters (CEX4 and later) are supported
+ * by the AP matrix device driver
+ */
+static struct ap_device_id ap_queue_ids[] = {
+	{ .dev_type = AP_DEVICE_TYPE_CEX4,
+	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+	{ .dev_type = AP_DEVICE_TYPE_CEX5,
+	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+	{ .dev_type = AP_DEVICE_TYPE_CEX6,
+	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+	{ /* end of sibling */ },
+};
+
+MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids);
+
+static int vfio_ap_queue_dev_probe(struct ap_device *apdev)
+{
+	return 0;
+}
+
+static void vfio_ap_queue_dev_remove(struct ap_device *apdev)
+{
+	/* Nothing to do yet */
+}
+
+static void vfio_ap_matrix_dev_release(struct device *dev)
+{
+	struct ap_matrix_dev *matrix_dev = dev_get_drvdata(dev);
+
+	kfree(matrix_dev);
+}
+
+static int vfio_ap_matrix_dev_create(void)
+{
+	int ret;
+	struct device *root_device;
+
+	root_device = root_device_register(VFIO_AP_ROOT_NAME);
+	if (IS_ERR(root_device))
+		return PTR_ERR(root_device);
+
+	matrix_dev = kzalloc(sizeof(*matrix_dev), GFP_KERNEL);
+	if (!matrix_dev) {
+		ret = -ENOMEM;
+		goto matrix_alloc_err;
+	}
+
+	/* Fill in config info via PQAP(QCI), if available */
+	if (test_facility(12)) {
+		ret = ap_qci(&matrix_dev->info);
+		if (ret)
+			goto matrix_alloc_err;
+	}
+
+	mutex_init(&matrix_dev->lock);
+	INIT_LIST_HEAD(&matrix_dev->mdev_list);
+
+	matrix_dev->device.type = &vfio_ap_dev_type;
+	dev_set_name(&matrix_dev->device, "%s", VFIO_AP_DEV_NAME);
+	matrix_dev->device.parent = root_device;
+	matrix_dev->device.release = vfio_ap_matrix_dev_release;
+	matrix_dev->device.driver = &vfio_ap_drv.driver;
+
+	ret = device_register(&matrix_dev->device);
+	if (ret)
+		goto matrix_reg_err;
+
+	return 0;
+
+matrix_reg_err:
+	put_device(&matrix_dev->device);
+matrix_alloc_err:
+	root_device_unregister(root_device);
+
+	return ret;
+}
+
+static void vfio_ap_matrix_dev_destroy(void)
+{
+	device_unregister(&matrix_dev->device);
+	root_device_unregister(matrix_dev->device.parent);
+}
+
+static int __init vfio_ap_init(void)
+{
+	int ret;
+
+	/* If there are no AP instructions, there is nothing to pass through. */
+	if (!ap_instructions_available())
+		return -ENODEV;
+
+	ret = vfio_ap_matrix_dev_create();
+	if (ret)
+		return ret;
+
+	memset(&vfio_ap_drv, 0, sizeof(vfio_ap_drv));
+	vfio_ap_drv.probe = vfio_ap_queue_dev_probe;
+	vfio_ap_drv.remove = vfio_ap_queue_dev_remove;
+	vfio_ap_drv.ids = ap_queue_ids;
+
+	ret = ap_driver_register(&vfio_ap_drv, THIS_MODULE, VFIO_AP_DRV_NAME);
+	if (ret) {
+		vfio_ap_matrix_dev_destroy();
+		return ret;
+	}
+
+	ret = vfio_ap_mdev_register();
+	if (ret) {
+		ap_driver_unregister(&vfio_ap_drv);
+		vfio_ap_matrix_dev_destroy();
+
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __exit vfio_ap_exit(void)
+{
+	vfio_ap_mdev_unregister();
+	ap_driver_unregister(&vfio_ap_drv);
+	vfio_ap_matrix_dev_destroy();
+}
+
+module_init(vfio_ap_init);
+module_exit(vfio_ap_exit);
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
new file mode 100644
index 000000000000..272ef427dcc0
--- /dev/null
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -0,0 +1,939 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Adjunct processor matrix VFIO device driver callbacks.
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
+ *	      Halil Pasic <pasic@linux.ibm.com>
+ *	      Pierre Morel <pmorel@linux.ibm.com>
+ */
+#include <linux/string.h>
+#include <linux/vfio.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/ctype.h>
+#include <linux/bitops.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <asm/kvm.h>
+#include <asm/zcrypt.h>
+
+#include "vfio_ap_private.h"
+
+#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
+#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
+
+static void vfio_ap_matrix_init(struct ap_config_info *info,
+				struct ap_matrix *matrix)
+{
+	matrix->apm_max = info->apxa ? info->Na : 63;
+	matrix->aqm_max = info->apxa ? info->Nd : 15;
+	matrix->adm_max = info->apxa ? info->Nd : 15;
+}
+
+static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
+{
+	struct ap_matrix_mdev *matrix_mdev;
+
+	if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
+		return -EPERM;
+
+	matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
+	if (!matrix_mdev) {
+		atomic_inc(&matrix_dev->available_instances);
+		return -ENOMEM;
+	}
+
+	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
+	mdev_set_drvdata(mdev, matrix_mdev);
+	mutex_lock(&matrix_dev->lock);
+	list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
+	mutex_unlock(&matrix_dev->lock);
+
+	return 0;
+}
+
+static int vfio_ap_mdev_remove(struct mdev_device *mdev)
+{
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+	if (matrix_mdev->kvm)
+		return -EBUSY;
+
+	mutex_lock(&matrix_dev->lock);
+	list_del(&matrix_mdev->node);
+	mutex_unlock(&matrix_dev->lock);
+
+	kfree(matrix_mdev);
+	mdev_set_drvdata(mdev, NULL);
+	atomic_inc(&matrix_dev->available_instances);
+
+	return 0;
+}
+
+static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+	return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
+}
+
+static MDEV_TYPE_ATTR_RO(name);
+
+static ssize_t available_instances_show(struct kobject *kobj,
+					struct device *dev, char *buf)
+{
+	return sprintf(buf, "%d\n",
+		       atomic_read(&matrix_dev->available_instances));
+}
+
+static MDEV_TYPE_ATTR_RO(available_instances);
+
+static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
+			       char *buf)
+{
+	return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
+}
+
+static MDEV_TYPE_ATTR_RO(device_api);
+
+static struct attribute *vfio_ap_mdev_type_attrs[] = {
+	&mdev_type_attr_name.attr,
+	&mdev_type_attr_device_api.attr,
+	&mdev_type_attr_available_instances.attr,
+	NULL,
+};
+
+static struct attribute_group vfio_ap_mdev_hwvirt_type_group = {
+	.name = VFIO_AP_MDEV_TYPE_HWVIRT,
+	.attrs = vfio_ap_mdev_type_attrs,
+};
+
+static struct attribute_group *vfio_ap_mdev_type_groups[] = {
+	&vfio_ap_mdev_hwvirt_type_group,
+	NULL,
+};
+
+struct vfio_ap_queue_reserved {
+	unsigned long *apid;
+	unsigned long *apqi;
+	bool reserved;
+};
+
+/**
+ * vfio_ap_has_queue
+ *
+ * @dev: an AP queue device
+ * @data: a struct vfio_ap_queue_reserved reference
+ *
+ * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
+ * apid or apqi specified in @data:
+ *
+ * - If @data contains both an apid and apqi value, then @data will be flagged
+ *   as reserved if the APID and APQI fields for the AP queue device matches
+ *
+ * - If @data contains only an apid value, @data will be flagged as
+ *   reserved if the APID field in the AP queue device matches
+ *
+ * - If @data contains only an apqi value, @data will be flagged as
+ *   reserved if the APQI field in the AP queue device matches
+ *
+ * Returns 0 to indicate the input to function succeeded. Returns -EINVAL if
+ * @data does not contain either an apid or apqi.
+ */
+static int vfio_ap_has_queue(struct device *dev, void *data)
+{
+	struct vfio_ap_queue_reserved *qres = data;
+	struct ap_queue *ap_queue = to_ap_queue(dev);
+	ap_qid_t qid;
+	unsigned long id;
+
+	if (qres->apid && qres->apqi) {
+		qid = AP_MKQID(*qres->apid, *qres->apqi);
+		if (qid == ap_queue->qid)
+			qres->reserved = true;
+	} else if (qres->apid && !qres->apqi) {
+		id = AP_QID_CARD(ap_queue->qid);
+		if (id == *qres->apid)
+			qres->reserved = true;
+	} else if (!qres->apid && qres->apqi) {
+		id = AP_QID_QUEUE(ap_queue->qid);
+		if (id == *qres->apqi)
+			qres->reserved = true;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * vfio_ap_verify_queue_reserved
+ *
+ * @matrix_dev: a mediated matrix device
+ * @apid: an AP adapter ID
+ * @apqi: an AP queue index
+ *
+ * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device
+ * driver according to the following rules:
+ *
+ * - If both @apid and @apqi are not NULL, then there must be an AP queue
+ *   device bound to the vfio_ap driver with the APQN identified by @apid and
+ *   @apqi
+ *
+ * - If only @apid is not NULL, then there must be an AP queue device bound
+ *   to the vfio_ap driver with an APQN containing @apid
+ *
+ * - If only @apqi is not NULL, then there must be an AP queue device bound
+ *   to the vfio_ap driver with an APQN containing @apqi
+ *
+ * Returns 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
+ */
+static int vfio_ap_verify_queue_reserved(unsigned long *apid,
+					 unsigned long *apqi)
+{
+	int ret;
+	struct vfio_ap_queue_reserved qres;
+
+	qres.apid = apid;
+	qres.apqi = apqi;
+	qres.reserved = false;
+
+	ret = driver_for_each_device(matrix_dev->device.driver, NULL, &qres,
+				     vfio_ap_has_queue);
+	if (ret)
+		return ret;
+
+	if (qres.reserved)
+		return 0;
+
+	return -EADDRNOTAVAIL;
+}
+
+static int
+vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
+					     unsigned long apid)
+{
+	int ret;
+	unsigned long apqi;
+	unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
+
+	if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
+		return vfio_ap_verify_queue_reserved(&apid, NULL);
+
+	for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) {
+		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * vfio_ap_mdev_verify_no_sharing
+ *
+ * Verifies that the APQNs derived from the cross product of the AP adapter IDs
+ * and AP queue indexes comprising the AP matrix are not configured for another
+ * mediated device. AP queue sharing is not allowed.
+ *
+ * @matrix_mdev: the mediated matrix device
+ *
+ * Returns 0 if the APQNs are not shared, otherwise; returns -EADDRINUSE.
+ */
+static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
+{
+	struct ap_matrix_mdev *lstdev;
+	DECLARE_BITMAP(apm, AP_DEVICES);
+	DECLARE_BITMAP(aqm, AP_DOMAINS);
+
+	list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) {
+		if (matrix_mdev == lstdev)
+			continue;
+
+		memset(apm, 0, sizeof(apm));
+		memset(aqm, 0, sizeof(aqm));
+
+		/*
+		 * We work on full longs, as we can only exclude the leftover
+		 * bits in non-inverse order. The leftover is all zeros.
+		 */
+		if (!bitmap_and(apm, matrix_mdev->matrix.apm,
+				lstdev->matrix.apm, AP_DEVICES))
+			continue;
+
+		if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
+				lstdev->matrix.aqm, AP_DOMAINS))
+			continue;
+
+		return -EADDRINUSE;
+	}
+
+	return 0;
+}
+
+/**
+ * assign_adapter_store
+ *
+ * @dev:	the matrix device
+ * @attr:	the mediated matrix device's assign_adapter attribute
+ * @buf:	a buffer containing the AP adapter number (APID) to
+ *		be assigned
+ * @count:	the number of bytes in @buf
+ *
+ * Parses the APID from @buf and sets the corresponding bit in the mediated
+ * matrix device's APM.
+ *
+ * Returns the number of bytes processed if the APID is valid; otherwise,
+ * returns one of the following errors:
+ *
+ *	1. -EINVAL
+ *	   The APID is not a valid number
+ *
+ *	2. -ENODEV
+ *	   The APID exceeds the maximum value configured for the system
+ *
+ *	3. -EADDRNOTAVAIL
+ *	   An APQN derived from the cross product of the APID being assigned
+ *	   and the APQIs previously assigned is not bound to the vfio_ap device
+ *	   driver; or, if no APQIs have yet been assigned, the APID is not
+ *	   contained in an APQN bound to the vfio_ap device driver.
+ *
+ *	4. -EADDRINUSE
+ *	   An APQN derived from the cross product of the APID being assigned
+ *	   and the APQIs previously assigned is being used by another mediated
+ *	   matrix device
+ */
+static ssize_t assign_adapter_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	int ret;
+	unsigned long apid;
+	struct mdev_device *mdev = mdev_from_dev(dev);
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+	/* If the guest is running, disallow assignment of adapter */
+	if (matrix_mdev->kvm)
+		return -EBUSY;
+
+	ret = kstrtoul(buf, 0, &apid);
+	if (ret)
+		return ret;
+
+	if (apid > matrix_mdev->matrix.apm_max)
+		return -ENODEV;
+
+	/*
+	 * Set the bit in the AP mask (APM) corresponding to the AP adapter
+	 * number (APID). The bits in the mask, from most significant to least
+	 * significant bit, correspond to APIDs 0-255.
+	 */
+	mutex_lock(&matrix_dev->lock);
+
+	ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid);
+	if (ret)
+		goto done;
+
+	set_bit_inv(apid, matrix_mdev->matrix.apm);
+
+	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
+	if (ret)
+		goto share_err;
+
+	ret = count;
+	goto done;
+
+share_err:
+	clear_bit_inv(apid, matrix_mdev->matrix.apm);
+done:
+	mutex_unlock(&matrix_dev->lock);
+
+	return ret;
+}
+static DEVICE_ATTR_WO(assign_adapter);
+
+/**
+ * unassign_adapter_store
+ *
+ * @dev:	the matrix device
+ * @attr:	the mediated matrix device's unassign_adapter attribute
+ * @buf:	a buffer containing the adapter number (APID) to be unassigned
+ * @count:	the number of bytes in @buf
+ *
+ * Parses the APID from @buf and clears the corresponding bit in the mediated
+ * matrix device's APM.
+ *
+ * Returns the number of bytes processed if the APID is valid; otherwise,
+ * returns one of the following errors:
+ *	-EINVAL if the APID is not a number
+ *	-ENODEV if the APID it exceeds the maximum value configured for the
+ *		system
+ */
+static ssize_t unassign_adapter_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	int ret;
+	unsigned long apid;
+	struct mdev_device *mdev = mdev_from_dev(dev);
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+	/* If the guest is running, disallow un-assignment of adapter */
+	if (matrix_mdev->kvm)
+		return -EBUSY;
+
+	ret = kstrtoul(buf, 0, &apid);
+	if (ret)
+		return ret;
+
+	if (apid > matrix_mdev->matrix.apm_max)
+		return -ENODEV;
+
+	mutex_lock(&matrix_dev->lock);
+	clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
+	mutex_unlock(&matrix_dev->lock);
+
+	return count;
+}
+static DEVICE_ATTR_WO(unassign_adapter);
+
+static int
+vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
+					     unsigned long apqi)
+{
+	int ret;
+	unsigned long apid;
+	unsigned long nbits = matrix_mdev->matrix.apm_max + 1;
+
+	if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits)
+		return vfio_ap_verify_queue_reserved(NULL, &apqi);
+
+	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) {
+		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * assign_domain_store
+ *
+ * @dev:	the matrix device
+ * @attr:	the mediated matrix device's assign_domain attribute
+ * @buf:	a buffer containing the AP queue index (APQI) of the domain to
+ *		be assigned
+ * @count:	the number of bytes in @buf
+ *
+ * Parses the APQI from @buf and sets the corresponding bit in the mediated
+ * matrix device's AQM.
+ *
+ * Returns the number of bytes processed if the APQI is valid; otherwise returns
+ * one of the following errors:
+ *
+ *	1. -EINVAL
+ *	   The APQI is not a valid number
+ *
+ *	2. -ENODEV
+ *	   The APQI exceeds the maximum value configured for the system
+ *
+ *	3. -EADDRNOTAVAIL
+ *	   An APQN derived from the cross product of the APQI being assigned
+ *	   and the APIDs previously assigned is not bound to the vfio_ap device
+ *	   driver; or, if no APIDs have yet been assigned, the APQI is not
+ *	   contained in an APQN bound to the vfio_ap device driver.
+ *
+ *	4. -EADDRINUSE
+ *	   An APQN derived from the cross product of the APQI being assigned
+ *	   and the APIDs previously assigned is being used by another mediated
+ *	   matrix device
+ */
+static ssize_t assign_domain_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	int ret;
+	unsigned long apqi;
+	struct mdev_device *mdev = mdev_from_dev(dev);
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
+
+	/* If the guest is running, disallow assignment of domain */
+	if (matrix_mdev->kvm)
+		return -EBUSY;
+
+	ret = kstrtoul(buf, 0, &apqi);
+	if (ret)
+		return ret;
+	if (apqi > max_apqi)
+		return -ENODEV;
+
+	mutex_lock(&matrix_dev->lock);
+
+	ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi);
+	if (ret)
+		goto done;
+
+	set_bit_inv(apqi, matrix_mdev->matrix.aqm);
+
+	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
+	if (ret)
+		goto share_err;
+
+	ret = count;
+	goto done;
+
+share_err:
+	clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
+done:
+	mutex_unlock(&matrix_dev->lock);
+
+	return ret;
+}
+static DEVICE_ATTR_WO(assign_domain);
+
+
+/**
+ * unassign_domain_store
+ *
+ * @dev:	the matrix device
+ * @attr:	the mediated matrix device's unassign_domain attribute
+ * @buf:	a buffer containing the AP queue index (APQI) of the domain to
+ *		be unassigned
+ * @count:	the number of bytes in @buf
+ *
+ * Parses the APQI from @buf and clears the corresponding bit in the
+ * mediated matrix device's AQM.
+ *
+ * Returns the number of bytes processed if the APQI is valid; otherwise,
+ * returns one of the following errors:
+ *	-EINVAL if the APQI is not a number
+ *	-ENODEV if the APQI exceeds the maximum value configured for the system
+ */
+static ssize_t unassign_domain_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	int ret;
+	unsigned long apqi;
+	struct mdev_device *mdev = mdev_from_dev(dev);
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+	/* If the guest is running, disallow un-assignment of domain */
+	if (matrix_mdev->kvm)
+		return -EBUSY;
+
+	ret = kstrtoul(buf, 0, &apqi);
+	if (ret)
+		return ret;
+
+	if (apqi > matrix_mdev->matrix.aqm_max)
+		return -ENODEV;
+
+	mutex_lock(&matrix_dev->lock);
+	clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
+	mutex_unlock(&matrix_dev->lock);
+
+	return count;
+}
+static DEVICE_ATTR_WO(unassign_domain);
+
+/**
+ * assign_control_domain_store
+ *
+ * @dev:	the matrix device
+ * @attr:	the mediated matrix device's assign_control_domain attribute
+ * @buf:	a buffer containing the domain ID to be assigned
+ * @count:	the number of bytes in @buf
+ *
+ * Parses the domain ID from @buf and sets the corresponding bit in the mediated
+ * matrix device's ADM.
+ *
+ * Returns the number of bytes processed if the domain ID is valid; otherwise,
+ * returns one of the following errors:
+ *	-EINVAL if the ID is not a number
+ *	-ENODEV if the ID exceeds the maximum value configured for the system
+ */
+static ssize_t assign_control_domain_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	int ret;
+	unsigned long id;
+	struct mdev_device *mdev = mdev_from_dev(dev);
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+	/* If the guest is running, disallow assignment of control domain */
+	if (matrix_mdev->kvm)
+		return -EBUSY;
+
+	ret = kstrtoul(buf, 0, &id);
+	if (ret)
+		return ret;
+
+	if (id > matrix_mdev->matrix.adm_max)
+		return -ENODEV;
+
+	/* Set the bit in the ADM (bitmask) corresponding to the AP control
+	 * domain number (id). The bits in the mask, from most significant to
+	 * least significant, correspond to IDs 0 up to the one less than the
+	 * number of control domains that can be assigned.
+	 */
+	mutex_lock(&matrix_dev->lock);
+	set_bit_inv(id, matrix_mdev->matrix.adm);
+	mutex_unlock(&matrix_dev->lock);
+
+	return count;
+}
+static DEVICE_ATTR_WO(assign_control_domain);
+
+/**
+ * unassign_control_domain_store
+ *
+ * @dev:	the matrix device
+ * @attr:	the mediated matrix device's unassign_control_domain attribute
+ * @buf:	a buffer containing the domain ID to be unassigned
+ * @count:	the number of bytes in @buf
+ *
+ * Parses the domain ID from @buf and clears the corresponding bit in the
+ * mediated matrix device's ADM.
+ *
+ * Returns the number of bytes processed if the domain ID is valid; otherwise,
+ * returns one of the following errors:
+ *	-EINVAL if the ID is not a number
+ *	-ENODEV if the ID exceeds the maximum value configured for the system
+ */
+static ssize_t unassign_control_domain_store(struct device *dev,
+					     struct device_attribute *attr,
+					     const char *buf, size_t count)
+{
+	int ret;
+	unsigned long domid;
+	struct mdev_device *mdev = mdev_from_dev(dev);
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	unsigned long max_domid =  matrix_mdev->matrix.adm_max;
+
+	/* If the guest is running, disallow un-assignment of control domain */
+	if (matrix_mdev->kvm)
+		return -EBUSY;
+
+	ret = kstrtoul(buf, 0, &domid);
+	if (ret)
+		return ret;
+	if (domid > max_domid)
+		return -ENODEV;
+
+	mutex_lock(&matrix_dev->lock);
+	clear_bit_inv(domid, matrix_mdev->matrix.adm);
+	mutex_unlock(&matrix_dev->lock);
+
+	return count;
+}
+static DEVICE_ATTR_WO(unassign_control_domain);
+
+static ssize_t control_domains_show(struct device *dev,
+				    struct device_attribute *dev_attr,
+				    char *buf)
+{
+	unsigned long id;
+	int nchars = 0;
+	int n;
+	char *bufpos = buf;
+	struct mdev_device *mdev = mdev_from_dev(dev);
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	unsigned long max_domid = matrix_mdev->matrix.adm_max;
+
+	mutex_lock(&matrix_dev->lock);
+	for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) {
+		n = sprintf(bufpos, "%04lx\n", id);
+		bufpos += n;
+		nchars += n;
+	}
+	mutex_unlock(&matrix_dev->lock);
+
+	return nchars;
+}
+static DEVICE_ATTR_RO(control_domains);
+
+static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct mdev_device *mdev = mdev_from_dev(dev);
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	char *bufpos = buf;
+	unsigned long apid;
+	unsigned long apqi;
+	unsigned long apid1;
+	unsigned long apqi1;
+	unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1;
+	unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1;
+	int nchars = 0;
+	int n;
+
+	apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits);
+	apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits);
+
+	mutex_lock(&matrix_dev->lock);
+
+	if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
+		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
+			for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
+					     naqm_bits) {
+				n = sprintf(bufpos, "%02lx.%04lx\n", apid,
+					    apqi);
+				bufpos += n;
+				nchars += n;
+			}
+		}
+	} else if (apid1 < napm_bits) {
+		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
+			n = sprintf(bufpos, "%02lx.\n", apid);
+			bufpos += n;
+			nchars += n;
+		}
+	} else if (apqi1 < naqm_bits) {
+		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) {
+			n = sprintf(bufpos, ".%04lx\n", apqi);
+			bufpos += n;
+			nchars += n;
+		}
+	}
+
+	mutex_unlock(&matrix_dev->lock);
+
+	return nchars;
+}
+static DEVICE_ATTR_RO(matrix);
+
+static struct attribute *vfio_ap_mdev_attrs[] = {
+	&dev_attr_assign_adapter.attr,
+	&dev_attr_unassign_adapter.attr,
+	&dev_attr_assign_domain.attr,
+	&dev_attr_unassign_domain.attr,
+	&dev_attr_assign_control_domain.attr,
+	&dev_attr_unassign_control_domain.attr,
+	&dev_attr_control_domains.attr,
+	&dev_attr_matrix.attr,
+	NULL,
+};
+
+static struct attribute_group vfio_ap_mdev_attr_group = {
+	.attrs = vfio_ap_mdev_attrs
+};
+
+static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
+	&vfio_ap_mdev_attr_group,
+	NULL
+};
+
+/**
+ * vfio_ap_mdev_set_kvm
+ *
+ * @matrix_mdev: a mediated matrix device
+ * @kvm: reference to KVM instance
+ *
+ * Verifies no other mediated matrix device has @kvm and sets a reference to
+ * it in @matrix_mdev->kvm.
+ *
+ * Return 0 if no other mediated matrix device has a reference to @kvm;
+ * otherwise, returns an -EPERM.
+ */
+static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
+				struct kvm *kvm)
+{
+	struct ap_matrix_mdev *m;
+
+	mutex_lock(&matrix_dev->lock);
+
+	list_for_each_entry(m, &matrix_dev->mdev_list, node) {
+		if ((m != matrix_mdev) && (m->kvm == kvm)) {
+			mutex_unlock(&matrix_dev->lock);
+			return -EPERM;
+		}
+	}
+
+	matrix_mdev->kvm = kvm;
+	mutex_unlock(&matrix_dev->lock);
+
+	return 0;
+}
+
+static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
+				       unsigned long action, void *data)
+{
+	int ret;
+	struct ap_matrix_mdev *matrix_mdev;
+
+	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
+		return NOTIFY_OK;
+
+	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
+
+	if (!data) {
+		matrix_mdev->kvm = NULL;
+		return NOTIFY_OK;
+	}
+
+	ret = vfio_ap_mdev_set_kvm(matrix_mdev, data);
+	if (ret)
+		return NOTIFY_DONE;
+
+	/* If there is no CRYCB pointer, then we can't copy the masks */
+	if (!matrix_mdev->kvm->arch.crypto.crycbd)
+		return NOTIFY_DONE;
+
+	kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm,
+				  matrix_mdev->matrix.aqm,
+				  matrix_mdev->matrix.adm);
+
+	return NOTIFY_OK;
+}
+
+static int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
+				    unsigned int retry)
+{
+	struct ap_queue_status status;
+
+	do {
+		status = ap_zapq(AP_MKQID(apid, apqi));
+		switch (status.response_code) {
+		case AP_RESPONSE_NORMAL:
+			return 0;
+		case AP_RESPONSE_RESET_IN_PROGRESS:
+		case AP_RESPONSE_BUSY:
+			msleep(20);
+			break;
+		default:
+			/* things are really broken, give up */
+			return -EIO;
+		}
+	} while (retry--);
+
+	return -EBUSY;
+}
+
+static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev)
+{
+	int ret;
+	int rc = 0;
+	unsigned long apid, apqi;
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
+			     matrix_mdev->matrix.apm_max + 1) {
+		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
+				     matrix_mdev->matrix.aqm_max + 1) {
+			ret = vfio_ap_mdev_reset_queue(apid, apqi, 1);
+			/*
+			 * Regardless whether a queue turns out to be busy, or
+			 * is not operational, we need to continue resetting
+			 * the remaining queues.
+			 */
+			if (ret)
+				rc = ret;
+		}
+	}
+
+	return rc;
+}
+
+static int vfio_ap_mdev_open(struct mdev_device *mdev)
+{
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	unsigned long events;
+	int ret;
+
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
+	events = VFIO_GROUP_NOTIFY_SET_KVM;
+
+	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
+				     &events, &matrix_mdev->group_notifier);
+	if (ret) {
+		module_put(THIS_MODULE);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void vfio_ap_mdev_release(struct mdev_device *mdev)
+{
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+	if (matrix_mdev->kvm)
+		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
+
+	vfio_ap_mdev_reset_queues(mdev);
+	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
+				 &matrix_mdev->group_notifier);
+	matrix_mdev->kvm = NULL;
+	module_put(THIS_MODULE);
+}
+
+static int vfio_ap_mdev_get_device_info(unsigned long arg)
+{
+	unsigned long minsz;
+	struct vfio_device_info info;
+
+	minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+	if (copy_from_user(&info, (void __user *)arg, minsz))
+		return -EFAULT;
+
+	if (info.argsz < minsz)
+		return -EINVAL;
+
+	info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET;
+	info.num_regions = 0;
+	info.num_irqs = 0;
+
+	return copy_to_user((void __user *)arg, &info, minsz);
+}
+
+static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
+				    unsigned int cmd, unsigned long arg)
+{
+	int ret;
+
+	switch (cmd) {
+	case VFIO_DEVICE_GET_INFO:
+		ret = vfio_ap_mdev_get_device_info(arg);
+		break;
+	case VFIO_DEVICE_RESET:
+		ret = vfio_ap_mdev_reset_queues(mdev);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	return ret;
+}
+
+static const struct mdev_parent_ops vfio_ap_matrix_ops = {
+	.owner			= THIS_MODULE,
+	.supported_type_groups	= vfio_ap_mdev_type_groups,
+	.mdev_attr_groups	= vfio_ap_mdev_attr_groups,
+	.create			= vfio_ap_mdev_create,
+	.remove			= vfio_ap_mdev_remove,
+	.open			= vfio_ap_mdev_open,
+	.release		= vfio_ap_mdev_release,
+	.ioctl			= vfio_ap_mdev_ioctl,
+};
+
+int vfio_ap_mdev_register(void)
+{
+	atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
+
+	return mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops);
+}
+
+void vfio_ap_mdev_unregister(void)
+{
+	mdev_unregister_device(&matrix_dev->device);
+}
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
new file mode 100644
index 000000000000..5675492233c7
--- /dev/null
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Private data and functions for adjunct processor VFIO matrix driver.
+ *
+ * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
+ *	      Halil Pasic <pasic@linux.ibm.com>
+ *
+ * Copyright IBM Corp. 2018
+ */
+
+#ifndef _VFIO_AP_PRIVATE_H_
+#define _VFIO_AP_PRIVATE_H_
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/mdev.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+
+#include "ap_bus.h"
+
+#define VFIO_AP_MODULE_NAME "vfio_ap"
+#define VFIO_AP_DRV_NAME "vfio_ap"
+
+/**
+ * ap_matrix_dev - the AP matrix device structure
+ * @device:	generic device structure associated with the AP matrix device
+ * @available_instances: number of mediated matrix devices that can be created
+ * @info:	the struct containing the output from the PQAP(QCI) instruction
+ * mdev_list:	the list of mediated matrix devices created
+ * lock:	mutex for locking the AP matrix device. This lock will be
+ *		taken every time we fiddle with state managed by the vfio_ap
+ *		driver, be it using @mdev_list or writing the state of a
+ *		single ap_matrix_mdev device. It's quite coarse but we don't
+ *		expect much contention.
+ */
+struct ap_matrix_dev {
+	struct device device;
+	atomic_t available_instances;
+	struct ap_config_info info;
+	struct list_head mdev_list;
+	struct mutex lock;
+};
+
+extern struct ap_matrix_dev *matrix_dev;
+
+/**
+ * The AP matrix is comprised of three bit masks identifying the adapters,
+ * queues (domains) and control domains that belong to an AP matrix. The bits i
+ * each mask, from least significant to most significant bit, correspond to IDs
+ * 0 to 255. When a bit is set, the corresponding ID belongs to the matrix.
+ *
+ * @apm_max: max adapter number in @apm
+ * @apm identifies the AP adapters in the matrix
+ * @aqm_max: max domain number in @aqm
+ * @aqm identifies the AP queues (domains) in the matrix
+ * @adm_max: max domain number in @adm
+ * @adm identifies the AP control domains in the matrix
+ */
+struct ap_matrix {
+	unsigned long apm_max;
+	DECLARE_BITMAP(apm, 256);
+	unsigned long aqm_max;
+	DECLARE_BITMAP(aqm, 256);
+	unsigned long adm_max;
+	DECLARE_BITMAP(adm, 256);
+};
+
+/**
+ * struct ap_matrix_mdev - the mediated matrix device structure
+ * @list:	allows the ap_matrix_mdev struct to be added to a list
+ * @matrix:	the adapters, usage domains and control domains assigned to the
+ *		mediated matrix device.
+ * @group_notifier: notifier block used for specifying callback function for
+ *		    handling the VFIO_GROUP_NOTIFY_SET_KVM event
+ * @kvm:	the struct holding guest's state
+ */
+struct ap_matrix_mdev {
+	struct list_head node;
+	struct ap_matrix matrix;
+	struct notifier_block group_notifier;
+	struct kvm *kvm;
+};
+
+extern int vfio_ap_mdev_register(void);
+extern void vfio_ap_mdev_unregister(void);
+
+#endif /* _VFIO_AP_PRIVATE_H_ */
diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index 4a6b2b350ace..3aeadb14aae1 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -16,7 +16,6 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
-#include <linux/phy/phy-qcom-ufs.h>
 
 #include "ufshcd.h"
 #include "ufshcd-pltfrm.h"
@@ -191,22 +190,9 @@ out:
 
 static int ufs_qcom_link_startup_post_change(struct ufs_hba *hba)
 {
-	struct ufs_qcom_host *host = ufshcd_get_variant(hba);
-	struct phy *phy = host->generic_phy;
 	u32 tx_lanes;
-	int err = 0;
-
-	err = ufs_qcom_get_connected_tx_lanes(hba, &tx_lanes);
-	if (err)
-		goto out;
 
-	err = ufs_qcom_phy_set_tx_lane_enable(phy, tx_lanes);
-	if (err)
-		dev_err(hba->dev, "%s: ufs_qcom_phy_set_tx_lane_enable failed\n",
-			__func__);
-
-out:
-	return err;
+	return ufs_qcom_get_connected_tx_lanes(hba, &tx_lanes);
 }
 
 static int ufs_qcom_check_hibern8(struct ufs_hba *hba)
@@ -934,10 +920,8 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba,
 {
 	u32 val;
 	struct ufs_qcom_host *host = ufshcd_get_variant(hba);
-	struct phy *phy = host->generic_phy;
 	struct ufs_qcom_dev_params ufs_qcom_cap;
 	int ret = 0;
-	int res = 0;
 
 	if (!dev_req_params) {
 		pr_err("%s: incoming dev_req_params is NULL\n", __func__);
@@ -1004,12 +988,6 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba,
 		}
 
 		val = ~(MAX_U32 << dev_req_params->lane_tx);
-		res = ufs_qcom_phy_set_tx_lane_enable(phy, val);
-		if (res) {
-			dev_err(hba->dev, "%s: ufs_qcom_phy_set_tx_lane_enable() failed res = %d\n",
-				__func__, res);
-			ret = res;
-		}
 
 		/* cache the power mode parameters to use internally */
 		memcpy(&host->dev_req_params,
@@ -1266,10 +1244,6 @@ static int ufs_qcom_init(struct ufs_hba *hba)
 		}
 	}
 
-	/* update phy revision information before calling phy_init() */
-	ufs_qcom_phy_save_controller_version(host->generic_phy,
-		host->hw_ver.major, host->hw_ver.minor, host->hw_ver.step);
-
 	err = ufs_qcom_init_lane_clks(host);
 	if (err)
 		goto out_variant_clear;
diff --git a/drivers/scsi/ufs/ufs-qcom.h b/drivers/scsi/ufs/ufs-qcom.h
index 295f4bef6a0e..c114826316eb 100644
--- a/drivers/scsi/ufs/ufs-qcom.h
+++ b/drivers/scsi/ufs/ufs-qcom.h
@@ -129,11 +129,6 @@ enum {
 	MASK_CLK_NS_REG                     = 0xFFFC00,
 };
 
-enum ufs_qcom_phy_init_type {
-	UFS_PHY_INIT_FULL,
-	UFS_PHY_INIT_CFG_RESTORE,
-};
-
 /* QCOM UFS debug print bit mask */
 #define UFS_QCOM_DBG_PRINT_REGS_EN	BIT(0)
 #define UFS_QCOM_DBG_PRINT_ICE_REGS_EN	BIT(1)
diff --git a/drivers/slimbus/core.c b/drivers/slimbus/core.c
index 95b00d28ad6e..55eda5863a6b 100644
--- a/drivers/slimbus/core.c
+++ b/drivers/slimbus/core.c
@@ -9,6 +9,7 @@
 #include <linux/init.h>
 #include <linux/idr.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/slimbus.h>
 #include "slimbus.h"
@@ -32,6 +33,10 @@ static int slim_device_match(struct device *dev, struct device_driver *drv)
 	struct slim_device *sbdev = to_slim_device(dev);
 	struct slim_driver *sbdrv = to_slim_driver(drv);
 
+	/* Attempt an OF style match first */
+	if (of_driver_match_device(dev, drv))
+		return 1;
+
 	return !!slim_match(sbdrv->id_table, sbdev);
 }
 
@@ -39,8 +44,23 @@ static int slim_device_probe(struct device *dev)
 {
 	struct slim_device	*sbdev = to_slim_device(dev);
 	struct slim_driver	*sbdrv = to_slim_driver(dev->driver);
+	int ret;
 
-	return sbdrv->probe(sbdev);
+	ret = sbdrv->probe(sbdev);
+	if (ret)
+		return ret;
+
+	/* try getting the logical address after probe */
+	ret = slim_get_logical_addr(sbdev);
+	if (!ret) {
+		if (sbdrv->device_status)
+			sbdrv->device_status(sbdev, sbdev->status);
+	} else {
+		dev_err(&sbdev->dev, "Failed to get logical address\n");
+		ret = -EPROBE_DEFER;
+	}
+
+	return ret;
 }
 
 static int slim_device_remove(struct device *dev)
@@ -57,11 +77,24 @@ static int slim_device_remove(struct device *dev)
 	return 0;
 }
 
+static int slim_device_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct slim_device *sbdev = to_slim_device(dev);
+	int ret;
+
+	ret = of_device_uevent_modalias(dev, env);
+	if (ret != -ENODEV)
+		return ret;
+
+	return add_uevent_var(env, "MODALIAS=slim:%s", dev_name(&sbdev->dev));
+}
+
 struct bus_type slimbus_bus = {
 	.name		= "slimbus",
 	.match		= slim_device_match,
 	.probe		= slim_device_probe,
 	.remove		= slim_device_remove,
+	.uevent		= slim_device_uevent,
 };
 EXPORT_SYMBOL_GPL(slimbus_bus);
 
@@ -77,7 +110,7 @@ EXPORT_SYMBOL_GPL(slimbus_bus);
 int __slim_driver_register(struct slim_driver *drv, struct module *owner)
 {
 	/* ID table and probe are mandatory */
-	if (!drv->id_table || !drv->probe)
+	if (!(drv->driver.of_match_table || drv->id_table) || !drv->probe)
 		return -EINVAL;
 
 	drv->driver.bus = &slimbus_bus;
diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c
index 8be4d6786c61..7218fb963d0a 100644
--- a/drivers/slimbus/qcom-ngd-ctrl.c
+++ b/drivers/slimbus/qcom-ngd-ctrl.c
@@ -1004,6 +1004,7 @@ static int qcom_slim_ngd_get_laddr(struct slim_controller *ctrl,
 				   struct slim_eaddr *ea, u8 *laddr)
 {
 	struct slim_val_inf msg =  {0};
+	u8 failed_ea[6] = {0, 0, 0, 0, 0, 0};
 	struct slim_msg_txn txn;
 	u8 wbuf[10] = {0};
 	u8 rbuf[10] = {0};
@@ -1034,6 +1035,9 @@ static int qcom_slim_ngd_get_laddr(struct slim_controller *ctrl,
 		return ret;
 	}
 
+	if (!memcmp(rbuf, failed_ea, 6))
+		return -ENXIO;
+
 	*laddr = rbuf[6];
 
 	return ret;
@@ -1234,8 +1238,17 @@ static int qcom_slim_ngd_enable(struct qcom_slim_ngd_ctrl *ctrl, bool enable)
 			pm_runtime_resume(ctrl->dev);
 		pm_runtime_mark_last_busy(ctrl->dev);
 		pm_runtime_put(ctrl->dev);
+
+		ret = slim_register_controller(&ctrl->ctrl);
+		if (ret) {
+			dev_err(ctrl->dev, "error adding slim controller\n");
+			return ret;
+		}
+
+		dev_info(ctrl->dev, "SLIM controller Registered\n");
 	} else {
 		qcom_slim_qmi_exit(ctrl);
+		slim_unregister_controller(&ctrl->ctrl);
 	}
 
 	return 0;
@@ -1342,7 +1355,6 @@ static int of_qcom_slim_ngd_register(struct device *parent,
 		ngd->base = ctrl->base + ngd->id * data->offset +
 					(ngd->id - 1) * data->size;
 		ctrl->ngd = ngd;
-		platform_driver_register(&qcom_slim_ngd_driver);
 
 		return 0;
 	}
@@ -1357,11 +1369,6 @@ static int qcom_slim_ngd_probe(struct platform_device *pdev)
 	int ret;
 
 	ctrl->ctrl.dev = dev;
-	ret = slim_register_controller(&ctrl->ctrl);
-	if (ret) {
-		dev_err(dev, "error adding slim controller\n");
-		return ret;
-	}
 
 	pm_runtime_use_autosuspend(dev);
 	pm_runtime_set_autosuspend_delay(dev, QCOM_SLIM_NGD_AUTOSUSPEND);
@@ -1371,7 +1378,7 @@ static int qcom_slim_ngd_probe(struct platform_device *pdev)
 	ret = qcom_slim_ngd_qmi_svc_event_init(ctrl);
 	if (ret) {
 		dev_err(&pdev->dev, "QMI service registration failed:%d", ret);
-		goto err;
+		return ret;
 	}
 
 	INIT_WORK(&ctrl->m_work, qcom_slim_ngd_master_worker);
@@ -1383,14 +1390,12 @@ static int qcom_slim_ngd_probe(struct platform_device *pdev)
 	}
 
 	return 0;
-err:
-	slim_unregister_controller(&ctrl->ctrl);
 wq_err:
 	qcom_slim_ngd_qmi_svc_event_deinit(&ctrl->qmi);
 	if (ctrl->mwq)
 		destroy_workqueue(ctrl->mwq);
 
-	return 0;
+	return ret;
 }
 
 static int qcom_slim_ngd_ctrl_probe(struct platform_device *pdev)
@@ -1441,6 +1446,7 @@ static int qcom_slim_ngd_ctrl_probe(struct platform_device *pdev)
 	init_completion(&ctrl->reconf);
 	init_completion(&ctrl->qmi.qmi_comp);
 
+	platform_driver_register(&qcom_slim_ngd_driver);
 	return of_qcom_slim_ngd_register(dev, ctrl);
 }
 
@@ -1456,7 +1462,7 @@ static int qcom_slim_ngd_remove(struct platform_device *pdev)
 	struct qcom_slim_ngd_ctrl *ctrl = platform_get_drvdata(pdev);
 
 	pm_runtime_disable(&pdev->dev);
-	slim_unregister_controller(&ctrl->ctrl);
+	qcom_slim_ngd_enable(ctrl, false);
 	qcom_slim_ngd_exit_dma(ctrl);
 	qcom_slim_ngd_qmi_svc_event_deinit(&ctrl->qmi);
 	if (ctrl->mwq)
@@ -1467,7 +1473,7 @@ static int qcom_slim_ngd_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static int qcom_slim_ngd_runtime_idle(struct device *dev)
+static int __maybe_unused qcom_slim_ngd_runtime_idle(struct device *dev)
 {
 	struct qcom_slim_ngd_ctrl *ctrl = dev_get_drvdata(dev);
 
@@ -1477,8 +1483,7 @@ static int qcom_slim_ngd_runtime_idle(struct device *dev)
 	return -EAGAIN;
 }
 
-#ifdef CONFIG_PM
-static int qcom_slim_ngd_runtime_suspend(struct device *dev)
+static int __maybe_unused qcom_slim_ngd_runtime_suspend(struct device *dev)
 {
 	struct qcom_slim_ngd_ctrl *ctrl = dev_get_drvdata(dev);
 	int ret = 0;
@@ -1491,7 +1496,6 @@ static int qcom_slim_ngd_runtime_suspend(struct device *dev)
 
 	return ret;
 }
-#endif
 
 static const struct dev_pm_ops qcom_slim_ngd_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
diff --git a/drivers/soc/dove/pmu.c b/drivers/soc/dove/pmu.c
index 5abb08ffb74d..ffc5311c0ed8 100644
--- a/drivers/soc/dove/pmu.c
+++ b/drivers/soc/dove/pmu.c
@@ -383,7 +383,7 @@ int __init dove_init_pmu(void)
 
 	domains_node = of_get_child_by_name(np_pmu, "domains");
 	if (!domains_node) {
-		pr_err("%s: failed to find domains sub-node\n", np_pmu->name);
+		pr_err("%pOFn: failed to find domains sub-node\n", np_pmu);
 		return 0;
 	}
 
@@ -396,7 +396,7 @@ int __init dove_init_pmu(void)
 	pmu->pmc_base = of_iomap(pmu->of_node, 0);
 	pmu->pmu_base = of_iomap(pmu->of_node, 1);
 	if (!pmu->pmc_base || !pmu->pmu_base) {
-		pr_err("%s: failed to map PMU\n", np_pmu->name);
+		pr_err("%pOFn: failed to map PMU\n", np_pmu);
 		iounmap(pmu->pmu_base);
 		iounmap(pmu->pmc_base);
 		kfree(pmu);
@@ -414,7 +414,7 @@ int __init dove_init_pmu(void)
 			break;
 
 		domain->pmu = pmu;
-		domain->base.name = kstrdup(np->name, GFP_KERNEL);
+		domain->base.name = kasprintf(GFP_KERNEL, "%pOFn", np);
 		if (!domain->base.name) {
 			kfree(domain);
 			break;
@@ -444,7 +444,7 @@ int __init dove_init_pmu(void)
 	/* Loss of the interrupt controller is not a fatal error. */
 	parent_irq = irq_of_parse_and_map(pmu->of_node, 0);
 	if (!parent_irq) {
-		pr_err("%s: no interrupt specified\n", np_pmu->name);
+		pr_err("%pOFn: no interrupt specified\n", np_pmu);
 	} else {
 		ret = dove_init_pmu_irq(pmu, parent_irq);
 		if (ret)
diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c
index 9b17f72349ed..321a92613a7e 100644
--- a/drivers/soc/fsl/dpio/dpio-service.c
+++ b/drivers/soc/fsl/dpio/dpio-service.c
@@ -310,6 +310,37 @@ int dpaa2_io_service_rearm(struct dpaa2_io *d,
 EXPORT_SYMBOL_GPL(dpaa2_io_service_rearm);
 
 /**
+ * dpaa2_io_service_pull_fq() - pull dequeue functions from a fq.
+ * @d: the given DPIO service.
+ * @fqid: the given frame queue id.
+ * @s: the dpaa2_io_store object for the result.
+ *
+ * Return 0 for success, or error code for failure.
+ */
+int dpaa2_io_service_pull_fq(struct dpaa2_io *d, u32 fqid,
+			     struct dpaa2_io_store *s)
+{
+	struct qbman_pull_desc pd;
+	int err;
+
+	qbman_pull_desc_clear(&pd);
+	qbman_pull_desc_set_storage(&pd, s->vaddr, s->paddr, 1);
+	qbman_pull_desc_set_numframes(&pd, (u8)s->max);
+	qbman_pull_desc_set_fq(&pd, fqid);
+
+	d = service_select(d);
+	if (!d)
+		return -ENODEV;
+	s->swp = d->swp;
+	err = qbman_swp_pull(d->swp, &pd);
+	if (err)
+		s->swp = NULL;
+
+	return err;
+}
+EXPORT_SYMBOL(dpaa2_io_service_pull_fq);
+
+/**
  * dpaa2_io_service_pull_channel() - pull dequeue functions from a channel.
  * @d: the given DPIO service.
  * @channelid: the given channel id.
@@ -342,6 +373,33 @@ int dpaa2_io_service_pull_channel(struct dpaa2_io *d, u32 channelid,
 EXPORT_SYMBOL_GPL(dpaa2_io_service_pull_channel);
 
 /**
+ * dpaa2_io_service_enqueue_fq() - Enqueue a frame to a frame queue.
+ * @d: the given DPIO service.
+ * @fqid: the given frame queue id.
+ * @fd: the frame descriptor which is enqueued.
+ *
+ * Return 0 for successful enqueue, -EBUSY if the enqueue ring is not ready,
+ * or -ENODEV if there is no dpio service.
+ */
+int dpaa2_io_service_enqueue_fq(struct dpaa2_io *d,
+				u32 fqid,
+				const struct dpaa2_fd *fd)
+{
+	struct qbman_eq_desc ed;
+
+	d = service_select(d);
+	if (!d)
+		return -ENODEV;
+
+	qbman_eq_desc_clear(&ed);
+	qbman_eq_desc_set_no_orp(&ed, 0);
+	qbman_eq_desc_set_fq(&ed, fqid);
+
+	return qbman_swp_enqueue(d->swp, &ed, fd);
+}
+EXPORT_SYMBOL(dpaa2_io_service_enqueue_fq);
+
+/**
  * dpaa2_io_service_enqueue_qd() - Enqueue a frame to a QD.
  * @d: the given DPIO service.
  * @qdid: the given queuing destination id.
diff --git a/drivers/soc/fsl/qe/qe_tdm.c b/drivers/soc/fsl/qe/qe_tdm.c
index f744c214f680..f78c34647ca2 100644
--- a/drivers/soc/fsl/qe/qe_tdm.c
+++ b/drivers/soc/fsl/qe/qe_tdm.c
@@ -131,7 +131,7 @@ int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm *utdm,
 
 	pdev = of_find_device_by_node(np2);
 	if (!pdev) {
-		pr_err("%s: failed to lookup pdev\n", np2->name);
+		pr_err("%pOFn: failed to lookup pdev\n", np2);
 		of_node_put(np2);
 		return -EINVAL;
 	}
@@ -153,7 +153,7 @@ int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm *utdm,
 	pdev = of_find_device_by_node(np2);
 	if (!pdev) {
 		ret = -EINVAL;
-		pr_err("%s: failed to lookup pdev\n", np2->name);
+		pr_err("%pOFn: failed to lookup pdev\n", np2);
 		of_node_put(np2);
 		goto err_miss_siram_property;
 	}
diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c
index 57af8a537332..4bda793ba6ae 100644
--- a/drivers/soc/qcom/apr.c
+++ b/drivers/soc/qcom/apr.c
@@ -219,7 +219,7 @@ static int apr_add_device(struct device *dev, struct device_node *np,
 	adev->domain_id = id->domain_id;
 	adev->version = id->svc_version;
 	if (np)
-		strncpy(adev->name, np->name, APR_NAME_SIZE);
+		snprintf(adev->name, APR_NAME_SIZE, "%pOFn", np);
 	else
 		strncpy(adev->name, id->name, APR_NAME_SIZE);
 
diff --git a/drivers/soc/rockchip/pm_domains.c b/drivers/soc/rockchip/pm_domains.c
index 6dff8682155f..6f86a726bb45 100644
--- a/drivers/soc/rockchip/pm_domains.c
+++ b/drivers/soc/rockchip/pm_domains.c
@@ -392,21 +392,21 @@ static int rockchip_pm_add_one_domain(struct rockchip_pmu *pmu,
 	error = of_property_read_u32(node, "reg", &id);
 	if (error) {
 		dev_err(pmu->dev,
-			"%s: failed to retrieve domain id (reg): %d\n",
-			node->name, error);
+			"%pOFn: failed to retrieve domain id (reg): %d\n",
+			node, error);
 		return -EINVAL;
 	}
 
 	if (id >= pmu->info->num_domains) {
-		dev_err(pmu->dev, "%s: invalid domain id %d\n",
-			node->name, id);
+		dev_err(pmu->dev, "%pOFn: invalid domain id %d\n",
+			node, id);
 		return -EINVAL;
 	}
 
 	pd_info = &pmu->info->domain_info[id];
 	if (!pd_info) {
-		dev_err(pmu->dev, "%s: undefined domain id %d\n",
-			node->name, id);
+		dev_err(pmu->dev, "%pOFn: undefined domain id %d\n",
+			node, id);
 		return -EINVAL;
 	}
 
@@ -424,8 +424,8 @@ static int rockchip_pm_add_one_domain(struct rockchip_pmu *pmu,
 		if (!pd->clks)
 			return -ENOMEM;
 	} else {
-		dev_dbg(pmu->dev, "%s: doesn't have clocks: %d\n",
-			node->name, pd->num_clks);
+		dev_dbg(pmu->dev, "%pOFn: doesn't have clocks: %d\n",
+			node, pd->num_clks);
 		pd->num_clks = 0;
 	}
 
@@ -434,8 +434,8 @@ static int rockchip_pm_add_one_domain(struct rockchip_pmu *pmu,
 		if (IS_ERR(pd->clks[i].clk)) {
 			error = PTR_ERR(pd->clks[i].clk);
 			dev_err(pmu->dev,
-				"%s: failed to get clk at index %d: %d\n",
-				node->name, i, error);
+				"%pOFn: failed to get clk at index %d: %d\n",
+				node, i, error);
 			return error;
 		}
 	}
@@ -486,8 +486,8 @@ static int rockchip_pm_add_one_domain(struct rockchip_pmu *pmu,
 	error = rockchip_pd_power(pd, true);
 	if (error) {
 		dev_err(pmu->dev,
-			"failed to power on domain '%s': %d\n",
-			node->name, error);
+			"failed to power on domain '%pOFn': %d\n",
+			node, error);
 		goto err_unprepare_clocks;
 	}
 
@@ -575,24 +575,24 @@ static int rockchip_pm_add_subdomain(struct rockchip_pmu *pmu,
 		error = of_property_read_u32(parent, "reg", &idx);
 		if (error) {
 			dev_err(pmu->dev,
-				"%s: failed to retrieve domain id (reg): %d\n",
-				parent->name, error);
+				"%pOFn: failed to retrieve domain id (reg): %d\n",
+				parent, error);
 			goto err_out;
 		}
 		parent_domain = pmu->genpd_data.domains[idx];
 
 		error = rockchip_pm_add_one_domain(pmu, np);
 		if (error) {
-			dev_err(pmu->dev, "failed to handle node %s: %d\n",
-				np->name, error);
+			dev_err(pmu->dev, "failed to handle node %pOFn: %d\n",
+				np, error);
 			goto err_out;
 		}
 
 		error = of_property_read_u32(np, "reg", &idx);
 		if (error) {
 			dev_err(pmu->dev,
-				"%s: failed to retrieve domain id (reg): %d\n",
-				np->name, error);
+				"%pOFn: failed to retrieve domain id (reg): %d\n",
+				np, error);
 			goto err_out;
 		}
 		child_domain = pmu->genpd_data.domains[idx];
@@ -683,16 +683,16 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev)
 	for_each_available_child_of_node(np, node) {
 		error = rockchip_pm_add_one_domain(pmu, node);
 		if (error) {
-			dev_err(dev, "failed to handle node %s: %d\n",
-				node->name, error);
+			dev_err(dev, "failed to handle node %pOFn: %d\n",
+				node, error);
 			of_node_put(node);
 			goto err_out;
 		}
 
 		error = rockchip_pm_add_subdomain(pmu, node);
 		if (error < 0) {
-			dev_err(dev, "failed to handle subdomain node %s: %d\n",
-				node->name, error);
+			dev_err(dev, "failed to handle subdomain node %pOFn: %d\n",
+				node, error);
 			of_node_put(node);
 			goto err_out;
 		}
diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c
index 2d6f3fcf3211..acbe63e925d5 100644
--- a/drivers/soc/tegra/pmc.c
+++ b/drivers/soc/tegra/pmc.c
@@ -796,7 +796,7 @@ static void tegra_powergate_add(struct tegra_pmc *pmc, struct device_node *np)
 
 	id = tegra_powergate_lookup(pmc, np->name);
 	if (id < 0) {
-		pr_err("powergate lookup failed for %s: %d\n", np->name, id);
+		pr_err("powergate lookup failed for %pOFn: %d\n", np, id);
 		goto free_mem;
 	}
 
@@ -816,13 +816,13 @@ static void tegra_powergate_add(struct tegra_pmc *pmc, struct device_node *np)
 
 	err = tegra_powergate_of_get_clks(pg, np);
 	if (err < 0) {
-		pr_err("failed to get clocks for %s: %d\n", np->name, err);
+		pr_err("failed to get clocks for %pOFn: %d\n", np, err);
 		goto set_available;
 	}
 
 	err = tegra_powergate_of_get_resets(pg, np, off);
 	if (err < 0) {
-		pr_err("failed to get resets for %s: %d\n", np->name, err);
+		pr_err("failed to get resets for %pOFn: %d\n", np, err);
 		goto remove_clks;
 	}
 
@@ -851,15 +851,15 @@ static void tegra_powergate_add(struct tegra_pmc *pmc, struct device_node *np)
 
 	err = pm_genpd_init(&pg->genpd, NULL, off);
 	if (err < 0) {
-		pr_err("failed to initialise PM domain %s: %d\n", np->name,
+		pr_err("failed to initialise PM domain %pOFn: %d\n", np,
 		       err);
 		goto remove_resets;
 	}
 
 	err = of_genpd_add_provider_simple(np, &pg->genpd);
 	if (err < 0) {
-		pr_err("failed to add PM domain provider for %s: %d\n",
-		       np->name, err);
+		pr_err("failed to add PM domain provider for %pOFn: %d\n",
+		       np, err);
 		goto remove_genpd;
 	}
 
diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c
index 224d7ddeeb76..bbd4e5bc8707 100644
--- a/drivers/soc/ti/knav_dma.c
+++ b/drivers/soc/ti/knav_dma.c
@@ -544,15 +544,15 @@ static void __iomem *pktdma_get_regs(struct knav_dma_device *dma,
 
 	ret = of_address_to_resource(node, index, &res);
 	if (ret) {
-		dev_err(dev, "Can't translate of node(%s) address for index(%d)\n",
-			node->name, index);
+		dev_err(dev, "Can't translate of node(%pOFn) address for index(%d)\n",
+			node, index);
 		return ERR_PTR(ret);
 	}
 
 	regs = devm_ioremap_resource(kdev->dev, &res);
 	if (IS_ERR(regs))
-		dev_err(dev, "Failed to map register base for index(%d) node(%s)\n",
-			index, node->name);
+		dev_err(dev, "Failed to map register base for index(%d) node(%pOFn)\n",
+			index, node);
 	if (_size)
 		*_size = resource_size(&res);
 
diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
index 6755f2af5619..b5d5673c255c 100644
--- a/drivers/soc/ti/knav_qmss_queue.c
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -1382,15 +1382,15 @@ static void __iomem *knav_queue_map_reg(struct knav_device *kdev,
 
 	ret = of_address_to_resource(node, index, &res);
 	if (ret) {
-		dev_err(kdev->dev, "Can't translate of node(%s) address for index(%d)\n",
-			node->name, index);
+		dev_err(kdev->dev, "Can't translate of node(%pOFn) address for index(%d)\n",
+			node, index);
 		return ERR_PTR(ret);
 	}
 
 	regs = devm_ioremap_resource(kdev->dev, &res);
 	if (IS_ERR(regs))
-		dev_err(kdev->dev, "Failed to map register base for index(%d) node(%s)\n",
-			index, node->name);
+		dev_err(kdev->dev, "Failed to map register base for index(%d) node(%pOFn)\n",
+			index, node);
 	return regs;
 }
 
diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index dcc0ff9f0c22..1cbfedfc20ef 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -35,6 +35,11 @@ int sdw_add_bus_master(struct sdw_bus *bus)
 	INIT_LIST_HEAD(&bus->slaves);
 	INIT_LIST_HEAD(&bus->m_rt_list);
 
+	/*
+	 * Initialize multi_link flag
+	 * TODO: populate this flag by reading property from FW node
+	 */
+	bus->multi_link = false;
 	if (bus->ops->read_prop) {
 		ret = bus->ops->read_prop(bus);
 		if (ret < 0) {
@@ -175,6 +180,7 @@ static inline int do_transfer_defer(struct sdw_bus *bus,
 
 	defer->msg = msg;
 	defer->length = msg->len;
+	init_completion(&defer->complete);
 
 	for (i = 0; i <= retry; i++) {
 		resp = bus->ops->xfer_msg_defer(bus, msg, defer);
diff --git a/drivers/soundwire/bus.h b/drivers/soundwire/bus.h
index 3b15c4e25a3a..c77de05b8100 100644
--- a/drivers/soundwire/bus.h
+++ b/drivers/soundwire/bus.h
@@ -4,6 +4,8 @@
 #ifndef __SDW_BUS_H
 #define __SDW_BUS_H
 
+#define DEFAULT_BANK_SWITCH_TIMEOUT 3000
+
 #if IS_ENABLED(CONFIG_ACPI)
 int sdw_acpi_find_slaves(struct sdw_bus *bus);
 #else
@@ -99,6 +101,7 @@ struct sdw_slave_runtime {
  * this stream, can be zero.
  * @slave_rt_list: Slave runtime list
  * @port_list: List of Master Ports configured for this stream, can be zero.
+ * @stream_node: sdw_stream_runtime master_list node
  * @bus_node: sdw_bus m_rt_list node
  */
 struct sdw_master_runtime {
@@ -108,6 +111,7 @@ struct sdw_master_runtime {
 	unsigned int ch_count;
 	struct list_head slave_rt_list;
 	struct list_head port_list;
+	struct list_head stream_node;
 	struct list_head bus_node;
 };
 
diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index 0a8990e758f9..c5ee97ee7886 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -398,6 +398,69 @@ static int intel_config_stream(struct sdw_intel *sdw,
 }
 
 /*
+ * bank switch routines
+ */
+
+static int intel_pre_bank_switch(struct sdw_bus *bus)
+{
+	struct sdw_cdns *cdns = bus_to_cdns(bus);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+	void __iomem *shim = sdw->res->shim;
+	int sync_reg;
+
+	/* Write to register only for multi-link */
+	if (!bus->multi_link)
+		return 0;
+
+	/* Read SYNC register */
+	sync_reg = intel_readl(shim, SDW_SHIM_SYNC);
+	sync_reg |= SDW_SHIM_SYNC_CMDSYNC << sdw->instance;
+	intel_writel(shim, SDW_SHIM_SYNC, sync_reg);
+
+	return 0;
+}
+
+static int intel_post_bank_switch(struct sdw_bus *bus)
+{
+	struct sdw_cdns *cdns = bus_to_cdns(bus);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+	void __iomem *shim = sdw->res->shim;
+	int sync_reg, ret;
+
+	/* Write to register only for multi-link */
+	if (!bus->multi_link)
+		return 0;
+
+	/* Read SYNC register */
+	sync_reg = intel_readl(shim, SDW_SHIM_SYNC);
+
+	/*
+	 * post_bank_switch() ops is called from the bus in loop for
+	 * all the Masters in the steam with the expectation that
+	 * we trigger the bankswitch for the only first Master in the list
+	 * and do nothing for the other Masters
+	 *
+	 * So, set the SYNCGO bit only if CMDSYNC bit is set for any Master.
+	 */
+	if (!(sync_reg & SDW_SHIM_SYNC_CMDSYNC_MASK))
+		return 0;
+
+	/*
+	 * Set SyncGO bit to synchronously trigger a bank switch for
+	 * all the masters. A write to SYNCGO bit clears CMDSYNC bit for all
+	 * the Masters.
+	 */
+	sync_reg |= SDW_SHIM_SYNC_SYNCGO;
+
+	ret = intel_clear_bit(shim, SDW_SHIM_SYNC, sync_reg,
+					SDW_SHIM_SYNC_SYNCGO);
+	if (ret < 0)
+		dev_err(sdw->cdns.dev, "Post bank switch failed: %d", ret);
+
+	return ret;
+}
+
+/*
  * DAI routines
  */
 
@@ -750,6 +813,8 @@ static struct sdw_master_ops sdw_intel_ops = {
 	.xfer_msg_defer = cdns_xfer_msg_defer,
 	.reset_page_addr = cdns_reset_page_addr,
 	.set_bus_conf = cdns_bus_conf,
+	.pre_bank_switch = intel_pre_bank_switch,
+	.post_bank_switch = intel_post_bank_switch,
 };
 
 /*
@@ -780,9 +845,6 @@ static int intel_probe(struct platform_device *pdev)
 	sdw_intel_ops.read_prop = intel_prop_read;
 	sdw->cdns.bus.ops = &sdw_intel_ops;
 
-	sdw_intel_ops.read_prop = intel_prop_read;
-	sdw->cdns.bus.ops = &sdw_intel_ops;
-
 	platform_set_drvdata(pdev, sdw);
 
 	ret = sdw_add_bus_master(&sdw->cdns.bus);
diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c
index d1ea6b4d0ad3..5c8a20d99878 100644
--- a/drivers/soundwire/intel_init.c
+++ b/drivers/soundwire/intel_init.c
@@ -151,7 +151,7 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
 	struct acpi_device *adev;
 
 	if (acpi_bus_get_device(handle, &adev)) {
-		dev_err(&adev->dev, "Couldn't find ACPI handle\n");
+		pr_err("%s: Couldn't find ACPI handle\n", __func__);
 		return AE_NOT_FOUND;
 	}
 
diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index e5c7e1ef6318..bd879b1a76c8 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -626,9 +626,10 @@ static int sdw_program_params(struct sdw_bus *bus)
 	return ret;
 }
 
-static int sdw_bank_switch(struct sdw_bus *bus)
+static int sdw_bank_switch(struct sdw_bus *bus, int m_rt_count)
 {
 	int col_index, row_index;
+	bool multi_link;
 	struct sdw_msg *wr_msg;
 	u8 *wbuf = NULL;
 	int ret = 0;
@@ -638,6 +639,8 @@ static int sdw_bank_switch(struct sdw_bus *bus)
 	if (!wr_msg)
 		return -ENOMEM;
 
+	bus->defer_msg.msg = wr_msg;
+
 	wbuf = kzalloc(sizeof(*wbuf), GFP_KERNEL);
 	if (!wbuf) {
 		ret = -ENOMEM;
@@ -658,17 +661,29 @@ static int sdw_bank_switch(struct sdw_bus *bus)
 					SDW_MSG_FLAG_WRITE, wbuf);
 	wr_msg->ssp_sync = true;
 
-	ret = sdw_transfer(bus, wr_msg);
+	/*
+	 * Set the multi_link flag only when both the hardware supports
+	 * and there is a stream handled by multiple masters
+	 */
+	multi_link = bus->multi_link && (m_rt_count > 1);
+
+	if (multi_link)
+		ret = sdw_transfer_defer(bus, wr_msg, &bus->defer_msg);
+	else
+		ret = sdw_transfer(bus, wr_msg);
+
 	if (ret < 0) {
 		dev_err(bus->dev, "Slave frame_ctrl reg write failed");
 		goto error;
 	}
 
-	kfree(wr_msg);
-	kfree(wbuf);
-	bus->defer_msg.msg = NULL;
-	bus->params.curr_bank = !bus->params.curr_bank;
-	bus->params.next_bank = !bus->params.next_bank;
+	if (!multi_link) {
+		kfree(wr_msg);
+		kfree(wbuf);
+		bus->defer_msg.msg = NULL;
+		bus->params.curr_bank = !bus->params.curr_bank;
+		bus->params.next_bank = !bus->params.next_bank;
+	}
 
 	return 0;
 
@@ -679,37 +694,138 @@ error_1:
 	return ret;
 }
 
+/**
+ * sdw_ml_sync_bank_switch: Multilink register bank switch
+ *
+ * @bus: SDW bus instance
+ *
+ * Caller function should free the buffers on error
+ */
+static int sdw_ml_sync_bank_switch(struct sdw_bus *bus)
+{
+	unsigned long time_left;
+
+	if (!bus->multi_link)
+		return 0;
+
+	/* Wait for completion of transfer */
+	time_left = wait_for_completion_timeout(&bus->defer_msg.complete,
+						bus->bank_switch_timeout);
+
+	if (!time_left) {
+		dev_err(bus->dev, "Controller Timed out on bank switch");
+		return -ETIMEDOUT;
+	}
+
+	bus->params.curr_bank = !bus->params.curr_bank;
+	bus->params.next_bank = !bus->params.next_bank;
+
+	if (bus->defer_msg.msg) {
+		kfree(bus->defer_msg.msg->buf);
+		kfree(bus->defer_msg.msg);
+	}
+
+	return 0;
+}
+
 static int do_bank_switch(struct sdw_stream_runtime *stream)
 {
-	struct sdw_master_runtime *m_rt = stream->m_rt;
+	struct sdw_master_runtime *m_rt = NULL;
 	const struct sdw_master_ops *ops;
-	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_bus *bus = NULL;
+	bool multi_link = false;
 	int ret = 0;
 
-	ops = bus->ops;
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		ops = bus->ops;
+
+		if (bus->multi_link) {
+			multi_link = true;
+			mutex_lock(&bus->msg_lock);
+		}
+
+		/* Pre-bank switch */
+		if (ops->pre_bank_switch) {
+			ret = ops->pre_bank_switch(bus);
+			if (ret < 0) {
+				dev_err(bus->dev,
+					"Pre bank switch op failed: %d", ret);
+				goto msg_unlock;
+			}
+		}
 
-	/* Pre-bank switch */
-	if (ops->pre_bank_switch) {
-		ret = ops->pre_bank_switch(bus);
+		/*
+		 * Perform Bank switch operation.
+		 * For multi link cases, the actual bank switch is
+		 * synchronized across all Masters and happens later as a
+		 * part of post_bank_switch ops.
+		 */
+		ret = sdw_bank_switch(bus, stream->m_rt_count);
 		if (ret < 0) {
-			dev_err(bus->dev, "Pre bank switch op failed: %d", ret);
-			return ret;
+			dev_err(bus->dev, "Bank switch failed: %d", ret);
+			goto error;
+
 		}
 	}
 
-	/* Bank switch */
-	ret = sdw_bank_switch(bus);
-	if (ret < 0) {
-		dev_err(bus->dev, "Bank switch failed: %d", ret);
-		return ret;
-	}
+	/*
+	 * For multi link cases, it is expected that the bank switch is
+	 * triggered by the post_bank_switch for the first Master in the list
+	 * and for the other Masters the post_bank_switch() should return doing
+	 * nothing.
+	 */
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		ops = bus->ops;
+
+		/* Post-bank switch */
+		if (ops->post_bank_switch) {
+			ret = ops->post_bank_switch(bus);
+			if (ret < 0) {
+				dev_err(bus->dev,
+					"Post bank switch op failed: %d", ret);
+				goto error;
+			}
+		} else if (bus->multi_link && stream->m_rt_count > 1) {
+			dev_err(bus->dev,
+				"Post bank switch ops not implemented");
+			goto error;
+		}
+
+		/* Set the bank switch timeout to default, if not set */
+		if (!bus->bank_switch_timeout)
+			bus->bank_switch_timeout = DEFAULT_BANK_SWITCH_TIMEOUT;
 
-	/* Post-bank switch */
-	if (ops->post_bank_switch) {
-		ret = ops->post_bank_switch(bus);
+		/* Check if bank switch was successful */
+		ret = sdw_ml_sync_bank_switch(bus);
 		if (ret < 0) {
 			dev_err(bus->dev,
-					"Post bank switch op failed: %d", ret);
+				"multi link bank switch failed: %d", ret);
+			goto error;
+		}
+
+		mutex_unlock(&bus->msg_lock);
+	}
+
+	return ret;
+
+error:
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+
+		bus = m_rt->bus;
+
+		kfree(bus->defer_msg.msg->buf);
+		kfree(bus->defer_msg.msg);
+	}
+
+msg_unlock:
+
+	if (multi_link) {
+		list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+			bus = m_rt->bus;
+			if (mutex_is_locked(&bus->msg_lock))
+				mutex_unlock(&bus->msg_lock);
 		}
 	}
 
@@ -747,12 +863,29 @@ struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name)
 		return NULL;
 
 	stream->name = stream_name;
+	INIT_LIST_HEAD(&stream->master_list);
 	stream->state = SDW_STREAM_ALLOCATED;
+	stream->m_rt_count = 0;
 
 	return stream;
 }
 EXPORT_SYMBOL(sdw_alloc_stream);
 
+static struct sdw_master_runtime
+*sdw_find_master_rt(struct sdw_bus *bus,
+			struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = NULL;
+
+	/* Retrieve Bus handle if already available */
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		if (m_rt->bus == bus)
+			return m_rt;
+	}
+
+	return NULL;
+}
+
 /**
  * sdw_alloc_master_rt() - Allocates and initialize Master runtime handle
  *
@@ -769,12 +902,11 @@ static struct sdw_master_runtime
 {
 	struct sdw_master_runtime *m_rt;
 
-	m_rt = stream->m_rt;
-
 	/*
 	 * check if Master is already allocated (as a result of Slave adding
 	 * it first), if so skip allocation and go to configure
 	 */
+	m_rt = sdw_find_master_rt(bus, stream);
 	if (m_rt)
 		goto stream_config;
 
@@ -785,7 +917,7 @@ static struct sdw_master_runtime
 	/* Initialization of Master runtime handle */
 	INIT_LIST_HEAD(&m_rt->port_list);
 	INIT_LIST_HEAD(&m_rt->slave_rt_list);
-	stream->m_rt = m_rt;
+	list_add_tail(&m_rt->stream_node, &stream->master_list);
 
 	list_add_tail(&m_rt->bus_node, &bus->m_rt_list);
 
@@ -843,17 +975,21 @@ static void sdw_slave_port_release(struct sdw_bus *bus,
 			struct sdw_stream_runtime *stream)
 {
 	struct sdw_port_runtime *p_rt, *_p_rt;
-	struct sdw_master_runtime *m_rt = stream->m_rt;
+	struct sdw_master_runtime *m_rt;
 	struct sdw_slave_runtime *s_rt;
 
-	list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) {
-		if (s_rt->slave != slave)
-			continue;
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) {
 
-		list_for_each_entry_safe(p_rt, _p_rt,
-				&s_rt->port_list, port_node) {
-			list_del(&p_rt->port_node);
-			kfree(p_rt);
+			if (s_rt->slave != slave)
+				continue;
+
+			list_for_each_entry_safe(p_rt, _p_rt,
+					&s_rt->port_list, port_node) {
+
+				list_del(&p_rt->port_node);
+				kfree(p_rt);
+			}
 		}
 	}
 }
@@ -870,16 +1006,18 @@ static void sdw_release_slave_stream(struct sdw_slave *slave,
 			struct sdw_stream_runtime *stream)
 {
 	struct sdw_slave_runtime *s_rt, *_s_rt;
-	struct sdw_master_runtime *m_rt = stream->m_rt;
+	struct sdw_master_runtime *m_rt;
 
-	/* Retrieve Slave runtime handle */
-	list_for_each_entry_safe(s_rt, _s_rt,
-			&m_rt->slave_rt_list, m_rt_node) {
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		/* Retrieve Slave runtime handle */
+		list_for_each_entry_safe(s_rt, _s_rt,
+					&m_rt->slave_rt_list, m_rt_node) {
 
-		if (s_rt->slave == slave) {
-			list_del(&s_rt->m_rt_node);
-			kfree(s_rt);
-			return;
+			if (s_rt->slave == slave) {
+				list_del(&s_rt->m_rt_node);
+				kfree(s_rt);
+				return;
+			}
 		}
 	}
 }
@@ -887,6 +1025,7 @@ static void sdw_release_slave_stream(struct sdw_slave *slave,
 /**
  * sdw_release_master_stream() - Free Master runtime handle
  *
+ * @m_rt: Master runtime node
  * @stream: Stream runtime handle.
  *
  * This function is to be called with bus_lock held
@@ -894,9 +1033,9 @@ static void sdw_release_slave_stream(struct sdw_slave *slave,
  * handle. If this is called first then sdw_release_slave_stream() will have
  * no effect as Slave(s) runtime handle would already be freed up.
  */
-static void sdw_release_master_stream(struct sdw_stream_runtime *stream)
+static void sdw_release_master_stream(struct sdw_master_runtime *m_rt,
+			struct sdw_stream_runtime *stream)
 {
-	struct sdw_master_runtime *m_rt = stream->m_rt;
 	struct sdw_slave_runtime *s_rt, *_s_rt;
 
 	list_for_each_entry_safe(s_rt, _s_rt, &m_rt->slave_rt_list, m_rt_node) {
@@ -904,7 +1043,9 @@ static void sdw_release_master_stream(struct sdw_stream_runtime *stream)
 		sdw_release_slave_stream(s_rt->slave, stream);
 	}
 
+	list_del(&m_rt->stream_node);
 	list_del(&m_rt->bus_node);
+	kfree(m_rt);
 }
 
 /**
@@ -918,13 +1059,23 @@ static void sdw_release_master_stream(struct sdw_stream_runtime *stream)
 int sdw_stream_remove_master(struct sdw_bus *bus,
 		struct sdw_stream_runtime *stream)
 {
+	struct sdw_master_runtime *m_rt, *_m_rt;
+
 	mutex_lock(&bus->bus_lock);
 
-	sdw_release_master_stream(stream);
-	sdw_master_port_release(bus, stream->m_rt);
-	stream->state = SDW_STREAM_RELEASED;
-	kfree(stream->m_rt);
-	stream->m_rt = NULL;
+	list_for_each_entry_safe(m_rt, _m_rt,
+			&stream->master_list, stream_node) {
+
+		if (m_rt->bus != bus)
+			continue;
+
+		sdw_master_port_release(bus, m_rt);
+		sdw_release_master_stream(m_rt, stream);
+		stream->m_rt_count--;
+	}
+
+	if (list_empty(&stream->master_list))
+		stream->state = SDW_STREAM_RELEASED;
 
 	mutex_unlock(&bus->bus_lock);
 
@@ -1107,6 +1258,18 @@ int sdw_stream_add_master(struct sdw_bus *bus,
 
 	mutex_lock(&bus->bus_lock);
 
+	/*
+	 * For multi link streams, add the second master only if
+	 * the bus supports it.
+	 * Check if bus->multi_link is set
+	 */
+	if (!bus->multi_link && stream->m_rt_count > 0) {
+		dev_err(bus->dev,
+			"Multilink not supported, link %d", bus->link_id);
+		ret = -EINVAL;
+		goto unlock;
+	}
+
 	m_rt = sdw_alloc_master_rt(bus, stream_config, stream);
 	if (!m_rt) {
 		dev_err(bus->dev,
@@ -1124,10 +1287,12 @@ int sdw_stream_add_master(struct sdw_bus *bus,
 	if (ret)
 		goto stream_error;
 
+	stream->m_rt_count++;
+
 	goto unlock;
 
 stream_error:
-	sdw_release_master_stream(stream);
+	sdw_release_master_stream(m_rt, stream);
 unlock:
 	mutex_unlock(&bus->bus_lock);
 	return ret;
@@ -1205,7 +1370,7 @@ stream_error:
 	 * we hit error so cleanup the stream, release all Slave(s) and
 	 * Master runtime
 	 */
-	sdw_release_master_stream(stream);
+	sdw_release_master_stream(m_rt, stream);
 error:
 	mutex_unlock(&slave->bus->bus_lock);
 	return ret;
@@ -1245,33 +1410,82 @@ struct sdw_dpn_prop *sdw_get_slave_dpn_prop(struct sdw_slave *slave,
 	return NULL;
 }
 
+/**
+ * sdw_acquire_bus_lock: Acquire bus lock for all Master runtime(s)
+ *
+ * @stream: SoundWire stream
+ *
+ * Acquire bus_lock for each of the master runtime(m_rt) part of this
+ * stream to reconfigure the bus.
+ * NOTE: This function is called from SoundWire stream ops and is
+ * expected that a global lock is held before acquiring bus_lock.
+ */
+static void sdw_acquire_bus_lock(struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = NULL;
+	struct sdw_bus *bus = NULL;
+
+	/* Iterate for all Master(s) in Master list */
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+
+		mutex_lock(&bus->bus_lock);
+	}
+}
+
+/**
+ * sdw_release_bus_lock: Release bus lock for all Master runtime(s)
+ *
+ * @stream: SoundWire stream
+ *
+ * Release the previously held bus_lock after reconfiguring the bus.
+ * NOTE: This function is called from SoundWire stream ops and is
+ * expected that a global lock is held before releasing bus_lock.
+ */
+static void sdw_release_bus_lock(struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = NULL;
+	struct sdw_bus *bus = NULL;
+
+	/* Iterate for all Master(s) in Master list */
+	list_for_each_entry_reverse(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		mutex_unlock(&bus->bus_lock);
+	}
+}
+
 static int _sdw_prepare_stream(struct sdw_stream_runtime *stream)
 {
-	struct sdw_master_runtime *m_rt = stream->m_rt;
-	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_master_runtime *m_rt = NULL;
+	struct sdw_bus *bus = NULL;
 	struct sdw_master_prop *prop = NULL;
 	struct sdw_bus_params params;
 	int ret;
 
-	prop = &bus->prop;
-	memcpy(&params, &bus->params, sizeof(params));
+	/* Prepare  Master(s) and Slave(s) port(s) associated with stream */
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		prop = &bus->prop;
+		memcpy(&params, &bus->params, sizeof(params));
 
-	/* TODO: Support Asynchronous mode */
-	if ((prop->max_freq % stream->params.rate) != 0) {
-		dev_err(bus->dev, "Async mode not supported");
-		return -EINVAL;
-	}
+		/* TODO: Support Asynchronous mode */
+		if ((prop->max_freq % stream->params.rate) != 0) {
+			dev_err(bus->dev, "Async mode not supported");
+			return -EINVAL;
+		}
 
-	/* Increment cumulative bus bandwidth */
-	/* TODO: Update this during Device-Device support */
-	bus->params.bandwidth += m_rt->stream->params.rate *
-		m_rt->ch_count * m_rt->stream->params.bps;
+		/* Increment cumulative bus bandwidth */
+		/* TODO: Update this during Device-Device support */
+		bus->params.bandwidth += m_rt->stream->params.rate *
+			m_rt->ch_count * m_rt->stream->params.bps;
+
+		/* Program params */
+		ret = sdw_program_params(bus);
+		if (ret < 0) {
+			dev_err(bus->dev, "Program params failed: %d", ret);
+			goto restore_params;
+		}
 
-	/* Program params */
-	ret = sdw_program_params(bus);
-	if (ret < 0) {
-		dev_err(bus->dev, "Program params failed: %d", ret);
-		goto restore_params;
 	}
 
 	ret = do_bank_switch(stream);
@@ -1280,12 +1494,16 @@ static int _sdw_prepare_stream(struct sdw_stream_runtime *stream)
 		goto restore_params;
 	}
 
-	/* Prepare port(s) on the new clock configuration */
-	ret = sdw_prep_deprep_ports(m_rt, true);
-	if (ret < 0) {
-		dev_err(bus->dev, "Prepare port(s) failed ret = %d",
-				ret);
-		return ret;
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+
+		/* Prepare port(s) on the new clock configuration */
+		ret = sdw_prep_deprep_ports(m_rt, true);
+		if (ret < 0) {
+			dev_err(bus->dev, "Prepare port(s) failed ret = %d",
+					ret);
+			return ret;
+		}
 	}
 
 	stream->state = SDW_STREAM_PREPARED;
@@ -1313,35 +1531,40 @@ int sdw_prepare_stream(struct sdw_stream_runtime *stream)
 		return -EINVAL;
 	}
 
-	mutex_lock(&stream->m_rt->bus->bus_lock);
+	sdw_acquire_bus_lock(stream);
 
 	ret = _sdw_prepare_stream(stream);
 	if (ret < 0)
 		pr_err("Prepare for stream:%s failed: %d", stream->name, ret);
 
-	mutex_unlock(&stream->m_rt->bus->bus_lock);
+	sdw_release_bus_lock(stream);
 	return ret;
 }
 EXPORT_SYMBOL(sdw_prepare_stream);
 
 static int _sdw_enable_stream(struct sdw_stream_runtime *stream)
 {
-	struct sdw_master_runtime *m_rt = stream->m_rt;
-	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_master_runtime *m_rt = NULL;
+	struct sdw_bus *bus = NULL;
 	int ret;
 
-	/* Program params */
-	ret = sdw_program_params(bus);
-	if (ret < 0) {
-		dev_err(bus->dev, "Program params failed: %d", ret);
-		return ret;
-	}
+	/* Enable Master(s) and Slave(s) port(s) associated with stream */
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
 
-	/* Enable port(s) */
-	ret = sdw_enable_disable_ports(m_rt, true);
-	if (ret < 0) {
-		dev_err(bus->dev, "Enable port(s) failed ret: %d", ret);
-		return ret;
+		/* Program params */
+		ret = sdw_program_params(bus);
+		if (ret < 0) {
+			dev_err(bus->dev, "Program params failed: %d", ret);
+			return ret;
+		}
+
+		/* Enable port(s) */
+		ret = sdw_enable_disable_ports(m_rt, true);
+		if (ret < 0) {
+			dev_err(bus->dev, "Enable port(s) failed ret: %d", ret);
+			return ret;
+		}
 	}
 
 	ret = do_bank_switch(stream);
@@ -1370,37 +1593,42 @@ int sdw_enable_stream(struct sdw_stream_runtime *stream)
 		return -EINVAL;
 	}
 
-	mutex_lock(&stream->m_rt->bus->bus_lock);
+	sdw_acquire_bus_lock(stream);
 
 	ret = _sdw_enable_stream(stream);
 	if (ret < 0)
 		pr_err("Enable for stream:%s failed: %d", stream->name, ret);
 
-	mutex_unlock(&stream->m_rt->bus->bus_lock);
+	sdw_release_bus_lock(stream);
 	return ret;
 }
 EXPORT_SYMBOL(sdw_enable_stream);
 
 static int _sdw_disable_stream(struct sdw_stream_runtime *stream)
 {
-	struct sdw_master_runtime *m_rt = stream->m_rt;
-	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_master_runtime *m_rt = NULL;
+	struct sdw_bus *bus = NULL;
 	int ret;
 
-	/* Disable port(s) */
-	ret = sdw_enable_disable_ports(m_rt, false);
-	if (ret < 0) {
-		dev_err(bus->dev, "Disable port(s) failed: %d", ret);
-		return ret;
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		/* Disable port(s) */
+		ret = sdw_enable_disable_ports(m_rt, false);
+		if (ret < 0) {
+			dev_err(bus->dev, "Disable port(s) failed: %d", ret);
+			return ret;
+		}
 	}
-
 	stream->state = SDW_STREAM_DISABLED;
 
-	/* Program params */
-	ret = sdw_program_params(bus);
-	if (ret < 0) {
-		dev_err(bus->dev, "Program params failed: %d", ret);
-		return ret;
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		/* Program params */
+		ret = sdw_program_params(bus);
+		if (ret < 0) {
+			dev_err(bus->dev, "Program params failed: %d", ret);
+			return ret;
+		}
 	}
 
 	return do_bank_switch(stream);
@@ -1422,43 +1650,46 @@ int sdw_disable_stream(struct sdw_stream_runtime *stream)
 		return -EINVAL;
 	}
 
-	mutex_lock(&stream->m_rt->bus->bus_lock);
+	sdw_acquire_bus_lock(stream);
 
 	ret = _sdw_disable_stream(stream);
 	if (ret < 0)
 		pr_err("Disable for stream:%s failed: %d", stream->name, ret);
 
-	mutex_unlock(&stream->m_rt->bus->bus_lock);
+	sdw_release_bus_lock(stream);
 	return ret;
 }
 EXPORT_SYMBOL(sdw_disable_stream);
 
 static int _sdw_deprepare_stream(struct sdw_stream_runtime *stream)
 {
-	struct sdw_master_runtime *m_rt = stream->m_rt;
-	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_master_runtime *m_rt = NULL;
+	struct sdw_bus *bus = NULL;
 	int ret = 0;
 
-	/* De-prepare port(s) */
-	ret = sdw_prep_deprep_ports(m_rt, false);
-	if (ret < 0) {
-		dev_err(bus->dev, "De-prepare port(s) failed: %d", ret);
-		return ret;
-	}
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		/* De-prepare port(s) */
+		ret = sdw_prep_deprep_ports(m_rt, false);
+		if (ret < 0) {
+			dev_err(bus->dev, "De-prepare port(s) failed: %d", ret);
+			return ret;
+		}
 
-	stream->state = SDW_STREAM_DEPREPARED;
+		/* TODO: Update this during Device-Device support */
+		bus->params.bandwidth -= m_rt->stream->params.rate *
+			m_rt->ch_count * m_rt->stream->params.bps;
 
-	/* TODO: Update this during Device-Device support */
-	bus->params.bandwidth -= m_rt->stream->params.rate *
-		m_rt->ch_count * m_rt->stream->params.bps;
+		/* Program params */
+		ret = sdw_program_params(bus);
+		if (ret < 0) {
+			dev_err(bus->dev, "Program params failed: %d", ret);
+			return ret;
+		}
 
-	/* Program params */
-	ret = sdw_program_params(bus);
-	if (ret < 0) {
-		dev_err(bus->dev, "Program params failed: %d", ret);
-		return ret;
 	}
 
+	stream->state = SDW_STREAM_DEPREPARED;
 	return do_bank_switch(stream);
 }
 
@@ -1478,13 +1709,12 @@ int sdw_deprepare_stream(struct sdw_stream_runtime *stream)
 		return -EINVAL;
 	}
 
-	mutex_lock(&stream->m_rt->bus->bus_lock);
-
+	sdw_acquire_bus_lock(stream);
 	ret = _sdw_deprepare_stream(stream);
 	if (ret < 0)
 		pr_err("De-prepare for stream:%d failed: %d", ret, ret);
 
-	mutex_unlock(&stream->m_rt->bus->bus_lock);
+	sdw_release_bus_lock(stream);
 	return ret;
 }
 EXPORT_SYMBOL(sdw_deprepare_stream);
diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
index 546a47156101..8cf0617d4ea0 100644
--- a/drivers/staging/erofs/namei.c
+++ b/drivers/staging/erofs/namei.c
@@ -223,18 +223,13 @@ static struct dentry *erofs_lookup(struct inode *dir,
 	if (err == -ENOENT) {
 		/* negative dentry */
 		inode = NULL;
-		goto negative_out;
-	} else if (unlikely(err))
-		return ERR_PTR(err);
-
-	debugln("%s, %s (nid %llu) found, d_type %u", __func__,
-		dentry->d_name.name, nid, d_type);
-
-	inode = erofs_iget(dir->i_sb, nid, d_type == EROFS_FT_DIR);
-	if (IS_ERR(inode))
-		return ERR_CAST(inode);
-
-negative_out:
+	} else if (unlikely(err)) {
+		inode = ERR_PTR(err);
+	} else {
+		debugln("%s, %s (nid %llu) found, d_type %u", __func__,
+			dentry->d_name.name, nid, d_type);
+		inode = erofs_iget(dir->i_sb, nid, d_type == EROFS_FT_DIR);
+	}
 	return d_splice_alias(inode, dentry);
 }
 
diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
index 9f18be14dda6..f38f1f74fcd6 100644
--- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
+++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
@@ -49,9 +49,9 @@ struct rtllib_tkip_data {
 	u32 dot11RSNAStatsTKIPLocalMICFailures;
 
 	int key_idx;
-	struct crypto_skcipher *rx_tfm_arc4;
+	struct crypto_sync_skcipher *rx_tfm_arc4;
 	struct crypto_shash *rx_tfm_michael;
-	struct crypto_skcipher *tx_tfm_arc4;
+	struct crypto_sync_skcipher *tx_tfm_arc4;
 	struct crypto_shash *tx_tfm_michael;
 	/* scratch buffers for virt_to_page() (crypto API) */
 	u8 rx_hdr[16];
@@ -66,8 +66,7 @@ static void *rtllib_tkip_init(int key_idx)
 	if (priv == NULL)
 		goto fail;
 	priv->key_idx = key_idx;
-	priv->tx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
-						  CRYPTO_ALG_ASYNC);
+	priv->tx_tfm_arc4 = crypto_alloc_sync_skcipher("ecb(arc4)", 0, 0);
 	if (IS_ERR(priv->tx_tfm_arc4)) {
 		pr_debug("Could not allocate crypto API arc4\n");
 		priv->tx_tfm_arc4 = NULL;
@@ -81,8 +80,7 @@ static void *rtllib_tkip_init(int key_idx)
 		goto fail;
 	}
 
-	priv->rx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
-						  CRYPTO_ALG_ASYNC);
+	priv->rx_tfm_arc4 = crypto_alloc_sync_skcipher("ecb(arc4)", 0, 0);
 	if (IS_ERR(priv->rx_tfm_arc4)) {
 		pr_debug("Could not allocate crypto API arc4\n");
 		priv->rx_tfm_arc4 = NULL;
@@ -100,9 +98,9 @@ static void *rtllib_tkip_init(int key_idx)
 fail:
 	if (priv) {
 		crypto_free_shash(priv->tx_tfm_michael);
-		crypto_free_skcipher(priv->tx_tfm_arc4);
+		crypto_free_sync_skcipher(priv->tx_tfm_arc4);
 		crypto_free_shash(priv->rx_tfm_michael);
-		crypto_free_skcipher(priv->rx_tfm_arc4);
+		crypto_free_sync_skcipher(priv->rx_tfm_arc4);
 		kfree(priv);
 	}
 
@@ -116,9 +114,9 @@ static void rtllib_tkip_deinit(void *priv)
 
 	if (_priv) {
 		crypto_free_shash(_priv->tx_tfm_michael);
-		crypto_free_skcipher(_priv->tx_tfm_arc4);
+		crypto_free_sync_skcipher(_priv->tx_tfm_arc4);
 		crypto_free_shash(_priv->rx_tfm_michael);
-		crypto_free_skcipher(_priv->rx_tfm_arc4);
+		crypto_free_sync_skcipher(_priv->rx_tfm_arc4);
 	}
 	kfree(priv);
 }
@@ -337,7 +335,7 @@ static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	*pos++ = (tkey->tx_iv32 >> 24) & 0xff;
 
 	if (!tcb_desc->bHwSec) {
-		SKCIPHER_REQUEST_ON_STACK(req, tkey->tx_tfm_arc4);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, tkey->tx_tfm_arc4);
 
 		icv = skb_put(skb, 4);
 		crc = ~crc32_le(~0, pos, len);
@@ -349,8 +347,8 @@ static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 		sg_init_one(&sg, pos, len+4);
 
 
-		crypto_skcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
-		skcipher_request_set_tfm(req, tkey->tx_tfm_arc4);
+		crypto_sync_skcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
+		skcipher_request_set_sync_tfm(req, tkey->tx_tfm_arc4);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 		skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
 		ret = crypto_skcipher_encrypt(req);
@@ -420,7 +418,7 @@ static int rtllib_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	pos += 8;
 
 	if (!tcb_desc->bHwSec || (skb->cb[0] == 1)) {
-		SKCIPHER_REQUEST_ON_STACK(req, tkey->rx_tfm_arc4);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, tkey->rx_tfm_arc4);
 
 		if ((iv32 < tkey->rx_iv32 ||
 		    (iv32 == tkey->rx_iv32 && iv16 <= tkey->rx_iv16)) &&
@@ -447,8 +445,8 @@ static int rtllib_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 
 		sg_init_one(&sg, pos, plen+4);
 
-		crypto_skcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
-		skcipher_request_set_tfm(req, tkey->rx_tfm_arc4);
+		crypto_sync_skcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
+		skcipher_request_set_sync_tfm(req, tkey->rx_tfm_arc4);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 		skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
 		err = crypto_skcipher_decrypt(req);
@@ -664,9 +662,9 @@ static int rtllib_tkip_set_key(void *key, int len, u8 *seq, void *priv)
 	struct rtllib_tkip_data *tkey = priv;
 	int keyidx;
 	struct crypto_shash *tfm = tkey->tx_tfm_michael;
-	struct crypto_skcipher *tfm2 = tkey->tx_tfm_arc4;
+	struct crypto_sync_skcipher *tfm2 = tkey->tx_tfm_arc4;
 	struct crypto_shash *tfm3 = tkey->rx_tfm_michael;
-	struct crypto_skcipher *tfm4 = tkey->rx_tfm_arc4;
+	struct crypto_sync_skcipher *tfm4 = tkey->rx_tfm_arc4;
 
 	keyidx = tkey->key_idx;
 	memset(tkey, 0, sizeof(*tkey));
diff --git a/drivers/staging/rtl8192e/rtllib_crypt_wep.c b/drivers/staging/rtl8192e/rtllib_crypt_wep.c
index b3343a5d0fd6..d11ec39171d5 100644
--- a/drivers/staging/rtl8192e/rtllib_crypt_wep.c
+++ b/drivers/staging/rtl8192e/rtllib_crypt_wep.c
@@ -27,8 +27,8 @@ struct prism2_wep_data {
 	u8 key[WEP_KEY_LEN + 1];
 	u8 key_len;
 	u8 key_idx;
-	struct crypto_skcipher *tx_tfm;
-	struct crypto_skcipher *rx_tfm;
+	struct crypto_sync_skcipher *tx_tfm;
+	struct crypto_sync_skcipher *rx_tfm;
 };
 
 
@@ -41,13 +41,13 @@ static void *prism2_wep_init(int keyidx)
 		goto fail;
 	priv->key_idx = keyidx;
 
-	priv->tx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+	priv->tx_tfm = crypto_alloc_sync_skcipher("ecb(arc4)", 0, 0);
 	if (IS_ERR(priv->tx_tfm)) {
 		pr_debug("rtllib_crypt_wep: could not allocate crypto API arc4\n");
 		priv->tx_tfm = NULL;
 		goto fail;
 	}
-	priv->rx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+	priv->rx_tfm = crypto_alloc_sync_skcipher("ecb(arc4)", 0, 0);
 	if (IS_ERR(priv->rx_tfm)) {
 		pr_debug("rtllib_crypt_wep: could not allocate crypto API arc4\n");
 		priv->rx_tfm = NULL;
@@ -61,8 +61,8 @@ static void *prism2_wep_init(int keyidx)
 
 fail:
 	if (priv) {
-		crypto_free_skcipher(priv->tx_tfm);
-		crypto_free_skcipher(priv->rx_tfm);
+		crypto_free_sync_skcipher(priv->tx_tfm);
+		crypto_free_sync_skcipher(priv->rx_tfm);
 		kfree(priv);
 	}
 	return NULL;
@@ -74,8 +74,8 @@ static void prism2_wep_deinit(void *priv)
 	struct prism2_wep_data *_priv = priv;
 
 	if (_priv) {
-		crypto_free_skcipher(_priv->tx_tfm);
-		crypto_free_skcipher(_priv->rx_tfm);
+		crypto_free_sync_skcipher(_priv->tx_tfm);
+		crypto_free_sync_skcipher(_priv->rx_tfm);
 	}
 	kfree(priv);
 }
@@ -135,7 +135,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	memcpy(key + 3, wep->key, wep->key_len);
 
 	if (!tcb_desc->bHwSec) {
-		SKCIPHER_REQUEST_ON_STACK(req, wep->tx_tfm);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, wep->tx_tfm);
 
 		/* Append little-endian CRC32 and encrypt it to produce ICV */
 		crc = ~crc32_le(~0, pos, len);
@@ -146,8 +146,8 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 		icv[3] = crc >> 24;
 
 		sg_init_one(&sg, pos, len+4);
-		crypto_skcipher_setkey(wep->tx_tfm, key, klen);
-		skcipher_request_set_tfm(req, wep->tx_tfm);
+		crypto_sync_skcipher_setkey(wep->tx_tfm, key, klen);
+		skcipher_request_set_sync_tfm(req, wep->tx_tfm);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 		skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
 		err = crypto_skcipher_encrypt(req);
@@ -199,11 +199,11 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	plen = skb->len - hdr_len - 8;
 
 	if (!tcb_desc->bHwSec) {
-		SKCIPHER_REQUEST_ON_STACK(req, wep->rx_tfm);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, wep->rx_tfm);
 
 		sg_init_one(&sg, pos, plen+4);
-		crypto_skcipher_setkey(wep->rx_tfm, key, klen);
-		skcipher_request_set_tfm(req, wep->rx_tfm);
+		crypto_sync_skcipher_setkey(wep->rx_tfm, key, klen);
+		skcipher_request_set_sync_tfm(req, wep->rx_tfm);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 		skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
 		err = crypto_skcipher_decrypt(req);
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c
index 1088fa0aee0e..829fa4bd253c 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c
@@ -53,9 +53,9 @@ struct ieee80211_tkip_data {
 
 	int key_idx;
 
-	struct crypto_skcipher *rx_tfm_arc4;
+	struct crypto_sync_skcipher *rx_tfm_arc4;
 	struct crypto_shash *rx_tfm_michael;
-	struct crypto_skcipher *tx_tfm_arc4;
+	struct crypto_sync_skcipher *tx_tfm_arc4;
 	struct crypto_shash *tx_tfm_michael;
 
 	/* scratch buffers for virt_to_page() (crypto API) */
@@ -71,8 +71,7 @@ static void *ieee80211_tkip_init(int key_idx)
 		goto fail;
 	priv->key_idx = key_idx;
 
-	priv->tx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
-			CRYPTO_ALG_ASYNC);
+	priv->tx_tfm_arc4 = crypto_alloc_sync_skcipher("ecb(arc4)", 0, 0);
 	if (IS_ERR(priv->tx_tfm_arc4)) {
 		printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
 				"crypto API arc4\n");
@@ -88,8 +87,7 @@ static void *ieee80211_tkip_init(int key_idx)
 		goto fail;
 	}
 
-	priv->rx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
-			CRYPTO_ALG_ASYNC);
+	priv->rx_tfm_arc4 = crypto_alloc_sync_skcipher("ecb(arc4)", 0, 0);
 	if (IS_ERR(priv->rx_tfm_arc4)) {
 		printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
 				"crypto API arc4\n");
@@ -110,9 +108,9 @@ static void *ieee80211_tkip_init(int key_idx)
 fail:
 	if (priv) {
 		crypto_free_shash(priv->tx_tfm_michael);
-		crypto_free_skcipher(priv->tx_tfm_arc4);
+		crypto_free_sync_skcipher(priv->tx_tfm_arc4);
 		crypto_free_shash(priv->rx_tfm_michael);
-		crypto_free_skcipher(priv->rx_tfm_arc4);
+		crypto_free_sync_skcipher(priv->rx_tfm_arc4);
 		kfree(priv);
 	}
 
@@ -126,9 +124,9 @@ static void ieee80211_tkip_deinit(void *priv)
 
 	if (_priv) {
 		crypto_free_shash(_priv->tx_tfm_michael);
-		crypto_free_skcipher(_priv->tx_tfm_arc4);
+		crypto_free_sync_skcipher(_priv->tx_tfm_arc4);
 		crypto_free_shash(_priv->rx_tfm_michael);
-		crypto_free_skcipher(_priv->rx_tfm_arc4);
+		crypto_free_sync_skcipher(_priv->rx_tfm_arc4);
 	}
 	kfree(priv);
 }
@@ -340,7 +338,7 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	*pos++ = (tkey->tx_iv32 >> 24) & 0xff;
 
 	if (!tcb_desc->bHwSec) {
-		SKCIPHER_REQUEST_ON_STACK(req, tkey->tx_tfm_arc4);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, tkey->tx_tfm_arc4);
 
 		icv = skb_put(skb, 4);
 		crc = ~crc32_le(~0, pos, len);
@@ -348,9 +346,9 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 		icv[1] = crc >> 8;
 		icv[2] = crc >> 16;
 		icv[3] = crc >> 24;
-		crypto_skcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
+		crypto_sync_skcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
 		sg_init_one(&sg, pos, len+4);
-		skcipher_request_set_tfm(req, tkey->tx_tfm_arc4);
+		skcipher_request_set_sync_tfm(req, tkey->tx_tfm_arc4);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 		skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
 		ret = crypto_skcipher_encrypt(req);
@@ -418,7 +416,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	pos += 8;
 
 	if (!tcb_desc->bHwSec) {
-		SKCIPHER_REQUEST_ON_STACK(req, tkey->rx_tfm_arc4);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, tkey->rx_tfm_arc4);
 
 		if (iv32 < tkey->rx_iv32 ||
 		(iv32 == tkey->rx_iv32 && iv16 <= tkey->rx_iv16)) {
@@ -440,10 +438,10 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 
 		plen = skb->len - hdr_len - 12;
 
-		crypto_skcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
+		crypto_sync_skcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
 		sg_init_one(&sg, pos, plen+4);
 
-		skcipher_request_set_tfm(req, tkey->rx_tfm_arc4);
+		skcipher_request_set_sync_tfm(req, tkey->rx_tfm_arc4);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 		skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
 
@@ -663,9 +661,9 @@ static int ieee80211_tkip_set_key(void *key, int len, u8 *seq, void *priv)
 	struct ieee80211_tkip_data *tkey = priv;
 	int keyidx;
 	struct crypto_shash *tfm = tkey->tx_tfm_michael;
-	struct crypto_skcipher *tfm2 = tkey->tx_tfm_arc4;
+	struct crypto_sync_skcipher *tfm2 = tkey->tx_tfm_arc4;
 	struct crypto_shash *tfm3 = tkey->rx_tfm_michael;
-	struct crypto_skcipher *tfm4 = tkey->rx_tfm_arc4;
+	struct crypto_sync_skcipher *tfm4 = tkey->rx_tfm_arc4;
 
 	keyidx = tkey->key_idx;
 	memset(tkey, 0, sizeof(*tkey));
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_wep.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_wep.c
index b9f86be9e52b..d4a1bf0caa7a 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_wep.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_wep.c
@@ -32,8 +32,8 @@ struct prism2_wep_data {
 	u8 key[WEP_KEY_LEN + 1];
 	u8 key_len;
 	u8 key_idx;
-	struct crypto_skcipher *tx_tfm;
-	struct crypto_skcipher *rx_tfm;
+	struct crypto_sync_skcipher *tx_tfm;
+	struct crypto_sync_skcipher *rx_tfm;
 };
 
 
@@ -46,10 +46,10 @@ static void *prism2_wep_init(int keyidx)
 		return NULL;
 	priv->key_idx = keyidx;
 
-	priv->tx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+	priv->tx_tfm = crypto_alloc_sync_skcipher("ecb(arc4)", 0, 0);
 	if (IS_ERR(priv->tx_tfm))
 		goto free_priv;
-	priv->rx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+	priv->rx_tfm = crypto_alloc_sync_skcipher("ecb(arc4)", 0, 0);
 	if (IS_ERR(priv->rx_tfm))
 		goto free_tx;
 
@@ -58,7 +58,7 @@ static void *prism2_wep_init(int keyidx)
 
 	return priv;
 free_tx:
-	crypto_free_skcipher(priv->tx_tfm);
+	crypto_free_sync_skcipher(priv->tx_tfm);
 free_priv:
 	kfree(priv);
 	return NULL;
@@ -70,8 +70,8 @@ static void prism2_wep_deinit(void *priv)
 	struct prism2_wep_data *_priv = priv;
 
 	if (_priv) {
-		crypto_free_skcipher(_priv->tx_tfm);
-		crypto_free_skcipher(_priv->rx_tfm);
+		crypto_free_sync_skcipher(_priv->tx_tfm);
+		crypto_free_sync_skcipher(_priv->rx_tfm);
 	}
 	kfree(priv);
 }
@@ -128,7 +128,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	memcpy(key + 3, wep->key, wep->key_len);
 
 	if (!tcb_desc->bHwSec) {
-		SKCIPHER_REQUEST_ON_STACK(req, wep->tx_tfm);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, wep->tx_tfm);
 
 		/* Append little-endian CRC32 and encrypt it to produce ICV */
 		crc = ~crc32_le(~0, pos, len);
@@ -138,10 +138,10 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 		icv[2] = crc >> 16;
 		icv[3] = crc >> 24;
 
-		crypto_skcipher_setkey(wep->tx_tfm, key, klen);
+		crypto_sync_skcipher_setkey(wep->tx_tfm, key, klen);
 		sg_init_one(&sg, pos, len+4);
 
-		skcipher_request_set_tfm(req, wep->tx_tfm);
+		skcipher_request_set_sync_tfm(req, wep->tx_tfm);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 		skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
 
@@ -193,12 +193,12 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	plen = skb->len - hdr_len - 8;
 
 	if (!tcb_desc->bHwSec) {
-		SKCIPHER_REQUEST_ON_STACK(req, wep->rx_tfm);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, wep->rx_tfm);
 
-		crypto_skcipher_setkey(wep->rx_tfm, key, klen);
+		crypto_sync_skcipher_setkey(wep->rx_tfm, key, klen);
 		sg_init_one(&sg, pos, plen+4);
 
-		skcipher_request_set_tfm(req, wep->rx_tfm);
+		skcipher_request_set_sync_tfm(req, wep->rx_tfm);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 		skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
 
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 0e69edc77d18..5422523c03f8 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -432,7 +432,7 @@ source "drivers/thermal/samsung/Kconfig"
 endmenu
 
 menu "STMicroelectronics thermal drivers"
-depends on ARCH_STI && OF
+depends on (ARCH_STI || ARCH_STM32) && OF
 source "drivers/thermal/st/Kconfig"
 endmenu
 
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 610344eb3e03..82bb50dc6423 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -53,7 +53,7 @@ obj-$(CONFIG_TI_SOC_THERMAL)	+= ti-soc-thermal/
 obj-$(CONFIG_INT340X_THERMAL)  += int340x_thermal/
 obj-$(CONFIG_INTEL_BXT_PMIC_THERMAL) += intel_bxt_pmic_thermal.o
 obj-$(CONFIG_INTEL_PCH_THERMAL)	+= intel_pch_thermal.o
-obj-$(CONFIG_ST_THERMAL)	+= st/
+obj-y				+= st/
 obj-$(CONFIG_QCOM_TSENS)	+= qcom/
 obj-y				+= tegra/
 obj-$(CONFIG_HISI_THERMAL)     += hisi_thermal.o
diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c
index 2c2f6d93034e..92f67d40f2e9 100644
--- a/drivers/thermal/armada_thermal.c
+++ b/drivers/thermal/armada_thermal.c
@@ -526,8 +526,8 @@ static int armada_thermal_probe_legacy(struct platform_device *pdev,
 
 	/* First memory region points towards the status register */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (IS_ERR(res))
-		return PTR_ERR(res);
+	if (!res)
+		return -EIO;
 
 	/*
 	 * Edit the resource start address and length to map over all the
diff --git a/drivers/thermal/da9062-thermal.c b/drivers/thermal/da9062-thermal.c
index dd8dd947b7f0..01b0cb994457 100644
--- a/drivers/thermal/da9062-thermal.c
+++ b/drivers/thermal/da9062-thermal.c
@@ -106,7 +106,7 @@ static void da9062_thermal_poll_on(struct work_struct *work)
 					   THERMAL_EVENT_UNSPECIFIED);
 
 		delay = msecs_to_jiffies(thermal->zone->passive_delay);
-		schedule_delayed_work(&thermal->work, delay);
+		queue_delayed_work(system_freezable_wq, &thermal->work, delay);
 		return;
 	}
 
@@ -125,7 +125,7 @@ static irqreturn_t da9062_thermal_irq_handler(int irq, void *data)
 	struct da9062_thermal *thermal = data;
 
 	disable_irq_nosync(thermal->irq);
-	schedule_delayed_work(&thermal->work, 0);
+	queue_delayed_work(system_freezable_wq, &thermal->work, 0);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index 761d0559c268..c4111a98f1a7 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -55,25 +55,39 @@
 #define HI3660_TEMP_STEP		(205)
 #define HI3660_TEMP_LAG			(4000)
 
-#define HI6220_DEFAULT_SENSOR		2
-#define HI3660_DEFAULT_SENSOR		1
+#define HI6220_CLUSTER0_SENSOR		2
+#define HI6220_CLUSTER1_SENSOR		1
+
+#define HI3660_LITTLE_SENSOR		0
+#define HI3660_BIG_SENSOR		1
+#define HI3660_G3D_SENSOR		2
+#define HI3660_MODEM_SENSOR		3
+
+struct hisi_thermal_data;
 
 struct hisi_thermal_sensor {
+	struct hisi_thermal_data *data;
 	struct thermal_zone_device *tzd;
+	const char *irq_name;
 	uint32_t id;
 	uint32_t thres_temp;
 };
 
+struct hisi_thermal_ops {
+	int (*get_temp)(struct hisi_thermal_sensor *sensor);
+	int (*enable_sensor)(struct hisi_thermal_sensor *sensor);
+	int (*disable_sensor)(struct hisi_thermal_sensor *sensor);
+	int (*irq_handler)(struct hisi_thermal_sensor *sensor);
+	int (*probe)(struct hisi_thermal_data *data);
+};
+
 struct hisi_thermal_data {
-	int (*get_temp)(struct hisi_thermal_data *data);
-	int (*enable_sensor)(struct hisi_thermal_data *data);
-	int (*disable_sensor)(struct hisi_thermal_data *data);
-	int (*irq_handler)(struct hisi_thermal_data *data);
+	const struct hisi_thermal_ops *ops;
+	struct hisi_thermal_sensor *sensor;
 	struct platform_device *pdev;
 	struct clk *clk;
-	struct hisi_thermal_sensor sensor;
 	void __iomem *regs;
-	int irq;
+	int nr_sensors;
 };
 
 /*
@@ -266,30 +280,40 @@ static inline void hi6220_thermal_hdak_set(void __iomem *addr, int value)
 	       (value << 4), addr + HI6220_TEMP0_CFG);
 }
 
-static int hi6220_thermal_irq_handler(struct hisi_thermal_data *data)
+static int hi6220_thermal_irq_handler(struct hisi_thermal_sensor *sensor)
 {
+	struct hisi_thermal_data *data = sensor->data;
+
 	hi6220_thermal_alarm_clear(data->regs, 1);
 	return 0;
 }
 
-static int hi3660_thermal_irq_handler(struct hisi_thermal_data *data)
+static int hi3660_thermal_irq_handler(struct hisi_thermal_sensor *sensor)
 {
-	hi3660_thermal_alarm_clear(data->regs, data->sensor.id, 1);
+	struct hisi_thermal_data *data = sensor->data;
+
+	hi3660_thermal_alarm_clear(data->regs, sensor->id, 1);
 	return 0;
 }
 
-static int hi6220_thermal_get_temp(struct hisi_thermal_data *data)
+static int hi6220_thermal_get_temp(struct hisi_thermal_sensor *sensor)
 {
+	struct hisi_thermal_data *data = sensor->data;
+
 	return hi6220_thermal_get_temperature(data->regs);
 }
 
-static int hi3660_thermal_get_temp(struct hisi_thermal_data *data)
+static int hi3660_thermal_get_temp(struct hisi_thermal_sensor *sensor)
 {
-	return hi3660_thermal_get_temperature(data->regs, data->sensor.id);
+	struct hisi_thermal_data *data = sensor->data;
+
+	return hi3660_thermal_get_temperature(data->regs, sensor->id);
 }
 
-static int hi6220_thermal_disable_sensor(struct hisi_thermal_data *data)
+static int hi6220_thermal_disable_sensor(struct hisi_thermal_sensor *sensor)
 {
+	struct hisi_thermal_data *data = sensor->data;
+
 	/* disable sensor module */
 	hi6220_thermal_enable(data->regs, 0);
 	hi6220_thermal_alarm_enable(data->regs, 0);
@@ -300,16 +324,18 @@ static int hi6220_thermal_disable_sensor(struct hisi_thermal_data *data)
 	return 0;
 }
 
-static int hi3660_thermal_disable_sensor(struct hisi_thermal_data *data)
+static int hi3660_thermal_disable_sensor(struct hisi_thermal_sensor *sensor)
 {
+	struct hisi_thermal_data *data = sensor->data;
+
 	/* disable sensor module */
-	hi3660_thermal_alarm_enable(data->regs, data->sensor.id, 0);
+	hi3660_thermal_alarm_enable(data->regs, sensor->id, 0);
 	return 0;
 }
 
-static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data)
+static int hi6220_thermal_enable_sensor(struct hisi_thermal_sensor *sensor)
 {
-	struct hisi_thermal_sensor *sensor = &data->sensor;
+	struct hisi_thermal_data *data = sensor->data;
 	int ret;
 
 	/* enable clock for tsensor */
@@ -345,10 +371,10 @@ static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data)
 	return 0;
 }
 
-static int hi3660_thermal_enable_sensor(struct hisi_thermal_data *data)
+static int hi3660_thermal_enable_sensor(struct hisi_thermal_sensor *sensor)
 {
 	unsigned int value;
-	struct hisi_thermal_sensor *sensor = &data->sensor;
+	struct hisi_thermal_data *data = sensor->data;
 
 	/* disable interrupt */
 	hi3660_thermal_alarm_enable(data->regs, sensor->id, 0);
@@ -371,21 +397,8 @@ static int hi6220_thermal_probe(struct hisi_thermal_data *data)
 {
 	struct platform_device *pdev = data->pdev;
 	struct device *dev = &pdev->dev;
-	struct resource *res;
 	int ret;
 
-	data->get_temp = hi6220_thermal_get_temp;
-	data->enable_sensor = hi6220_thermal_enable_sensor;
-	data->disable_sensor = hi6220_thermal_disable_sensor;
-	data->irq_handler = hi6220_thermal_irq_handler;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	data->regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(data->regs)) {
-		dev_err(dev, "failed to get io address\n");
-		return PTR_ERR(data->regs);
-	}
-
 	data->clk = devm_clk_get(dev, "thermal_clk");
 	if (IS_ERR(data->clk)) {
 		ret = PTR_ERR(data->clk);
@@ -394,11 +407,14 @@ static int hi6220_thermal_probe(struct hisi_thermal_data *data)
 		return ret;
 	}
 
-	data->irq = platform_get_irq(pdev, 0);
-	if (data->irq < 0)
-		return data->irq;
+	data->sensor = devm_kzalloc(dev, sizeof(*data->sensor), GFP_KERNEL);
+	if (!data->sensor)
+		return -ENOMEM;
 
-	data->sensor.id = HI6220_DEFAULT_SENSOR;
+	data->sensor[0].id = HI6220_CLUSTER0_SENSOR;
+	data->sensor[0].irq_name = "tsensor_intr";
+	data->sensor[0].data = data;
+	data->nr_sensors = 1;
 
 	return 0;
 }
@@ -407,38 +423,34 @@ static int hi3660_thermal_probe(struct hisi_thermal_data *data)
 {
 	struct platform_device *pdev = data->pdev;
 	struct device *dev = &pdev->dev;
-	struct resource *res;
 
-	data->get_temp = hi3660_thermal_get_temp;
-	data->enable_sensor = hi3660_thermal_enable_sensor;
-	data->disable_sensor = hi3660_thermal_disable_sensor;
-	data->irq_handler = hi3660_thermal_irq_handler;
+	data->nr_sensors = 2;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	data->regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(data->regs)) {
-		dev_err(dev, "failed to get io address\n");
-		return PTR_ERR(data->regs);
-	}
+	data->sensor = devm_kzalloc(dev, sizeof(*data->sensor) *
+				    data->nr_sensors, GFP_KERNEL);
+	if (!data->sensor)
+		return -ENOMEM;
 
-	data->irq = platform_get_irq(pdev, 0);
-	if (data->irq < 0)
-		return data->irq;
+	data->sensor[0].id = HI3660_BIG_SENSOR;
+	data->sensor[0].irq_name = "tsensor_a73";
+	data->sensor[0].data = data;
 
-	data->sensor.id = HI3660_DEFAULT_SENSOR;
+	data->sensor[1].id = HI3660_LITTLE_SENSOR;
+	data->sensor[1].irq_name = "tsensor_a53";
+	data->sensor[1].data = data;
 
 	return 0;
 }
 
 static int hisi_thermal_get_temp(void *__data, int *temp)
 {
-	struct hisi_thermal_data *data = __data;
-	struct hisi_thermal_sensor *sensor = &data->sensor;
+	struct hisi_thermal_sensor *sensor = __data;
+	struct hisi_thermal_data *data = sensor->data;
 
-	*temp = data->get_temp(data);
+	*temp = data->ops->get_temp(sensor);
 
-	dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n",
-		sensor->id, *temp, sensor->thres_temp);
+	dev_dbg(&data->pdev->dev, "tzd=%p, id=%d, temp=%d, thres=%d\n",
+		sensor->tzd, sensor->id, *temp, sensor->thres_temp);
 
 	return 0;
 }
@@ -449,38 +461,39 @@ static const struct thermal_zone_of_device_ops hisi_of_thermal_ops = {
 
 static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev)
 {
-	struct hisi_thermal_data *data = dev;
-	struct hisi_thermal_sensor *sensor = &data->sensor;
+	struct hisi_thermal_sensor *sensor = dev;
+	struct hisi_thermal_data *data = sensor->data;
 	int temp = 0;
 
-	data->irq_handler(data);
+	data->ops->irq_handler(sensor);
 
-	hisi_thermal_get_temp(data, &temp);
+	hisi_thermal_get_temp(sensor, &temp);
 
 	if (temp >= sensor->thres_temp) {
-		dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n",
-			 temp, sensor->thres_temp);
+		dev_crit(&data->pdev->dev,
+			 "sensor <%d> THERMAL ALARM: %d > %d\n",
+			 sensor->id, temp, sensor->thres_temp);
 
-		thermal_zone_device_update(data->sensor.tzd,
+		thermal_zone_device_update(sensor->tzd,
 					   THERMAL_EVENT_UNSPECIFIED);
 
 	} else {
-		dev_crit(&data->pdev->dev, "THERMAL ALARM stopped: %d < %d\n",
-			 temp, sensor->thres_temp);
+		dev_crit(&data->pdev->dev,
+			 "sensor <%d> THERMAL ALARM stopped: %d < %d\n",
+			 sensor->id, temp, sensor->thres_temp);
 	}
 
 	return IRQ_HANDLED;
 }
 
 static int hisi_thermal_register_sensor(struct platform_device *pdev,
-					struct hisi_thermal_data *data,
 					struct hisi_thermal_sensor *sensor)
 {
 	int ret, i;
 	const struct thermal_trip *trip;
 
 	sensor->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev,
-							   sensor->id, data,
+							   sensor->id, sensor,
 							   &hisi_of_thermal_ops);
 	if (IS_ERR(sensor->tzd)) {
 		ret = PTR_ERR(sensor->tzd);
@@ -502,14 +515,30 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev,
 	return 0;
 }
 
+static const struct hisi_thermal_ops hi6220_ops = {
+	.get_temp	= hi6220_thermal_get_temp,
+	.enable_sensor	= hi6220_thermal_enable_sensor,
+	.disable_sensor	= hi6220_thermal_disable_sensor,
+	.irq_handler	= hi6220_thermal_irq_handler,
+	.probe		= hi6220_thermal_probe,
+};
+
+static const struct hisi_thermal_ops hi3660_ops = {
+	.get_temp	= hi3660_thermal_get_temp,
+	.enable_sensor	= hi3660_thermal_enable_sensor,
+	.disable_sensor	= hi3660_thermal_disable_sensor,
+	.irq_handler	= hi3660_thermal_irq_handler,
+	.probe		= hi3660_thermal_probe,
+};
+
 static const struct of_device_id of_hisi_thermal_match[] = {
 	{
 		.compatible = "hisilicon,tsensor",
-		.data = hi6220_thermal_probe
+		.data = &hi6220_ops,
 	},
 	{
 		.compatible = "hisilicon,hi3660-tsensor",
-		.data = hi3660_thermal_probe
+		.data = &hi3660_ops,
 	},
 	{ /* end */ }
 };
@@ -527,9 +556,9 @@ static void hisi_thermal_toggle_sensor(struct hisi_thermal_sensor *sensor,
 static int hisi_thermal_probe(struct platform_device *pdev)
 {
 	struct hisi_thermal_data *data;
-	int (*platform_probe)(struct hisi_thermal_data *);
 	struct device *dev = &pdev->dev;
-	int ret;
+	struct resource *res;
+	int i, ret;
 
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
@@ -537,41 +566,50 @@ static int hisi_thermal_probe(struct platform_device *pdev)
 
 	data->pdev = pdev;
 	platform_set_drvdata(pdev, data);
+	data->ops = of_device_get_match_data(dev);
 
-	platform_probe = of_device_get_match_data(dev);
-	if (!platform_probe) {
-		dev_err(dev, "failed to get probe func\n");
-		return -EINVAL;
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(data->regs)) {
+		dev_err(dev, "failed to get io address\n");
+		return PTR_ERR(data->regs);
 	}
 
-	ret = platform_probe(data);
+	ret = data->ops->probe(data);
 	if (ret)
 		return ret;
 
-	ret = hisi_thermal_register_sensor(pdev, data,
-					   &data->sensor);
-	if (ret) {
-		dev_err(dev, "failed to register thermal sensor: %d\n", ret);
-		return ret;
-	}
+	for (i = 0; i < data->nr_sensors; i++) {
+		struct hisi_thermal_sensor *sensor = &data->sensor[i];
 
-	ret = data->enable_sensor(data);
-	if (ret) {
-		dev_err(dev, "Failed to setup the sensor: %d\n", ret);
-		return ret;
-	}
+		ret = hisi_thermal_register_sensor(pdev, sensor);
+		if (ret) {
+			dev_err(dev, "failed to register thermal sensor: %d\n",
+				ret);
+			return ret;
+		}
+
+		ret = platform_get_irq_byname(pdev, sensor->irq_name);
+		if (ret < 0)
+			return ret;
 
-	if (data->irq) {
-		ret = devm_request_threaded_irq(dev, data->irq, NULL,
-				hisi_thermal_alarm_irq_thread,
-				IRQF_ONESHOT, "hisi_thermal", data);
+		ret = devm_request_threaded_irq(dev, ret, NULL,
+						hisi_thermal_alarm_irq_thread,
+						IRQF_ONESHOT, sensor->irq_name,
+						sensor);
 		if (ret < 0) {
-			dev_err(dev, "failed to request alarm irq: %d\n", ret);
+			dev_err(dev, "Failed to request alarm irq: %d\n", ret);
 			return ret;
 		}
-	}
 
-	hisi_thermal_toggle_sensor(&data->sensor, true);
+		ret = data->ops->enable_sensor(sensor);
+		if (ret) {
+			dev_err(dev, "Failed to setup the sensor: %d\n", ret);
+			return ret;
+		}
+
+		hisi_thermal_toggle_sensor(sensor, true);
+	}
 
 	return 0;
 }
@@ -579,11 +617,14 @@ static int hisi_thermal_probe(struct platform_device *pdev)
 static int hisi_thermal_remove(struct platform_device *pdev)
 {
 	struct hisi_thermal_data *data = platform_get_drvdata(pdev);
-	struct hisi_thermal_sensor *sensor = &data->sensor;
+	int i;
 
-	hisi_thermal_toggle_sensor(sensor, false);
+	for (i = 0; i < data->nr_sensors; i++) {
+		struct hisi_thermal_sensor *sensor = &data->sensor[i];
 
-	data->disable_sensor(data);
+		hisi_thermal_toggle_sensor(sensor, false);
+		data->ops->disable_sensor(sensor);
+	}
 
 	return 0;
 }
@@ -592,8 +633,10 @@ static int hisi_thermal_remove(struct platform_device *pdev)
 static int hisi_thermal_suspend(struct device *dev)
 {
 	struct hisi_thermal_data *data = dev_get_drvdata(dev);
+	int i;
 
-	data->disable_sensor(data);
+	for (i = 0; i < data->nr_sensors; i++)
+		data->ops->disable_sensor(&data->sensor[i]);
 
 	return 0;
 }
@@ -601,8 +644,12 @@ static int hisi_thermal_suspend(struct device *dev)
 static int hisi_thermal_resume(struct device *dev)
 {
 	struct hisi_thermal_data *data = dev_get_drvdata(dev);
+	int i, ret = 0;
+
+	for (i = 0; i < data->nr_sensors; i++)
+		ret |= data->ops->enable_sensor(&data->sensor[i]);
 
-	return data->enable_sensor(data);
+	return ret;
 }
 #endif
 
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index aa452acb60b6..15661549eb67 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -725,7 +725,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	} else {
 		ret = imx_init_from_tempmon_data(pdev);
 		if (ret) {
-			dev_err(&pdev->dev, "failed to init from from fsl,tempmon-data\n");
+			dev_err(&pdev->dev, "failed to init from fsl,tempmon-data\n");
 			return ret;
 		}
 	}
@@ -762,9 +762,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 		if (ret != -EPROBE_DEFER)
 			dev_err(&pdev->dev,
 				"failed to get thermal clk: %d\n", ret);
-		cpufreq_cooling_unregister(data->cdev);
-		cpufreq_cpu_put(data->policy);
-		return ret;
+		goto cpufreq_put;
 	}
 
 	/*
@@ -777,9 +775,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	ret = clk_prepare_enable(data->thermal_clk);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret);
-		cpufreq_cooling_unregister(data->cdev);
-		cpufreq_cpu_put(data->policy);
-		return ret;
+		goto cpufreq_put;
 	}
 
 	data->tz = thermal_zone_device_register("imx_thermal_zone",
@@ -792,10 +788,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 		ret = PTR_ERR(data->tz);
 		dev_err(&pdev->dev,
 			"failed to register thermal zone device %d\n", ret);
-		clk_disable_unprepare(data->thermal_clk);
-		cpufreq_cooling_unregister(data->cdev);
-		cpufreq_cpu_put(data->policy);
-		return ret;
+		goto clk_disable;
 	}
 
 	dev_info(&pdev->dev, "%s CPU temperature grade - max:%dC"
@@ -827,14 +820,20 @@ static int imx_thermal_probe(struct platform_device *pdev)
 			0, "imx_thermal", data);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret);
-		clk_disable_unprepare(data->thermal_clk);
-		thermal_zone_device_unregister(data->tz);
-		cpufreq_cooling_unregister(data->cdev);
-		cpufreq_cpu_put(data->policy);
-		return ret;
+		goto thermal_zone_unregister;
 	}
 
 	return 0;
+
+thermal_zone_unregister:
+	thermal_zone_device_unregister(data->tz);
+clk_disable:
+	clk_disable_unprepare(data->thermal_clk);
+cpufreq_put:
+	cpufreq_cooling_unregister(data->cdev);
+	cpufreq_cpu_put(data->policy);
+
+	return ret;
 }
 
 static int imx_thermal_remove(struct platform_device *pdev)
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 4f2816559205..4bfdb4a1e47d 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -19,23 +19,34 @@
 /***   Private data structures to represent thermal device tree data ***/
 
 /**
- * struct __thermal_bind_param - a match between trip and cooling device
+ * struct __thermal_cooling_bind_param - a cooling device for a trip point
  * @cooling_device: a pointer to identify the referred cooling device
- * @trip_id: the trip point index
- * @usage: the percentage (from 0 to 100) of cooling contribution
  * @min: minimum cooling state used at this trip point
  * @max: maximum cooling state used at this trip point
  */
 
-struct __thermal_bind_params {
+struct __thermal_cooling_bind_param {
 	struct device_node *cooling_device;
-	unsigned int trip_id;
-	unsigned int usage;
 	unsigned long min;
 	unsigned long max;
 };
 
 /**
+ * struct __thermal_bind_param - a match between trip and cooling device
+ * @tcbp: a pointer to an array of cooling devices
+ * @count: number of elements in array
+ * @trip_id: the trip point index
+ * @usage: the percentage (from 0 to 100) of cooling contribution
+ */
+
+struct __thermal_bind_params {
+	struct __thermal_cooling_bind_param *tcbp;
+	unsigned int count;
+	unsigned int trip_id;
+	unsigned int usage;
+};
+
+/**
  * struct __thermal_zone - internal representation of a thermal zone
  * @mode: current thermal zone device mode (enabled/disabled)
  * @passive_delay: polling interval while passive cooling is activated
@@ -192,25 +203,31 @@ static int of_thermal_bind(struct thermal_zone_device *thermal,
 			   struct thermal_cooling_device *cdev)
 {
 	struct __thermal_zone *data = thermal->devdata;
-	int i;
+	struct __thermal_bind_params *tbp;
+	struct __thermal_cooling_bind_param *tcbp;
+	int i, j;
 
 	if (!data || IS_ERR(data))
 		return -ENODEV;
 
 	/* find where to bind */
 	for (i = 0; i < data->num_tbps; i++) {
-		struct __thermal_bind_params *tbp = data->tbps + i;
+		tbp = data->tbps + i;
 
-		if (tbp->cooling_device == cdev->np) {
-			int ret;
+		for (j = 0; j < tbp->count; j++) {
+			tcbp = tbp->tcbp + j;
 
-			ret = thermal_zone_bind_cooling_device(thermal,
+			if (tcbp->cooling_device == cdev->np) {
+				int ret;
+
+				ret = thermal_zone_bind_cooling_device(thermal,
 						tbp->trip_id, cdev,
-						tbp->max,
-						tbp->min,
+						tcbp->max,
+						tcbp->min,
 						tbp->usage);
-			if (ret)
-				return ret;
+				if (ret)
+					return ret;
+			}
 		}
 	}
 
@@ -221,22 +238,28 @@ static int of_thermal_unbind(struct thermal_zone_device *thermal,
 			     struct thermal_cooling_device *cdev)
 {
 	struct __thermal_zone *data = thermal->devdata;
-	int i;
+	struct __thermal_bind_params *tbp;
+	struct __thermal_cooling_bind_param *tcbp;
+	int i, j;
 
 	if (!data || IS_ERR(data))
 		return -ENODEV;
 
 	/* find where to unbind */
 	for (i = 0; i < data->num_tbps; i++) {
-		struct __thermal_bind_params *tbp = data->tbps + i;
+		tbp = data->tbps + i;
+
+		for (j = 0; j < tbp->count; j++) {
+			tcbp = tbp->tcbp + j;
 
-		if (tbp->cooling_device == cdev->np) {
-			int ret;
+			if (tcbp->cooling_device == cdev->np) {
+				int ret;
 
-			ret = thermal_zone_unbind_cooling_device(thermal,
-						tbp->trip_id, cdev);
-			if (ret)
-				return ret;
+				ret = thermal_zone_unbind_cooling_device(thermal,
+							tbp->trip_id, cdev);
+				if (ret)
+					return ret;
+			}
 		}
 	}
 
@@ -486,8 +509,8 @@ thermal_zone_of_sensor_register(struct device *dev, int sensor_id, void *data,
 		if (sensor_specs.args_count >= 1) {
 			id = sensor_specs.args[0];
 			WARN(sensor_specs.args_count > 1,
-			     "%s: too many cells in sensor specifier %d\n",
-			     sensor_specs.np->name, sensor_specs.args_count);
+			     "%pOFn: too many cells in sensor specifier %d\n",
+			     sensor_specs.np, sensor_specs.args_count);
 		} else {
 			id = 0;
 		}
@@ -655,8 +678,9 @@ static int thermal_of_populate_bind_params(struct device_node *np,
 					   int ntrips)
 {
 	struct of_phandle_args cooling_spec;
+	struct __thermal_cooling_bind_param *__tcbp;
 	struct device_node *trip;
-	int ret, i;
+	int ret, i, count;
 	u32 prop;
 
 	/* Default weight. Usage is optional */
@@ -683,20 +707,44 @@ static int thermal_of_populate_bind_params(struct device_node *np,
 		goto end;
 	}
 
-	ret = of_parse_phandle_with_args(np, "cooling-device", "#cooling-cells",
-					 0, &cooling_spec);
-	if (ret < 0) {
-		pr_err("missing cooling_device property\n");
+	count = of_count_phandle_with_args(np, "cooling-device",
+					   "#cooling-cells");
+	if (!count) {
+		pr_err("Add a cooling_device property with at least one device\n");
 		goto end;
 	}
-	__tbp->cooling_device = cooling_spec.np;
-	if (cooling_spec.args_count >= 2) { /* at least min and max */
-		__tbp->min = cooling_spec.args[0];
-		__tbp->max = cooling_spec.args[1];
-	} else {
-		pr_err("wrong reference to cooling device, missing limits\n");
+
+	__tcbp = kcalloc(count, sizeof(*__tcbp), GFP_KERNEL);
+	if (!__tcbp)
+		goto end;
+
+	for (i = 0; i < count; i++) {
+		ret = of_parse_phandle_with_args(np, "cooling-device",
+				"#cooling-cells", i, &cooling_spec);
+		if (ret < 0) {
+			pr_err("Invalid cooling-device entry\n");
+			goto free_tcbp;
+		}
+
+		__tcbp[i].cooling_device = cooling_spec.np;
+
+		if (cooling_spec.args_count >= 2) { /* at least min and max */
+			__tcbp[i].min = cooling_spec.args[0];
+			__tcbp[i].max = cooling_spec.args[1];
+		} else {
+			pr_err("wrong reference to cooling device, missing limits\n");
+		}
 	}
 
+	__tbp->tcbp = __tcbp;
+	__tbp->count = count;
+
+	goto end;
+
+free_tcbp:
+	for (i = i - 1; i >= 0; i--)
+		of_node_put(__tcbp[i].cooling_device);
+	kfree(__tcbp);
 end:
 	of_node_put(trip);
 
@@ -903,8 +951,16 @@ finish:
 	return tz;
 
 free_tbps:
-	for (i = i - 1; i >= 0; i--)
-		of_node_put(tz->tbps[i].cooling_device);
+	for (i = i - 1; i >= 0; i--) {
+		struct __thermal_bind_params *tbp = tz->tbps + i;
+		int j;
+
+		for (j = 0; j < tbp->count; j++)
+			of_node_put(tbp->tcbp[j].cooling_device);
+
+		kfree(tbp->tcbp);
+	}
+
 	kfree(tz->tbps);
 free_trips:
 	for (i = 0; i < tz->ntrips; i++)
@@ -920,10 +976,18 @@ free_tz:
 
 static inline void of_thermal_free_zone(struct __thermal_zone *tz)
 {
-	int i;
+	struct __thermal_bind_params *tbp;
+	int i, j;
+
+	for (i = 0; i < tz->num_tbps; i++) {
+		tbp = tz->tbps + i;
+
+		for (j = 0; j < tbp->count; j++)
+			of_node_put(tbp->tcbp[j].cooling_device);
+
+		kfree(tbp->tcbp);
+	}
 
-	for (i = 0; i < tz->num_tbps; i++)
-		of_node_put(tz->tbps[i].cooling_device);
 	kfree(tz->tbps);
 	for (i = 0; i < tz->ntrips; i++)
 		of_node_put(tz->trips[i].np);
@@ -963,8 +1027,8 @@ int __init of_parse_thermal_zones(void)
 
 		tz = thermal_of_build_thermal_zone(child);
 		if (IS_ERR(tz)) {
-			pr_err("failed to build thermal zone %s: %ld\n",
-			       child->name,
+			pr_err("failed to build thermal zone %pOFn: %ld\n",
+			       child,
 			       PTR_ERR(tz));
 			continue;
 		}
@@ -998,7 +1062,7 @@ int __init of_parse_thermal_zones(void)
 						    tz->passive_delay,
 						    tz->polling_delay);
 		if (IS_ERR(zone)) {
-			pr_err("Failed to build %s zone %ld\n", child->name,
+			pr_err("Failed to build %pOFn zone %ld\n", child,
 			       PTR_ERR(zone));
 			kfree(tzp);
 			kfree(ops);
diff --git a/drivers/thermal/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom-spmi-temp-alarm.c
index ad4f3a8d6560..b2d5d5bf4a9b 100644
--- a/drivers/thermal/qcom-spmi-temp-alarm.c
+++ b/drivers/thermal/qcom-spmi-temp-alarm.c
@@ -23,6 +23,8 @@
 #include <linux/regmap.h>
 #include <linux/thermal.h>
 
+#include "thermal_core.h"
+
 #define QPNP_TM_REG_TYPE		0x04
 #define QPNP_TM_REG_SUBTYPE		0x05
 #define QPNP_TM_REG_STATUS		0x08
@@ -37,9 +39,11 @@
 #define STATUS_GEN2_STATE_MASK		GENMASK(6, 4)
 #define STATUS_GEN2_STATE_SHIFT		4
 
-#define SHUTDOWN_CTRL1_OVERRIDE_MASK	GENMASK(7, 6)
+#define SHUTDOWN_CTRL1_OVERRIDE_S2	BIT(6)
 #define SHUTDOWN_CTRL1_THRESHOLD_MASK	GENMASK(1, 0)
 
+#define SHUTDOWN_CTRL1_RATE_25HZ	BIT(3)
+
 #define ALARM_CTRL_FORCE_ENABLE		BIT(7)
 
 /*
@@ -56,12 +60,19 @@
 #define TEMP_THRESH_STEP		5000	/* Threshold step: 5 C */
 
 #define THRESH_MIN			0
+#define THRESH_MAX			3
+
+/* Stage 2 Threshold Min: 125 C */
+#define STAGE2_THRESHOLD_MIN		125000
+/* Stage 2 Threshold Max: 140 C */
+#define STAGE2_THRESHOLD_MAX		140000
 
 /* Temperature in Milli Celsius reported during stage 0 if no ADC is present */
 #define DEFAULT_TEMP			37000
 
 struct qpnp_tm_chip {
 	struct regmap			*map;
+	struct device			*dev;
 	struct thermal_zone_device	*tz_dev;
 	unsigned int			subtype;
 	long				temp;
@@ -69,6 +80,10 @@ struct qpnp_tm_chip {
 	unsigned int			stage;
 	unsigned int			prev_stage;
 	unsigned int			base;
+	/* protects .thresh, .stage and chip registers */
+	struct mutex			lock;
+	bool				initialized;
+
 	struct iio_channel		*adc;
 };
 
@@ -125,6 +140,8 @@ static int qpnp_tm_update_temp_no_adc(struct qpnp_tm_chip *chip)
 	unsigned int stage, stage_new, stage_old;
 	int ret;
 
+	WARN_ON(!mutex_is_locked(&chip->lock));
+
 	ret = qpnp_tm_get_temp_stage(chip);
 	if (ret < 0)
 		return ret;
@@ -163,8 +180,15 @@ static int qpnp_tm_get_temp(void *data, int *temp)
 	if (!temp)
 		return -EINVAL;
 
+	if (!chip->initialized) {
+		*temp = DEFAULT_TEMP;
+		return 0;
+	}
+
 	if (!chip->adc) {
+		mutex_lock(&chip->lock);
 		ret = qpnp_tm_update_temp_no_adc(chip);
+		mutex_unlock(&chip->lock);
 		if (ret < 0)
 			return ret;
 	} else {
@@ -180,8 +204,72 @@ static int qpnp_tm_get_temp(void *data, int *temp)
 	return 0;
 }
 
+static int qpnp_tm_update_critical_trip_temp(struct qpnp_tm_chip *chip,
+					     int temp)
+{
+	u8 reg;
+	bool disable_s2_shutdown = false;
+
+	WARN_ON(!mutex_is_locked(&chip->lock));
+
+	/*
+	 * Default: S2 and S3 shutdown enabled, thresholds at
+	 * 105C/125C/145C, monitoring at 25Hz
+	 */
+	reg = SHUTDOWN_CTRL1_RATE_25HZ;
+
+	if (temp == THERMAL_TEMP_INVALID ||
+	    temp < STAGE2_THRESHOLD_MIN) {
+		chip->thresh = THRESH_MIN;
+		goto skip;
+	}
+
+	if (temp <= STAGE2_THRESHOLD_MAX) {
+		chip->thresh = THRESH_MAX -
+			((STAGE2_THRESHOLD_MAX - temp) /
+			 TEMP_THRESH_STEP);
+		disable_s2_shutdown = true;
+	} else {
+		chip->thresh = THRESH_MAX;
+
+		if (chip->adc)
+			disable_s2_shutdown = true;
+		else
+			dev_warn(chip->dev,
+				 "No ADC is configured and critical temperature is above the maximum stage 2 threshold of 140 C! Configuring stage 2 shutdown at 140 C.\n");
+	}
+
+skip:
+	reg |= chip->thresh;
+	if (disable_s2_shutdown)
+		reg |= SHUTDOWN_CTRL1_OVERRIDE_S2;
+
+	return qpnp_tm_write(chip, QPNP_TM_REG_SHUTDOWN_CTRL1, reg);
+}
+
+static int qpnp_tm_set_trip_temp(void *data, int trip, int temp)
+{
+	struct qpnp_tm_chip *chip = data;
+	const struct thermal_trip *trip_points;
+	int ret;
+
+	trip_points = of_thermal_get_trip_points(chip->tz_dev);
+	if (!trip_points)
+		return -EINVAL;
+
+	if (trip_points[trip].type != THERMAL_TRIP_CRITICAL)
+		return 0;
+
+	mutex_lock(&chip->lock);
+	ret = qpnp_tm_update_critical_trip_temp(chip, temp);
+	mutex_unlock(&chip->lock);
+
+	return ret;
+}
+
 static const struct thermal_zone_of_device_ops qpnp_tm_sensor_ops = {
 	.get_temp = qpnp_tm_get_temp,
+	.set_trip_temp = qpnp_tm_set_trip_temp,
 };
 
 static irqreturn_t qpnp_tm_isr(int irq, void *data)
@@ -193,6 +281,29 @@ static irqreturn_t qpnp_tm_isr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+static int qpnp_tm_get_critical_trip_temp(struct qpnp_tm_chip *chip)
+{
+	int ntrips;
+	const struct thermal_trip *trips;
+	int i;
+
+	ntrips = of_thermal_get_ntrips(chip->tz_dev);
+	if (ntrips <= 0)
+		return THERMAL_TEMP_INVALID;
+
+	trips = of_thermal_get_trip_points(chip->tz_dev);
+	if (!trips)
+		return THERMAL_TEMP_INVALID;
+
+	for (i = 0; i < ntrips; i++) {
+		if (of_thermal_is_trip_valid(chip->tz_dev, i) &&
+		    trips[i].type == THERMAL_TRIP_CRITICAL)
+			return trips[i].temperature;
+	}
+
+	return THERMAL_TEMP_INVALID;
+}
+
 /*
  * This function initializes the internal temp value based on only the
  * current thermal stage and threshold. Setup threshold control and
@@ -203,17 +314,20 @@ static int qpnp_tm_init(struct qpnp_tm_chip *chip)
 	unsigned int stage;
 	int ret;
 	u8 reg = 0;
+	int crit_temp;
+
+	mutex_lock(&chip->lock);
 
 	ret = qpnp_tm_read(chip, QPNP_TM_REG_SHUTDOWN_CTRL1, &reg);
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	chip->thresh = reg & SHUTDOWN_CTRL1_THRESHOLD_MASK;
 	chip->temp = DEFAULT_TEMP;
 
 	ret = qpnp_tm_get_temp_stage(chip);
 	if (ret < 0)
-		return ret;
+		goto out;
 	chip->stage = ret;
 
 	stage = chip->subtype == QPNP_TM_SUBTYPE_GEN1
@@ -224,21 +338,19 @@ static int qpnp_tm_init(struct qpnp_tm_chip *chip)
 			     (stage - 1) * TEMP_STAGE_STEP +
 			     TEMP_THRESH_MIN;
 
-	/*
-	 * Set threshold and disable software override of stage 2 and 3
-	 * shutdowns.
-	 */
-	chip->thresh = THRESH_MIN;
-	reg &= ~(SHUTDOWN_CTRL1_OVERRIDE_MASK | SHUTDOWN_CTRL1_THRESHOLD_MASK);
-	reg |= chip->thresh & SHUTDOWN_CTRL1_THRESHOLD_MASK;
-	ret = qpnp_tm_write(chip, QPNP_TM_REG_SHUTDOWN_CTRL1, reg);
+	crit_temp = qpnp_tm_get_critical_trip_temp(chip);
+	ret = qpnp_tm_update_critical_trip_temp(chip, crit_temp);
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	/* Enable the thermal alarm PMIC module in always-on mode. */
 	reg = ALARM_CTRL_FORCE_ENABLE;
 	ret = qpnp_tm_write(chip, QPNP_TM_REG_ALARM_CTRL, reg);
 
+	chip->initialized = true;
+
+out:
+	mutex_unlock(&chip->lock);
 	return ret;
 }
 
@@ -257,6 +369,9 @@ static int qpnp_tm_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	dev_set_drvdata(&pdev->dev, chip);
+	chip->dev = &pdev->dev;
+
+	mutex_init(&chip->lock);
 
 	chip->map = dev_get_regmap(pdev->dev.parent, NULL);
 	if (!chip->map)
@@ -302,6 +417,18 @@ static int qpnp_tm_probe(struct platform_device *pdev)
 
 	chip->subtype = subtype;
 
+	/*
+	 * Register the sensor before initializing the hardware to be able to
+	 * read the trip points. get_temp() returns the default temperature
+	 * before the hardware initialization is completed.
+	 */
+	chip->tz_dev = devm_thermal_zone_of_sensor_register(
+		&pdev->dev, 0, chip, &qpnp_tm_sensor_ops);
+	if (IS_ERR(chip->tz_dev)) {
+		dev_err(&pdev->dev, "failed to register sensor\n");
+		return PTR_ERR(chip->tz_dev);
+	}
+
 	ret = qpnp_tm_init(chip);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "init failed\n");
@@ -313,12 +440,7 @@ static int qpnp_tm_probe(struct platform_device *pdev)
 	if (ret < 0)
 		return ret;
 
-	chip->tz_dev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, chip,
-							&qpnp_tm_sensor_ops);
-	if (IS_ERR(chip->tz_dev)) {
-		dev_err(&pdev->dev, "failed to register sensor\n");
-		return PTR_ERR(chip->tz_dev);
-	}
+	thermal_zone_device_update(chip->tz_dev, THERMAL_EVENT_UNSPECIFIED);
 
 	return 0;
 }
diff --git a/drivers/thermal/qcom/tsens-8916.c b/drivers/thermal/qcom/tsens-8916.c
index fdf561b8b81d..c6dd620ac029 100644
--- a/drivers/thermal/qcom/tsens-8916.c
+++ b/drivers/thermal/qcom/tsens-8916.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/platform_device.h>
@@ -109,5 +100,6 @@ static const struct tsens_ops ops_8916 = {
 const struct tsens_data data_8916 = {
 	.num_sensors	= 5,
 	.ops		= &ops_8916,
+	.reg_offsets	= { [SROT_CTRL_OFFSET] = 0x0 },
 	.hw_ids		= (unsigned int []){0, 1, 2, 4, 5 },
 };
diff --git a/drivers/thermal/qcom/tsens-8960.c b/drivers/thermal/qcom/tsens-8960.c
index 0451277d3a8f..0f0adb302a7b 100644
--- a/drivers/thermal/qcom/tsens-8960.c
+++ b/drivers/thermal/qcom/tsens-8960.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/platform_device.h>
@@ -69,7 +60,7 @@ static int suspend_8960(struct tsens_device *tmdev)
 {
 	int ret;
 	unsigned int mask;
-	struct regmap *map = tmdev->map;
+	struct regmap *map = tmdev->tm_map;
 
 	ret = regmap_read(map, THRESHOLD_ADDR, &tmdev->ctx.threshold);
 	if (ret)
@@ -94,7 +85,7 @@ static int suspend_8960(struct tsens_device *tmdev)
 static int resume_8960(struct tsens_device *tmdev)
 {
 	int ret;
-	struct regmap *map = tmdev->map;
+	struct regmap *map = tmdev->tm_map;
 
 	ret = regmap_update_bits(map, CNTL_ADDR, SW_RST, SW_RST);
 	if (ret)
@@ -126,12 +117,12 @@ static int enable_8960(struct tsens_device *tmdev, int id)
 	int ret;
 	u32 reg, mask;
 
-	ret = regmap_read(tmdev->map, CNTL_ADDR, &reg);
+	ret = regmap_read(tmdev->tm_map, CNTL_ADDR, &reg);
 	if (ret)
 		return ret;
 
 	mask = BIT(id + SENSOR0_SHIFT);
-	ret = regmap_write(tmdev->map, CNTL_ADDR, reg | SW_RST);
+	ret = regmap_write(tmdev->tm_map, CNTL_ADDR, reg | SW_RST);
 	if (ret)
 		return ret;
 
@@ -140,7 +131,7 @@ static int enable_8960(struct tsens_device *tmdev, int id)
 	else
 		reg |= mask | SLP_CLK_ENA_8660 | EN;
 
-	ret = regmap_write(tmdev->map, CNTL_ADDR, reg);
+	ret = regmap_write(tmdev->tm_map, CNTL_ADDR, reg);
 	if (ret)
 		return ret;
 
@@ -157,7 +148,7 @@ static void disable_8960(struct tsens_device *tmdev)
 	mask <<= SENSOR0_SHIFT;
 	mask |= EN;
 
-	ret = regmap_read(tmdev->map, CNTL_ADDR, &reg_cntl);
+	ret = regmap_read(tmdev->tm_map, CNTL_ADDR, &reg_cntl);
 	if (ret)
 		return;
 
@@ -168,7 +159,7 @@ static void disable_8960(struct tsens_device *tmdev)
 	else
 		reg_cntl &= ~SLP_CLK_ENA_8660;
 
-	regmap_write(tmdev->map, CNTL_ADDR, reg_cntl);
+	regmap_write(tmdev->tm_map, CNTL_ADDR, reg_cntl);
 }
 
 static int init_8960(struct tsens_device *tmdev)
@@ -176,8 +167,8 @@ static int init_8960(struct tsens_device *tmdev)
 	int ret, i;
 	u32 reg_cntl;
 
-	tmdev->map = dev_get_regmap(tmdev->dev, NULL);
-	if (!tmdev->map)
+	tmdev->tm_map = dev_get_regmap(tmdev->dev, NULL);
+	if (!tmdev->tm_map)
 		return -ENODEV;
 
 	/*
@@ -193,14 +184,14 @@ static int init_8960(struct tsens_device *tmdev)
 	}
 
 	reg_cntl = SW_RST;
-	ret = regmap_update_bits(tmdev->map, CNTL_ADDR, SW_RST, reg_cntl);
+	ret = regmap_update_bits(tmdev->tm_map, CNTL_ADDR, SW_RST, reg_cntl);
 	if (ret)
 		return ret;
 
 	if (tmdev->num_sensors > 1) {
 		reg_cntl |= SLP_CLK_ENA | (MEASURE_PERIOD << 18);
 		reg_cntl &= ~SW_RST;
-		ret = regmap_update_bits(tmdev->map, CONFIG_ADDR,
+		ret = regmap_update_bits(tmdev->tm_map, CONFIG_ADDR,
 					 CONFIG_MASK, CONFIG);
 	} else {
 		reg_cntl |= SLP_CLK_ENA_8660 | (MEASURE_PERIOD << 16);
@@ -209,12 +200,12 @@ static int init_8960(struct tsens_device *tmdev)
 	}
 
 	reg_cntl |= GENMASK(tmdev->num_sensors - 1, 0) << SENSOR0_SHIFT;
-	ret = regmap_write(tmdev->map, CNTL_ADDR, reg_cntl);
+	ret = regmap_write(tmdev->tm_map, CNTL_ADDR, reg_cntl);
 	if (ret)
 		return ret;
 
 	reg_cntl |= EN;
-	ret = regmap_write(tmdev->map, CNTL_ADDR, reg_cntl);
+	ret = regmap_write(tmdev->tm_map, CNTL_ADDR, reg_cntl);
 	if (ret)
 		return ret;
 
@@ -261,12 +252,12 @@ static int get_temp_8960(struct tsens_device *tmdev, int id, int *temp)
 
 	timeout = jiffies + usecs_to_jiffies(TIMEOUT_US);
 	do {
-		ret = regmap_read(tmdev->map, INT_STATUS_ADDR, &trdy);
+		ret = regmap_read(tmdev->tm_map, INT_STATUS_ADDR, &trdy);
 		if (ret)
 			return ret;
 		if (!(trdy & TRDY_MASK))
 			continue;
-		ret = regmap_read(tmdev->map, s->status, &code);
+		ret = regmap_read(tmdev->tm_map, s->status, &code);
 		if (ret)
 			return ret;
 		*temp = code_to_mdegC(code, s);
diff --git a/drivers/thermal/qcom/tsens-8974.c b/drivers/thermal/qcom/tsens-8974.c
index 9baf77e8cbe3..3d3fda3d731b 100644
--- a/drivers/thermal/qcom/tsens-8974.c
+++ b/drivers/thermal/qcom/tsens-8974.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/platform_device.h>
@@ -241,4 +232,5 @@ static const struct tsens_ops ops_8974 = {
 const struct tsens_data data_8974 = {
 	.num_sensors	= 11,
 	.ops		= &ops_8974,
+	.reg_offsets	= { [SROT_CTRL_OFFSET] = 0x0 },
 };
diff --git a/drivers/thermal/qcom/tsens-common.c b/drivers/thermal/qcom/tsens-common.c
index 6207d8d92351..3be4be2e0465 100644
--- a/drivers/thermal/qcom/tsens-common.c
+++ b/drivers/thermal/qcom/tsens-common.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/err.h>
@@ -21,7 +12,11 @@
 #include <linux/regmap.h>
 #include "tsens.h"
 
-#define S0_ST_ADDR		0x1030
+/* SROT */
+#define TSENS_EN		BIT(0)
+
+/* TM */
+#define STATUS_OFFSET		0x30
 #define SN_ADDR_OFFSET		0x4
 #define SN_ST_TEMP_MASK		0x3ff
 #define CAL_DEGC_PT1		30
@@ -107,8 +102,8 @@ int get_temp_common(struct tsens_device *tmdev, int id, int *temp)
 	unsigned int status_reg;
 	int last_temp = 0, ret;
 
-	status_reg = S0_ST_ADDR + s->hw_id * SN_ADDR_OFFSET;
-	ret = regmap_read(tmdev->map, status_reg, &code);
+	status_reg = tmdev->tm_offset + STATUS_OFFSET + s->hw_id * SN_ADDR_OFFSET;
+	ret = regmap_read(tmdev->tm_map, status_reg, &code);
 	if (ret)
 		return ret;
 	last_temp = code & SN_ST_TEMP_MASK;
@@ -126,29 +121,52 @@ static const struct regmap_config tsens_config = {
 
 int __init init_common(struct tsens_device *tmdev)
 {
-	void __iomem *base;
+	void __iomem *tm_base, *srot_base;
 	struct resource *res;
+	u32 code;
+	int ret;
 	struct platform_device *op = of_find_device_by_node(tmdev->dev->of_node);
+	u16 ctrl_offset = tmdev->reg_offsets[SROT_CTRL_OFFSET];
 
 	if (!op)
 		return -EINVAL;
 
-	/* The driver only uses the TM register address space for now */
 	if (op->num_resources > 1) {
+		/* DT with separate SROT and TM address space */
 		tmdev->tm_offset = 0;
+		res = platform_get_resource(op, IORESOURCE_MEM, 1);
+		srot_base = devm_ioremap_resource(&op->dev, res);
+		if (IS_ERR(srot_base))
+			return PTR_ERR(srot_base);
+
+		tmdev->srot_map = devm_regmap_init_mmio(tmdev->dev,
+							srot_base, &tsens_config);
+		if (IS_ERR(tmdev->srot_map))
+			return PTR_ERR(tmdev->srot_map);
+
 	} else {
 		/* old DTs where SROT and TM were in a contiguous 2K block */
 		tmdev->tm_offset = 0x1000;
 	}
 
 	res = platform_get_resource(op, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&op->dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
-
-	tmdev->map = devm_regmap_init_mmio(tmdev->dev, base, &tsens_config);
-	if (IS_ERR(tmdev->map))
-		return PTR_ERR(tmdev->map);
+	tm_base = devm_ioremap_resource(&op->dev, res);
+	if (IS_ERR(tm_base))
+		return PTR_ERR(tm_base);
+
+	tmdev->tm_map = devm_regmap_init_mmio(tmdev->dev, tm_base, &tsens_config);
+	if (IS_ERR(tmdev->tm_map))
+		return PTR_ERR(tmdev->tm_map);
+
+	if (tmdev->srot_map) {
+		ret = regmap_read(tmdev->srot_map, ctrl_offset, &code);
+		if (ret)
+			return ret;
+		if (!(code & TSENS_EN)) {
+			dev_err(tmdev->dev, "tsens device is not enabled\n");
+			return -ENODEV;
+		}
+	}
 
 	return 0;
 }
diff --git a/drivers/thermal/qcom/tsens-v2.c b/drivers/thermal/qcom/tsens-v2.c
index 44da02f594ac..381a212872bf 100644
--- a/drivers/thermal/qcom/tsens-v2.c
+++ b/drivers/thermal/qcom/tsens-v2.c
@@ -21,7 +21,7 @@ static int get_temp_tsens_v2(struct tsens_device *tmdev, int id, int *temp)
 	int ret;
 
 	status_reg = tmdev->tm_offset + STATUS_OFFSET + s->hw_id * 4;
-	ret = regmap_read(tmdev->map, status_reg, &code);
+	ret = regmap_read(tmdev->tm_map, status_reg, &code);
 	if (ret)
 		return ret;
 	last_temp = code & LAST_TEMP_MASK;
@@ -29,7 +29,7 @@ static int get_temp_tsens_v2(struct tsens_device *tmdev, int id, int *temp)
 		goto done;
 
 	/* Try a second time */
-	ret = regmap_read(tmdev->map, status_reg, &code);
+	ret = regmap_read(tmdev->tm_map, status_reg, &code);
 	if (ret)
 		return ret;
 	if (code & STATUS_VALID_BIT) {
@@ -40,7 +40,7 @@ static int get_temp_tsens_v2(struct tsens_device *tmdev, int id, int *temp)
 	}
 
 	/* Try a third/last time */
-	ret = regmap_read(tmdev->map, status_reg, &code);
+	ret = regmap_read(tmdev->tm_map, status_reg, &code);
 	if (ret)
 		return ret;
 	if (code & STATUS_VALID_BIT) {
@@ -68,10 +68,12 @@ static const struct tsens_ops ops_generic_v2 = {
 
 const struct tsens_data data_tsens_v2 = {
 	.ops            = &ops_generic_v2,
+	.reg_offsets	= { [SROT_CTRL_OFFSET] = 0x4 },
 };
 
 /* Kept around for backward compatibility with old msm8996.dtsi */
 const struct tsens_data data_8996 = {
 	.num_sensors	= 13,
 	.ops		= &ops_generic_v2,
+	.reg_offsets	= { [SROT_CTRL_OFFSET] = 0x4 },
 };
diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index a2c9bfae3d86..f1ec9bbe4717 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/err.h>
@@ -89,11 +80,6 @@ static int tsens_register(struct tsens_device *tmdev)
 {
 	int i;
 	struct thermal_zone_device *tzd;
-	u32 *hw_id, n = tmdev->num_sensors;
-
-	hw_id = devm_kcalloc(tmdev->dev, n, sizeof(u32), GFP_KERNEL);
-	if (!hw_id)
-		return -ENOMEM;
 
 	for (i = 0;  i < tmdev->num_sensors; i++) {
 		tmdev->sensor[i].tmdev = tmdev;
@@ -158,6 +144,9 @@ static int tsens_probe(struct platform_device *pdev)
 		else
 			tmdev->sensor[i].hw_id = i;
 	}
+	for (i = 0; i < REG_ARRAY_SIZE; i++) {
+		tmdev->reg_offsets[i] = data->reg_offsets[i];
+	}
 
 	if (!tmdev->ops || !tmdev->ops->init || !tmdev->ops->get_temp)
 		return -EINVAL;
diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h
index 14331eb45a86..7b7feee5dc46 100644
--- a/drivers/thermal/qcom/tsens.h
+++ b/drivers/thermal/qcom/tsens.h
@@ -1,15 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
+
 #ifndef __QCOM_TSENS_H__
 #define __QCOM_TSENS_H__
 
@@ -55,15 +48,23 @@ struct tsens_ops {
 	int (*get_trend)(struct tsens_device *, int, enum thermal_trend *);
 };
 
+enum reg_list {
+	SROT_CTRL_OFFSET,
+
+	REG_ARRAY_SIZE,
+};
+
 /**
  * struct tsens_data - tsens instance specific data
  * @num_sensors: Max number of sensors supported by platform
  * @ops: operations the tsens instance supports
  * @hw_ids: Subset of sensors ids supported by platform, if not the first n
+ * @reg_offsets: Register offsets for commonly used registers
  */
 struct tsens_data {
 	const u32		num_sensors;
 	const struct tsens_ops	*ops;
+	const u16		reg_offsets[REG_ARRAY_SIZE];
 	unsigned int		*hw_ids;
 };
 
@@ -76,8 +77,10 @@ struct tsens_context {
 struct tsens_device {
 	struct device			*dev;
 	u32				num_sensors;
-	struct regmap			*map;
+	struct regmap			*tm_map;
+	struct regmap			*srot_map;
 	u32				tm_offset;
+	u16				reg_offsets[REG_ARRAY_SIZE];
 	struct tsens_context		ctx;
 	const struct tsens_ops		*ops;
 	struct tsens_sensor		sensor[0];
diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c
index 450ed66edf58..18c711b19514 100644
--- a/drivers/thermal/qoriq_thermal.c
+++ b/drivers/thermal/qoriq_thermal.c
@@ -119,8 +119,8 @@ static int qoriq_tmu_get_sensor_id(void)
 	if (sensor_specs.args_count >= 1) {
 		id = sensor_specs.args[0];
 		WARN(sensor_specs.args_count > 1,
-				"%s: too many cells in sensor specifier %d\n",
-				sensor_specs.np->name, sensor_specs.args_count);
+				"%pOFn: too many cells in sensor specifier %d\n",
+				sensor_specs.np, sensor_specs.args_count);
 	} else {
 		id = 0;
 	}
@@ -294,6 +294,7 @@ static SIMPLE_DEV_PM_OPS(qoriq_tmu_pm_ops,
 
 static const struct of_device_id qoriq_tmu_match[] = {
 	{ .compatible = "fsl,qoriq-tmu", },
+	{ .compatible = "fsl,imx8mq-tmu", },
 	{},
 };
 MODULE_DEVICE_TABLE(of, qoriq_tmu_match);
diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index 7aed5337bdd3..75786cc8e2f9 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -318,9 +318,11 @@ static void rcar_gen3_thermal_init(struct rcar_gen3_thermal_tsc *tsc)
 }
 
 static const struct of_device_id rcar_gen3_thermal_dt_ids[] = {
+	{ .compatible = "renesas,r8a774a1-thermal", },
 	{ .compatible = "renesas,r8a7795-thermal", },
 	{ .compatible = "renesas,r8a7796-thermal", },
 	{ .compatible = "renesas,r8a77965-thermal", },
+	{ .compatible = "renesas,r8a77980-thermal", },
 	{},
 };
 MODULE_DEVICE_TABLE(of, rcar_gen3_thermal_dt_ids);
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 78f932822d38..8014a207d8d9 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -113,6 +113,10 @@ static const struct of_device_id rcar_thermal_dt_ids[] = {
 		 .data = &rcar_gen2_thermal,
 	},
 	{
+		.compatible = "renesas,thermal-r8a77970",
+		.data = &rcar_gen3_thermal,
+	},
+	{
 		.compatible = "renesas,thermal-r8a77995",
 		.data = &rcar_gen3_thermal,
 	},
@@ -434,8 +438,8 @@ static irqreturn_t rcar_thermal_irq(int irq, void *data)
 	rcar_thermal_for_each_priv(priv, common) {
 		if (rcar_thermal_had_changed(priv, status)) {
 			rcar_thermal_irq_disable(priv);
-			schedule_delayed_work(&priv->work,
-					      msecs_to_jiffies(300));
+			queue_delayed_work(system_freezable_wq, &priv->work,
+					   msecs_to_jiffies(300));
 		}
 	}
 
@@ -453,6 +457,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
 
 	rcar_thermal_for_each_priv(priv, common) {
 		rcar_thermal_irq_disable(priv);
+		cancel_delayed_work_sync(&priv->work);
 		if (priv->chip->use_of_thermal)
 			thermal_remove_hwmon_sysfs(priv->zone);
 		else
@@ -492,7 +497,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
 	pm_runtime_get_sync(dev);
 
 	for (i = 0; i < chip->nirqs; i++) {
-		irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+		irq = platform_get_resource(pdev, IORESOURCE_IRQ, i);
 		if (!irq)
 			continue;
 		if (!common->base) {
diff --git a/drivers/thermal/st/Kconfig b/drivers/thermal/st/Kconfig
index 490fdbe22eea..b80f9a9e4f8f 100644
--- a/drivers/thermal/st/Kconfig
+++ b/drivers/thermal/st/Kconfig
@@ -1,3 +1,7 @@
+#
+# STMicroelectronics thermal drivers configuration
+#
+
 config ST_THERMAL
        tristate "Thermal sensors on STMicroelectronics STi series of SoCs"
        help
@@ -10,3 +14,13 @@ config ST_THERMAL_SYSCFG
 config ST_THERMAL_MEMMAP
 	select ST_THERMAL
 	tristate "STi series memory mapped access based thermal sensors"
+
+config STM32_THERMAL
+       tristate "Thermal framework support on STMicroelectronics STM32 series of SoCs"
+       depends on MACH_STM32MP157
+       default y
+       help
+	Support for thermal framework on STMicroelectronics STM32 series of
+	SoCs. This thermal driver allows to access to general thermal framework
+	functionalities and to acces to SoC sensor functionalities. This
+	configuration is fully dependent of MACH_STM32MP157.
diff --git a/drivers/thermal/st/Makefile b/drivers/thermal/st/Makefile
index b38878977bd8..b2b9e9b96296 100644
--- a/drivers/thermal/st/Makefile
+++ b/drivers/thermal/st/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_ST_THERMAL)		:= st_thermal.o
 obj-$(CONFIG_ST_THERMAL_SYSCFG)		+= st_thermal_syscfg.o
 obj-$(CONFIG_ST_THERMAL_MEMMAP)		+= st_thermal_memmap.o
+obj-$(CONFIG_STM32_THERMAL)		:= stm_thermal.o
+\ No newline at end of file
diff --git a/drivers/thermal/st/stm_thermal.c b/drivers/thermal/st/stm_thermal.c
new file mode 100644
index 000000000000..47623da0f91b
--- /dev/null
+++ b/drivers/thermal/st/stm_thermal.c
@@ -0,0 +1,760 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) STMicroelectronics 2018 - All Rights Reserved
+ * Author: David Hernandez Sanchez <david.hernandezsanchez@st.com> for
+ * STMicroelectronics.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#include "../thermal_core.h"
+#include "../thermal_hwmon.h"
+
+/* DTS register offsets */
+#define DTS_CFGR1_OFFSET	0x0
+#define DTS_T0VALR1_OFFSET	0x8
+#define DTS_RAMPVALR_OFFSET	0X10
+#define DTS_ITR1_OFFSET		0x14
+#define DTS_DR_OFFSET		0x1C
+#define DTS_SR_OFFSET		0x20
+#define DTS_ITENR_OFFSET	0x24
+#define DTS_CIFR_OFFSET		0x28
+
+/* DTS_CFGR1 register mask definitions */
+#define HSREF_CLK_DIV_MASK	GENMASK(30, 24)
+#define TS1_SMP_TIME_MASK	GENMASK(19, 16)
+#define TS1_INTRIG_SEL_MASK	GENMASK(11, 8)
+
+/* DTS_T0VALR1 register mask definitions */
+#define TS1_T0_MASK		GENMASK(17, 16)
+#define TS1_FMT0_MASK		GENMASK(15, 0)
+
+/* DTS_RAMPVALR register mask definitions */
+#define TS1_RAMP_COEFF_MASK	GENMASK(15, 0)
+
+/* DTS_ITR1 register mask definitions */
+#define TS1_HITTHD_MASK		GENMASK(31, 16)
+#define TS1_LITTHD_MASK		GENMASK(15, 0)
+
+/* DTS_DR register mask definitions */
+#define TS1_MFREQ_MASK		GENMASK(15, 0)
+
+/* Less significant bit position definitions */
+#define TS1_T0_POS		16
+#define TS1_SMP_TIME_POS	16
+#define TS1_HITTHD_POS		16
+#define HSREF_CLK_DIV_POS	24
+
+/* DTS_CFGR1 bit definitions */
+#define TS1_EN			BIT(0)
+#define TS1_START		BIT(4)
+#define REFCLK_SEL		BIT(20)
+#define REFCLK_LSE		REFCLK_SEL
+#define Q_MEAS_OPT		BIT(21)
+#define CALIBRATION_CONTROL	Q_MEAS_OPT
+
+/* DTS_SR bit definitions */
+#define TS_RDY			BIT(15)
+/* Bit definitions below are common for DTS_SR, DTS_ITENR and DTS_CIFR */
+#define HIGH_THRESHOLD		BIT(2)
+#define LOW_THRESHOLD		BIT(1)
+
+/* Constants */
+#define ADJUST			100
+#define ONE_MHZ			1000000
+#define POLL_TIMEOUT		5000
+#define STARTUP_TIME		40
+#define TS1_T0_VAL0		30
+#define TS1_T0_VAL1		130
+#define NO_HW_TRIG		0
+
+/* The Thermal Framework expects millidegrees */
+#define mcelsius(temp)		((temp) * 1000)
+
+/* The Sensor expects oC degrees */
+#define celsius(temp)		((temp) / 1000)
+
+struct stm_thermal_sensor {
+	struct device *dev;
+	struct thermal_zone_device *th_dev;
+	enum thermal_device_mode mode;
+	struct clk *clk;
+	int high_temp;
+	int low_temp;
+	int temp_critical;
+	int temp_passive;
+	unsigned int low_temp_enabled;
+	int num_trips;
+	int irq;
+	unsigned int irq_enabled;
+	void __iomem *base;
+	int t0, fmt0, ramp_coeff;
+};
+
+static irqreturn_t stm_thermal_alarm_irq(int irq, void *sdata)
+{
+	struct stm_thermal_sensor *sensor = sdata;
+
+	disable_irq_nosync(irq);
+	sensor->irq_enabled = false;
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t stm_thermal_alarm_irq_thread(int irq, void *sdata)
+{
+	u32 value;
+	struct stm_thermal_sensor *sensor = sdata;
+
+	/* read IT reason in SR and clear flags */
+	value = readl_relaxed(sensor->base + DTS_SR_OFFSET);
+
+	if ((value & LOW_THRESHOLD) == LOW_THRESHOLD)
+		writel_relaxed(LOW_THRESHOLD, sensor->base + DTS_CIFR_OFFSET);
+
+	if ((value & HIGH_THRESHOLD) == HIGH_THRESHOLD)
+		writel_relaxed(HIGH_THRESHOLD, sensor->base + DTS_CIFR_OFFSET);
+
+	thermal_zone_device_update(sensor->th_dev, THERMAL_EVENT_UNSPECIFIED);
+
+	return IRQ_HANDLED;
+}
+
+static int stm_sensor_power_on(struct stm_thermal_sensor *sensor)
+{
+	int ret;
+	u32 value;
+
+	/* Enable sensor */
+	value = readl_relaxed(sensor->base + DTS_CFGR1_OFFSET);
+	value |= TS1_EN;
+	writel_relaxed(value, sensor->base + DTS_CFGR1_OFFSET);
+
+	/*
+	 * The DTS block can be enabled by setting TSx_EN bit in
+	 * DTS_CFGRx register. It requires a startup time of
+	 * 40μs. Use 5 ms as arbitrary timeout.
+	 */
+	ret = readl_poll_timeout(sensor->base + DTS_SR_OFFSET,
+				 value, (value & TS_RDY),
+				 STARTUP_TIME, POLL_TIMEOUT);
+	if (ret)
+		return ret;
+
+	/* Start continuous measuring */
+	value = readl_relaxed(sensor->base +
+			      DTS_CFGR1_OFFSET);
+	value |= TS1_START;
+	writel_relaxed(value, sensor->base +
+		       DTS_CFGR1_OFFSET);
+
+	return 0;
+}
+
+static int stm_sensor_power_off(struct stm_thermal_sensor *sensor)
+{
+	u32 value;
+
+	/* Stop measuring */
+	value = readl_relaxed(sensor->base + DTS_CFGR1_OFFSET);
+	value &= ~TS1_START;
+	writel_relaxed(value, sensor->base + DTS_CFGR1_OFFSET);
+
+	/* Ensure stop is taken into account */
+	usleep_range(STARTUP_TIME, POLL_TIMEOUT);
+
+	/* Disable sensor */
+	value = readl_relaxed(sensor->base + DTS_CFGR1_OFFSET);
+	value &= ~TS1_EN;
+	writel_relaxed(value, sensor->base + DTS_CFGR1_OFFSET);
+
+	/* Ensure disable is taken into account */
+	return readl_poll_timeout(sensor->base + DTS_SR_OFFSET, value,
+				  !(value & TS_RDY),
+				  STARTUP_TIME, POLL_TIMEOUT);
+}
+
+static int stm_thermal_calibration(struct stm_thermal_sensor *sensor)
+{
+	u32 value, clk_freq;
+	u32 prescaler;
+
+	/* Figure out prescaler value for PCLK during calibration */
+	clk_freq = clk_get_rate(sensor->clk);
+	if (!clk_freq)
+		return -EINVAL;
+
+	prescaler = 0;
+	clk_freq /= ONE_MHZ;
+	if (clk_freq) {
+		while (prescaler <= clk_freq)
+			prescaler++;
+	}
+
+	value = readl_relaxed(sensor->base + DTS_CFGR1_OFFSET);
+
+	/* Clear prescaler */
+	value &= ~HSREF_CLK_DIV_MASK;
+
+	/* Set prescaler. pclk_freq/prescaler < 1MHz */
+	value |= (prescaler << HSREF_CLK_DIV_POS);
+
+	/* Select PCLK as reference clock */
+	value &= ~REFCLK_SEL;
+
+	/* Set maximal sampling time for better precision */
+	value |= TS1_SMP_TIME_MASK;
+
+	/* Measure with calibration */
+	value &= ~CALIBRATION_CONTROL;
+
+	/* select trigger */
+	value &= ~TS1_INTRIG_SEL_MASK;
+	value |= NO_HW_TRIG;
+
+	writel_relaxed(value, sensor->base + DTS_CFGR1_OFFSET);
+
+	return 0;
+}
+
+/* Fill in DTS structure with factory sensor values */
+static int stm_thermal_read_factory_settings(struct stm_thermal_sensor *sensor)
+{
+	/* Retrieve engineering calibration temperature */
+	sensor->t0 = readl_relaxed(sensor->base + DTS_T0VALR1_OFFSET) &
+					TS1_T0_MASK;
+	if (!sensor->t0)
+		sensor->t0 = TS1_T0_VAL0;
+	else
+		sensor->t0 = TS1_T0_VAL1;
+
+	/* Retrieve fmt0 and put it on Hz */
+	sensor->fmt0 = ADJUST * readl_relaxed(sensor->base + DTS_T0VALR1_OFFSET)
+					      & TS1_FMT0_MASK;
+
+	/* Retrieve ramp coefficient */
+	sensor->ramp_coeff = readl_relaxed(sensor->base + DTS_RAMPVALR_OFFSET) &
+					   TS1_RAMP_COEFF_MASK;
+
+	if (!sensor->fmt0 || !sensor->ramp_coeff) {
+		dev_err(sensor->dev, "%s: wrong setting\n", __func__);
+		return -EINVAL;
+	}
+
+	dev_dbg(sensor->dev, "%s: T0 = %doC, FMT0 = %dHz, RAMP_COEFF = %dHz/oC",
+		__func__, sensor->t0, sensor->fmt0, sensor->ramp_coeff);
+
+	return 0;
+}
+
+static int stm_thermal_calculate_threshold(struct stm_thermal_sensor *sensor,
+					   int temp, u32 *th)
+{
+	int freqM;
+	u32 sampling_time;
+
+	/* Retrieve the number of periods to sample */
+	sampling_time = (readl_relaxed(sensor->base + DTS_CFGR1_OFFSET) &
+			TS1_SMP_TIME_MASK) >> TS1_SMP_TIME_POS;
+
+	/* Figure out the CLK_PTAT frequency for a given temperature */
+	freqM = ((temp - sensor->t0) * sensor->ramp_coeff)
+		 + sensor->fmt0;
+
+	dev_dbg(sensor->dev, "%s: freqM for threshold = %d Hz",
+		__func__, freqM);
+
+	/* Figure out the threshold sample number */
+	*th = clk_get_rate(sensor->clk);
+	if (!*th)
+		return -EINVAL;
+
+	*th = *th / freqM;
+
+	*th *= sampling_time;
+
+	return 0;
+}
+
+static int stm_thermal_set_threshold(struct stm_thermal_sensor *sensor)
+{
+	u32 value, th;
+	int ret;
+
+	value = readl_relaxed(sensor->base + DTS_ITR1_OFFSET);
+
+	/* Erase threshold content */
+	value &= ~(TS1_LITTHD_MASK | TS1_HITTHD_MASK);
+
+	/* Retrieve the sample threshold number th for a given temperature */
+	ret = stm_thermal_calculate_threshold(sensor, sensor->high_temp, &th);
+	if (ret)
+		return ret;
+
+	value |= th & TS1_LITTHD_MASK;
+
+	if (sensor->low_temp_enabled) {
+		/* Retrieve the sample threshold */
+		ret = stm_thermal_calculate_threshold(sensor, sensor->low_temp,
+						      &th);
+		if (ret)
+			return ret;
+
+		value |= (TS1_HITTHD_MASK  & (th << TS1_HITTHD_POS));
+	}
+
+	/* Write value on the Low interrupt threshold */
+	writel_relaxed(value, sensor->base + DTS_ITR1_OFFSET);
+
+	return 0;
+}
+
+/* Disable temperature interrupt */
+static int stm_disable_irq(struct stm_thermal_sensor *sensor)
+{
+	u32 value;
+
+	/* Disable IT generation for low and high thresholds */
+	value = readl_relaxed(sensor->base + DTS_ITENR_OFFSET);
+	writel_relaxed(value & ~(LOW_THRESHOLD | HIGH_THRESHOLD),
+		       sensor->base + DTS_ITENR_OFFSET);
+
+	dev_dbg(sensor->dev, "%s: IT disabled on sensor side", __func__);
+
+	return 0;
+}
+
+/* Enable temperature interrupt */
+static int stm_enable_irq(struct stm_thermal_sensor *sensor)
+{
+	u32 value;
+
+	/*
+	 * Code below enables High temperature threshold using a low threshold
+	 * sampling value
+	 */
+
+	/* Make sure LOW_THRESHOLD IT is clear before enabling */
+	writel_relaxed(LOW_THRESHOLD, sensor->base + DTS_CIFR_OFFSET);
+
+	/* Enable IT generation for low threshold */
+	value = readl_relaxed(sensor->base + DTS_ITENR_OFFSET);
+	value |= LOW_THRESHOLD;
+
+	/* Enable the low temperature threshold if needed */
+	if (sensor->low_temp_enabled) {
+		/* Make sure HIGH_THRESHOLD IT is clear before enabling */
+		writel_relaxed(HIGH_THRESHOLD, sensor->base + DTS_CIFR_OFFSET);
+
+		/* Enable IT generation for high threshold */
+		value |= HIGH_THRESHOLD;
+	}
+
+	/* Enable thresholds */
+	writel_relaxed(value, sensor->base + DTS_ITENR_OFFSET);
+
+	dev_dbg(sensor->dev, "%s: IT enabled on sensor side", __func__);
+
+	return 0;
+}
+
+static int stm_thermal_update_threshold(struct stm_thermal_sensor *sensor)
+{
+	int ret;
+
+	sensor->mode = THERMAL_DEVICE_DISABLED;
+
+	ret = stm_sensor_power_off(sensor);
+	if (ret)
+		return ret;
+
+	ret = stm_disable_irq(sensor);
+	if (ret)
+		return ret;
+
+	ret = stm_thermal_set_threshold(sensor);
+	if (ret)
+		return ret;
+
+	ret = stm_enable_irq(sensor);
+	if (ret)
+		return ret;
+
+	ret = stm_sensor_power_on(sensor);
+	if (ret)
+		return ret;
+
+	sensor->mode = THERMAL_DEVICE_ENABLED;
+
+	return 0;
+}
+
+/* Callback to get temperature from HW */
+static int stm_thermal_get_temp(void *data, int *temp)
+{
+	struct stm_thermal_sensor *sensor = data;
+	u32 sampling_time;
+	int freqM, ret;
+
+	if (sensor->mode != THERMAL_DEVICE_ENABLED)
+		return -EAGAIN;
+
+	/* Retrieve the number of samples */
+	ret = readl_poll_timeout(sensor->base + DTS_DR_OFFSET, freqM,
+				 (freqM & TS1_MFREQ_MASK), STARTUP_TIME,
+				 POLL_TIMEOUT);
+
+	if (ret)
+		return ret;
+
+	if (!freqM)
+		return -ENODATA;
+
+	/* Retrieve the number of periods sampled */
+	sampling_time = (readl_relaxed(sensor->base + DTS_CFGR1_OFFSET) &
+			TS1_SMP_TIME_MASK) >> TS1_SMP_TIME_POS;
+
+	/* Figure out the number of samples per period */
+	freqM /= sampling_time;
+
+	/* Figure out the CLK_PTAT frequency */
+	freqM = clk_get_rate(sensor->clk) / freqM;
+	if (!freqM)
+		return -EINVAL;
+
+	dev_dbg(sensor->dev, "%s: freqM=%d\n", __func__, freqM);
+
+	/* Figure out the temperature in mili celsius */
+	*temp = mcelsius(sensor->t0 + ((freqM - sensor->fmt0) /
+			 sensor->ramp_coeff));
+
+	dev_dbg(sensor->dev, "%s: temperature = %d millicelsius",
+		__func__, *temp);
+
+	/* Update thresholds */
+	if (sensor->num_trips > 1) {
+		/* Update alarm threshold value to next higher trip point */
+		if (sensor->high_temp == sensor->temp_passive &&
+		    celsius(*temp) >= sensor->temp_passive) {
+			sensor->high_temp = sensor->temp_critical;
+			sensor->low_temp = sensor->temp_passive;
+			sensor->low_temp_enabled = true;
+			ret = stm_thermal_update_threshold(sensor);
+			if (ret)
+				return ret;
+		}
+
+		if (sensor->high_temp == sensor->temp_critical &&
+		    celsius(*temp) < sensor->temp_passive) {
+			sensor->high_temp = sensor->temp_passive;
+			sensor->low_temp_enabled = false;
+			ret = stm_thermal_update_threshold(sensor);
+			if (ret)
+				return ret;
+		}
+
+		/*
+		 * Re-enable alarm IRQ if temperature below critical
+		 * temperature
+		 */
+		if (!sensor->irq_enabled &&
+		    (celsius(*temp) < sensor->temp_critical)) {
+			sensor->irq_enabled = true;
+			enable_irq(sensor->irq);
+		}
+	}
+
+	return 0;
+}
+
+/* Registers DTS irq to be visible by GIC */
+static int stm_register_irq(struct stm_thermal_sensor *sensor)
+{
+	struct device *dev = sensor->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	int ret;
+
+	sensor->irq = platform_get_irq(pdev, 0);
+	if (sensor->irq < 0) {
+		dev_err(dev, "%s: Unable to find IRQ\n", __func__);
+		return sensor->irq;
+	}
+
+	ret = devm_request_threaded_irq(dev, sensor->irq,
+					stm_thermal_alarm_irq,
+					stm_thermal_alarm_irq_thread,
+					IRQF_ONESHOT,
+					dev->driver->name, sensor);
+	if (ret) {
+		dev_err(dev, "%s: Failed to register IRQ %d\n", __func__,
+			sensor->irq);
+		return ret;
+	}
+
+	sensor->irq_enabled = true;
+
+	dev_dbg(dev, "%s: thermal IRQ registered", __func__);
+
+	return 0;
+}
+
+static int stm_thermal_sensor_off(struct stm_thermal_sensor *sensor)
+{
+	int ret;
+
+	ret = stm_sensor_power_off(sensor);
+	if (ret)
+		return ret;
+
+	clk_disable_unprepare(sensor->clk);
+
+	return 0;
+}
+
+static int stm_thermal_prepare(struct stm_thermal_sensor *sensor)
+{
+	int ret;
+	struct device *dev = sensor->dev;
+
+	ret = clk_prepare_enable(sensor->clk);
+	if (ret)
+		return ret;
+
+	ret = stm_thermal_calibration(sensor);
+	if (ret)
+		goto thermal_unprepare;
+
+	/* Set threshold(s) for IRQ */
+	ret = stm_thermal_set_threshold(sensor);
+	if (ret)
+		goto thermal_unprepare;
+
+	ret = stm_enable_irq(sensor);
+	if (ret)
+		goto thermal_unprepare;
+
+	ret = stm_sensor_power_on(sensor);
+	if (ret) {
+		dev_err(dev, "%s: failed to power on sensor\n", __func__);
+		goto irq_disable;
+	}
+
+	return 0;
+
+irq_disable:
+	stm_disable_irq(sensor);
+
+thermal_unprepare:
+	clk_disable_unprepare(sensor->clk);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int stm_thermal_suspend(struct device *dev)
+{
+	int ret;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct stm_thermal_sensor *sensor = platform_get_drvdata(pdev);
+
+	ret = stm_thermal_sensor_off(sensor);
+	if (ret)
+		return ret;
+
+	sensor->mode = THERMAL_DEVICE_DISABLED;
+
+	return 0;
+}
+
+static int stm_thermal_resume(struct device *dev)
+{
+	int ret;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct stm_thermal_sensor *sensor = platform_get_drvdata(pdev);
+
+	ret = stm_thermal_prepare(sensor);
+	if (ret)
+		return ret;
+
+	sensor->mode = THERMAL_DEVICE_ENABLED;
+
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+SIMPLE_DEV_PM_OPS(stm_thermal_pm_ops, stm_thermal_suspend, stm_thermal_resume);
+
+static const struct thermal_zone_of_device_ops stm_tz_ops = {
+	.get_temp	= stm_thermal_get_temp,
+};
+
+static const struct of_device_id stm_thermal_of_match[] = {
+		{ .compatible = "st,stm32-thermal"},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, stm_thermal_of_match);
+
+static int stm_thermal_probe(struct platform_device *pdev)
+{
+	struct stm_thermal_sensor *sensor;
+	struct resource *res;
+	const struct thermal_trip *trip;
+	void __iomem *base;
+	int ret, i;
+
+	if (!pdev->dev.of_node) {
+		dev_err(&pdev->dev, "%s: device tree node not found\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	sensor = devm_kzalloc(&pdev->dev, sizeof(*sensor), GFP_KERNEL);
+	if (!sensor)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, sensor);
+
+	sensor->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	/* Populate sensor */
+	sensor->base = base;
+
+	ret = stm_thermal_read_factory_settings(sensor);
+	if (ret)
+		return ret;
+
+	sensor->clk = devm_clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(sensor->clk)) {
+		dev_err(&pdev->dev, "%s: failed to fetch PCLK clock\n",
+			__func__);
+		return PTR_ERR(sensor->clk);
+	}
+
+	/* Register IRQ into GIC */
+	ret = stm_register_irq(sensor);
+	if (ret)
+		return ret;
+
+	sensor->th_dev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0,
+							      sensor,
+							      &stm_tz_ops);
+
+	if (IS_ERR(sensor->th_dev)) {
+		dev_err(&pdev->dev, "%s: thermal zone sensor registering KO\n",
+			__func__);
+		ret = PTR_ERR(sensor->th_dev);
+		return ret;
+	}
+
+	if (!sensor->th_dev->ops->get_crit_temp) {
+		/* Critical point must be provided */
+		ret = -EINVAL;
+		goto err_tz;
+	}
+
+	ret = sensor->th_dev->ops->get_crit_temp(sensor->th_dev,
+			&sensor->temp_critical);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Not able to read critical_temp: %d\n", ret);
+		goto err_tz;
+	}
+
+	sensor->temp_critical = celsius(sensor->temp_critical);
+
+	/* Set thresholds for IRQ */
+	sensor->high_temp = sensor->temp_critical;
+
+	trip = of_thermal_get_trip_points(sensor->th_dev);
+	sensor->num_trips = of_thermal_get_ntrips(sensor->th_dev);
+
+	/* Find out passive temperature if it exists */
+	for (i = (sensor->num_trips - 1); i >= 0;  i--) {
+		if (trip[i].type == THERMAL_TRIP_PASSIVE) {
+			sensor->temp_passive = celsius(trip[i].temperature);
+			/* Update high temperature threshold */
+			sensor->high_temp = sensor->temp_passive;
+			}
+	}
+
+	/*
+	 * Ensure low_temp_enabled flag is disabled.
+	 * By disabling low_temp_enabled, low threshold IT will not be
+	 * configured neither enabled because it is not needed as high
+	 * threshold is set on the lowest temperature trip point after
+	 * probe.
+	 */
+	sensor->low_temp_enabled = false;
+
+	/* Configure and enable HW sensor */
+	ret = stm_thermal_prepare(sensor);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Not able to enable sensor: %d\n", ret);
+		goto err_tz;
+	}
+
+	/*
+	 * Thermal_zone doesn't enable hwmon as default,
+	 * enable it here
+	 */
+	sensor->th_dev->tzp->no_hwmon = false;
+	ret = thermal_add_hwmon_sysfs(sensor->th_dev);
+	if (ret)
+		goto err_tz;
+
+	sensor->mode = THERMAL_DEVICE_ENABLED;
+
+	dev_info(&pdev->dev, "%s: Driver initialized successfully\n",
+		 __func__);
+
+	return 0;
+
+err_tz:
+	thermal_zone_of_sensor_unregister(&pdev->dev, sensor->th_dev);
+	return ret;
+}
+
+static int stm_thermal_remove(struct platform_device *pdev)
+{
+	struct stm_thermal_sensor *sensor = platform_get_drvdata(pdev);
+
+	stm_thermal_sensor_off(sensor);
+	thermal_remove_hwmon_sysfs(sensor->th_dev);
+	thermal_zone_of_sensor_unregister(&pdev->dev, sensor->th_dev);
+
+	return 0;
+}
+
+static struct platform_driver stm_thermal_driver = {
+	.driver = {
+		.name	= "stm_thermal",
+		.pm     = &stm_thermal_pm_ops,
+		.of_match_table = stm_thermal_of_match,
+	},
+	.probe		= stm_thermal_probe,
+	.remove		= stm_thermal_remove,
+};
+module_platform_driver(stm_thermal_driver);
+
+MODULE_DESCRIPTION("STMicroelectronics STM32 Thermal Sensor Driver");
+MODULE_AUTHOR("David Hernandez Sanchez <david.hernandezsanchez@st.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:stm_thermal");
diff --git a/drivers/thunderbolt/cap.c b/drivers/thunderbolt/cap.c
index c2277b8ee88d..9553305c63ea 100644
--- a/drivers/thunderbolt/cap.c
+++ b/drivers/thunderbolt/cap.c
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Thunderbolt Cactus Ridge driver - capabilities lookup
+ * Thunderbolt driver - capabilities lookup
  *
  * Copyright (c) 2014 Andreas Noever <andreas.noever@gmail.com>
+ * Copyright (C) 2018, Intel Corporation
  */
 
 #include <linux/slab.h>
diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c
index 37a7f4c735d0..73b386de4d15 100644
--- a/drivers/thunderbolt/ctl.c
+++ b/drivers/thunderbolt/ctl.c
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Thunderbolt Cactus Ridge driver - control channel and configuration commands
+ * Thunderbolt driver - control channel and configuration commands
  *
  * Copyright (c) 2014 Andreas Noever <andreas.noever@gmail.com>
+ * Copyright (C) 2018, Intel Corporation
  */
 
 #include <linux/crc32.h>
@@ -631,7 +632,7 @@ struct tb_ctl *tb_ctl_alloc(struct tb_nhi *nhi, event_cb cb, void *cb_data)
 		ctl->rx_packets[i]->frame.callback = tb_ctl_rx_callback;
 	}
 
-	tb_ctl_info(ctl, "control channel created\n");
+	tb_ctl_dbg(ctl, "control channel created\n");
 	return ctl;
 err:
 	tb_ctl_free(ctl);
@@ -662,8 +663,7 @@ void tb_ctl_free(struct tb_ctl *ctl)
 		tb_ctl_pkg_free(ctl->rx_packets[i]);
 
 
-	if (ctl->frame_pool)
-		dma_pool_destroy(ctl->frame_pool);
+	dma_pool_destroy(ctl->frame_pool);
 	kfree(ctl);
 }
 
@@ -673,7 +673,7 @@ void tb_ctl_free(struct tb_ctl *ctl)
 void tb_ctl_start(struct tb_ctl *ctl)
 {
 	int i;
-	tb_ctl_info(ctl, "control channel starting...\n");
+	tb_ctl_dbg(ctl, "control channel starting...\n");
 	tb_ring_start(ctl->tx); /* is used to ack hotplug packets, start first */
 	tb_ring_start(ctl->rx);
 	for (i = 0; i < TB_CTL_RX_PKG_COUNT; i++)
@@ -702,7 +702,7 @@ void tb_ctl_stop(struct tb_ctl *ctl)
 	if (!list_empty(&ctl->request_queue))
 		tb_ctl_WARN(ctl, "dangling request in request_queue\n");
 	INIT_LIST_HEAD(&ctl->request_queue);
-	tb_ctl_info(ctl, "control channel stopped\n");
+	tb_ctl_dbg(ctl, "control channel stopped\n");
 }
 
 /* public interface, commands */
diff --git a/drivers/thunderbolt/ctl.h b/drivers/thunderbolt/ctl.h
index 3062e0b5f71e..2f1a1e111110 100644
--- a/drivers/thunderbolt/ctl.h
+++ b/drivers/thunderbolt/ctl.h
@@ -1,8 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Thunderbolt Cactus Ridge driver - control channel and configuration commands
+ * Thunderbolt driver - control channel and configuration commands
  *
  * Copyright (c) 2014 Andreas Noever <andreas.noever@gmail.com>
+ * Copyright (C) 2018, Intel Corporation
  */
 
 #ifndef _TB_CFG
diff --git a/drivers/thunderbolt/dma_port.c b/drivers/thunderbolt/dma_port.c
index f2701194f810..847dd07a7b17 100644
--- a/drivers/thunderbolt/dma_port.c
+++ b/drivers/thunderbolt/dma_port.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Thunderbolt DMA configuration based mailbox support
  *
  * Copyright (C) 2017, Intel Corporation
  * Authors: Michael Jamet <michael.jamet@intel.com>
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/delay.h>
diff --git a/drivers/thunderbolt/dma_port.h b/drivers/thunderbolt/dma_port.h
index c4a69e0fbff7..7deadd97ce31 100644
--- a/drivers/thunderbolt/dma_port.h
+++ b/drivers/thunderbolt/dma_port.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Thunderbolt DMA configuration based mailbox support
  *
  * Copyright (C) 2017, Intel Corporation
  * Authors: Michael Jamet <michael.jamet@intel.com>
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef DMA_PORT_H_
diff --git a/drivers/thunderbolt/domain.c b/drivers/thunderbolt/domain.c
index 092381e2accf..93e562f18d40 100644
--- a/drivers/thunderbolt/domain.c
+++ b/drivers/thunderbolt/domain.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Thunderbolt bus support
  *
  * Copyright (C) 2017, Intel Corporation
- * Author:  Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
  */
 
 #include <linux/device.h>
diff --git a/drivers/thunderbolt/eeprom.c b/drivers/thunderbolt/eeprom.c
index 3e8caf22c294..81e8ac4c5805 100644
--- a/drivers/thunderbolt/eeprom.c
+++ b/drivers/thunderbolt/eeprom.c
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Thunderbolt Cactus Ridge driver - eeprom access
+ * Thunderbolt driver - eeprom access
  *
  * Copyright (c) 2014 Andreas Noever <andreas.noever@gmail.com>
+ * Copyright (C) 2018, Intel Corporation
  */
 
 #include <linux/crc32.h>
@@ -540,7 +541,7 @@ int tb_drom_read(struct tb_switch *sw)
 		return res;
 	size &= 0x3ff;
 	size += TB_DROM_DATA_START;
-	tb_sw_info(sw, "reading drom (length: %#x)\n", size);
+	tb_sw_dbg(sw, "reading drom (length: %#x)\n", size);
 	if (size < sizeof(*header)) {
 		tb_sw_warn(sw, "drom too small, aborting\n");
 		return -EIO;
diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c
index 28fc4ce75edb..e3fc920af682 100644
--- a/drivers/thunderbolt/icm.c
+++ b/drivers/thunderbolt/icm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Internal Thunderbolt Connection Manager. This is a firmware running on
  * the Thunderbolt host controller performing most of the low-level
@@ -6,10 +7,6 @@
  * Copyright (C) 2017, Intel Corporation
  * Authors: Michael Jamet <michael.jamet@intel.com>
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/delay.h>
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index 5cd6bdfa068f..9aa44f9762a3 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -1,10 +1,11 @@
 /*
- * Thunderbolt Cactus Ridge driver - NHI driver
+ * Thunderbolt driver - NHI driver
  *
  * The NHI (native host interface) is the pci device that allows us to send and
  * receive frames from the thunderbolt bus.
  *
  * Copyright (c) 2014 Andreas Noever <andreas.noever@gmail.com>
+ * Copyright (C) 2018, Intel Corporation
  */
 
 #include <linux/pm_runtime.h>
@@ -95,9 +96,9 @@ static void ring_interrupt_active(struct tb_ring *ring, bool active)
 	else
 		new = old & ~mask;
 
-	dev_info(&ring->nhi->pdev->dev,
-		 "%s interrupt at register %#x bit %d (%#x -> %#x)\n",
-		 active ? "enabling" : "disabling", reg, bit, old, new);
+	dev_dbg(&ring->nhi->pdev->dev,
+		"%s interrupt at register %#x bit %d (%#x -> %#x)\n",
+		active ? "enabling" : "disabling", reg, bit, old, new);
 
 	if (new == old)
 		dev_WARN(&ring->nhi->pdev->dev,
@@ -476,8 +477,9 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size,
 				     void *poll_data)
 {
 	struct tb_ring *ring = NULL;
-	dev_info(&nhi->pdev->dev, "allocating %s ring %d of size %d\n",
-		 transmit ? "TX" : "RX", hop, size);
+
+	dev_dbg(&nhi->pdev->dev, "allocating %s ring %d of size %d\n",
+		transmit ? "TX" : "RX", hop, size);
 
 	/* Tx Ring 2 is reserved for E2E workaround */
 	if (transmit && hop == RING_E2E_UNUSED_HOPID)
@@ -585,8 +587,8 @@ void tb_ring_start(struct tb_ring *ring)
 		dev_WARN(&ring->nhi->pdev->dev, "ring already started\n");
 		goto err;
 	}
-	dev_info(&ring->nhi->pdev->dev, "starting %s %d\n",
-		 RING_TYPE(ring), ring->hop);
+	dev_dbg(&ring->nhi->pdev->dev, "starting %s %d\n",
+		RING_TYPE(ring), ring->hop);
 
 	if (ring->flags & RING_FLAG_FRAME) {
 		/* Means 4096 */
@@ -647,8 +649,8 @@ void tb_ring_stop(struct tb_ring *ring)
 {
 	spin_lock_irq(&ring->nhi->lock);
 	spin_lock(&ring->lock);
-	dev_info(&ring->nhi->pdev->dev, "stopping %s %d\n",
-		 RING_TYPE(ring), ring->hop);
+	dev_dbg(&ring->nhi->pdev->dev, "stopping %s %d\n",
+		RING_TYPE(ring), ring->hop);
 	if (ring->nhi->going_away)
 		goto err;
 	if (!ring->running) {
@@ -716,10 +718,8 @@ void tb_ring_free(struct tb_ring *ring)
 	ring->descriptors_dma = 0;
 
 
-	dev_info(&ring->nhi->pdev->dev,
-		 "freeing %s %d\n",
-		 RING_TYPE(ring),
-		 ring->hop);
+	dev_dbg(&ring->nhi->pdev->dev, "freeing %s %d\n", RING_TYPE(ring),
+		ring->hop);
 
 	/**
 	 * ring->work can no longer be scheduled (it is scheduled only
@@ -931,7 +931,8 @@ static int nhi_runtime_resume(struct device *dev)
 static void nhi_shutdown(struct tb_nhi *nhi)
 {
 	int i;
-	dev_info(&nhi->pdev->dev, "shutdown\n");
+
+	dev_dbg(&nhi->pdev->dev, "shutdown\n");
 
 	for (i = 0; i < nhi->hop_count; i++) {
 		if (nhi->tx_rings[i])
@@ -1059,7 +1060,7 @@ static int nhi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return -ENODEV;
 	}
 
-	dev_info(&nhi->pdev->dev, "NHI initialized, starting thunderbolt\n");
+	dev_dbg(&nhi->pdev->dev, "NHI initialized, starting thunderbolt\n");
 
 	res = tb_domain_add(tb);
 	if (res) {
diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h
index 1696a4560948..1b5d47ecd3ed 100644
--- a/drivers/thunderbolt/nhi.h
+++ b/drivers/thunderbolt/nhi.h
@@ -1,8 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Thunderbolt Cactus Ridge driver - NHI driver
+ * Thunderbolt driver - NHI driver
  *
  * Copyright (c) 2014 Andreas Noever <andreas.noever@gmail.com>
+ * Copyright (C) 2018, Intel Corporation
  */
 
 #ifndef DSL3510_H_
diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h
index b3e49d19c01e..a60bd98c1d04 100644
--- a/drivers/thunderbolt/nhi_regs.h
+++ b/drivers/thunderbolt/nhi_regs.h
@@ -3,6 +3,7 @@
  * Thunderbolt driver - NHI registers
  *
  * Copyright (c) 2014 Andreas Noever <andreas.noever@gmail.com>
+ * Copyright (C) 2018, Intel Corporation
  */
 
 #ifndef NHI_REGS_H_
diff --git a/drivers/thunderbolt/path.c b/drivers/thunderbolt/path.c
index ff49ad880bfd..a11956522bac 100644
--- a/drivers/thunderbolt/path.c
+++ b/drivers/thunderbolt/path.c
@@ -13,19 +13,19 @@
 
 static void tb_dump_hop(struct tb_port *port, struct tb_regs_hop *hop)
 {
-	tb_port_info(port, " Hop through port %d to hop %d (%s)\n",
-		     hop->out_port, hop->next_hop,
-		     hop->enable ? "enabled" : "disabled");
-	tb_port_info(port, "  Weight: %d Priority: %d Credits: %d Drop: %d\n",
-		     hop->weight, hop->priority,
-		     hop->initial_credits, hop->drop_packages);
-	tb_port_info(port, "   Counter enabled: %d Counter index: %d\n",
-		     hop->counter_enable, hop->counter);
-	tb_port_info(port, "  Flow Control (In/Eg): %d/%d Shared Buffer (In/Eg): %d/%d\n",
-		     hop->ingress_fc, hop->egress_fc,
-		     hop->ingress_shared_buffer, hop->egress_shared_buffer);
-	tb_port_info(port, "  Unknown1: %#x Unknown2: %#x Unknown3: %#x\n",
-		     hop->unknown1, hop->unknown2, hop->unknown3);
+	tb_port_dbg(port, " Hop through port %d to hop %d (%s)\n",
+		    hop->out_port, hop->next_hop,
+		    hop->enable ? "enabled" : "disabled");
+	tb_port_dbg(port, "  Weight: %d Priority: %d Credits: %d Drop: %d\n",
+		    hop->weight, hop->priority,
+		    hop->initial_credits, hop->drop_packages);
+	tb_port_dbg(port, "   Counter enabled: %d Counter index: %d\n",
+		    hop->counter_enable, hop->counter);
+	tb_port_dbg(port, "  Flow Control (In/Eg): %d/%d Shared Buffer (In/Eg): %d/%d\n",
+		    hop->ingress_fc, hop->egress_fc,
+		    hop->ingress_shared_buffer, hop->egress_shared_buffer);
+	tb_port_dbg(port, "  Unknown1: %#x Unknown2: %#x Unknown3: %#x\n",
+		    hop->unknown1, hop->unknown2, hop->unknown3);
 }
 
 /**
diff --git a/drivers/thunderbolt/property.c b/drivers/thunderbolt/property.c
index 8fe913a95b4a..b2f0d6386cee 100644
--- a/drivers/thunderbolt/property.c
+++ b/drivers/thunderbolt/property.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Thunderbolt XDomain property support
  *
  * Copyright (C) 2017, Intel Corporation
  * Authors: Michael Jamet <michael.jamet@intel.com>
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/err.h>
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 7442bc4c6433..52ff854f0d6c 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Thunderbolt Cactus Ridge driver - switch/port utility functions
+ * Thunderbolt driver - switch/port utility functions
  *
  * Copyright (c) 2014 Andreas Noever <andreas.noever@gmail.com>
+ * Copyright (C) 2018, Intel Corporation
  */
 
 #include <linux/delay.h>
@@ -436,15 +437,15 @@ static const char *tb_port_type(struct tb_regs_port_header *port)
 
 static void tb_dump_port(struct tb *tb, struct tb_regs_port_header *port)
 {
-	tb_info(tb,
-		" Port %d: %x:%x (Revision: %d, TB Version: %d, Type: %s (%#x))\n",
-		port->port_number, port->vendor_id, port->device_id,
-		port->revision, port->thunderbolt_version, tb_port_type(port),
-		port->type);
-	tb_info(tb, "  Max hop id (in/out): %d/%d\n",
-		port->max_in_hop_id, port->max_out_hop_id);
-	tb_info(tb, "  Max counters: %d\n", port->max_counters);
-	tb_info(tb, "  NFC Credits: %#x\n", port->nfc_credits);
+	tb_dbg(tb,
+	       " Port %d: %x:%x (Revision: %d, TB Version: %d, Type: %s (%#x))\n",
+	       port->port_number, port->vendor_id, port->device_id,
+	       port->revision, port->thunderbolt_version, tb_port_type(port),
+	       port->type);
+	tb_dbg(tb, "  Max hop id (in/out): %d/%d\n",
+	       port->max_in_hop_id, port->max_out_hop_id);
+	tb_dbg(tb, "  Max counters: %d\n", port->max_counters);
+	tb_dbg(tb, "  NFC Credits: %#x\n", port->nfc_credits);
 }
 
 /**
@@ -605,20 +606,18 @@ static int tb_init_port(struct tb_port *port)
 
 static void tb_dump_switch(struct tb *tb, struct tb_regs_switch_header *sw)
 {
-	tb_info(tb,
-		" Switch: %x:%x (Revision: %d, TB Version: %d)\n",
-		sw->vendor_id, sw->device_id, sw->revision,
-		sw->thunderbolt_version);
-	tb_info(tb, "  Max Port Number: %d\n", sw->max_port_number);
-	tb_info(tb, "  Config:\n");
-	tb_info(tb,
+	tb_dbg(tb, " Switch: %x:%x (Revision: %d, TB Version: %d)\n",
+	       sw->vendor_id, sw->device_id, sw->revision,
+	       sw->thunderbolt_version);
+	tb_dbg(tb, "  Max Port Number: %d\n", sw->max_port_number);
+	tb_dbg(tb, "  Config:\n");
+	tb_dbg(tb,
 		"   Upstream Port Number: %d Depth: %d Route String: %#llx Enabled: %d, PlugEventsDelay: %dms\n",
-		sw->upstream_port_number, sw->depth,
-		(((u64) sw->route_hi) << 32) | sw->route_lo,
-		sw->enabled, sw->plug_events_delay);
-	tb_info(tb,
-		"   unknown1: %#x unknown4: %#x\n",
-		sw->__unknown1, sw->__unknown4);
+	       sw->upstream_port_number, sw->depth,
+	       (((u64) sw->route_hi) << 32) | sw->route_lo,
+	       sw->enabled, sw->plug_events_delay);
+	tb_dbg(tb, "   unknown1: %#x unknown4: %#x\n",
+	       sw->__unknown1, sw->__unknown4);
 }
 
 /**
@@ -634,7 +633,7 @@ int tb_switch_reset(struct tb *tb, u64 route)
 		header.route_lo = route,
 		header.enabled = true,
 	};
-	tb_info(tb, "resetting switch at %llx\n", route);
+	tb_dbg(tb, "resetting switch at %llx\n", route);
 	res.err = tb_cfg_write(tb->ctl, ((u32 *) &header) + 2, route,
 			0, 2, 2, 2);
 	if (res.err)
@@ -1139,7 +1138,7 @@ struct tb_switch *tb_switch_alloc(struct tb *tb, struct device *parent,
 	if (tb_cfg_read(tb->ctl, &sw->config, route, 0, TB_CFG_SWITCH, 0, 5))
 		goto err_free_sw_ports;
 
-	tb_info(tb, "current switch config:\n");
+	tb_dbg(tb, "current switch config:\n");
 	tb_dump_switch(tb, &sw->config);
 
 	/* configure switch */
@@ -1246,9 +1245,8 @@ int tb_switch_configure(struct tb_switch *sw)
 	int ret;
 
 	route = tb_route(sw);
-	tb_info(tb,
-		"initializing Switch at %#llx (depth: %d, up port: %d)\n",
-		route, tb_route_length(route), sw->config.upstream_port_number);
+	tb_dbg(tb, "initializing Switch at %#llx (depth: %d, up port: %d)\n",
+	       route, tb_route_length(route), sw->config.upstream_port_number);
 
 	if (sw->config.vendor_id != PCI_VENDOR_ID_INTEL)
 		tb_sw_warn(sw, "unknown switch vendor id %#x\n",
@@ -1386,13 +1384,13 @@ int tb_switch_add(struct tb_switch *sw)
 			tb_sw_warn(sw, "tb_eeprom_read_rom failed\n");
 			return ret;
 		}
-		tb_sw_info(sw, "uid: %#llx\n", sw->uid);
+		tb_sw_dbg(sw, "uid: %#llx\n", sw->uid);
 
 		tb_switch_set_uuid(sw);
 
 		for (i = 0; i <= sw->config.max_port_number; i++) {
 			if (sw->ports[i].disabled) {
-				tb_port_info(&sw->ports[i], "disabled by eeprom\n");
+				tb_port_dbg(&sw->ports[i], "disabled by eeprom\n");
 				continue;
 			}
 			ret = tb_init_port(&sw->ports[i]);
@@ -1405,6 +1403,14 @@ int tb_switch_add(struct tb_switch *sw)
 	if (ret)
 		return ret;
 
+	if (tb_route(sw)) {
+		dev_info(&sw->dev, "new device found, vendor=%#x device=%#x\n",
+			 sw->vendor, sw->device);
+		if (sw->vendor_name && sw->device_name)
+			dev_info(&sw->dev, "%s %s\n", sw->vendor_name,
+				 sw->device_name);
+	}
+
 	ret = tb_switch_nvm_add(sw);
 	if (ret) {
 		device_del(&sw->dev);
@@ -1456,6 +1462,9 @@ void tb_switch_remove(struct tb_switch *sw)
 		tb_plug_events_active(sw, false);
 
 	tb_switch_nvm_remove(sw);
+
+	if (tb_route(sw))
+		dev_info(&sw->dev, "device disconnected\n");
 	device_unregister(&sw->dev);
 }
 
@@ -1483,7 +1492,7 @@ void tb_sw_set_unplugged(struct tb_switch *sw)
 int tb_switch_resume(struct tb_switch *sw)
 {
 	int i, err;
-	tb_sw_info(sw, "resuming switch\n");
+	tb_sw_dbg(sw, "resuming switch\n");
 
 	/*
 	 * Check for UID of the connected switches except for root
diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index 1424581fd9af..30e02c716f6c 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -404,10 +404,10 @@ static int tb_suspend_noirq(struct tb *tb)
 {
 	struct tb_cm *tcm = tb_priv(tb);
 
-	tb_info(tb, "suspending...\n");
+	tb_dbg(tb, "suspending...\n");
 	tb_switch_suspend(tb->root_switch);
 	tcm->hotplug_active = false; /* signal tb_handle_hotplug to quit */
-	tb_info(tb, "suspend finished\n");
+	tb_dbg(tb, "suspend finished\n");
 
 	return 0;
 }
@@ -417,7 +417,7 @@ static int tb_resume_noirq(struct tb *tb)
 	struct tb_cm *tcm = tb_priv(tb);
 	struct tb_pci_tunnel *tunnel, *n;
 
-	tb_info(tb, "resuming...\n");
+	tb_dbg(tb, "resuming...\n");
 
 	/* remove any pci devices the firmware might have setup */
 	tb_switch_reset(tb, 0);
@@ -432,12 +432,12 @@ static int tb_resume_noirq(struct tb *tb)
 		 * the pcie links need some time to get going.
 		 * 100ms works for me...
 		 */
-		tb_info(tb, "tunnels restarted, sleeping for 100ms\n");
+		tb_dbg(tb, "tunnels restarted, sleeping for 100ms\n");
 		msleep(100);
 	}
 	 /* Allow tb_handle_hotplug to progress events */
 	tcm->hotplug_active = true;
-	tb_info(tb, "resume finished\n");
+	tb_dbg(tb, "resume finished\n");
 
 	return 0;
 }
diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h
index 5067d69d0501..52584c4003e3 100644
--- a/drivers/thunderbolt/tb.h
+++ b/drivers/thunderbolt/tb.h
@@ -1,8 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Thunderbolt Cactus Ridge driver - bus logic (NHI independent)
+ * Thunderbolt driver - bus logic (NHI independent)
  *
  * Copyright (c) 2014 Andreas Noever <andreas.noever@gmail.com>
+ * Copyright (C) 2018, Intel Corporation
  */
 
 #ifndef TB_H_
@@ -327,7 +328,7 @@ static inline int tb_port_write(struct tb_port *port, const void *buffer,
 #define tb_WARN(tb, fmt, arg...) dev_WARN(&(tb)->nhi->pdev->dev, fmt, ## arg)
 #define tb_warn(tb, fmt, arg...) dev_warn(&(tb)->nhi->pdev->dev, fmt, ## arg)
 #define tb_info(tb, fmt, arg...) dev_info(&(tb)->nhi->pdev->dev, fmt, ## arg)
-
+#define tb_dbg(tb, fmt, arg...) dev_dbg(&(tb)->nhi->pdev->dev, fmt, ## arg)
 
 #define __TB_SW_PRINT(level, sw, fmt, arg...)           \
 	do {                                            \
@@ -338,7 +339,7 @@ static inline int tb_port_write(struct tb_port *port, const void *buffer,
 #define tb_sw_WARN(sw, fmt, arg...) __TB_SW_PRINT(tb_WARN, sw, fmt, ##arg)
 #define tb_sw_warn(sw, fmt, arg...) __TB_SW_PRINT(tb_warn, sw, fmt, ##arg)
 #define tb_sw_info(sw, fmt, arg...) __TB_SW_PRINT(tb_info, sw, fmt, ##arg)
-
+#define tb_sw_dbg(sw, fmt, arg...) __TB_SW_PRINT(tb_dbg, sw, fmt, ##arg)
 
 #define __TB_PORT_PRINT(level, _port, fmt, arg...)                      \
 	do {                                                            \
@@ -352,6 +353,8 @@ static inline int tb_port_write(struct tb_port *port, const void *buffer,
 	__TB_PORT_PRINT(tb_warn, port, fmt, ##arg)
 #define tb_port_info(port, fmt, arg...) \
 	__TB_PORT_PRINT(tb_info, port, fmt, ##arg)
+#define tb_port_dbg(port, fmt, arg...) \
+	__TB_PORT_PRINT(tb_dbg, port, fmt, ##arg)
 
 struct tb *icm_probe(struct tb_nhi *nhi);
 struct tb *tb_probe(struct tb_nhi *nhi);
diff --git a/drivers/thunderbolt/tb_msgs.h b/drivers/thunderbolt/tb_msgs.h
index 2487e162c885..02c84aa3d018 100644
--- a/drivers/thunderbolt/tb_msgs.h
+++ b/drivers/thunderbolt/tb_msgs.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Thunderbolt control channel messages
  *
  * Copyright (C) 2014 Andreas Noever <andreas.noever@gmail.com>
  * Copyright (C) 2017, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef _TB_MSGS
diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h
index 693b0353c3fe..6f1ff04ee195 100644
--- a/drivers/thunderbolt/tb_regs.h
+++ b/drivers/thunderbolt/tb_regs.h
@@ -1,12 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Thunderbolt Cactus Ridge driver - Port/Switch config area registers
+ * Thunderbolt driver - Port/Switch config area registers
  *
  * Every thunderbolt device consists (logically) of a switch with multiple
  * ports. Every port contains up to four config regions (HOPS, PORT, SWITCH,
  * COUNTERS) which are used to configure the device.
  *
  * Copyright (c) 2014 Andreas Noever <andreas.noever@gmail.com>
+ * Copyright (C) 2018, Intel Corporation
  */
 
 #ifndef _TB_REGS
diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
index db8bece63327..e27dd8beb94b 100644
--- a/drivers/thunderbolt/xdomain.c
+++ b/drivers/thunderbolt/xdomain.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Thunderbolt XDomain discovery protocol support
  *
  * Copyright (C) 2017, Intel Corporation
  * Authors: Michael Jamet <michael.jamet@intel.com>
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/device.h>
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 70a7981b94b3..85644669fbe7 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -274,6 +274,8 @@ static struct class uio_class = {
 	.dev_groups = uio_groups,
 };
 
+static bool uio_class_registered;
+
 /*
  * device functions
  */
@@ -668,7 +670,7 @@ static vm_fault_t uio_vma_fault(struct vm_fault *vmf)
 	struct page *page;
 	unsigned long offset;
 	void *addr;
-	int ret = 0;
+	vm_fault_t ret = 0;
 	int mi;
 
 	mutex_lock(&idev->info_lock);
@@ -736,7 +738,8 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
 		return -EINVAL;
 
 	vma->vm_ops = &uio_physical_vm_ops;
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	if (idev->info->mem[mi].memtype == UIO_MEM_PHYS)
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	/*
 	 * We cannot use the vm_iomap_memory() helper here,
@@ -793,18 +796,19 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
 	}
 
 	switch (idev->info->mem[mi].memtype) {
-		case UIO_MEM_PHYS:
-			ret = uio_mmap_physical(vma);
-			break;
-		case UIO_MEM_LOGICAL:
-		case UIO_MEM_VIRTUAL:
-			ret = uio_mmap_logical(vma);
-			break;
-		default:
-			ret = -EINVAL;
+	case UIO_MEM_IOVA:
+	case UIO_MEM_PHYS:
+		ret = uio_mmap_physical(vma);
+		break;
+	case UIO_MEM_LOGICAL:
+	case UIO_MEM_VIRTUAL:
+		ret = uio_mmap_logical(vma);
+		break;
+	default:
+		ret = -EINVAL;
 	}
 
-out:
+ out:
 	mutex_unlock(&idev->info_lock);
 	return ret;
 }
@@ -876,6 +880,9 @@ static int init_uio_class(void)
 		printk(KERN_ERR "class_register failed for uio\n");
 		goto err_class_register;
 	}
+
+	uio_class_registered = true;
+
 	return 0;
 
 err_class_register:
@@ -886,6 +893,7 @@ exit:
 
 static void release_uio_class(void)
 {
+	uio_class_registered = false;
 	class_unregister(&uio_class);
 	uio_major_cleanup();
 }
@@ -912,6 +920,9 @@ int __uio_register_device(struct module *owner,
 	struct uio_device *idev;
 	int ret = 0;
 
+	if (!uio_class_registered)
+		return -EPROBE_DEFER;
+
 	if (!parent || !info || !info->name || !info->version)
 		return -EINVAL;
 
diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
index e1134a4d97f3..003badaef5f3 100644
--- a/drivers/uio/uio_dmem_genirq.c
+++ b/drivers/uio/uio_dmem_genirq.c
@@ -163,7 +163,8 @@ static int uio_dmem_genirq_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "unable to kmalloc\n");
 			goto bad2;
 		}
-		uioinfo->name = pdev->dev.of_node->name;
+		uioinfo->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%pOFn",
+					       pdev->dev.of_node);
 		uioinfo->version = "devicetree";
 
 		/* Multiple IRQs are not supported */
diff --git a/drivers/uio/uio_fsl_elbc_gpcm.c b/drivers/uio/uio_fsl_elbc_gpcm.c
index bbc17effae5e..9cc37fe07d35 100644
--- a/drivers/uio/uio_fsl_elbc_gpcm.c
+++ b/drivers/uio/uio_fsl_elbc_gpcm.c
@@ -382,8 +382,7 @@ static int uio_fsl_elbc_gpcm_probe(struct platform_device *pdev)
 	}
 
 	/* set all UIO data */
-	if (node->name)
-		info->mem[0].name = kstrdup(node->name, GFP_KERNEL);
+	info->mem[0].name = kasprintf(GFP_KERNEL, "%pOFn", node);
 	info->mem[0].addr = res.start;
 	info->mem[0].size = resource_size(&res);
 	info->mem[0].memtype = UIO_MEM_PHYS;
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index e401be8321ab..c2493d011225 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -17,7 +17,6 @@
  * # echo -n "ed963694-e847-4b2a-85af-bc9cfc11d6f3" \
  *    > /sys/bus/vmbus/drivers/uio_hv_generic/bind
  */
-#define DEBUG 1
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/device.h>
@@ -33,13 +32,13 @@
 
 #include "../hv/hyperv_vmbus.h"
 
-#define DRIVER_VERSION	"0.02.0"
+#define DRIVER_VERSION	"0.02.1"
 #define DRIVER_AUTHOR	"Stephen Hemminger <sthemmin at microsoft.com>"
 #define DRIVER_DESC	"Generic UIO driver for VMBus devices"
 
 #define HV_RING_SIZE	 512	/* pages */
-#define SEND_BUFFER_SIZE (15 * 1024 * 1024)
-#define RECV_BUFFER_SIZE (15 * 1024 * 1024)
+#define SEND_BUFFER_SIZE (16 * 1024 * 1024)
+#define RECV_BUFFER_SIZE (31 * 1024 * 1024)
 
 /*
  * List of resources to be mapped to user space
@@ -56,6 +55,7 @@ enum hv_uio_map {
 struct hv_uio_private_data {
 	struct uio_info info;
 	struct hv_device *device;
+	atomic_t refcnt;
 
 	void	*recv_buf;
 	u32	recv_gpadl;
@@ -129,13 +129,12 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj,
 {
 	struct vmbus_channel *channel
 		= container_of(kobj, struct vmbus_channel, kobj);
-	struct hv_device *dev = channel->primary_channel->device_obj;
-	u16 q_idx = channel->offermsg.offer.sub_channel_index;
+	void *ring_buffer = page_address(channel->ringbuffer_page);
 
-	dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n",
-		q_idx, vma_pages(vma), vma->vm_pgoff);
+	if (channel->state != CHANNEL_OPENED_STATE)
+		return -ENODEV;
 
-	return vm_iomap_memory(vma, virt_to_phys(channel->ringbuffer_pages),
+	return vm_iomap_memory(vma, virt_to_phys(ring_buffer),
 			       channel->ringbuffer_pagecount << PAGE_SHIFT);
 }
 
@@ -176,58 +175,104 @@ hv_uio_new_channel(struct vmbus_channel *new_sc)
 	}
 }
 
+/* free the reserved buffers for send and receive */
 static void
 hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata)
 {
-	if (pdata->send_gpadl)
+	if (pdata->send_gpadl) {
 		vmbus_teardown_gpadl(dev->channel, pdata->send_gpadl);
-	vfree(pdata->send_buf);
+		pdata->send_gpadl = 0;
+		vfree(pdata->send_buf);
+	}
 
-	if (pdata->recv_gpadl)
+	if (pdata->recv_gpadl) {
 		vmbus_teardown_gpadl(dev->channel, pdata->recv_gpadl);
-	vfree(pdata->recv_buf);
+		pdata->recv_gpadl = 0;
+		vfree(pdata->recv_buf);
+	}
+}
+
+/* VMBus primary channel is opened on first use */
+static int
+hv_uio_open(struct uio_info *info, struct inode *inode)
+{
+	struct hv_uio_private_data *pdata
+		= container_of(info, struct hv_uio_private_data, info);
+	struct hv_device *dev = pdata->device;
+	int ret;
+
+	if (atomic_inc_return(&pdata->refcnt) != 1)
+		return 0;
+
+	ret = vmbus_connect_ring(dev->channel,
+				 hv_uio_channel_cb, dev->channel);
+
+	if (ret == 0)
+		dev->channel->inbound.ring_buffer->interrupt_mask = 1;
+	else
+		atomic_dec(&pdata->refcnt);
+
+	return ret;
+}
+
+/* VMBus primary channel is closed on last close */
+static int
+hv_uio_release(struct uio_info *info, struct inode *inode)
+{
+	struct hv_uio_private_data *pdata
+		= container_of(info, struct hv_uio_private_data, info);
+	struct hv_device *dev = pdata->device;
+	int ret = 0;
+
+	if (atomic_dec_and_test(&pdata->refcnt))
+		ret = vmbus_disconnect_ring(dev->channel);
+
+	return ret;
 }
 
 static int
 hv_uio_probe(struct hv_device *dev,
 	     const struct hv_vmbus_device_id *dev_id)
 {
+	struct vmbus_channel *channel = dev->channel;
 	struct hv_uio_private_data *pdata;
+	void *ring_buffer;
 	int ret;
 
+	/* Communicating with host has to be via shared memory not hypercall */
+	if (!channel->offermsg.monitor_allocated) {
+		dev_err(&dev->device, "vmbus channel requires hypercall\n");
+		return -ENOTSUPP;
+	}
+
 	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
 		return -ENOMEM;
 
-	ret = vmbus_open(dev->channel, HV_RING_SIZE * PAGE_SIZE,
-			 HV_RING_SIZE * PAGE_SIZE, NULL, 0,
-			 hv_uio_channel_cb, dev->channel);
+	ret = vmbus_alloc_ring(channel, HV_RING_SIZE * PAGE_SIZE,
+			       HV_RING_SIZE * PAGE_SIZE);
 	if (ret)
 		goto fail;
 
-	/* Communicating with host has to be via shared memory not hypercall */
-	if (!dev->channel->offermsg.monitor_allocated) {
-		dev_err(&dev->device, "vmbus channel requires hypercall\n");
-		ret = -ENOTSUPP;
-		goto fail_close;
-	}
-
-	dev->channel->inbound.ring_buffer->interrupt_mask = 1;
-	set_channel_read_mode(dev->channel, HV_CALL_ISR);
+	set_channel_read_mode(channel, HV_CALL_ISR);
 
 	/* Fill general uio info */
 	pdata->info.name = "uio_hv_generic";
 	pdata->info.version = DRIVER_VERSION;
 	pdata->info.irqcontrol = hv_uio_irqcontrol;
+	pdata->info.open = hv_uio_open;
+	pdata->info.release = hv_uio_release;
 	pdata->info.irq = UIO_IRQ_CUSTOM;
+	atomic_set(&pdata->refcnt, 0);
 
 	/* mem resources */
 	pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings";
+	ring_buffer = page_address(channel->ringbuffer_page);
 	pdata->info.mem[TXRX_RING_MAP].addr
-		= (uintptr_t)dev->channel->ringbuffer_pages;
+		= (uintptr_t)virt_to_phys(ring_buffer);
 	pdata->info.mem[TXRX_RING_MAP].size
-		= dev->channel->ringbuffer_pagecount << PAGE_SHIFT;
-	pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL;
+		= channel->ringbuffer_pagecount << PAGE_SHIFT;
+	pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_IOVA;
 
 	pdata->info.mem[INT_PAGE_MAP].name = "int_page";
 	pdata->info.mem[INT_PAGE_MAP].addr
@@ -247,7 +292,7 @@ hv_uio_probe(struct hv_device *dev,
 		goto fail_close;
 	}
 
-	ret = vmbus_establish_gpadl(dev->channel, pdata->recv_buf,
+	ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
 				    RECV_BUFFER_SIZE, &pdata->recv_gpadl);
 	if (ret)
 		goto fail_close;
@@ -261,14 +306,13 @@ hv_uio_probe(struct hv_device *dev,
 	pdata->info.mem[RECV_BUF_MAP].size = RECV_BUFFER_SIZE;
 	pdata->info.mem[RECV_BUF_MAP].memtype = UIO_MEM_VIRTUAL;
 
-
 	pdata->send_buf = vzalloc(SEND_BUFFER_SIZE);
 	if (pdata->send_buf == NULL) {
 		ret = -ENOMEM;
 		goto fail_close;
 	}
 
-	ret = vmbus_establish_gpadl(dev->channel, pdata->send_buf,
+	ret = vmbus_establish_gpadl(channel, pdata->send_buf,
 				    SEND_BUFFER_SIZE, &pdata->send_gpadl);
 	if (ret)
 		goto fail_close;
@@ -290,10 +334,10 @@ hv_uio_probe(struct hv_device *dev,
 		goto fail_close;
 	}
 
-	vmbus_set_chn_rescind_callback(dev->channel, hv_uio_rescind);
-	vmbus_set_sc_create_callback(dev->channel, hv_uio_new_channel);
+	vmbus_set_chn_rescind_callback(channel, hv_uio_rescind);
+	vmbus_set_sc_create_callback(channel, hv_uio_new_channel);
 
-	ret = sysfs_create_bin_file(&dev->channel->kobj, &ring_buffer_bin_attr);
+	ret = sysfs_create_bin_file(&channel->kobj, &ring_buffer_bin_attr);
 	if (ret)
 		dev_notice(&dev->device,
 			   "sysfs create ring bin file failed; %d\n", ret);
@@ -304,7 +348,6 @@ hv_uio_probe(struct hv_device *dev,
 
 fail_close:
 	hv_uio_cleanup(dev, pdata);
-	vmbus_close(dev->channel);
 fail:
 	kfree(pdata);
 
@@ -322,7 +365,8 @@ hv_uio_remove(struct hv_device *dev)
 	uio_unregister_device(&pdata->info);
 	hv_uio_cleanup(dev, pdata);
 	hv_set_drvdata(dev, NULL);
-	vmbus_close(dev->channel);
+
+	vmbus_free_ring(dev->channel);
 	kfree(pdata);
 	return 0;
 }
diff --git a/drivers/uio/uio_pdrv_genirq.c b/drivers/uio/uio_pdrv_genirq.c
index f598ecddc8a7..6c759934bff3 100644
--- a/drivers/uio/uio_pdrv_genirq.c
+++ b/drivers/uio/uio_pdrv_genirq.c
@@ -118,7 +118,8 @@ static int uio_pdrv_genirq_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "unable to kmalloc\n");
 			return -ENOMEM;
 		}
-		uioinfo->name = pdev->dev.of_node->name;
+		uioinfo->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%pOFn",
+					       pdev->dev.of_node);
 		uioinfo->version = "devicetree";
 		/* Multiple IRQs are not supported */
 	}
diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index 19f5f5f2a48a..09b37c0d075d 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -364,8 +364,7 @@ static void ci_hdrc_imx_shutdown(struct platform_device *pdev)
 	ci_hdrc_imx_remove(pdev);
 }
 
-#ifdef CONFIG_PM
-static int imx_controller_suspend(struct device *dev)
+static int __maybe_unused imx_controller_suspend(struct device *dev)
 {
 	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
 
@@ -377,7 +376,7 @@ static int imx_controller_suspend(struct device *dev)
 	return 0;
 }
 
-static int imx_controller_resume(struct device *dev)
+static int __maybe_unused imx_controller_resume(struct device *dev)
 {
 	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
 	int ret = 0;
@@ -408,8 +407,7 @@ clk_disable:
 	return ret;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int ci_hdrc_imx_suspend(struct device *dev)
+static int __maybe_unused ci_hdrc_imx_suspend(struct device *dev)
 {
 	int ret;
 
@@ -431,7 +429,7 @@ static int ci_hdrc_imx_suspend(struct device *dev)
 	return imx_controller_suspend(dev);
 }
 
-static int ci_hdrc_imx_resume(struct device *dev)
+static int __maybe_unused ci_hdrc_imx_resume(struct device *dev)
 {
 	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
 	int ret;
@@ -445,9 +443,8 @@ static int ci_hdrc_imx_resume(struct device *dev)
 
 	return ret;
 }
-#endif /* CONFIG_PM_SLEEP */
 
-static int ci_hdrc_imx_runtime_suspend(struct device *dev)
+static int __maybe_unused ci_hdrc_imx_runtime_suspend(struct device *dev)
 {
 	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
 	int ret;
@@ -466,13 +463,11 @@ static int ci_hdrc_imx_runtime_suspend(struct device *dev)
 	return imx_controller_suspend(dev);
 }
 
-static int ci_hdrc_imx_runtime_resume(struct device *dev)
+static int __maybe_unused ci_hdrc_imx_runtime_resume(struct device *dev)
 {
 	return imx_controller_resume(dev);
 }
 
-#endif /* CONFIG_PM */
-
 static const struct dev_pm_ops ci_hdrc_imx_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(ci_hdrc_imx_suspend, ci_hdrc_imx_resume)
 	SET_RUNTIME_PM_OPS(ci_hdrc_imx_runtime_suspend,
@@ -492,7 +487,7 @@ static struct platform_driver ci_hdrc_imx_driver = {
 module_platform_driver(ci_hdrc_imx_driver);
 
 MODULE_ALIAS("platform:imx-usb");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("CI HDRC i.MX USB binding");
 MODULE_AUTHOR("Marek Vasut <marex@denx.de>");
 MODULE_AUTHOR("Richard Zhao <richard.zhao@freescale.com>");
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 85fc6db48e44..7bfcbb23c2a4 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -53,6 +53,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
 #include <linux/usb/otg.h>
@@ -723,6 +724,24 @@ static int ci_get_platdata(struct device *dev,
 		else
 			cable->connected = false;
 	}
+
+	platdata->pctl = devm_pinctrl_get(dev);
+	if (!IS_ERR(platdata->pctl)) {
+		struct pinctrl_state *p;
+
+		p = pinctrl_lookup_state(platdata->pctl, "default");
+		if (!IS_ERR(p))
+			platdata->pins_default = p;
+
+		p = pinctrl_lookup_state(platdata->pctl, "host");
+		if (!IS_ERR(p))
+			platdata->pins_host = p;
+
+		p = pinctrl_lookup_state(platdata->pctl, "device");
+		if (!IS_ERR(p))
+			platdata->pins_device = p;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c
index 4638d9b066be..d858a82c4f44 100644
--- a/drivers/usb/chipidea/host.c
+++ b/drivers/usb/chipidea/host.c
@@ -13,6 +13,7 @@
 #include <linux/usb/hcd.h>
 #include <linux/usb/chipidea.h>
 #include <linux/regulator/consumer.h>
+#include <linux/pinctrl/consumer.h>
 
 #include "../host/ehci.h"
 
@@ -153,6 +154,10 @@ static int host_start(struct ci_hdrc *ci)
 		}
 	}
 
+	if (ci->platdata->pins_host)
+		pinctrl_select_state(ci->platdata->pctl,
+				     ci->platdata->pins_host);
+
 	ret = usb_add_hcd(hcd, 0, 0);
 	if (ret) {
 		goto disable_reg;
@@ -197,6 +202,10 @@ static void host_stop(struct ci_hdrc *ci)
 	}
 	ci->hcd = NULL;
 	ci->otg.host = NULL;
+
+	if (ci->platdata->pins_host && ci->platdata->pins_default)
+		pinctrl_select_state(ci->platdata->pctl,
+				     ci->platdata->pins_default);
 }
 
 
diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c
index db4ceffcf2a6..f25d4827fd49 100644
--- a/drivers/usb/chipidea/otg.c
+++ b/drivers/usb/chipidea/otg.c
@@ -203,14 +203,17 @@ static void ci_otg_work(struct work_struct *work)
 	}
 
 	pm_runtime_get_sync(ci->dev);
+
 	if (ci->id_event) {
 		ci->id_event = false;
 		ci_handle_id_switch(ci);
-	} else if (ci->b_sess_valid_event) {
+	}
+
+	if (ci->b_sess_valid_event) {
 		ci->b_sess_valid_event = false;
 		ci_handle_vbus_change(ci);
-	} else
-		dev_err(ci->dev, "unexpected event occurs at %s\n", __func__);
+	}
+
 	pm_runtime_put_sync(ci->dev);
 
 	enable_irq(ci->irq);
diff --git a/drivers/usb/chipidea/otg.h b/drivers/usb/chipidea/otg.h
index 7e7428e48bfa..4f8b8179ec96 100644
--- a/drivers/usb/chipidea/otg.h
+++ b/drivers/usb/chipidea/otg.h
@@ -17,7 +17,8 @@ void ci_handle_vbus_change(struct ci_hdrc *ci);
 static inline void ci_otg_queue_work(struct ci_hdrc *ci)
 {
 	disable_irq_nosync(ci->irq);
-	queue_work(ci->wq, &ci->work);
+	if (queue_work(ci->wq, &ci->work) == false)
+		enable_irq(ci->irq);
 }
 
 #endif /* __DRIVERS_USB_CHIPIDEA_OTG_H */
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 9852ec5e6e01..829e947cabf5 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
 #include <linux/usb/otg-fsm.h>
@@ -1965,6 +1966,10 @@ void ci_hdrc_gadget_destroy(struct ci_hdrc *ci)
 
 static int udc_id_switch_for_device(struct ci_hdrc *ci)
 {
+	if (ci->platdata->pins_device)
+		pinctrl_select_state(ci->platdata->pctl,
+				     ci->platdata->pins_device);
+
 	if (ci->is_otg)
 		/* Clear and enable BSV irq */
 		hw_write_otgsc(ci, OTGSC_BSVIS | OTGSC_BSVIE,
@@ -1983,6 +1988,10 @@ static void udc_id_switch_for_host(struct ci_hdrc *ci)
 		hw_write_otgsc(ci, OTGSC_BSVIE | OTGSC_BSVIS, OTGSC_BSVIS);
 
 	ci->vbus_active = 0;
+
+	if (ci->platdata->pins_device && ci->platdata->pins_default)
+		pinctrl_select_state(ci->platdata->pctl,
+				     ci->platdata->pins_default);
 }
 
 /**
diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c
index 34ad5bf8acd8..def80ff547e4 100644
--- a/drivers/usb/chipidea/usbmisc_imx.c
+++ b/drivers/usb/chipidea/usbmisc_imx.c
@@ -343,6 +343,8 @@ static int usbmisc_imx6q_init(struct imx_usbmisc_data *data)
 	} else if (data->oc_polarity == 1) {
 		/* High active */
 		reg &= ~(MX6_BM_OVER_CUR_DIS | MX6_BM_OVER_CUR_POLARITY);
+	} else {
+		reg &= ~(MX6_BM_OVER_CUR_DIS);
 	}
 	writel(reg, usbmisc->base + data->index * 4);
 
@@ -633,6 +635,6 @@ static struct platform_driver usbmisc_imx_driver = {
 module_platform_driver(usbmisc_imx_driver);
 
 MODULE_ALIAS("platform:usbmisc-imx");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("driver for imx usb non-core registers");
 MODULE_AUTHOR("Richard Zhao <richard.zhao@freescale.com>");
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 83ffa5a14c3d..4942122b2346 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2007 Stefan Kopp, Gechingen, Germany
  * Copyright (C) 2008 Novell, Inc.
  * Copyright (C) 2008 Greg Kroah-Hartman <gregkh@suse.de>
+ * Copyright (C) 2018 IVI Foundation, Inc.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -21,21 +22,24 @@
 #include <linux/compat.h>
 #include <linux/usb/tmc.h>
 
+/* Increment API VERSION when changing tmc.h with new flags or ioctls
+ * or when changing a significant behavior of the driver.
+ */
+#define USBTMC_API_VERSION (2)
 
 #define USBTMC_HEADER_SIZE	12
 #define USBTMC_MINOR_BASE	176
 
-/*
- * Size of driver internal IO buffer. Must be multiple of 4 and at least as
- * large as wMaxPacketSize (which is usually 512 bytes).
- */
-#define USBTMC_SIZE_IOBUFFER	2048
-
 /* Minimum USB timeout (in milliseconds) */
 #define USBTMC_MIN_TIMEOUT	100
 /* Default USB timeout (in milliseconds) */
 #define USBTMC_TIMEOUT		5000
 
+/* Max number of urbs used in write transfers */
+#define MAX_URBS_IN_FLIGHT	16
+/* I/O buffer size used in generic read/write functions */
+#define USBTMC_BUFSIZE		(4096)
+
 /*
  * Maximum number of read cycles to empty bulk in endpoint during CLEAR and
  * ABORT_BULK_IN requests. Ends the loop if (for whatever reason) a short
@@ -79,6 +83,9 @@ struct usbtmc_device_data {
 	u8 bTag_last_write;	/* needed for abort */
 	u8 bTag_last_read;	/* needed for abort */
 
+	/* packet size of IN bulk */
+	u16            wMaxPacketSize;
+
 	/* data for interrupt in endpoint handling */
 	u8             bNotify1;
 	u8             bNotify2;
@@ -95,11 +102,6 @@ struct usbtmc_device_data {
 	/* coalesced usb488_caps from usbtmc_dev_capabilities */
 	__u8 usb488_caps;
 
-	/* attributes from the USB TMC spec for this device */
-	u8 TermChar;
-	bool TermCharEnabled;
-	bool auto_abort;
-
 	bool zombie; /* fd of disconnected device */
 
 	struct usbtmc_dev_capabilities	capabilities;
@@ -121,13 +123,34 @@ struct usbtmc_file_data {
 	u32            timeout;
 	u8             srq_byte;
 	atomic_t       srq_asserted;
+	atomic_t       closing;
+	u8             bmTransferAttributes; /* member of DEV_DEP_MSG_IN */
+
 	u8             eom_val;
 	u8             term_char;
 	bool           term_char_enabled;
+	bool           auto_abort;
+
+	spinlock_t     err_lock; /* lock for errors */
+
+	struct usb_anchor submitted;
+
+	/* data for generic_write */
+	struct semaphore limit_write_sem;
+	u32 out_transfer_size;
+	int out_status;
+
+	/* data for generic_read */
+	u32 in_transfer_size;
+	int in_status;
+	int in_urbs_used;
+	struct usb_anchor in_anchor;
+	wait_queue_head_t wait_bulk_in;
 };
 
 /* Forward declarations */
 static struct usb_driver usbtmc_driver;
+static void usbtmc_draw_down(struct usbtmc_file_data *file_data);
 
 static void usbtmc_delete(struct kref *kref)
 {
@@ -153,6 +176,12 @@ static int usbtmc_open(struct inode *inode, struct file *filp)
 	if (!file_data)
 		return -ENOMEM;
 
+	spin_lock_init(&file_data->err_lock);
+	sema_init(&file_data->limit_write_sem, MAX_URBS_IN_FLIGHT);
+	init_usb_anchor(&file_data->submitted);
+	init_usb_anchor(&file_data->in_anchor);
+	init_waitqueue_head(&file_data->wait_bulk_in);
+
 	data = usb_get_intfdata(intf);
 	/* Protect reference to data from file structure until release */
 	kref_get(&data->kref);
@@ -160,10 +189,12 @@ static int usbtmc_open(struct inode *inode, struct file *filp)
 	mutex_lock(&data->io_mutex);
 	file_data->data = data;
 
-	/* copy default values from device settings */
+	atomic_set(&file_data->closing, 0);
+
 	file_data->timeout = USBTMC_TIMEOUT;
-	file_data->term_char = data->TermChar;
-	file_data->term_char_enabled = data->TermCharEnabled;
+	file_data->term_char = '\n';
+	file_data->term_char_enabled = 0;
+	file_data->auto_abort = 0;
 	file_data->eom_val = 1;
 
 	INIT_LIST_HEAD(&file_data->file_elem);
@@ -178,6 +209,40 @@ static int usbtmc_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+/*
+ * usbtmc_flush - called before file handle is closed
+ */
+static int usbtmc_flush(struct file *file, fl_owner_t id)
+{
+	struct usbtmc_file_data *file_data;
+	struct usbtmc_device_data *data;
+
+	file_data = file->private_data;
+	if (file_data == NULL)
+		return -ENODEV;
+
+	atomic_set(&file_data->closing, 1);
+	data = file_data->data;
+
+	/* wait for io to stop */
+	mutex_lock(&data->io_mutex);
+
+	usbtmc_draw_down(file_data);
+
+	spin_lock_irq(&file_data->err_lock);
+	file_data->in_status = 0;
+	file_data->in_transfer_size = 0;
+	file_data->in_urbs_used = 0;
+	file_data->out_status = 0;
+	file_data->out_transfer_size = 0;
+	spin_unlock_irq(&file_data->err_lock);
+
+	wake_up_interruptible_all(&data->waitq);
+	mutex_unlock(&data->io_mutex);
+
+	return 0;
+}
+
 static int usbtmc_release(struct inode *inode, struct file *file)
 {
 	struct usbtmc_file_data *file_data = file->private_data;
@@ -197,18 +262,17 @@ static int usbtmc_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int usbtmc_ioctl_abort_bulk_in(struct usbtmc_device_data *data)
+static int usbtmc_ioctl_abort_bulk_in_tag(struct usbtmc_device_data *data,
+					  u8 tag)
 {
 	u8 *buffer;
 	struct device *dev;
 	int rv;
 	int n;
 	int actual;
-	struct usb_host_interface *current_setting;
-	int max_size;
 
 	dev = &data->intf->dev;
-	buffer = kmalloc(USBTMC_SIZE_IOBUFFER, GFP_KERNEL);
+	buffer = kmalloc(USBTMC_BUFSIZE, GFP_KERNEL);
 	if (!buffer)
 		return -ENOMEM;
 
@@ -216,86 +280,88 @@ static int usbtmc_ioctl_abort_bulk_in(struct usbtmc_device_data *data)
 			     usb_rcvctrlpipe(data->usb_dev, 0),
 			     USBTMC_REQUEST_INITIATE_ABORT_BULK_IN,
 			     USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT,
-			     data->bTag_last_read, data->bulk_in,
-			     buffer, 2, USBTMC_TIMEOUT);
+			     tag, data->bulk_in,
+			     buffer, 2, USB_CTRL_GET_TIMEOUT);
 
 	if (rv < 0) {
 		dev_err(dev, "usb_control_msg returned %d\n", rv);
 		goto exit;
 	}
 
-	dev_dbg(dev, "INITIATE_ABORT_BULK_IN returned %x\n", buffer[0]);
+	dev_dbg(dev, "INITIATE_ABORT_BULK_IN returned %x with tag %02x\n",
+		buffer[0], buffer[1]);
 
 	if (buffer[0] == USBTMC_STATUS_FAILED) {
+		/* No transfer in progress and the Bulk-OUT FIFO is empty. */
 		rv = 0;
 		goto exit;
 	}
 
-	if (buffer[0] != USBTMC_STATUS_SUCCESS) {
-		dev_err(dev, "INITIATE_ABORT_BULK_IN returned %x\n",
-			buffer[0]);
-		rv = -EPERM;
+	if (buffer[0] == USBTMC_STATUS_TRANSFER_NOT_IN_PROGRESS) {
+		/* The device returns this status if either:
+		 * - There is a transfer in progress, but the specified bTag
+		 *   does not match.
+		 * - There is no transfer in progress, but the Bulk-OUT FIFO
+		 *   is not empty.
+		 */
+		rv = -ENOMSG;
 		goto exit;
 	}
 
-	max_size = 0;
-	current_setting = data->intf->cur_altsetting;
-	for (n = 0; n < current_setting->desc.bNumEndpoints; n++)
-		if (current_setting->endpoint[n].desc.bEndpointAddress ==
-			data->bulk_in)
-			max_size = usb_endpoint_maxp(&current_setting->endpoint[n].desc);
-
-	if (max_size == 0) {
-		dev_err(dev, "Couldn't get wMaxPacketSize\n");
+	if (buffer[0] != USBTMC_STATUS_SUCCESS) {
+		dev_err(dev, "INITIATE_ABORT_BULK_IN returned %x\n",
+			buffer[0]);
 		rv = -EPERM;
 		goto exit;
 	}
 
-	dev_dbg(&data->intf->dev, "wMaxPacketSize is %d\n", max_size);
-
 	n = 0;
 
-	do {
-		dev_dbg(dev, "Reading from bulk in EP\n");
+usbtmc_abort_bulk_in_status:
+	dev_dbg(dev, "Reading from bulk in EP\n");
 
-		rv = usb_bulk_msg(data->usb_dev,
-				  usb_rcvbulkpipe(data->usb_dev,
-						  data->bulk_in),
-				  buffer, USBTMC_SIZE_IOBUFFER,
-				  &actual, USBTMC_TIMEOUT);
+	/* Data must be present. So use low timeout 300 ms */
+	actual = 0;
+	rv = usb_bulk_msg(data->usb_dev,
+			  usb_rcvbulkpipe(data->usb_dev,
+					  data->bulk_in),
+			  buffer, USBTMC_BUFSIZE,
+			  &actual, 300);
 
-		n++;
+	print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1,
+			     buffer, actual, true);
 
-		if (rv < 0) {
-			dev_err(dev, "usb_bulk_msg returned %d\n", rv);
+	n++;
+
+	if (rv < 0) {
+		dev_err(dev, "usb_bulk_msg returned %d\n", rv);
+		if (rv != -ETIMEDOUT)
 			goto exit;
-		}
-	} while ((actual == max_size) &&
-		 (n < USBTMC_MAX_READS_TO_CLEAR_BULK_IN));
+	}
 
-	if (actual == max_size) {
+	if (actual == USBTMC_BUFSIZE)
+		goto usbtmc_abort_bulk_in_status;
+
+	if (n >= USBTMC_MAX_READS_TO_CLEAR_BULK_IN) {
 		dev_err(dev, "Couldn't clear device buffer within %d cycles\n",
 			USBTMC_MAX_READS_TO_CLEAR_BULK_IN);
 		rv = -EPERM;
 		goto exit;
 	}
 
-	n = 0;
-
-usbtmc_abort_bulk_in_status:
 	rv = usb_control_msg(data->usb_dev,
 			     usb_rcvctrlpipe(data->usb_dev, 0),
 			     USBTMC_REQUEST_CHECK_ABORT_BULK_IN_STATUS,
 			     USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT,
 			     0, data->bulk_in, buffer, 0x08,
-			     USBTMC_TIMEOUT);
+			     USB_CTRL_GET_TIMEOUT);
 
 	if (rv < 0) {
 		dev_err(dev, "usb_control_msg returned %d\n", rv);
 		goto exit;
 	}
 
-	dev_dbg(dev, "INITIATE_ABORT_BULK_IN returned %x\n", buffer[0]);
+	dev_dbg(dev, "CHECK_ABORT_BULK_IN returned %x\n", buffer[0]);
 
 	if (buffer[0] == USBTMC_STATUS_SUCCESS) {
 		rv = 0;
@@ -303,46 +369,30 @@ usbtmc_abort_bulk_in_status:
 	}
 
 	if (buffer[0] != USBTMC_STATUS_PENDING) {
-		dev_err(dev, "INITIATE_ABORT_BULK_IN returned %x\n", buffer[0]);
+		dev_err(dev, "CHECK_ABORT_BULK_IN returned %x\n", buffer[0]);
 		rv = -EPERM;
 		goto exit;
 	}
 
-	if (buffer[1] == 1)
-		do {
-			dev_dbg(dev, "Reading from bulk in EP\n");
-
-			rv = usb_bulk_msg(data->usb_dev,
-					  usb_rcvbulkpipe(data->usb_dev,
-							  data->bulk_in),
-					  buffer, USBTMC_SIZE_IOBUFFER,
-					  &actual, USBTMC_TIMEOUT);
-
-			n++;
-
-			if (rv < 0) {
-				dev_err(dev, "usb_bulk_msg returned %d\n", rv);
-				goto exit;
-			}
-		} while ((actual == max_size) &&
-			 (n < USBTMC_MAX_READS_TO_CLEAR_BULK_IN));
-
-	if (actual == max_size) {
-		dev_err(dev, "Couldn't clear device buffer within %d cycles\n",
-			USBTMC_MAX_READS_TO_CLEAR_BULK_IN);
-		rv = -EPERM;
-		goto exit;
+	if ((buffer[1] & 1) > 0) {
+		/* The device has 1 or more queued packets the Host can read */
+		goto usbtmc_abort_bulk_in_status;
 	}
 
-	goto usbtmc_abort_bulk_in_status;
-
+	/* The Host must send CHECK_ABORT_BULK_IN_STATUS at a later time. */
+	rv = -EAGAIN;
 exit:
 	kfree(buffer);
 	return rv;
+}
 
+static int usbtmc_ioctl_abort_bulk_in(struct usbtmc_device_data *data)
+{
+	return usbtmc_ioctl_abort_bulk_in_tag(data, data->bTag_last_read);
 }
 
-static int usbtmc_ioctl_abort_bulk_out(struct usbtmc_device_data *data)
+static int usbtmc_ioctl_abort_bulk_out_tag(struct usbtmc_device_data *data,
+					   u8 tag)
 {
 	struct device *dev;
 	u8 *buffer;
@@ -359,8 +409,8 @@ static int usbtmc_ioctl_abort_bulk_out(struct usbtmc_device_data *data)
 			     usb_rcvctrlpipe(data->usb_dev, 0),
 			     USBTMC_REQUEST_INITIATE_ABORT_BULK_OUT,
 			     USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT,
-			     data->bTag_last_write, data->bulk_out,
-			     buffer, 2, USBTMC_TIMEOUT);
+			     tag, data->bulk_out,
+			     buffer, 2, USB_CTRL_GET_TIMEOUT);
 
 	if (rv < 0) {
 		dev_err(dev, "usb_control_msg returned %d\n", rv);
@@ -379,12 +429,14 @@ static int usbtmc_ioctl_abort_bulk_out(struct usbtmc_device_data *data)
 	n = 0;
 
 usbtmc_abort_bulk_out_check_status:
+	/* do not stress device with subsequent requests */
+	msleep(50);
 	rv = usb_control_msg(data->usb_dev,
 			     usb_rcvctrlpipe(data->usb_dev, 0),
 			     USBTMC_REQUEST_CHECK_ABORT_BULK_OUT_STATUS,
 			     USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT,
 			     0, data->bulk_out, buffer, 0x08,
-			     USBTMC_TIMEOUT);
+			     USB_CTRL_GET_TIMEOUT);
 	n++;
 	if (rv < 0) {
 		dev_err(dev, "usb_control_msg returned %d\n", rv);
@@ -418,6 +470,11 @@ exit:
 	return rv;
 }
 
+static int usbtmc_ioctl_abort_bulk_out(struct usbtmc_device_data *data)
+{
+	return usbtmc_ioctl_abort_bulk_out_tag(data, data->bTag_last_write);
+}
+
 static int usbtmc488_ioctl_read_stb(struct usbtmc_file_data *file_data,
 				void __user *arg)
 {
@@ -457,7 +514,7 @@ static int usbtmc488_ioctl_read_stb(struct usbtmc_file_data *file_data,
 			USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
 			data->iin_bTag,
 			data->ifnum,
-			buffer, 0x03, USBTMC_TIMEOUT);
+			buffer, 0x03, USB_CTRL_GET_TIMEOUT);
 	if (rv < 0) {
 		dev_err(dev, "stb usb_control_msg returned %d\n", rv);
 		goto exit;
@@ -510,6 +567,54 @@ static int usbtmc488_ioctl_read_stb(struct usbtmc_file_data *file_data,
 	return rv;
 }
 
+static int usbtmc488_ioctl_wait_srq(struct usbtmc_file_data *file_data,
+				    __u32 __user *arg)
+{
+	struct usbtmc_device_data *data = file_data->data;
+	struct device *dev = &data->intf->dev;
+	int rv;
+	u32 timeout;
+	unsigned long expire;
+
+	if (!data->iin_ep_present) {
+		dev_dbg(dev, "no interrupt endpoint present\n");
+		return -EFAULT;
+	}
+
+	if (get_user(timeout, arg))
+		return -EFAULT;
+
+	expire = msecs_to_jiffies(timeout);
+
+	mutex_unlock(&data->io_mutex);
+
+	rv = wait_event_interruptible_timeout(
+			data->waitq,
+			atomic_read(&file_data->srq_asserted) != 0 ||
+			atomic_read(&file_data->closing),
+			expire);
+
+	mutex_lock(&data->io_mutex);
+
+	/* Note! disconnect or close could be called in the meantime */
+	if (atomic_read(&file_data->closing) || data->zombie)
+		rv = -ENODEV;
+
+	if (rv < 0) {
+		/* dev can be invalid now! */
+		pr_debug("%s - wait interrupted %d\n", __func__, rv);
+		return rv;
+	}
+
+	if (rv == 0) {
+		dev_dbg(dev, "%s - wait timed out\n", __func__);
+		return -ETIMEDOUT;
+	}
+
+	dev_dbg(dev, "%s - srq asserted\n", __func__);
+	return 0;
+}
+
 static int usbtmc488_ioctl_simple(struct usbtmc_device_data *data,
 				void __user *arg, unsigned int cmd)
 {
@@ -543,7 +648,7 @@ static int usbtmc488_ioctl_simple(struct usbtmc_device_data *data,
 			USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
 			wValue,
 			data->ifnum,
-			buffer, 0x01, USBTMC_TIMEOUT);
+			buffer, 0x01, USB_CTRL_GET_TIMEOUT);
 	if (rv < 0) {
 		dev_err(dev, "simple usb_control_msg failed %d\n", rv);
 		goto exit;
@@ -610,6 +715,559 @@ static int usbtmc488_ioctl_trigger(struct usbtmc_file_data *file_data)
 	return 0;
 }
 
+static struct urb *usbtmc_create_urb(void)
+{
+	const size_t bufsize = USBTMC_BUFSIZE;
+	u8 *dmabuf = NULL;
+	struct urb *urb = usb_alloc_urb(0, GFP_KERNEL);
+
+	if (!urb)
+		return NULL;
+
+	dmabuf = kmalloc(bufsize, GFP_KERNEL);
+	if (!dmabuf) {
+		usb_free_urb(urb);
+		return NULL;
+	}
+
+	urb->transfer_buffer = dmabuf;
+	urb->transfer_buffer_length = bufsize;
+	urb->transfer_flags |= URB_FREE_BUFFER;
+	return urb;
+}
+
+static void usbtmc_read_bulk_cb(struct urb *urb)
+{
+	struct usbtmc_file_data *file_data = urb->context;
+	int status = urb->status;
+	unsigned long flags;
+
+	/* sync/async unlink faults aren't errors */
+	if (status) {
+		if (!(/* status == -ENOENT || */
+			status == -ECONNRESET ||
+			status == -EREMOTEIO || /* Short packet */
+			status == -ESHUTDOWN))
+			dev_err(&file_data->data->intf->dev,
+			"%s - nonzero read bulk status received: %d\n",
+			__func__, status);
+
+		spin_lock_irqsave(&file_data->err_lock, flags);
+		if (!file_data->in_status)
+			file_data->in_status = status;
+		spin_unlock_irqrestore(&file_data->err_lock, flags);
+	}
+
+	spin_lock_irqsave(&file_data->err_lock, flags);
+	file_data->in_transfer_size += urb->actual_length;
+	dev_dbg(&file_data->data->intf->dev,
+		"%s - total size: %u current: %d status: %d\n",
+		__func__, file_data->in_transfer_size,
+		urb->actual_length, status);
+	spin_unlock_irqrestore(&file_data->err_lock, flags);
+	usb_anchor_urb(urb, &file_data->in_anchor);
+
+	wake_up_interruptible(&file_data->wait_bulk_in);
+	wake_up_interruptible(&file_data->data->waitq);
+}
+
+static inline bool usbtmc_do_transfer(struct usbtmc_file_data *file_data)
+{
+	bool data_or_error;
+
+	spin_lock_irq(&file_data->err_lock);
+	data_or_error = !usb_anchor_empty(&file_data->in_anchor)
+			|| file_data->in_status;
+	spin_unlock_irq(&file_data->err_lock);
+	dev_dbg(&file_data->data->intf->dev, "%s: returns %d\n", __func__,
+		data_or_error);
+	return data_or_error;
+}
+
+static ssize_t usbtmc_generic_read(struct usbtmc_file_data *file_data,
+				   void __user *user_buffer,
+				   u32 transfer_size,
+				   u32 *transferred,
+				   u32 flags)
+{
+	struct usbtmc_device_data *data = file_data->data;
+	struct device *dev = &data->intf->dev;
+	u32 done = 0;
+	u32 remaining;
+	const u32 bufsize = USBTMC_BUFSIZE;
+	int retval = 0;
+	u32 max_transfer_size;
+	unsigned long expire;
+	int bufcount = 1;
+	int again = 0;
+
+	/* mutex already locked */
+
+	*transferred = done;
+
+	max_transfer_size = transfer_size;
+
+	if (flags & USBTMC_FLAG_IGNORE_TRAILER) {
+		/* The device may send extra alignment bytes (up to
+		 * wMaxPacketSize – 1) to avoid sending a zero-length
+		 * packet
+		 */
+		remaining = transfer_size;
+		if ((max_transfer_size % data->wMaxPacketSize) == 0)
+			max_transfer_size += (data->wMaxPacketSize - 1);
+	} else {
+		/* round down to bufsize to avoid truncated data left */
+		if (max_transfer_size > bufsize) {
+			max_transfer_size =
+				roundup(max_transfer_size + 1 - bufsize,
+					bufsize);
+		}
+		remaining = max_transfer_size;
+	}
+
+	spin_lock_irq(&file_data->err_lock);
+
+	if (file_data->in_status) {
+		/* return the very first error */
+		retval = file_data->in_status;
+		spin_unlock_irq(&file_data->err_lock);
+		goto error;
+	}
+
+	if (flags & USBTMC_FLAG_ASYNC) {
+		if (usb_anchor_empty(&file_data->in_anchor))
+			again = 1;
+
+		if (file_data->in_urbs_used == 0) {
+			file_data->in_transfer_size = 0;
+			file_data->in_status = 0;
+		}
+	} else {
+		file_data->in_transfer_size = 0;
+		file_data->in_status = 0;
+	}
+
+	if (max_transfer_size == 0) {
+		bufcount = 0;
+	} else {
+		bufcount = roundup(max_transfer_size, bufsize) / bufsize;
+		if (bufcount > file_data->in_urbs_used)
+			bufcount -= file_data->in_urbs_used;
+		else
+			bufcount = 0;
+
+		if (bufcount + file_data->in_urbs_used > MAX_URBS_IN_FLIGHT) {
+			bufcount = MAX_URBS_IN_FLIGHT -
+					file_data->in_urbs_used;
+		}
+	}
+	spin_unlock_irq(&file_data->err_lock);
+
+	dev_dbg(dev, "%s: requested=%u flags=0x%X size=%u bufs=%d used=%d\n",
+		__func__, transfer_size, flags,
+		max_transfer_size, bufcount, file_data->in_urbs_used);
+
+	while (bufcount > 0) {
+		u8 *dmabuf = NULL;
+		struct urb *urb = usbtmc_create_urb();
+
+		if (!urb) {
+			retval = -ENOMEM;
+			goto error;
+		}
+
+		dmabuf = urb->transfer_buffer;
+
+		usb_fill_bulk_urb(urb, data->usb_dev,
+			usb_rcvbulkpipe(data->usb_dev, data->bulk_in),
+			dmabuf, bufsize,
+			usbtmc_read_bulk_cb, file_data);
+
+		usb_anchor_urb(urb, &file_data->submitted);
+		retval = usb_submit_urb(urb, GFP_KERNEL);
+		/* urb is anchored. We can release our reference. */
+		usb_free_urb(urb);
+		if (unlikely(retval)) {
+			usb_unanchor_urb(urb);
+			goto error;
+		}
+		file_data->in_urbs_used++;
+		bufcount--;
+	}
+
+	if (again) {
+		dev_dbg(dev, "%s: ret=again\n", __func__);
+		return -EAGAIN;
+	}
+
+	if (user_buffer == NULL)
+		return -EINVAL;
+
+	expire = msecs_to_jiffies(file_data->timeout);
+
+	while (max_transfer_size > 0) {
+		u32 this_part;
+		struct urb *urb = NULL;
+
+		if (!(flags & USBTMC_FLAG_ASYNC)) {
+			dev_dbg(dev, "%s: before wait time %lu\n",
+				__func__, expire);
+			retval = wait_event_interruptible_timeout(
+				file_data->wait_bulk_in,
+				usbtmc_do_transfer(file_data),
+				expire);
+
+			dev_dbg(dev, "%s: wait returned %d\n",
+				__func__, retval);
+
+			if (retval <= 0) {
+				if (retval == 0)
+					retval = -ETIMEDOUT;
+				goto error;
+			}
+		}
+
+		urb = usb_get_from_anchor(&file_data->in_anchor);
+		if (!urb) {
+			if (!(flags & USBTMC_FLAG_ASYNC)) {
+				/* synchronous case: must not happen */
+				retval = -EFAULT;
+				goto error;
+			}
+
+			/* asynchronous case: ready, do not block or wait */
+			*transferred = done;
+			dev_dbg(dev, "%s: (async) done=%u ret=0\n",
+				__func__, done);
+			return 0;
+		}
+
+		file_data->in_urbs_used--;
+
+		if (max_transfer_size > urb->actual_length)
+			max_transfer_size -= urb->actual_length;
+		else
+			max_transfer_size = 0;
+
+		if (remaining > urb->actual_length)
+			this_part = urb->actual_length;
+		else
+			this_part = remaining;
+
+		print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1,
+			urb->transfer_buffer, urb->actual_length, true);
+
+		if (copy_to_user(user_buffer + done,
+				 urb->transfer_buffer, this_part)) {
+			usb_free_urb(urb);
+			retval = -EFAULT;
+			goto error;
+		}
+
+		remaining -= this_part;
+		done += this_part;
+
+		spin_lock_irq(&file_data->err_lock);
+		if (urb->status) {
+			/* return the very first error */
+			retval = file_data->in_status;
+			spin_unlock_irq(&file_data->err_lock);
+			usb_free_urb(urb);
+			goto error;
+		}
+		spin_unlock_irq(&file_data->err_lock);
+
+		if (urb->actual_length < bufsize) {
+			/* short packet or ZLP received => ready */
+			usb_free_urb(urb);
+			retval = 1;
+			break;
+		}
+
+		if (!(flags & USBTMC_FLAG_ASYNC) &&
+		    max_transfer_size > (bufsize * file_data->in_urbs_used)) {
+			/* resubmit, since other buffers still not enough */
+			usb_anchor_urb(urb, &file_data->submitted);
+			retval = usb_submit_urb(urb, GFP_KERNEL);
+			if (unlikely(retval)) {
+				usb_unanchor_urb(urb);
+				usb_free_urb(urb);
+				goto error;
+			}
+			file_data->in_urbs_used++;
+		}
+		usb_free_urb(urb);
+		retval = 0;
+	}
+
+error:
+	*transferred = done;
+
+	dev_dbg(dev, "%s: before kill\n", __func__);
+	/* Attention: killing urbs can take long time (2 ms) */
+	usb_kill_anchored_urbs(&file_data->submitted);
+	dev_dbg(dev, "%s: after kill\n", __func__);
+	usb_scuttle_anchored_urbs(&file_data->in_anchor);
+	file_data->in_urbs_used = 0;
+	file_data->in_status = 0; /* no spinlock needed here */
+	dev_dbg(dev, "%s: done=%u ret=%d\n", __func__, done, retval);
+
+	return retval;
+}
+
+static ssize_t usbtmc_ioctl_generic_read(struct usbtmc_file_data *file_data,
+					 void __user *arg)
+{
+	struct usbtmc_message msg;
+	ssize_t retval = 0;
+
+	/* mutex already locked */
+
+	if (copy_from_user(&msg, arg, sizeof(struct usbtmc_message)))
+		return -EFAULT;
+
+	retval = usbtmc_generic_read(file_data, msg.message,
+				     msg.transfer_size, &msg.transferred,
+				     msg.flags);
+
+	if (put_user(msg.transferred,
+		     &((struct usbtmc_message __user *)arg)->transferred))
+		return -EFAULT;
+
+	return retval;
+}
+
+static void usbtmc_write_bulk_cb(struct urb *urb)
+{
+	struct usbtmc_file_data *file_data = urb->context;
+	int wakeup = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&file_data->err_lock, flags);
+	file_data->out_transfer_size += urb->actual_length;
+
+	/* sync/async unlink faults aren't errors */
+	if (urb->status) {
+		if (!(urb->status == -ENOENT ||
+			urb->status == -ECONNRESET ||
+			urb->status == -ESHUTDOWN))
+			dev_err(&file_data->data->intf->dev,
+				"%s - nonzero write bulk status received: %d\n",
+				__func__, urb->status);
+
+		if (!file_data->out_status) {
+			file_data->out_status = urb->status;
+			wakeup = 1;
+		}
+	}
+	spin_unlock_irqrestore(&file_data->err_lock, flags);
+
+	dev_dbg(&file_data->data->intf->dev,
+		"%s - write bulk total size: %u\n",
+		__func__, file_data->out_transfer_size);
+
+	up(&file_data->limit_write_sem);
+	if (usb_anchor_empty(&file_data->submitted) || wakeup)
+		wake_up_interruptible(&file_data->data->waitq);
+}
+
+static ssize_t usbtmc_generic_write(struct usbtmc_file_data *file_data,
+				    const void __user *user_buffer,
+				    u32 transfer_size,
+				    u32 *transferred,
+				    u32 flags)
+{
+	struct usbtmc_device_data *data = file_data->data;
+	struct device *dev;
+	u32 done = 0;
+	u32 remaining;
+	unsigned long expire;
+	const u32 bufsize = USBTMC_BUFSIZE;
+	struct urb *urb = NULL;
+	int retval = 0;
+	u32 timeout;
+
+	*transferred = 0;
+
+	/* Get pointer to private data structure */
+	dev = &data->intf->dev;
+
+	dev_dbg(dev, "%s: size=%u flags=0x%X sema=%u\n",
+		__func__, transfer_size, flags,
+		file_data->limit_write_sem.count);
+
+	if (flags & USBTMC_FLAG_APPEND) {
+		spin_lock_irq(&file_data->err_lock);
+		retval = file_data->out_status;
+		spin_unlock_irq(&file_data->err_lock);
+		if (retval < 0)
+			return retval;
+	} else {
+		spin_lock_irq(&file_data->err_lock);
+		file_data->out_transfer_size = 0;
+		file_data->out_status = 0;
+		spin_unlock_irq(&file_data->err_lock);
+	}
+
+	remaining = transfer_size;
+	if (remaining > INT_MAX)
+		remaining = INT_MAX;
+
+	timeout = file_data->timeout;
+	expire = msecs_to_jiffies(timeout);
+
+	while (remaining > 0) {
+		u32 this_part, aligned;
+		u8 *buffer = NULL;
+
+		if (flags & USBTMC_FLAG_ASYNC) {
+			if (down_trylock(&file_data->limit_write_sem)) {
+				retval = (done)?(0):(-EAGAIN);
+				goto exit;
+			}
+		} else {
+			retval = down_timeout(&file_data->limit_write_sem,
+					      expire);
+			if (retval < 0) {
+				retval = -ETIMEDOUT;
+				goto error;
+			}
+		}
+
+		spin_lock_irq(&file_data->err_lock);
+		retval = file_data->out_status;
+		spin_unlock_irq(&file_data->err_lock);
+		if (retval < 0) {
+			up(&file_data->limit_write_sem);
+			goto error;
+		}
+
+		/* prepare next urb to send */
+		urb = usbtmc_create_urb();
+		if (!urb) {
+			retval = -ENOMEM;
+			up(&file_data->limit_write_sem);
+			goto error;
+		}
+		buffer = urb->transfer_buffer;
+
+		if (remaining > bufsize)
+			this_part = bufsize;
+		else
+			this_part = remaining;
+
+		if (copy_from_user(buffer, user_buffer + done, this_part)) {
+			retval = -EFAULT;
+			up(&file_data->limit_write_sem);
+			goto error;
+		}
+
+		print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE,
+			16, 1, buffer, this_part, true);
+
+		/* fill bulk with 32 bit alignment to meet USBTMC specification
+		 * (size + 3 & ~3) rounds up and simplifies user code
+		 */
+		aligned = (this_part + 3) & ~3;
+		dev_dbg(dev, "write(size:%u align:%u done:%u)\n",
+			(unsigned int)this_part,
+			(unsigned int)aligned,
+			(unsigned int)done);
+
+		usb_fill_bulk_urb(urb, data->usb_dev,
+			usb_sndbulkpipe(data->usb_dev, data->bulk_out),
+			urb->transfer_buffer, aligned,
+			usbtmc_write_bulk_cb, file_data);
+
+		usb_anchor_urb(urb, &file_data->submitted);
+		retval = usb_submit_urb(urb, GFP_KERNEL);
+		if (unlikely(retval)) {
+			usb_unanchor_urb(urb);
+			up(&file_data->limit_write_sem);
+			goto error;
+		}
+
+		usb_free_urb(urb);
+		urb = NULL; /* urb will be finally released by usb driver */
+
+		remaining -= this_part;
+		done += this_part;
+	}
+
+	/* All urbs are on the fly */
+	if (!(flags & USBTMC_FLAG_ASYNC)) {
+		if (!usb_wait_anchor_empty_timeout(&file_data->submitted,
+						   timeout)) {
+			retval = -ETIMEDOUT;
+			goto error;
+		}
+	}
+
+	retval = 0;
+	goto exit;
+
+error:
+	usb_kill_anchored_urbs(&file_data->submitted);
+exit:
+	usb_free_urb(urb);
+
+	spin_lock_irq(&file_data->err_lock);
+	if (!(flags & USBTMC_FLAG_ASYNC))
+		done = file_data->out_transfer_size;
+	if (!retval && file_data->out_status)
+		retval = file_data->out_status;
+	spin_unlock_irq(&file_data->err_lock);
+
+	*transferred = done;
+
+	dev_dbg(dev, "%s: done=%u, retval=%d, urbstat=%d\n",
+		__func__, done, retval, file_data->out_status);
+
+	return retval;
+}
+
+static ssize_t usbtmc_ioctl_generic_write(struct usbtmc_file_data *file_data,
+					  void __user *arg)
+{
+	struct usbtmc_message msg;
+	ssize_t retval = 0;
+
+	/* mutex already locked */
+
+	if (copy_from_user(&msg, arg, sizeof(struct usbtmc_message)))
+		return -EFAULT;
+
+	retval = usbtmc_generic_write(file_data, msg.message,
+				      msg.transfer_size, &msg.transferred,
+				      msg.flags);
+
+	if (put_user(msg.transferred,
+		     &((struct usbtmc_message __user *)arg)->transferred))
+		return -EFAULT;
+
+	return retval;
+}
+
+/*
+ * Get the generic write result
+ */
+static ssize_t usbtmc_ioctl_write_result(struct usbtmc_file_data *file_data,
+				void __user *arg)
+{
+	u32 transferred;
+	int retval;
+
+	spin_lock_irq(&file_data->err_lock);
+	transferred = file_data->out_transfer_size;
+	retval = file_data->out_status;
+	spin_unlock_irq(&file_data->err_lock);
+
+	if (put_user(transferred, (__u32 __user *)arg))
+		return -EFAULT;
+
+	return retval;
+}
+
 /*
  * Sends a REQUEST_DEV_DEP_MSG_IN message on the Bulk-OUT endpoint.
  * @transfer_size: number of bytes to request from the device.
@@ -619,7 +1277,7 @@ static int usbtmc488_ioctl_trigger(struct usbtmc_file_data *file_data)
  * Also updates bTag_last_write.
  */
 static int send_request_dev_dep_msg_in(struct usbtmc_file_data *file_data,
-				       size_t transfer_size)
+				       u32 transfer_size)
 {
 	struct usbtmc_device_data *data = file_data->data;
 	int retval;
@@ -662,12 +1320,11 @@ static int send_request_dev_dep_msg_in(struct usbtmc_file_data *file_data,
 		data->bTag++;
 
 	kfree(buffer);
-	if (retval < 0) {
-		dev_err(&data->intf->dev, "usb_bulk_msg in send_request_dev_dep_msg_in() returned %d\n", retval);
-		return retval;
-	}
+	if (retval < 0)
+		dev_err(&data->intf->dev, "%s returned %d\n",
+			__func__, retval);
 
-	return 0;
+	return retval;
 }
 
 static ssize_t usbtmc_read(struct file *filp, char __user *buf,
@@ -676,20 +1333,20 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf,
 	struct usbtmc_file_data *file_data;
 	struct usbtmc_device_data *data;
 	struct device *dev;
+	const u32 bufsize = USBTMC_BUFSIZE;
 	u32 n_characters;
 	u8 *buffer;
 	int actual;
-	size_t done;
-	size_t remaining;
+	u32 done = 0;
+	u32 remaining;
 	int retval;
-	size_t this_part;
 
 	/* Get pointer to private data structure */
 	file_data = filp->private_data;
 	data = file_data->data;
 	dev = &data->intf->dev;
 
-	buffer = kmalloc(USBTMC_SIZE_IOBUFFER, GFP_KERNEL);
+	buffer = kmalloc(bufsize, GFP_KERNEL);
 	if (!buffer)
 		return -ENOMEM;
 
@@ -699,124 +1356,116 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf,
 		goto exit;
 	}
 
-	dev_dbg(dev, "usb_bulk_msg_in: count(%zu)\n", count);
+	if (count > INT_MAX)
+		count = INT_MAX;
+
+	dev_dbg(dev, "%s(count:%zu)\n", __func__, count);
 
 	retval = send_request_dev_dep_msg_in(file_data, count);
 
 	if (retval < 0) {
-		if (data->auto_abort)
+		if (file_data->auto_abort)
 			usbtmc_ioctl_abort_bulk_out(data);
 		goto exit;
 	}
 
 	/* Loop until we have fetched everything we requested */
 	remaining = count;
-	this_part = remaining;
-	done = 0;
+	actual = 0;
 
-	while (remaining > 0) {
-		/* Send bulk URB */
-		retval = usb_bulk_msg(data->usb_dev,
-				      usb_rcvbulkpipe(data->usb_dev,
-						      data->bulk_in),
-				      buffer, USBTMC_SIZE_IOBUFFER, &actual,
-				      file_data->timeout);
-
-		dev_dbg(dev, "usb_bulk_msg: retval(%u), done(%zu), remaining(%zu), actual(%d)\n", retval, done, remaining, actual);
+	/* Send bulk URB */
+	retval = usb_bulk_msg(data->usb_dev,
+			      usb_rcvbulkpipe(data->usb_dev,
+					      data->bulk_in),
+			      buffer, bufsize, &actual,
+			      file_data->timeout);
 
-		/* Store bTag (in case we need to abort) */
-		data->bTag_last_read = data->bTag;
+	dev_dbg(dev, "%s: bulk_msg retval(%u), actual(%d)\n",
+		__func__, retval, actual);
 
-		if (retval < 0) {
-			dev_dbg(dev, "Unable to read data, error %d\n", retval);
-			if (data->auto_abort)
-				usbtmc_ioctl_abort_bulk_in(data);
-			goto exit;
-		}
-
-		/* Parse header in first packet */
-		if (done == 0) {
-			/* Sanity checks for the header */
-			if (actual < USBTMC_HEADER_SIZE) {
-				dev_err(dev, "Device sent too small first packet: %u < %u\n", actual, USBTMC_HEADER_SIZE);
-				if (data->auto_abort)
-					usbtmc_ioctl_abort_bulk_in(data);
-				goto exit;
-			}
+	/* Store bTag (in case we need to abort) */
+	data->bTag_last_read = data->bTag;
 
-			if (buffer[0] != 2) {
-				dev_err(dev, "Device sent reply with wrong MsgID: %u != 2\n", buffer[0]);
-				if (data->auto_abort)
-					usbtmc_ioctl_abort_bulk_in(data);
-				goto exit;
-			}
+	if (retval < 0) {
+		if (file_data->auto_abort)
+			usbtmc_ioctl_abort_bulk_in(data);
+		goto exit;
+	}
 
-			if (buffer[1] != data->bTag_last_write) {
-				dev_err(dev, "Device sent reply with wrong bTag: %u != %u\n", buffer[1], data->bTag_last_write);
-				if (data->auto_abort)
-					usbtmc_ioctl_abort_bulk_in(data);
-				goto exit;
-			}
+	/* Sanity checks for the header */
+	if (actual < USBTMC_HEADER_SIZE) {
+		dev_err(dev, "Device sent too small first packet: %u < %u\n",
+			actual, USBTMC_HEADER_SIZE);
+		if (file_data->auto_abort)
+			usbtmc_ioctl_abort_bulk_in(data);
+		goto exit;
+	}
 
-			/* How many characters did the instrument send? */
-			n_characters = buffer[4] +
-				       (buffer[5] << 8) +
-				       (buffer[6] << 16) +
-				       (buffer[7] << 24);
+	if (buffer[0] != 2) {
+		dev_err(dev, "Device sent reply with wrong MsgID: %u != 2\n",
+			buffer[0]);
+		if (file_data->auto_abort)
+			usbtmc_ioctl_abort_bulk_in(data);
+		goto exit;
+	}
 
-			if (n_characters > this_part) {
-				dev_err(dev, "Device wants to return more data than requested: %u > %zu\n", n_characters, count);
-				if (data->auto_abort)
-					usbtmc_ioctl_abort_bulk_in(data);
-				goto exit;
-			}
+	if (buffer[1] != data->bTag_last_write) {
+		dev_err(dev, "Device sent reply with wrong bTag: %u != %u\n",
+		buffer[1], data->bTag_last_write);
+		if (file_data->auto_abort)
+			usbtmc_ioctl_abort_bulk_in(data);
+		goto exit;
+	}
 
-			/* Remove the USBTMC header */
-			actual -= USBTMC_HEADER_SIZE;
+	/* How many characters did the instrument send? */
+	n_characters = buffer[4] +
+		       (buffer[5] << 8) +
+		       (buffer[6] << 16) +
+		       (buffer[7] << 24);
 
-			/* Check if the message is smaller than requested */
-			if (remaining > n_characters)
-				remaining = n_characters;
-			/* Remove padding if it exists */
-			if (actual > remaining)
-				actual = remaining;
+	file_data->bmTransferAttributes = buffer[8];
 
-			dev_dbg(dev, "Bulk-IN header: N_characters(%u), bTransAttr(%u)\n", n_characters, buffer[8]);
+	dev_dbg(dev, "Bulk-IN header: N_characters(%u), bTransAttr(%u)\n",
+		n_characters, buffer[8]);
 
-			remaining -= actual;
+	if (n_characters > remaining) {
+		dev_err(dev, "Device wants to return more data than requested: %u > %zu\n",
+			n_characters, count);
+		if (file_data->auto_abort)
+			usbtmc_ioctl_abort_bulk_in(data);
+		goto exit;
+	}
 
-			/* Terminate if end-of-message bit received from device */
-			if ((buffer[8] & 0x01) && (actual >= n_characters))
-				remaining = 0;
+	print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE,
+			     16, 1, buffer, actual, true);
 
-			dev_dbg(dev, "Bulk-IN header: remaining(%zu), buf(%p), buffer(%p) done(%zu)\n", remaining,buf,buffer,done);
+	remaining = n_characters;
 
+	/* Remove the USBTMC header */
+	actual -= USBTMC_HEADER_SIZE;
 
-			/* Copy buffer to user space */
-			if (copy_to_user(buf + done, &buffer[USBTMC_HEADER_SIZE], actual)) {
-				/* There must have been an addressing problem */
-				retval = -EFAULT;
-				goto exit;
-			}
-			done += actual;
-		}
-		else  {
-			if (actual > remaining)
-				actual = remaining;
+	/* Remove padding if it exists */
+	if (actual > remaining)
+		actual = remaining;
 
-			remaining -= actual;
+	remaining -= actual;
 
-			dev_dbg(dev, "Bulk-IN header cont: actual(%u), done(%zu), remaining(%zu), buf(%p), buffer(%p)\n", actual, done, remaining,buf,buffer);
+	/* Copy buffer to user space */
+	if (copy_to_user(buf, &buffer[USBTMC_HEADER_SIZE], actual)) {
+		/* There must have been an addressing problem */
+		retval = -EFAULT;
+		goto exit;
+	}
 
-			/* Copy buffer to user space */
-			if (copy_to_user(buf + done, buffer, actual)) {
-				/* There must have been an addressing problem */
-				retval = -EFAULT;
-				goto exit;
-			}
-			done += actual;
-		}
+	if ((actual + USBTMC_HEADER_SIZE) == bufsize) {
+		retval = usbtmc_generic_read(file_data, buf + actual,
+					     remaining,
+					     &done,
+					     USBTMC_FLAG_IGNORE_TRAILER);
+		if (retval < 0)
+			goto exit;
 	}
+	done += actual;
 
 	/* Update file position value */
 	*f_pos = *f_pos + done;
@@ -833,113 +1482,152 @@ static ssize_t usbtmc_write(struct file *filp, const char __user *buf,
 {
 	struct usbtmc_file_data *file_data;
 	struct usbtmc_device_data *data;
+	struct urb *urb = NULL;
+	ssize_t retval = 0;
 	u8 *buffer;
-	int retval;
-	int actual;
-	unsigned long int n_bytes;
-	int remaining;
-	int done;
-	int this_part;
+	u32 remaining, done;
+	u32 transfersize, aligned, buflen;
 
 	file_data = filp->private_data;
 	data = file_data->data;
 
-	buffer = kmalloc(USBTMC_SIZE_IOBUFFER, GFP_KERNEL);
-	if (!buffer)
-		return -ENOMEM;
-
 	mutex_lock(&data->io_mutex);
+
 	if (data->zombie) {
 		retval = -ENODEV;
 		goto exit;
 	}
 
-	remaining = count;
 	done = 0;
 
-	while (remaining > 0) {
-		if (remaining > USBTMC_SIZE_IOBUFFER - USBTMC_HEADER_SIZE) {
-			this_part = USBTMC_SIZE_IOBUFFER - USBTMC_HEADER_SIZE;
-			buffer[8] = 0;
-		} else {
-			this_part = remaining;
-			buffer[8] = file_data->eom_val;
-		}
+	spin_lock_irq(&file_data->err_lock);
+	file_data->out_transfer_size = 0;
+	file_data->out_status = 0;
+	spin_unlock_irq(&file_data->err_lock);
 
-		/* Setup IO buffer for DEV_DEP_MSG_OUT message */
-		buffer[0] = 1;
-		buffer[1] = data->bTag;
-		buffer[2] = ~data->bTag;
-		buffer[3] = 0; /* Reserved */
-		buffer[4] = this_part >> 0;
-		buffer[5] = this_part >> 8;
-		buffer[6] = this_part >> 16;
-		buffer[7] = this_part >> 24;
-		/* buffer[8] is set above... */
-		buffer[9] = 0; /* Reserved */
-		buffer[10] = 0; /* Reserved */
-		buffer[11] = 0; /* Reserved */
-
-		if (copy_from_user(&buffer[USBTMC_HEADER_SIZE], buf + done, this_part)) {
-			retval = -EFAULT;
-			goto exit;
-		}
+	if (!count)
+		goto exit;
 
-		n_bytes = roundup(USBTMC_HEADER_SIZE + this_part, 4);
-		memset(buffer + USBTMC_HEADER_SIZE + this_part, 0, n_bytes - (USBTMC_HEADER_SIZE + this_part));
+	if (down_trylock(&file_data->limit_write_sem)) {
+		/* previous calls were async */
+		retval = -EBUSY;
+		goto exit;
+	}
 
-		do {
-			retval = usb_bulk_msg(data->usb_dev,
-					      usb_sndbulkpipe(data->usb_dev,
-							      data->bulk_out),
-					      buffer, n_bytes,
-					      &actual, file_data->timeout);
-			if (retval != 0)
-				break;
-			n_bytes -= actual;
-		} while (n_bytes);
-
-		data->bTag_last_write = data->bTag;
+	urb = usbtmc_create_urb();
+	if (!urb) {
+		retval = -ENOMEM;
+		up(&file_data->limit_write_sem);
+		goto exit;
+	}
+
+	buffer = urb->transfer_buffer;
+	buflen = urb->transfer_buffer_length;
+
+	if (count > INT_MAX) {
+		transfersize = INT_MAX;
+		buffer[8] = 0;
+	} else {
+		transfersize = count;
+		buffer[8] = file_data->eom_val;
+	}
+
+	/* Setup IO buffer for DEV_DEP_MSG_OUT message */
+	buffer[0] = 1;
+	buffer[1] = data->bTag;
+	buffer[2] = ~data->bTag;
+	buffer[3] = 0; /* Reserved */
+	buffer[4] = transfersize >> 0;
+	buffer[5] = transfersize >> 8;
+	buffer[6] = transfersize >> 16;
+	buffer[7] = transfersize >> 24;
+	/* buffer[8] is set above... */
+	buffer[9] = 0; /* Reserved */
+	buffer[10] = 0; /* Reserved */
+	buffer[11] = 0; /* Reserved */
+
+	remaining = transfersize;
+
+	if (transfersize + USBTMC_HEADER_SIZE > buflen) {
+		transfersize = buflen - USBTMC_HEADER_SIZE;
+		aligned = buflen;
+	} else {
+		aligned = (transfersize + (USBTMC_HEADER_SIZE + 3)) & ~3;
+	}
+
+	if (copy_from_user(&buffer[USBTMC_HEADER_SIZE], buf, transfersize)) {
+		retval = -EFAULT;
+		up(&file_data->limit_write_sem);
+		goto exit;
+	}
+
+	dev_dbg(&data->intf->dev, "%s(size:%u align:%u)\n", __func__,
+		(unsigned int)transfersize, (unsigned int)aligned);
+
+	print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE,
+			     16, 1, buffer, aligned, true);
+
+	usb_fill_bulk_urb(urb, data->usb_dev,
+		usb_sndbulkpipe(data->usb_dev, data->bulk_out),
+		urb->transfer_buffer, aligned,
+		usbtmc_write_bulk_cb, file_data);
+
+	usb_anchor_urb(urb, &file_data->submitted);
+	retval = usb_submit_urb(urb, GFP_KERNEL);
+	if (unlikely(retval)) {
+		usb_unanchor_urb(urb);
+		up(&file_data->limit_write_sem);
+		goto exit;
+	}
+
+	remaining -= transfersize;
+
+	data->bTag_last_write = data->bTag;
+	data->bTag++;
+
+	if (!data->bTag)
 		data->bTag++;
 
-		if (!data->bTag)
-			data->bTag++;
+	/* call generic_write even when remaining = 0 */
+	retval = usbtmc_generic_write(file_data, buf + transfersize, remaining,
+				      &done, USBTMC_FLAG_APPEND);
+	/* truncate alignment bytes */
+	if (done > remaining)
+		done = remaining;
 
-		if (retval < 0) {
-			dev_err(&data->intf->dev,
-				"Unable to send data, error %d\n", retval);
-			if (data->auto_abort)
-				usbtmc_ioctl_abort_bulk_out(data);
-			goto exit;
-		}
+	/*add size of first urb*/
+	done += transfersize;
 
-		remaining -= this_part;
-		done += this_part;
+	if (retval < 0) {
+		usb_kill_anchored_urbs(&file_data->submitted);
+
+		dev_err(&data->intf->dev,
+			"Unable to send data, error %d\n", (int)retval);
+		if (file_data->auto_abort)
+			usbtmc_ioctl_abort_bulk_out(data);
+		goto exit;
 	}
 
-	retval = count;
+	retval = done;
 exit:
+	usb_free_urb(urb);
 	mutex_unlock(&data->io_mutex);
-	kfree(buffer);
 	return retval;
 }
 
 static int usbtmc_ioctl_clear(struct usbtmc_device_data *data)
 {
-	struct usb_host_interface *current_setting;
-	struct usb_endpoint_descriptor *desc;
 	struct device *dev;
 	u8 *buffer;
 	int rv;
 	int n;
 	int actual = 0;
-	int max_size;
 
 	dev = &data->intf->dev;
 
 	dev_dbg(dev, "Sending INITIATE_CLEAR request\n");
 
-	buffer = kmalloc(USBTMC_SIZE_IOBUFFER, GFP_KERNEL);
+	buffer = kmalloc(USBTMC_BUFSIZE, GFP_KERNEL);
 	if (!buffer)
 		return -ENOMEM;
 
@@ -947,7 +1635,7 @@ static int usbtmc_ioctl_clear(struct usbtmc_device_data *data)
 			     usb_rcvctrlpipe(data->usb_dev, 0),
 			     USBTMC_REQUEST_INITIATE_CLEAR,
 			     USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			     0, 0, buffer, 1, USBTMC_TIMEOUT);
+			     0, 0, buffer, 1, USB_CTRL_GET_TIMEOUT);
 	if (rv < 0) {
 		dev_err(dev, "usb_control_msg returned %d\n", rv);
 		goto exit;
@@ -961,22 +1649,6 @@ static int usbtmc_ioctl_clear(struct usbtmc_device_data *data)
 		goto exit;
 	}
 
-	max_size = 0;
-	current_setting = data->intf->cur_altsetting;
-	for (n = 0; n < current_setting->desc.bNumEndpoints; n++) {
-		desc = &current_setting->endpoint[n].desc;
-		if (desc->bEndpointAddress == data->bulk_in)
-			max_size = usb_endpoint_maxp(desc);
-	}
-
-	if (max_size == 0) {
-		dev_err(dev, "Couldn't get wMaxPacketSize\n");
-		rv = -EPERM;
-		goto exit;
-	}
-
-	dev_dbg(dev, "wMaxPacketSize is %d\n", max_size);
-
 	n = 0;
 
 usbtmc_clear_check_status:
@@ -987,7 +1659,7 @@ usbtmc_clear_check_status:
 			     usb_rcvctrlpipe(data->usb_dev, 0),
 			     USBTMC_REQUEST_CHECK_CLEAR_STATUS,
 			     USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			     0, 0, buffer, 2, USBTMC_TIMEOUT);
+			     0, 0, buffer, 2, USB_CTRL_GET_TIMEOUT);
 	if (rv < 0) {
 		dev_err(dev, "usb_control_msg returned %d\n", rv);
 		goto exit;
@@ -1004,15 +1676,20 @@ usbtmc_clear_check_status:
 		goto exit;
 	}
 
-	if (buffer[1] == 1)
+	if ((buffer[1] & 1) != 0) {
 		do {
 			dev_dbg(dev, "Reading from bulk in EP\n");
 
+			actual = 0;
 			rv = usb_bulk_msg(data->usb_dev,
 					  usb_rcvbulkpipe(data->usb_dev,
 							  data->bulk_in),
-					  buffer, USBTMC_SIZE_IOBUFFER,
-					  &actual, USBTMC_TIMEOUT);
+					  buffer, USBTMC_BUFSIZE,
+					  &actual, USB_CTRL_GET_TIMEOUT);
+
+			print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE,
+					     16, 1, buffer, actual, true);
+
 			n++;
 
 			if (rv < 0) {
@@ -1020,10 +1697,15 @@ usbtmc_clear_check_status:
 					rv);
 				goto exit;
 			}
-		} while ((actual == max_size) &&
+		} while ((actual == USBTMC_BUFSIZE) &&
 			  (n < USBTMC_MAX_READS_TO_CLEAR_BULK_IN));
+	} else {
+		/* do not stress device with subsequent requests */
+		msleep(50);
+		n++;
+	}
 
-	if (actual == max_size) {
+	if (n >= USBTMC_MAX_READS_TO_CLEAR_BULK_IN) {
 		dev_err(dev, "Couldn't clear device buffer within %d cycles\n",
 			USBTMC_MAX_READS_TO_CLEAR_BULK_IN);
 		rv = -EPERM;
@@ -1037,7 +1719,7 @@ usbtmc_clear_bulk_out_halt:
 	rv = usb_clear_halt(data->usb_dev,
 			    usb_sndbulkpipe(data->usb_dev, data->bulk_out));
 	if (rv < 0) {
-		dev_err(dev, "usb_control_msg returned %d\n", rv);
+		dev_err(dev, "usb_clear_halt returned %d\n", rv);
 		goto exit;
 	}
 	rv = 0;
@@ -1054,12 +1736,9 @@ static int usbtmc_ioctl_clear_out_halt(struct usbtmc_device_data *data)
 	rv = usb_clear_halt(data->usb_dev,
 			    usb_sndbulkpipe(data->usb_dev, data->bulk_out));
 
-	if (rv < 0) {
-		dev_err(&data->usb_dev->dev, "usb_control_msg returned %d\n",
-			rv);
-		return rv;
-	}
-	return 0;
+	if (rv < 0)
+		dev_err(&data->usb_dev->dev, "%s returned %d\n", __func__, rv);
+	return rv;
 }
 
 static int usbtmc_ioctl_clear_in_halt(struct usbtmc_device_data *data)
@@ -1069,11 +1748,33 @@ static int usbtmc_ioctl_clear_in_halt(struct usbtmc_device_data *data)
 	rv = usb_clear_halt(data->usb_dev,
 			    usb_rcvbulkpipe(data->usb_dev, data->bulk_in));
 
-	if (rv < 0) {
-		dev_err(&data->usb_dev->dev, "usb_control_msg returned %d\n",
-			rv);
-		return rv;
-	}
+	if (rv < 0)
+		dev_err(&data->usb_dev->dev, "%s returned %d\n", __func__, rv);
+	return rv;
+}
+
+static int usbtmc_ioctl_cancel_io(struct usbtmc_file_data *file_data)
+{
+	spin_lock_irq(&file_data->err_lock);
+	file_data->in_status = -ECANCELED;
+	file_data->out_status = -ECANCELED;
+	spin_unlock_irq(&file_data->err_lock);
+	usb_kill_anchored_urbs(&file_data->submitted);
+	return 0;
+}
+
+static int usbtmc_ioctl_cleanup_io(struct usbtmc_file_data *file_data)
+{
+	usb_kill_anchored_urbs(&file_data->submitted);
+	usb_scuttle_anchored_urbs(&file_data->in_anchor);
+	spin_lock_irq(&file_data->err_lock);
+	file_data->in_status = 0;
+	file_data->in_transfer_size = 0;
+	file_data->out_status = 0;
+	file_data->out_transfer_size = 0;
+	spin_unlock_irq(&file_data->err_lock);
+
+	file_data->in_urbs_used = 0;
 	return 0;
 }
 
@@ -1090,7 +1791,7 @@ static int get_capabilities(struct usbtmc_device_data *data)
 	rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0),
 			     USBTMC_REQUEST_GET_CAPABILITIES,
 			     USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			     0, 0, buffer, 0x18, USBTMC_TIMEOUT);
+			     0, 0, buffer, 0x18, USB_CTRL_GET_TIMEOUT);
 	if (rv < 0) {
 		dev_err(dev, "usb_control_msg returned %d\n", rv);
 		goto err_out;
@@ -1147,72 +1848,6 @@ static const struct attribute_group capability_attr_grp = {
 	.attrs = capability_attrs,
 };
 
-static ssize_t TermChar_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
-{
-	struct usb_interface *intf = to_usb_interface(dev);
-	struct usbtmc_device_data *data = usb_get_intfdata(intf);
-
-	return sprintf(buf, "%c\n", data->TermChar);
-}
-
-static ssize_t TermChar_store(struct device *dev,
-			      struct device_attribute *attr,
-			      const char *buf, size_t count)
-{
-	struct usb_interface *intf = to_usb_interface(dev);
-	struct usbtmc_device_data *data = usb_get_intfdata(intf);
-
-	if (count < 1)
-		return -EINVAL;
-	data->TermChar = buf[0];
-	return count;
-}
-static DEVICE_ATTR_RW(TermChar);
-
-#define data_attribute(name)						\
-static ssize_t name##_show(struct device *dev,				\
-			   struct device_attribute *attr, char *buf)	\
-{									\
-	struct usb_interface *intf = to_usb_interface(dev);		\
-	struct usbtmc_device_data *data = usb_get_intfdata(intf);	\
-									\
-	return sprintf(buf, "%d\n", data->name);			\
-}									\
-static ssize_t name##_store(struct device *dev,				\
-			    struct device_attribute *attr,		\
-			    const char *buf, size_t count)		\
-{									\
-	struct usb_interface *intf = to_usb_interface(dev);		\
-	struct usbtmc_device_data *data = usb_get_intfdata(intf);	\
-	ssize_t result;							\
-	unsigned val;							\
-									\
-	result = sscanf(buf, "%u\n", &val);				\
-	if (result != 1)						\
-		result = -EINVAL;					\
-	data->name = val;						\
-	if (result < 0)							\
-		return result;						\
-	else								\
-		return count;						\
-}									\
-static DEVICE_ATTR_RW(name)
-
-data_attribute(TermCharEnabled);
-data_attribute(auto_abort);
-
-static struct attribute *data_attrs[] = {
-	&dev_attr_TermChar.attr,
-	&dev_attr_TermCharEnabled.attr,
-	&dev_attr_auto_abort.attr,
-	NULL,
-};
-
-static const struct attribute_group data_attr_grp = {
-	.attrs = data_attrs,
-};
-
 static int usbtmc_ioctl_indicator_pulse(struct usbtmc_device_data *data)
 {
 	struct device *dev;
@@ -1229,7 +1864,7 @@ static int usbtmc_ioctl_indicator_pulse(struct usbtmc_device_data *data)
 			     usb_rcvctrlpipe(data->usb_dev, 0),
 			     USBTMC_REQUEST_INDICATOR_PULSE,
 			     USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			     0, 0, buffer, 0x01, USBTMC_TIMEOUT);
+			     0, 0, buffer, 0x01, USB_CTRL_GET_TIMEOUT);
 
 	if (rv < 0) {
 		dev_err(dev, "usb_control_msg returned %d\n", rv);
@@ -1250,6 +1885,63 @@ exit:
 	return rv;
 }
 
+static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
+				void __user *arg)
+{
+	struct device *dev = &data->intf->dev;
+	struct usbtmc_ctrlrequest request;
+	u8 *buffer = NULL;
+	int rv;
+	unsigned long res;
+
+	res = copy_from_user(&request, arg, sizeof(struct usbtmc_ctrlrequest));
+	if (res)
+		return -EFAULT;
+
+	if (request.req.wLength > USBTMC_BUFSIZE)
+		return -EMSGSIZE;
+
+	if (request.req.wLength) {
+		buffer = kmalloc(request.req.wLength, GFP_KERNEL);
+		if (!buffer)
+			return -ENOMEM;
+
+		if ((request.req.bRequestType & USB_DIR_IN) == 0) {
+			/* Send control data to device */
+			res = copy_from_user(buffer, request.data,
+					     request.req.wLength);
+			if (res) {
+				rv = -EFAULT;
+				goto exit;
+			}
+		}
+	}
+
+	rv = usb_control_msg(data->usb_dev,
+			usb_rcvctrlpipe(data->usb_dev, 0),
+			request.req.bRequest,
+			request.req.bRequestType,
+			request.req.wValue,
+			request.req.wIndex,
+			buffer, request.req.wLength, USB_CTRL_GET_TIMEOUT);
+
+	if (rv < 0) {
+		dev_err(dev, "%s failed %d\n", __func__, rv);
+		goto exit;
+	}
+
+	if (rv && (request.req.bRequestType & USB_DIR_IN)) {
+		/* Read control data from device */
+		res = copy_to_user(request.data, buffer, rv);
+		if (res)
+			rv = -EFAULT;
+	}
+
+ exit:
+	kfree(buffer);
+	return rv;
+}
+
 /*
  * Get the usb timeout value
  */
@@ -1331,6 +2023,7 @@ static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct usbtmc_file_data *file_data;
 	struct usbtmc_device_data *data;
 	int retval = -EBADRQC;
+	__u8 tmp_byte;
 
 	file_data = file->private_data;
 	data = file_data->data;
@@ -1366,6 +2059,10 @@ static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		retval = usbtmc_ioctl_abort_bulk_in(data);
 		break;
 
+	case USBTMC_IOCTL_CTRL_REQUEST:
+		retval = usbtmc_ioctl_request(data, (void __user *)arg);
+		break;
+
 	case USBTMC_IOCTL_GET_TIMEOUT:
 		retval = usbtmc_ioctl_get_timeout(file_data,
 						  (void __user *)arg);
@@ -1386,12 +2083,29 @@ static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 						   (void __user *)arg);
 		break;
 
+	case USBTMC_IOCTL_WRITE:
+		retval = usbtmc_ioctl_generic_write(file_data,
+						    (void __user *)arg);
+		break;
+
+	case USBTMC_IOCTL_READ:
+		retval = usbtmc_ioctl_generic_read(file_data,
+						   (void __user *)arg);
+		break;
+
+	case USBTMC_IOCTL_WRITE_RESULT:
+		retval = usbtmc_ioctl_write_result(file_data,
+						   (void __user *)arg);
+		break;
+
+	case USBTMC_IOCTL_API_VERSION:
+		retval = put_user(USBTMC_API_VERSION,
+				  (__u32 __user *)arg);
+		break;
+
 	case USBTMC488_IOCTL_GET_CAPS:
-		retval = copy_to_user((void __user *)arg,
-				&data->usb488_caps,
-				sizeof(data->usb488_caps));
-		if (retval)
-			retval = -EFAULT;
+		retval = put_user(data->usb488_caps,
+				  (unsigned char __user *)arg);
 		break;
 
 	case USBTMC488_IOCTL_READ_STB:
@@ -1417,6 +2131,30 @@ static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case USBTMC488_IOCTL_TRIGGER:
 		retval = usbtmc488_ioctl_trigger(file_data);
 		break;
+
+	case USBTMC488_IOCTL_WAIT_SRQ:
+		retval = usbtmc488_ioctl_wait_srq(file_data,
+						  (__u32 __user *)arg);
+		break;
+
+	case USBTMC_IOCTL_MSG_IN_ATTR:
+		retval = put_user(file_data->bmTransferAttributes,
+				  (__u8 __user *)arg);
+		break;
+
+	case USBTMC_IOCTL_AUTO_ABORT:
+		retval = get_user(tmp_byte, (unsigned char __user *)arg);
+		if (retval == 0)
+			file_data->auto_abort = !!tmp_byte;
+		break;
+
+	case USBTMC_IOCTL_CANCEL_IO:
+		retval = usbtmc_ioctl_cancel_io(file_data);
+		break;
+
+	case USBTMC_IOCTL_CLEANUP_IO:
+		retval = usbtmc_ioctl_cleanup_io(file_data);
+		break;
 	}
 
 skip_io_on_zombie:
@@ -1446,7 +2184,28 @@ static __poll_t usbtmc_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, &data->waitq, wait);
 
-	mask = (atomic_read(&file_data->srq_asserted)) ? EPOLLPRI : 0;
+	/* Note that EPOLLPRI is now assigned to SRQ, and
+	 * EPOLLIN|EPOLLRDNORM to normal read data.
+	 */
+	mask = 0;
+	if (atomic_read(&file_data->srq_asserted))
+		mask |= EPOLLPRI;
+
+	/* Note that the anchor submitted includes all urbs for BULK IN
+	 * and OUT. So EPOLLOUT is signaled when BULK OUT is empty and
+	 * all BULK IN urbs are completed and moved to in_anchor.
+	 */
+	if (usb_anchor_empty(&file_data->submitted))
+		mask |= (EPOLLOUT | EPOLLWRNORM);
+	if (!usb_anchor_empty(&file_data->in_anchor))
+		mask |= (EPOLLIN | EPOLLRDNORM);
+
+	spin_lock_irq(&file_data->err_lock);
+	if (file_data->in_status || file_data->out_status)
+		mask |= EPOLLERR;
+	spin_unlock_irq(&file_data->err_lock);
+
+	dev_dbg(&data->intf->dev, "poll mask = %x\n", mask);
 
 no_poll:
 	mutex_unlock(&data->io_mutex);
@@ -1459,6 +2218,7 @@ static const struct file_operations fops = {
 	.write		= usbtmc_write,
 	.open		= usbtmc_open,
 	.release	= usbtmc_release,
+	.flush		= usbtmc_flush,
 	.unlocked_ioctl	= usbtmc_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= usbtmc_ioctl,
@@ -1552,7 +2312,9 @@ static void usbtmc_free_int(struct usbtmc_device_data *data)
 		return;
 	usb_kill_urb(data->iin_urb);
 	kfree(data->iin_buffer);
+	data->iin_buffer = NULL;
 	usb_free_urb(data->iin_urb);
+	data->iin_urb = NULL;
 	kref_put(&data->kref, usbtmc_delete);
 }
 
@@ -1585,8 +2347,6 @@ static int usbtmc_probe(struct usb_interface *intf,
 
 	/* Initialize USBTMC bTag and other fields */
 	data->bTag	= 1;
-	data->TermCharEnabled = 0;
-	data->TermChar = '\n';
 	/*  2 <= bTag <= 127   USBTMC-USB488 subclass specification 4.3.1 */
 	data->iin_bTag = 2;
 
@@ -1603,6 +2363,7 @@ static int usbtmc_probe(struct usb_interface *intf,
 	}
 
 	data->bulk_in = bulk_in->bEndpointAddress;
+	data->wMaxPacketSize = usb_endpoint_maxp(bulk_in);
 	dev_dbg(&intf->dev, "Found bulk in endpoint at %u\n", data->bulk_in);
 
 	data->bulk_out = bulk_out->bEndpointAddress;
@@ -1659,12 +2420,10 @@ static int usbtmc_probe(struct usb_interface *intf,
 		}
 	}
 
-	retcode = sysfs_create_group(&intf->dev.kobj, &data_attr_grp);
-
 	retcode = usb_register_dev(intf, &usbtmc_class);
 	if (retcode) {
-		dev_err(&intf->dev, "Not able to get a minor"
-			" (base %u, slice default): %d\n", USBTMC_MINOR_BASE,
+		dev_err(&intf->dev, "Not able to get a minor (base %u, slice default): %d\n",
+			USBTMC_MINOR_BASE,
 			retcode);
 		goto error_register;
 	}
@@ -1674,7 +2433,6 @@ static int usbtmc_probe(struct usb_interface *intf,
 
 error_register:
 	sysfs_remove_group(&intf->dev.kobj, &capability_attr_grp);
-	sysfs_remove_group(&intf->dev.kobj, &data_attr_grp);
 	usbtmc_free_int(data);
 err_put:
 	kref_put(&data->kref, usbtmc_delete);
@@ -1684,26 +2442,103 @@ err_put:
 static void usbtmc_disconnect(struct usb_interface *intf)
 {
 	struct usbtmc_device_data *data  = usb_get_intfdata(intf);
+	struct list_head *elem;
 
 	usb_deregister_dev(intf, &usbtmc_class);
 	sysfs_remove_group(&intf->dev.kobj, &capability_attr_grp);
-	sysfs_remove_group(&intf->dev.kobj, &data_attr_grp);
 	mutex_lock(&data->io_mutex);
 	data->zombie = 1;
 	wake_up_interruptible_all(&data->waitq);
+	list_for_each(elem, &data->file_list) {
+		struct usbtmc_file_data *file_data;
+
+		file_data = list_entry(elem,
+				       struct usbtmc_file_data,
+				       file_elem);
+		usb_kill_anchored_urbs(&file_data->submitted);
+		usb_scuttle_anchored_urbs(&file_data->in_anchor);
+	}
 	mutex_unlock(&data->io_mutex);
 	usbtmc_free_int(data);
 	kref_put(&data->kref, usbtmc_delete);
 }
 
+static void usbtmc_draw_down(struct usbtmc_file_data *file_data)
+{
+	int time;
+
+	time = usb_wait_anchor_empty_timeout(&file_data->submitted, 1000);
+	if (!time)
+		usb_kill_anchored_urbs(&file_data->submitted);
+	usb_scuttle_anchored_urbs(&file_data->in_anchor);
+}
+
 static int usbtmc_suspend(struct usb_interface *intf, pm_message_t message)
 {
-	/* this driver does not have pending URBs */
+	struct usbtmc_device_data *data = usb_get_intfdata(intf);
+	struct list_head *elem;
+
+	if (!data)
+		return 0;
+
+	mutex_lock(&data->io_mutex);
+	list_for_each(elem, &data->file_list) {
+		struct usbtmc_file_data *file_data;
+
+		file_data = list_entry(elem,
+				       struct usbtmc_file_data,
+				       file_elem);
+		usbtmc_draw_down(file_data);
+	}
+
+	if (data->iin_ep_present && data->iin_urb)
+		usb_kill_urb(data->iin_urb);
+
+	mutex_unlock(&data->io_mutex);
 	return 0;
 }
 
 static int usbtmc_resume(struct usb_interface *intf)
 {
+	struct usbtmc_device_data *data = usb_get_intfdata(intf);
+	int retcode = 0;
+
+	if (data->iin_ep_present && data->iin_urb)
+		retcode = usb_submit_urb(data->iin_urb, GFP_KERNEL);
+	if (retcode)
+		dev_err(&intf->dev, "Failed to submit iin_urb\n");
+
+	return retcode;
+}
+
+static int usbtmc_pre_reset(struct usb_interface *intf)
+{
+	struct usbtmc_device_data *data  = usb_get_intfdata(intf);
+	struct list_head *elem;
+
+	if (!data)
+		return 0;
+
+	mutex_lock(&data->io_mutex);
+
+	list_for_each(elem, &data->file_list) {
+		struct usbtmc_file_data *file_data;
+
+		file_data = list_entry(elem,
+				       struct usbtmc_file_data,
+				       file_elem);
+		usbtmc_ioctl_cancel_io(file_data);
+	}
+
+	return 0;
+}
+
+static int usbtmc_post_reset(struct usb_interface *intf)
+{
+	struct usbtmc_device_data *data  = usb_get_intfdata(intf);
+
+	mutex_unlock(&data->io_mutex);
+
 	return 0;
 }
 
@@ -1714,6 +2549,8 @@ static struct usb_driver usbtmc_driver = {
 	.disconnect	= usbtmc_disconnect,
 	.suspend	= usbtmc_suspend,
 	.resume		= usbtmc_resume,
+	.pre_reset	= usbtmc_pre_reset,
+	.post_reset	= usbtmc_post_reset,
 };
 
 module_usb_driver(usbtmc_driver);
diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c
index 77eef8acff94..f641342cdec0 100644
--- a/drivers/usb/core/buffer.c
+++ b/drivers/usb/core/buffer.c
@@ -101,12 +101,8 @@ void hcd_buffer_destroy(struct usb_hcd *hcd)
 		return;
 
 	for (i = 0; i < HCD_BUFFER_POOLS; i++) {
-		struct dma_pool *pool = hcd->pool[i];
-
-		if (pool) {
-			dma_pool_destroy(pool);
-			hcd->pool[i] = NULL;
-		}
+		dma_pool_destroy(hcd->pool[i]);
+		hcd->pool[i] = NULL;
 	}
 }
 
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index a1f225f077cd..53564386ed57 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -510,7 +510,6 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 				struct usb_interface *iface, void *priv)
 {
 	struct device *dev;
-	struct usb_device *udev;
 	int retval = 0;
 
 	if (!iface)
@@ -524,8 +523,6 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	if (!iface->authorized)
 		return -ENODEV;
 
-	udev = interface_to_usbdev(iface);
-
 	dev->driver = &driver->drvwrap.driver;
 	usb_set_intfdata(iface, priv);
 	iface->needs_binding = 0;
diff --git a/drivers/usb/core/generic.c b/drivers/usb/core/generic.c
index bc8242bc4564..356b05c82dbc 100644
--- a/drivers/usb/core/generic.c
+++ b/drivers/usb/core/generic.c
@@ -21,6 +21,7 @@
 
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
+#include <uapi/linux/usb/audio.h>
 #include "usb.h"
 
 static inline const char *plural(int n)
@@ -42,6 +43,16 @@ static int is_activesync(struct usb_interface_descriptor *desc)
 		&& desc->bInterfaceProtocol == 1;
 }
 
+static bool is_audio(struct usb_interface_descriptor *desc)
+{
+	return desc->bInterfaceClass == USB_CLASS_AUDIO;
+}
+
+static bool is_uac3_config(struct usb_interface_descriptor *desc)
+{
+	return desc->bInterfaceProtocol == UAC_VERSION_3;
+}
+
 int usb_choose_configuration(struct usb_device *udev)
 {
 	int i;
@@ -121,6 +132,22 @@ int usb_choose_configuration(struct usb_device *udev)
 #endif
 		}
 
+		/*
+		 * Select first configuration as default for audio so that
+		 * devices that don't comply with UAC3 protocol are supported.
+		 * But, still iterate through other configurations and
+		 * select UAC3 compliant config if present.
+		 */
+		if (i == 0 && num_configs > 1 && desc && is_audio(desc)) {
+			best = c;
+			continue;
+		}
+
+		if (i > 0 && desc && is_audio(desc) && is_uac3_config(desc)) {
+			best = c;
+			break;
+		}
+
 		/* From the remaining configs, choose the first one whose
 		 * first interface is for a non-vendor-specific class.
 		 * Reason: Linux is more likely to have a class driver
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 1c21955fe7c0..487025d31d44 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1738,7 +1738,6 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
 	struct usb_hcd *hcd = bus_to_hcd(urb->dev->bus);
 	struct usb_anchor *anchor = urb->anchor;
 	int status = urb->unlinked;
-	unsigned long flags;
 
 	urb->hcpriv = NULL;
 	if (unlikely((urb->transfer_flags & URB_SHORT_NOT_OK) &&
@@ -1755,20 +1754,7 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
 
 	/* pass ownership to the completion handler */
 	urb->status = status;
-
-	/*
-	 * We disable local IRQs here avoid possible deadlock because
-	 * drivers may call spin_lock() to hold lock which might be
-	 * acquired in one hard interrupt handler.
-	 *
-	 * The local_irq_save()/local_irq_restore() around complete()
-	 * will be removed if current USB drivers have been cleaned up
-	 * and no one may trigger the above deadlock situation when
-	 * running complete() in tasklet.
-	 */
-	local_irq_save(flags);
 	urb->complete(urb);
-	local_irq_restore(flags);
 
 	usb_anchor_resume_wakeups(anchor);
 	atomic_dec(&urb->use_count);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 462ce49f683a..c6077d582d29 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -28,6 +28,7 @@
 #include <linux/mutex.h>
 #include <linux/random.h>
 #include <linux/pm_qos.h>
+#include <linux/kobject.h>
 
 #include <linux/uaccess.h>
 #include <asm/byteorder.h>
@@ -2660,11 +2661,13 @@ static bool use_new_scheme(struct usb_device *udev, int retry,
 {
 	int old_scheme_first_port =
 		port_dev->quirks & USB_PORT_QUIRK_OLD_SCHEME;
+	int quick_enumeration = (udev->speed == USB_SPEED_HIGH);
 
 	if (udev->speed >= USB_SPEED_SUPER)
 		return false;
 
-	return USE_NEW_SCHEME(retry, old_scheme_first_port || old_scheme_first);
+	return USE_NEW_SCHEME(retry, old_scheme_first_port || old_scheme_first
+			      || quick_enumeration);
 }
 
 /* Is a USB 3.0 port in the Inactive or Compliance Mode state?
@@ -5147,6 +5150,42 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 	usb_lock_port(port_dev);
 }
 
+/* Handle notifying userspace about hub over-current events */
+static void port_over_current_notify(struct usb_port *port_dev)
+{
+	static char *envp[] = { NULL, NULL, NULL };
+	struct device *hub_dev;
+	char *port_dev_path;
+
+	sysfs_notify(&port_dev->dev.kobj, NULL, "over_current_count");
+
+	hub_dev = port_dev->dev.parent;
+
+	if (!hub_dev)
+		return;
+
+	port_dev_path = kobject_get_path(&port_dev->dev.kobj, GFP_KERNEL);
+	if (!port_dev_path)
+		return;
+
+	envp[0] = kasprintf(GFP_KERNEL, "OVER_CURRENT_PORT=%s", port_dev_path);
+	if (!envp[0])
+		goto exit_path;
+
+	envp[1] = kasprintf(GFP_KERNEL, "OVER_CURRENT_COUNT=%u",
+			port_dev->over_current_count);
+	if (!envp[1])
+		goto exit;
+
+	kobject_uevent_env(&hub_dev->kobj, KOBJ_CHANGE, envp);
+
+	kfree(envp[1]);
+exit:
+	kfree(envp[0]);
+exit_path:
+	kfree(port_dev_path);
+}
+
 static void port_event(struct usb_hub *hub, int port1)
 		__must_hold(&port_dev->status_lock)
 {
@@ -5189,6 +5228,7 @@ static void port_event(struct usb_hub *hub, int port1)
 	if (portchange & USB_PORT_STAT_C_OVERCURRENT) {
 		u16 status = 0, unused;
 		port_dev->over_current_count++;
+		port_over_current_notify(port_dev);
 
 		dev_dbg(&port_dev->dev, "over-current change #%u\n",
 			port_dev->over_current_count);
diff --git a/drivers/usb/core/phy.c b/drivers/usb/core/phy.c
index 9879767452a2..38b2c776c4b4 100644
--- a/drivers/usb/core/phy.c
+++ b/drivers/usb/core/phy.c
@@ -23,10 +23,11 @@ static int usb_phy_roothub_add_phy(struct device *dev, int index,
 				   struct list_head *list)
 {
 	struct usb_phy_roothub *roothub_entry;
-	struct phy *phy = devm_of_phy_get_by_index(dev, dev->of_node, index);
+	struct phy *phy;
 
-	if (IS_ERR_OR_NULL(phy)) {
-		if (!phy || PTR_ERR(phy) == -ENODEV)
+	phy = devm_of_phy_get_by_index(dev, dev->of_node, index);
+	if (IS_ERR(phy)) {
+		if (PTR_ERR(phy) == -ENODEV)
 			return 0;
 		else
 			return PTR_ERR(phy);
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index 4a2143195395..1a06a4b5fbb1 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -16,6 +16,15 @@ static int usb_port_block_power_off;
 
 static const struct attribute_group *port_dev_group[];
 
+static ssize_t location_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct usb_port *port_dev = to_usb_port(dev);
+
+	return sprintf(buf, "0x%08x\n", port_dev->location);
+}
+static DEVICE_ATTR_RO(location);
+
 static ssize_t connect_type_show(struct device *dev,
 				 struct device_attribute *attr, char *buf)
 {
@@ -140,6 +149,7 @@ static DEVICE_ATTR_RW(usb3_lpm_permit);
 
 static struct attribute *port_dev_attrs[] = {
 	&dev_attr_connect_type.attr,
+	&dev_attr_location.attr,
 	&dev_attr_quirks.attr,
 	&dev_attr_over_current_count.attr,
 	NULL,
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index cc9c93affa14..30bab8463c96 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -393,6 +393,20 @@ enum dwc2_ep0_state {
  *			0 - No
  *			1 - Yes
  * @hird_threshold:	Value of BESL or HIRD Threshold.
+ * @ref_clk_per:        Indicates in terms of pico seconds the period
+ *                      of ref_clk.
+ *			62500 - 16MHz
+ *                      58823 - 17MHz
+ *                      52083 - 19.2MHz
+ *			50000 - 20MHz
+ *			41666 - 24MHz
+ *			33333 - 30MHz (default)
+ *			25000 - 40MHz
+ * @sof_cnt_wkup_alert: Indicates in term of number of SOF's after which
+ *                      the controller should generate an interrupt if the
+ *                      device had been in L1 state until that period.
+ *                      This is used by SW to initiate Remote WakeUp in the
+ *                      controller so as to sync to the uF number from the host.
  * @activate_stm_fs_transceiver: Activate internal transceiver using GGPIO
  *			register.
  *			0 - Deactivate the transceiver (default)
@@ -416,6 +430,9 @@ enum dwc2_ep0_state {
  *                      back to DWC2_SPEED_PARAM_HIGH while device is gone.
  *			0 - No (default)
  *			1 - Yes
+ * @service_interval:   Enable service interval based scheduling.
+ *                      0 - No
+ *                      1 - Yes
  *
  * The following parameters may be specified when starting the module. These
  * parameters define how the DWC_otg controller should be configured. A
@@ -461,6 +478,7 @@ struct dwc2_core_params {
 	bool lpm_clock_gating;
 	bool besl;
 	bool hird_threshold_en;
+	bool service_interval;
 	u8 hird_threshold;
 	bool activate_stm_fs_transceiver;
 	bool ipg_isoc_en;
@@ -468,6 +486,10 @@ struct dwc2_core_params {
 	u32 max_transfer_size;
 	u32 ahbcfg;
 
+	/* GREFCLK parameters */
+	u32 ref_clk_per;
+	u16 sof_cnt_wkup_alert;
+
 	/* Host parameters */
 	bool host_dma;
 	bool dma_desc_enable;
@@ -605,6 +627,10 @@ struct dwc2_core_params {
  *			FIFO sizing is enabled 16 to 32768
  *			Actual maximum value is autodetected and also
  *			the default.
+ * @service_interval_mode: For enabling service interval based scheduling in the
+ *                         controller.
+ *                           0 - Disable
+ *                           1 - Enable
  */
 struct dwc2_hw_params {
 	unsigned op_mode:3;
@@ -635,6 +661,7 @@ struct dwc2_hw_params {
 	unsigned utmi_phy_data_width:2;
 	unsigned lpm_mode:1;
 	unsigned ipg_isoc_en:1;
+	unsigned service_interval_mode:1;
 	u32 snpsid;
 	u32 dev_ep_dirs;
 	u32 g_tx_fifo_size[MAX_EPS_CHANNELS];
@@ -1354,6 +1381,7 @@ int dwc2_hsotg_tx_fifo_count(struct dwc2_hsotg *hsotg);
 int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg);
 int dwc2_hsotg_tx_fifo_average_depth(struct dwc2_hsotg *hsotg);
 void dwc2_gadget_init_lpm(struct dwc2_hsotg *hsotg);
+void dwc2_gadget_program_ref_clk(struct dwc2_hsotg *hsotg);
 #else
 static inline int dwc2_hsotg_remove(struct dwc2_hsotg *dwc2)
 { return 0; }
@@ -1388,6 +1416,7 @@ static inline int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg)
 static inline int dwc2_hsotg_tx_fifo_average_depth(struct dwc2_hsotg *hsotg)
 { return 0; }
 static inline void dwc2_gadget_init_lpm(struct dwc2_hsotg *hsotg) {}
+static inline void dwc2_gadget_program_ref_clk(struct dwc2_hsotg *hsotg) {}
 #endif
 
 #if IS_ENABLED(CONFIG_USB_DWC2_HOST) || IS_ENABLED(CONFIG_USB_DWC2_DUAL_ROLE)
diff --git a/drivers/usb/dwc2/debugfs.c b/drivers/usb/dwc2/debugfs.c
index 22d015b0424f..7f62f4cdc265 100644
--- a/drivers/usb/dwc2/debugfs.c
+++ b/drivers/usb/dwc2/debugfs.c
@@ -701,6 +701,7 @@ static int params_show(struct seq_file *seq, void *v)
 	print_param(seq, p, besl);
 	print_param(seq, p, hird_threshold_en);
 	print_param(seq, p, hird_threshold);
+	print_param(seq, p, service_interval);
 	print_param(seq, p, host_dma);
 	print_param(seq, p, g_dma);
 	print_param(seq, p, g_dma_desc);
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 220c0f9b89b0..2d6d2c8244de 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -123,6 +123,24 @@ static inline void dwc2_gadget_incr_frame_num(struct dwc2_hsotg_ep *hs_ep)
 }
 
 /**
+ * dwc2_gadget_dec_frame_num_by_one - Decrements the targeted frame number
+ *                                    by one.
+ * @hs_ep: The endpoint.
+ *
+ * This function used in service interval based scheduling flow to calculate
+ * descriptor frame number filed value. For service interval mode frame
+ * number in descriptor should point to last (u)frame in the interval.
+ *
+ */
+static inline void dwc2_gadget_dec_frame_num_by_one(struct dwc2_hsotg_ep *hs_ep)
+{
+	if (hs_ep->target_frame)
+		hs_ep->target_frame -= 1;
+	else
+		hs_ep->target_frame = DSTS_SOFFN_LIMIT;
+}
+
+/**
  * dwc2_hsotg_en_gsint - enable one or more of the general interrupt
  * @hsotg: The device state
  * @ints: A bitmask of the interrupts to enable
@@ -228,6 +246,27 @@ int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg)
 }
 
 /**
+ * dwc2_gadget_wkup_alert_handler - Handler for WKUP_ALERT interrupt
+ *
+ * @hsotg: Programming view of the DWC_otg controller
+ *
+ */
+static void dwc2_gadget_wkup_alert_handler(struct dwc2_hsotg *hsotg)
+{
+	u32 gintsts2;
+	u32 gintmsk2;
+
+	gintsts2 = dwc2_readl(hsotg, GINTSTS2);
+	gintmsk2 = dwc2_readl(hsotg, GINTMSK2);
+
+	if (gintsts2 & GINTSTS2_WKUP_ALERT_INT) {
+		dev_dbg(hsotg->dev, "%s: Wkup_Alert_Int\n", __func__);
+		dwc2_clear_bit(hsotg, GINTSTS2, GINTSTS2_WKUP_ALERT_INT);
+		dwc2_set_bit(hsotg, DCFG, DCTL_RMTWKUPSIG);
+	}
+}
+
+/**
  * dwc2_hsotg_tx_fifo_average_depth - returns average depth of device mode
  * TX FIFOs
  *
@@ -2812,6 +2851,23 @@ static void dwc2_gadget_handle_nak(struct dwc2_hsotg_ep *hs_ep)
 		if (using_desc_dma(hsotg)) {
 			hs_ep->target_frame = hsotg->frame_number;
 			dwc2_gadget_incr_frame_num(hs_ep);
+
+			/* In service interval mode target_frame must
+			 * be set to last (u)frame of the service interval.
+			 */
+			if (hsotg->params.service_interval) {
+				/* Set target_frame to the first (u)frame of
+				 * the service interval
+				 */
+				hs_ep->target_frame &= ~hs_ep->interval + 1;
+
+				/* Set target_frame to the last (u)frame of
+				 * the service interval
+				 */
+				dwc2_gadget_incr_frame_num(hs_ep);
+				dwc2_gadget_dec_frame_num_by_one(hs_ep);
+			}
+
 			dwc2_gadget_start_isoc_ddma(hs_ep);
 			return;
 		}
@@ -3109,6 +3165,8 @@ static void kill_all_requests(struct dwc2_hsotg *hsotg,
 		dwc2_hsotg_txfifo_flush(hsotg, ep->fifo_index);
 }
 
+static int dwc2_hsotg_ep_disable(struct usb_ep *ep);
+
 /**
  * dwc2_hsotg_disconnect - disconnect service
  * @hsotg: The device state.
@@ -3127,13 +3185,12 @@ void dwc2_hsotg_disconnect(struct dwc2_hsotg *hsotg)
 	hsotg->connected = 0;
 	hsotg->test_mode = 0;
 
+	/* all endpoints should be shutdown */
 	for (ep = 0; ep < hsotg->num_of_eps; ep++) {
 		if (hsotg->eps_in[ep])
-			kill_all_requests(hsotg, hsotg->eps_in[ep],
-					  -ESHUTDOWN);
+			dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
 		if (hsotg->eps_out[ep])
-			kill_all_requests(hsotg, hsotg->eps_out[ep],
-					  -ESHUTDOWN);
+			dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
 	}
 
 	call_gadget(hsotg, disconnect);
@@ -3191,13 +3248,23 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 	u32 val;
 	u32 usbcfg;
 	u32 dcfg = 0;
+	int ep;
 
 	/* Kill any ep0 requests as controller will be reinitialized */
 	kill_all_requests(hsotg, hsotg->eps_out[0], -ECONNRESET);
 
-	if (!is_usb_reset)
+	if (!is_usb_reset) {
 		if (dwc2_core_reset(hsotg, true))
 			return;
+	} else {
+		/* all endpoints should be shutdown */
+		for (ep = 1; ep < hsotg->num_of_eps; ep++) {
+			if (hsotg->eps_in[ep])
+				dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
+			if (hsotg->eps_out[ep])
+				dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
+		}
+	}
 
 	/*
 	 * we must now enable ep0 ready for host detection and then
@@ -3312,6 +3379,10 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 		dwc2_set_bit(hsotg, DIEPMSK, DIEPMSK_BNAININTRMSK);
 	}
 
+	/* Enable Service Interval mode if supported */
+	if (using_desc_dma(hsotg) && hsotg->params.service_interval)
+		dwc2_set_bit(hsotg, DCTL, DCTL_SERVICE_INTERVAL_SUPPORTED);
+
 	dwc2_writel(hsotg, 0, DAINTMSK);
 
 	dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n",
@@ -3368,6 +3439,10 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
 	/* configure the core to support LPM */
 	dwc2_gadget_init_lpm(hsotg);
 
+	/* program GREFCLK register if needed */
+	if (using_desc_dma(hsotg) && hsotg->params.service_interval)
+		dwc2_gadget_program_ref_clk(hsotg);
+
 	/* must be at-least 3ms to allow bus to see disconnect */
 	mdelay(3);
 
@@ -3676,6 +3751,10 @@ irq_retry:
 	if (gintsts & IRQ_RETRY_MASK && --retry_count > 0)
 		goto irq_retry;
 
+	/* Check WKUP_ALERT interrupt*/
+	if (hsotg->params.service_interval)
+		dwc2_gadget_wkup_alert_handler(hsotg);
+
 	spin_unlock(&hsotg->lock);
 
 	return IRQ_HANDLED;
@@ -3993,6 +4072,7 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
 	unsigned long flags;
 	u32 epctrl_reg;
 	u32 ctrl;
+	int locked;
 
 	dev_dbg(hsotg->dev, "%s(ep %p)\n", __func__, ep);
 
@@ -4008,7 +4088,9 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
 
 	epctrl_reg = dir_in ? DIEPCTL(index) : DOEPCTL(index);
 
-	spin_lock_irqsave(&hsotg->lock, flags);
+	locked = spin_is_locked(&hsotg->lock);
+	if (!locked)
+		spin_lock_irqsave(&hsotg->lock, flags);
 
 	ctrl = dwc2_readl(hsotg, epctrl_reg);
 
@@ -4032,7 +4114,9 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
 	hs_ep->fifo_index = 0;
 	hs_ep->fifo_size = 0;
 
-	spin_unlock_irqrestore(&hsotg->lock, flags);
+	if (!locked)
+		spin_unlock_irqrestore(&hsotg->lock, flags);
+
 	return 0;
 }
 
@@ -4944,6 +5028,29 @@ void dwc2_gadget_init_lpm(struct dwc2_hsotg *hsotg)
 	val |= hsotg->params.besl ? GLPMCFG_ENBESL : 0;
 	dwc2_writel(hsotg, val, GLPMCFG);
 	dev_dbg(hsotg->dev, "GLPMCFG=0x%08x\n", dwc2_readl(hsotg, GLPMCFG));
+
+	/* Unmask WKUP_ALERT Interrupt */
+	if (hsotg->params.service_interval)
+		dwc2_set_bit(hsotg, GINTMSK2, GINTMSK2_WKUP_ALERT_INT_MSK);
+}
+
+/**
+ * dwc2_gadget_program_ref_clk - Program GREFCLK register in device mode
+ *
+ * @hsotg: Programming view of DWC_otg controller
+ *
+ */
+void dwc2_gadget_program_ref_clk(struct dwc2_hsotg *hsotg)
+{
+	u32 val = 0;
+
+	val |= GREFCLK_REF_CLK_MODE;
+	val |= hsotg->params.ref_clk_per << GREFCLK_REFCLKPER_SHIFT;
+	val |= hsotg->params.sof_cnt_wkup_alert <<
+	       GREFCLK_SOF_CNT_WKUP_ALERT_SHIFT;
+
+	dwc2_writel(hsotg, val, GREFCLK);
+	dev_dbg(hsotg->dev, "GREFCLK=0x%08x\n", dwc2_readl(hsotg, GREFCLK));
 }
 
 /**
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 2bd6e6bfc241..dd82fa516f3f 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -358,16 +358,10 @@ static void dwc2_gusbcfg_init(struct dwc2_hsotg *hsotg)
 
 static int dwc2_vbus_supply_init(struct dwc2_hsotg *hsotg)
 {
-	int ret;
-
-	hsotg->vbus_supply = devm_regulator_get_optional(hsotg->dev, "vbus");
-	if (IS_ERR(hsotg->vbus_supply)) {
-		ret = PTR_ERR(hsotg->vbus_supply);
-		hsotg->vbus_supply = NULL;
-		return ret == -ENODEV ? 0 : ret;
-	}
+	if (hsotg->vbus_supply)
+		return regulator_enable(hsotg->vbus_supply);
 
-	return regulator_enable(hsotg->vbus_supply);
+	return 0;
 }
 
 static int dwc2_vbus_supply_exit(struct dwc2_hsotg *hsotg)
@@ -1328,14 +1322,11 @@ static void dwc2_hc_write_packet(struct dwc2_hsotg *hsotg,
 	u32 remaining_count;
 	u32 byte_count;
 	u32 dword_count;
-	u32 __iomem *data_fifo;
 	u32 *data_buf = (u32 *)chan->xfer_buf;
 
 	if (dbg_hc(chan))
 		dev_vdbg(hsotg->dev, "%s()\n", __func__);
 
-	data_fifo = (u32 __iomem *)(hsotg->regs + HCFIFO(chan->hc_num));
-
 	remaining_count = chan->xfer_len - chan->xfer_count;
 	if (remaining_count > chan->max_packet)
 		byte_count = chan->max_packet;
@@ -3564,6 +3555,7 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg *hsotg, u16 typereq,
 	u32 port_status;
 	u32 speed;
 	u32 pcgctl;
+	u32 pwr;
 
 	switch (typereq) {
 	case ClearHubFeature:
@@ -3612,8 +3604,11 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg *hsotg, u16 typereq,
 			dev_dbg(hsotg->dev,
 				"ClearPortFeature USB_PORT_FEAT_POWER\n");
 			hprt0 = dwc2_read_hprt0(hsotg);
+			pwr = hprt0 & HPRT0_PWR;
 			hprt0 &= ~HPRT0_PWR;
 			dwc2_writel(hsotg, hprt0, HPRT0);
+			if (pwr)
+				dwc2_vbus_supply_exit(hsotg);
 			break;
 
 		case USB_PORT_FEAT_INDICATOR:
@@ -3823,8 +3818,11 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg *hsotg, u16 typereq,
 			dev_dbg(hsotg->dev,
 				"SetPortFeature - USB_PORT_FEAT_POWER\n");
 			hprt0 = dwc2_read_hprt0(hsotg);
+			pwr = hprt0 & HPRT0_PWR;
 			hprt0 |= HPRT0_PWR;
 			dwc2_writel(hsotg, hprt0, HPRT0);
+			if (!pwr)
+				dwc2_vbus_supply_init(hsotg);
 			break;
 
 		case USB_PORT_FEAT_RESET:
@@ -3841,6 +3839,7 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg *hsotg, u16 typereq,
 			dwc2_writel(hsotg, 0, PCGCTL);
 
 			hprt0 = dwc2_read_hprt0(hsotg);
+			pwr = hprt0 & HPRT0_PWR;
 			/* Clear suspend bit if resetting from suspend state */
 			hprt0 &= ~HPRT0_SUSP;
 
@@ -3854,6 +3853,8 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg *hsotg, u16 typereq,
 				dev_dbg(hsotg->dev,
 					"In host mode, hprt0=%08x\n", hprt0);
 				dwc2_writel(hsotg, hprt0, HPRT0);
+				if (!pwr)
+					dwc2_vbus_supply_init(hsotg);
 			}
 
 			/* Clear reset bit in 10ms (FS/LS) or 50ms (HS) */
@@ -4393,6 +4394,8 @@ static int _dwc2_hcd_start(struct usb_hcd *hcd)
 	struct dwc2_hsotg *hsotg = dwc2_hcd_to_hsotg(hcd);
 	struct usb_bus *bus = hcd_to_bus(hcd);
 	unsigned long flags;
+	u32 hprt0;
+	int ret;
 
 	dev_dbg(hsotg->dev, "DWC OTG HCD START\n");
 
@@ -4408,6 +4411,17 @@ static int _dwc2_hcd_start(struct usb_hcd *hcd)
 
 	dwc2_hcd_reinit(hsotg);
 
+	hprt0 = dwc2_read_hprt0(hsotg);
+	/* Has vbus power been turned on in dwc2_core_host_init ? */
+	if (hprt0 & HPRT0_PWR) {
+		/* Enable external vbus supply before resuming root hub */
+		spin_unlock_irqrestore(&hsotg->lock, flags);
+		ret = dwc2_vbus_supply_init(hsotg);
+		if (ret)
+			return ret;
+		spin_lock_irqsave(&hsotg->lock, flags);
+	}
+
 	/* Initialize and connect root hub if one is not already attached */
 	if (bus->root_hub) {
 		dev_dbg(hsotg->dev, "DWC OTG HCD Has Root Hub\n");
@@ -4417,7 +4431,7 @@ static int _dwc2_hcd_start(struct usb_hcd *hcd)
 
 	spin_unlock_irqrestore(&hsotg->lock, flags);
 
-	return dwc2_vbus_supply_init(hsotg);
+	return 0;
 }
 
 /*
@@ -4428,6 +4442,7 @@ static void _dwc2_hcd_stop(struct usb_hcd *hcd)
 {
 	struct dwc2_hsotg *hsotg = dwc2_hcd_to_hsotg(hcd);
 	unsigned long flags;
+	u32 hprt0;
 
 	/* Turn off all host-specific interrupts */
 	dwc2_disable_host_interrupts(hsotg);
@@ -4436,6 +4451,7 @@ static void _dwc2_hcd_stop(struct usb_hcd *hcd)
 	synchronize_irq(hcd->irq);
 
 	spin_lock_irqsave(&hsotg->lock, flags);
+	hprt0 = dwc2_read_hprt0(hsotg);
 	/* Ensure hcd is disconnected */
 	dwc2_hcd_disconnect(hsotg, true);
 	dwc2_hcd_stop(hsotg);
@@ -4444,7 +4460,9 @@ static void _dwc2_hcd_stop(struct usb_hcd *hcd)
 	clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
 	spin_unlock_irqrestore(&hsotg->lock, flags);
 
-	dwc2_vbus_supply_exit(hsotg);
+	/* keep balanced supply init/exit by checking HPRT0_PWR */
+	if (hprt0 & HPRT0_PWR)
+		dwc2_vbus_supply_exit(hsotg);
 
 	usleep_range(1000, 3000);
 }
@@ -4482,7 +4500,9 @@ static int _dwc2_hcd_suspend(struct usb_hcd *hcd)
 		hprt0 |= HPRT0_SUSP;
 		hprt0 &= ~HPRT0_PWR;
 		dwc2_writel(hsotg, hprt0, HPRT0);
+		spin_unlock_irqrestore(&hsotg->lock, flags);
 		dwc2_vbus_supply_exit(hsotg);
+		spin_lock_irqsave(&hsotg->lock, flags);
 	}
 
 	/* Enter partial_power_down */
diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h
index 0ca8e7bc7aaf..2b1ea441b7d4 100644
--- a/drivers/usb/dwc2/hw.h
+++ b/drivers/usb/dwc2/hw.h
@@ -312,6 +312,7 @@
 #define GHWCFG4_UTMI_PHY_DATA_WIDTH_SHIFT	14
 #define GHWCFG4_ACG_SUPPORTED			BIT(12)
 #define GHWCFG4_IPG_ISOC_SUPPORTED		BIT(11)
+#define GHWCFG4_SERVICE_INTERVAL_SUPPORTED      BIT(10)
 #define GHWCFG4_UTMI_PHY_DATA_WIDTH_8		0
 #define GHWCFG4_UTMI_PHY_DATA_WIDTH_16		1
 #define GHWCFG4_UTMI_PHY_DATA_WIDTH_8_OR_16	2
@@ -404,6 +405,19 @@
 #define ADPCTL_PRB_DSCHRG_MASK		(0x3 << 0)
 #define ADPCTL_PRB_DSCHRG_SHIFT		0
 
+#define GREFCLK				    HSOTG_REG(0x0064)
+#define GREFCLK_REFCLKPER_MASK		    (0x1ffff << 15)
+#define GREFCLK_REFCLKPER_SHIFT		    15
+#define GREFCLK_REF_CLK_MODE		    BIT(14)
+#define GREFCLK_SOF_CNT_WKUP_ALERT_MASK	    (0x3ff)
+#define GREFCLK_SOF_CNT_WKUP_ALERT_SHIFT    0
+
+#define GINTMSK2			HSOTG_REG(0x0068)
+#define GINTMSK2_WKUP_ALERT_INT_MSK	BIT(0)
+
+#define GINTSTS2			HSOTG_REG(0x006c)
+#define GINTSTS2_WKUP_ALERT_INT		BIT(0)
+
 #define HPTXFSIZ			HSOTG_REG(0x100)
 /* Use FIFOSIZE_* constants to access this register */
 
@@ -443,6 +457,7 @@
 #define DCFG_DEVSPD_FS48		3
 
 #define DCTL				HSOTG_REG(0x804)
+#define DCTL_SERVICE_INTERVAL_SUPPORTED BIT(19)
 #define DCTL_PWRONPRGDONE		BIT(11)
 #define DCTL_CGOUTNAK			BIT(10)
 #define DCTL_SGOUTNAK			BIT(9)
diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c
index bf7052e037d6..7c1b6938f212 100644
--- a/drivers/usb/dwc2/params.c
+++ b/drivers/usb/dwc2/params.c
@@ -81,6 +81,7 @@ static void dwc2_set_rk_params(struct dwc2_hsotg *hsotg)
 	p->host_perio_tx_fifo_size = 256;
 	p->ahbcfg = GAHBCFG_HBSTLEN_INCR16 <<
 		GAHBCFG_HBSTLEN_SHIFT;
+	p->power_down = 0;
 }
 
 static void dwc2_set_ltq_params(struct dwc2_hsotg *hsotg)
@@ -299,9 +300,12 @@ static void dwc2_set_default_params(struct dwc2_hsotg *hsotg)
 	p->hird_threshold_en = true;
 	p->hird_threshold = 4;
 	p->ipg_isoc_en = false;
+	p->service_interval = false;
 	p->max_packet_count = hw->max_packet_count;
 	p->max_transfer_size = hw->max_transfer_size;
 	p->ahbcfg = GAHBCFG_HBSTLEN_INCR << GAHBCFG_HBSTLEN_SHIFT;
+	p->ref_clk_per = 33333;
+	p->sof_cnt_wkup_alert = 100;
 
 	if ((hsotg->dr_mode == USB_DR_MODE_HOST) ||
 	    (hsotg->dr_mode == USB_DR_MODE_OTG)) {
@@ -592,6 +596,7 @@ static void dwc2_check_params(struct dwc2_hsotg *hsotg)
 	CHECK_BOOL(besl, (hsotg->hw_params.snpsid >= DWC2_CORE_REV_3_00a));
 	CHECK_BOOL(hird_threshold_en, hsotg->params.lpm);
 	CHECK_RANGE(hird_threshold, 0, hsotg->params.besl ? 12 : 7, 0);
+	CHECK_BOOL(service_interval, hw->service_interval_mode);
 	CHECK_RANGE(max_packet_count,
 		    15, hw->max_packet_count,
 		    hw->max_packet_count);
@@ -780,6 +785,8 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
 				  GHWCFG4_UTMI_PHY_DATA_WIDTH_SHIFT;
 	hw->acg_enable = !!(hwcfg4 & GHWCFG4_ACG_SUPPORTED);
 	hw->ipg_isoc_en = !!(hwcfg4 & GHWCFG4_IPG_ISOC_SUPPORTED);
+	hw->service_interval_mode = !!(hwcfg4 &
+				       GHWCFG4_SERVICE_INTERVAL_SUPPORTED);
 
 	/* fifo sizes */
 	hw->rx_fifo_size = (grxfsiz & GRXFSIZ_DEPTH_MASK) >>
diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index 577642895b57..c0b64d483552 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -432,6 +432,14 @@ static int dwc2_driver_probe(struct platform_device *dev)
 	if (retval)
 		return retval;
 
+	hsotg->vbus_supply = devm_regulator_get_optional(hsotg->dev, "vbus");
+	if (IS_ERR(hsotg->vbus_supply)) {
+		retval = PTR_ERR(hsotg->vbus_supply);
+		hsotg->vbus_supply = NULL;
+		if (retval != -ENODEV)
+			return retval;
+	}
+
 	retval = dwc2_lowlevel_hw_enable(hsotg);
 	if (retval)
 		return retval;
diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig
index 518ead12458d..1a0404fda596 100644
--- a/drivers/usb/dwc3/Kconfig
+++ b/drivers/usb/dwc3/Kconfig
@@ -113,7 +113,7 @@ config USB_DWC3_ST
 
 config USB_DWC3_QCOM
 	tristate "Qualcomm Platform"
-	depends on ARCH_QCOM || COMPILE_TEST
+	depends on EXTCON && (ARCH_QCOM || COMPILE_TEST)
 	depends on OF
 	default USB_DWC3
 	help
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 88c80fcc39f5..becfbb87f791 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -756,7 +756,7 @@ static void dwc3_core_setup_global_control(struct dwc3 *dwc)
 
 	/* check if current dwc3 is on simulation board */
 	if (dwc->hwparams.hwparams6 & DWC3_GHWPARAMS6_EN_FPGA) {
-		dev_info(dwc->dev, "Running with FPGA optmizations\n");
+		dev_info(dwc->dev, "Running with FPGA optimizations\n");
 		dwc->is_fpga = true;
 	}
 
diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c
index a94fb1ba8f2c..cb7fcd7c0ad8 100644
--- a/drivers/usb/dwc3/dwc3-exynos.c
+++ b/drivers/usb/dwc3/dwc3-exynos.c
@@ -13,80 +13,30 @@
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
-#include <linux/usb/otg.h>
-#include <linux/usb/usb_phy_generic.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/regulator/consumer.h>
 
+#define DWC3_EXYNOS_MAX_CLOCKS	4
+
+struct dwc3_exynos_driverdata {
+	const char		*clk_names[DWC3_EXYNOS_MAX_CLOCKS];
+	int			num_clks;
+	int			suspend_clk_idx;
+};
+
 struct dwc3_exynos {
-	struct platform_device	*usb2_phy;
-	struct platform_device	*usb3_phy;
 	struct device		*dev;
 
-	struct clk		*clk;
-	struct clk		*susp_clk;
-	struct clk		*axius_clk;
+	const char		**clk_names;
+	struct clk		*clks[DWC3_EXYNOS_MAX_CLOCKS];
+	int			num_clks;
+	int			suspend_clk_idx;
 
 	struct regulator	*vdd33;
 	struct regulator	*vdd10;
 };
 
-static int dwc3_exynos_register_phys(struct dwc3_exynos *exynos)
-{
-	struct usb_phy_generic_platform_data pdata;
-	struct platform_device	*pdev;
-	int			ret;
-
-	memset(&pdata, 0x00, sizeof(pdata));
-
-	pdev = platform_device_alloc("usb_phy_generic", PLATFORM_DEVID_AUTO);
-	if (!pdev)
-		return -ENOMEM;
-
-	exynos->usb2_phy = pdev;
-	pdata.type = USB_PHY_TYPE_USB2;
-	pdata.gpio_reset = -1;
-
-	ret = platform_device_add_data(exynos->usb2_phy, &pdata, sizeof(pdata));
-	if (ret)
-		goto err1;
-
-	pdev = platform_device_alloc("usb_phy_generic", PLATFORM_DEVID_AUTO);
-	if (!pdev) {
-		ret = -ENOMEM;
-		goto err1;
-	}
-
-	exynos->usb3_phy = pdev;
-	pdata.type = USB_PHY_TYPE_USB3;
-
-	ret = platform_device_add_data(exynos->usb3_phy, &pdata, sizeof(pdata));
-	if (ret)
-		goto err2;
-
-	ret = platform_device_add(exynos->usb2_phy);
-	if (ret)
-		goto err2;
-
-	ret = platform_device_add(exynos->usb3_phy);
-	if (ret)
-		goto err3;
-
-	return 0;
-
-err3:
-	platform_device_del(exynos->usb2_phy);
-
-err2:
-	platform_device_put(exynos->usb3_phy);
-
-err1:
-	platform_device_put(exynos->usb2_phy);
-
-	return ret;
-}
-
 static int dwc3_exynos_remove_child(struct device *dev, void *unused)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -101,47 +51,42 @@ static int dwc3_exynos_probe(struct platform_device *pdev)
 	struct dwc3_exynos	*exynos;
 	struct device		*dev = &pdev->dev;
 	struct device_node	*node = dev->of_node;
-
-	int			ret;
+	const struct dwc3_exynos_driverdata *driver_data;
+	int			i, ret;
 
 	exynos = devm_kzalloc(dev, sizeof(*exynos), GFP_KERNEL);
 	if (!exynos)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, exynos);
+	driver_data = of_device_get_match_data(dev);
+	exynos->dev = dev;
+	exynos->num_clks = driver_data->num_clks;
+	exynos->clk_names = (const char **)driver_data->clk_names;
+	exynos->suspend_clk_idx = driver_data->suspend_clk_idx;
 
-	exynos->dev	= dev;
+	platform_set_drvdata(pdev, exynos);
 
-	exynos->clk = devm_clk_get(dev, "usbdrd30");
-	if (IS_ERR(exynos->clk)) {
-		dev_err(dev, "couldn't get clock\n");
-		return -EINVAL;
+	for (i = 0; i < exynos->num_clks; i++) {
+		exynos->clks[i] = devm_clk_get(dev, exynos->clk_names[i]);
+		if (IS_ERR(exynos->clks[i])) {
+			dev_err(dev, "failed to get clock: %s\n",
+				exynos->clk_names[i]);
+			return PTR_ERR(exynos->clks[i]);
+		}
 	}
-	ret = clk_prepare_enable(exynos->clk);
-	if (ret)
-		return ret;
 
-	exynos->susp_clk = devm_clk_get(dev, "usbdrd30_susp_clk");
-	if (IS_ERR(exynos->susp_clk))
-		exynos->susp_clk = NULL;
-	ret = clk_prepare_enable(exynos->susp_clk);
-	if (ret)
-		goto susp_clk_err;
-
-	if (of_device_is_compatible(node, "samsung,exynos7-dwusb3")) {
-		exynos->axius_clk = devm_clk_get(dev, "usbdrd30_axius_clk");
-		if (IS_ERR(exynos->axius_clk)) {
-			dev_err(dev, "no AXI UpScaler clk specified\n");
-			ret = -ENODEV;
-			goto axius_clk_err;
+	for (i = 0; i < exynos->num_clks; i++) {
+		ret = clk_prepare_enable(exynos->clks[i]);
+		if (ret) {
+			while (--i > 0)
+				clk_disable_unprepare(exynos->clks[i]);
+			return ret;
 		}
-		ret = clk_prepare_enable(exynos->axius_clk);
-		if (ret)
-			goto axius_clk_err;
-	} else {
-		exynos->axius_clk = NULL;
 	}
 
+	if (exynos->suspend_clk_idx >= 0)
+		clk_prepare_enable(exynos->clks[exynos->suspend_clk_idx]);
+
 	exynos->vdd33 = devm_regulator_get(dev, "vdd33");
 	if (IS_ERR(exynos->vdd33)) {
 		ret = PTR_ERR(exynos->vdd33);
@@ -164,12 +109,6 @@ static int dwc3_exynos_probe(struct platform_device *pdev)
 		goto vdd10_err;
 	}
 
-	ret = dwc3_exynos_register_phys(exynos);
-	if (ret) {
-		dev_err(dev, "couldn't register PHYs\n");
-		goto phys_err;
-	}
-
 	if (node) {
 		ret = of_platform_populate(node, NULL, NULL, dev);
 		if (ret) {
@@ -185,32 +124,31 @@ static int dwc3_exynos_probe(struct platform_device *pdev)
 	return 0;
 
 populate_err:
-	platform_device_unregister(exynos->usb2_phy);
-	platform_device_unregister(exynos->usb3_phy);
-phys_err:
 	regulator_disable(exynos->vdd10);
 vdd10_err:
 	regulator_disable(exynos->vdd33);
 vdd33_err:
-	clk_disable_unprepare(exynos->axius_clk);
-axius_clk_err:
-	clk_disable_unprepare(exynos->susp_clk);
-susp_clk_err:
-	clk_disable_unprepare(exynos->clk);
+	for (i = exynos->num_clks - 1; i >= 0; i--)
+		clk_disable_unprepare(exynos->clks[i]);
+
+	if (exynos->suspend_clk_idx >= 0)
+		clk_disable_unprepare(exynos->clks[exynos->suspend_clk_idx]);
+
 	return ret;
 }
 
 static int dwc3_exynos_remove(struct platform_device *pdev)
 {
 	struct dwc3_exynos	*exynos = platform_get_drvdata(pdev);
+	int i;
 
 	device_for_each_child(&pdev->dev, NULL, dwc3_exynos_remove_child);
-	platform_device_unregister(exynos->usb2_phy);
-	platform_device_unregister(exynos->usb3_phy);
 
-	clk_disable_unprepare(exynos->axius_clk);
-	clk_disable_unprepare(exynos->susp_clk);
-	clk_disable_unprepare(exynos->clk);
+	for (i = exynos->num_clks - 1; i >= 0; i--)
+		clk_disable_unprepare(exynos->clks[i]);
+
+	if (exynos->suspend_clk_idx >= 0)
+		clk_disable_unprepare(exynos->clks[exynos->suspend_clk_idx]);
 
 	regulator_disable(exynos->vdd33);
 	regulator_disable(exynos->vdd10);
@@ -218,10 +156,36 @@ static int dwc3_exynos_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct dwc3_exynos_driverdata exynos5250_drvdata = {
+	.clk_names = { "usbdrd30" },
+	.num_clks = 1,
+	.suspend_clk_idx = -1,
+};
+
+static const struct dwc3_exynos_driverdata exynos5433_drvdata = {
+	.clk_names = { "aclk", "susp_clk", "pipe_pclk", "phyclk" },
+	.num_clks = 4,
+	.suspend_clk_idx = 1,
+};
+
+static const struct dwc3_exynos_driverdata exynos7_drvdata = {
+	.clk_names = { "usbdrd30", "usbdrd30_susp_clk", "usbdrd30_axius_clk" },
+	.num_clks = 3,
+	.suspend_clk_idx = 1,
+};
+
 static const struct of_device_id exynos_dwc3_match[] = {
-	{ .compatible = "samsung,exynos5250-dwusb3" },
-	{ .compatible = "samsung,exynos7-dwusb3" },
-	{},
+	{
+		.compatible = "samsung,exynos5250-dwusb3",
+		.data = &exynos5250_drvdata,
+	}, {
+		.compatible = "samsung,exynos5433-dwusb3",
+		.data = &exynos5433_drvdata,
+	}, {
+		.compatible = "samsung,exynos7-dwusb3",
+		.data = &exynos7_drvdata,
+	}, {
+	}
 };
 MODULE_DEVICE_TABLE(of, exynos_dwc3_match);
 
@@ -229,9 +193,10 @@ MODULE_DEVICE_TABLE(of, exynos_dwc3_match);
 static int dwc3_exynos_suspend(struct device *dev)
 {
 	struct dwc3_exynos *exynos = dev_get_drvdata(dev);
+	int i;
 
-	clk_disable(exynos->axius_clk);
-	clk_disable(exynos->clk);
+	for (i = exynos->num_clks - 1; i >= 0; i--)
+		clk_disable_unprepare(exynos->clks[i]);
 
 	regulator_disable(exynos->vdd33);
 	regulator_disable(exynos->vdd10);
@@ -242,7 +207,7 @@ static int dwc3_exynos_suspend(struct device *dev)
 static int dwc3_exynos_resume(struct device *dev)
 {
 	struct dwc3_exynos *exynos = dev_get_drvdata(dev);
-	int ret;
+	int i, ret;
 
 	ret = regulator_enable(exynos->vdd33);
 	if (ret) {
@@ -255,13 +220,14 @@ static int dwc3_exynos_resume(struct device *dev)
 		return ret;
 	}
 
-	clk_enable(exynos->clk);
-	clk_enable(exynos->axius_clk);
-
-	/* runtime set active to reflect active state. */
-	pm_runtime_disable(dev);
-	pm_runtime_set_active(dev);
-	pm_runtime_enable(dev);
+	for (i = 0; i < exynos->num_clks; i++) {
+		ret = clk_prepare_enable(exynos->clks[i]);
+		if (ret) {
+			while (--i > 0)
+				clk_disable_unprepare(exynos->clks[i]);
+			return ret;
+		}
+	}
 
 	return 0;
 }
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 2b53194081ba..679c12e14522 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -270,27 +270,36 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd,
 	const struct usb_endpoint_descriptor *desc = dep->endpoint.desc;
 	struct dwc3		*dwc = dep->dwc;
 	u32			timeout = 1000;
+	u32			saved_config = 0;
 	u32			reg;
 
 	int			cmd_status = 0;
-	int			susphy = false;
 	int			ret = -EINVAL;
 
 	/*
-	 * Synopsys Databook 2.60a states, on section 6.3.2.5.[1-8], that if
-	 * we're issuing an endpoint command, we must check if
-	 * GUSB2PHYCFG.SUSPHY bit is set. If it is, then we need to clear it.
+	 * When operating in USB 2.0 speeds (HS/FS), if GUSB2PHYCFG.ENBLSLPM or
+	 * GUSB2PHYCFG.SUSPHY is set, it must be cleared before issuing an
+	 * endpoint command.
 	 *
-	 * We will also set SUSPHY bit to what it was before returning as stated
-	 * by the same section on Synopsys databook.
+	 * Save and clear both GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY
+	 * settings. Restore them after the command is completed.
+	 *
+	 * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2
 	 */
 	if (dwc->gadget.speed <= USB_SPEED_HIGH) {
 		reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
 		if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) {
-			susphy = true;
+			saved_config |= DWC3_GUSB2PHYCFG_SUSPHY;
 			reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
-			dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
 		}
+
+		if (reg & DWC3_GUSB2PHYCFG_ENBLSLPM) {
+			saved_config |= DWC3_GUSB2PHYCFG_ENBLSLPM;
+			reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM;
+		}
+
+		if (saved_config)
+			dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
 	}
 
 	if (DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_STARTTRANSFER) {
@@ -389,9 +398,9 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd,
 		}
 	}
 
-	if (unlikely(susphy)) {
+	if (saved_config) {
 		reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
-		reg |= DWC3_GUSB2PHYCFG_SUSPHY;
+		reg |= saved_config;
 		dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
 	}
 
diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c
index e15e896f356c..165653a5e45d 100644
--- a/drivers/usb/early/xhci-dbc.c
+++ b/drivers/usb/early/xhci-dbc.c
@@ -717,17 +717,14 @@ static void xdbc_handle_port_status(struct xdbc_trb *evt_trb)
 
 static void xdbc_handle_tx_event(struct xdbc_trb *evt_trb)
 {
-	size_t remain_length;
 	u32 comp_code;
 	int ep_id;
 
 	comp_code	= GET_COMP_CODE(le32_to_cpu(evt_trb->field[2]));
-	remain_length	= EVENT_TRB_LEN(le32_to_cpu(evt_trb->field[2]));
 	ep_id		= TRB_TO_EP_ID(le32_to_cpu(evt_trb->field[3]));
 
 	switch (comp_code) {
 	case COMP_SUCCESS:
-		remain_length = 0;
 	case COMP_SHORT_PACKET:
 		break;
 	case COMP_TRB_ERROR:
diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index d582921f7257..db2d4980cb35 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -22,12 +22,8 @@
  *  controlled by two clock sources :
  *    CLK_5 := c_srate, and CLK_6 := p_srate
  */
-#define USB_OUT_IT_ID	1
-#define IO_IN_IT_ID	2
-#define IO_OUT_OT_ID	3
-#define USB_IN_OT_ID	4
-#define USB_OUT_CLK_ID	5
-#define USB_IN_CLK_ID	6
+#define USB_OUT_CLK_ID	(out_clk_src_desc.bClockID)
+#define USB_IN_CLK_ID	(in_clk_src_desc.bClockID)
 
 #define CONTROL_ABSENT	0
 #define CONTROL_RDONLY	1
@@ -43,6 +39,9 @@
 #define UNFLW_CTRL	8
 #define OVFLW_CTRL	10
 
+#define EPIN_EN(_opts) ((_opts)->p_chmask != 0)
+#define EPOUT_EN(_opts) ((_opts)->c_chmask != 0)
+
 struct f_uac2 {
 	struct g_audio g_audio;
 	u8 ac_intf, as_in_intf, as_out_intf;
@@ -135,7 +134,7 @@ static struct uac_clock_source_descriptor in_clk_src_desc = {
 	.bDescriptorType = USB_DT_CS_INTERFACE,
 
 	.bDescriptorSubtype = UAC2_CLOCK_SOURCE,
-	.bClockID = USB_IN_CLK_ID,
+	/* .bClockID = DYNAMIC */
 	.bmAttributes = UAC_CLOCK_SOURCE_TYPE_INT_FIXED,
 	.bmControls = (CONTROL_RDONLY << CLK_FREQ_CTRL),
 	.bAssocTerminal = 0,
@@ -147,7 +146,7 @@ static struct uac_clock_source_descriptor out_clk_src_desc = {
 	.bDescriptorType = USB_DT_CS_INTERFACE,
 
 	.bDescriptorSubtype = UAC2_CLOCK_SOURCE,
-	.bClockID = USB_OUT_CLK_ID,
+	/* .bClockID = DYNAMIC */
 	.bmAttributes = UAC_CLOCK_SOURCE_TYPE_INT_FIXED,
 	.bmControls = (CONTROL_RDONLY << CLK_FREQ_CTRL),
 	.bAssocTerminal = 0,
@@ -159,10 +158,10 @@ static struct uac2_input_terminal_descriptor usb_out_it_desc = {
 	.bDescriptorType = USB_DT_CS_INTERFACE,
 
 	.bDescriptorSubtype = UAC_INPUT_TERMINAL,
-	.bTerminalID = USB_OUT_IT_ID,
+	/* .bTerminalID = DYNAMIC */
 	.wTerminalType = cpu_to_le16(UAC_TERMINAL_STREAMING),
 	.bAssocTerminal = 0,
-	.bCSourceID = USB_OUT_CLK_ID,
+	/* .bCSourceID = DYNAMIC */
 	.iChannelNames = 0,
 	.bmControls = cpu_to_le16(CONTROL_RDWR << COPY_CTRL),
 };
@@ -173,10 +172,10 @@ static struct uac2_input_terminal_descriptor io_in_it_desc = {
 	.bDescriptorType = USB_DT_CS_INTERFACE,
 
 	.bDescriptorSubtype = UAC_INPUT_TERMINAL,
-	.bTerminalID = IO_IN_IT_ID,
+	/* .bTerminalID = DYNAMIC */
 	.wTerminalType = cpu_to_le16(UAC_INPUT_TERMINAL_UNDEFINED),
 	.bAssocTerminal = 0,
-	.bCSourceID = USB_IN_CLK_ID,
+	/* .bCSourceID = DYNAMIC */
 	.iChannelNames = 0,
 	.bmControls = cpu_to_le16(CONTROL_RDWR << COPY_CTRL),
 };
@@ -187,11 +186,11 @@ static struct uac2_output_terminal_descriptor usb_in_ot_desc = {
 	.bDescriptorType = USB_DT_CS_INTERFACE,
 
 	.bDescriptorSubtype = UAC_OUTPUT_TERMINAL,
-	.bTerminalID = USB_IN_OT_ID,
+	/* .bTerminalID = DYNAMIC */
 	.wTerminalType = cpu_to_le16(UAC_TERMINAL_STREAMING),
 	.bAssocTerminal = 0,
-	.bSourceID = IO_IN_IT_ID,
-	.bCSourceID = USB_IN_CLK_ID,
+	/* .bSourceID = DYNAMIC */
+	/* .bCSourceID = DYNAMIC */
 	.bmControls = cpu_to_le16(CONTROL_RDWR << COPY_CTRL),
 };
 
@@ -201,11 +200,11 @@ static struct uac2_output_terminal_descriptor io_out_ot_desc = {
 	.bDescriptorType = USB_DT_CS_INTERFACE,
 
 	.bDescriptorSubtype = UAC_OUTPUT_TERMINAL,
-	.bTerminalID = IO_OUT_OT_ID,
+	/* .bTerminalID = DYNAMIC */
 	.wTerminalType = cpu_to_le16(UAC_OUTPUT_TERMINAL_UNDEFINED),
 	.bAssocTerminal = 0,
-	.bSourceID = USB_OUT_IT_ID,
-	.bCSourceID = USB_OUT_CLK_ID,
+	/* .bSourceID = DYNAMIC */
+	/* .bCSourceID = DYNAMIC */
 	.bmControls = cpu_to_le16(CONTROL_RDWR << COPY_CTRL),
 };
 
@@ -253,7 +252,7 @@ static struct uac2_as_header_descriptor as_out_hdr_desc = {
 	.bDescriptorType = USB_DT_CS_INTERFACE,
 
 	.bDescriptorSubtype = UAC_AS_GENERAL,
-	.bTerminalLink = USB_OUT_IT_ID,
+	/* .bTerminalLink = DYNAMIC */
 	.bmControls = 0,
 	.bFormatType = UAC_FORMAT_TYPE_I,
 	.bmFormats = cpu_to_le32(UAC_FORMAT_TYPE_I_PCM),
@@ -330,7 +329,7 @@ static struct uac2_as_header_descriptor as_in_hdr_desc = {
 	.bDescriptorType = USB_DT_CS_INTERFACE,
 
 	.bDescriptorSubtype = UAC_AS_GENERAL,
-	.bTerminalLink = USB_IN_OT_ID,
+	/* .bTerminalLink = DYNAMIC */
 	.bmControls = 0,
 	.bFormatType = UAC_FORMAT_TYPE_I,
 	.bmFormats = cpu_to_le32(UAC_FORMAT_TYPE_I_PCM),
@@ -471,6 +470,125 @@ static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts,
 				le16_to_cpu(ep_desc->wMaxPacketSize)));
 }
 
+/* Use macro to overcome line length limitation */
+#define USBDHDR(p) (struct usb_descriptor_header *)(p)
+
+static void setup_descriptor(struct f_uac2_opts *opts)
+{
+	/* patch descriptors */
+	int i = 1; /* ID's start with 1 */
+
+	if (EPOUT_EN(opts))
+		usb_out_it_desc.bTerminalID = i++;
+	if (EPIN_EN(opts))
+		io_in_it_desc.bTerminalID = i++;
+	if (EPOUT_EN(opts))
+		io_out_ot_desc.bTerminalID = i++;
+	if (EPIN_EN(opts))
+		usb_in_ot_desc.bTerminalID = i++;
+	if (EPOUT_EN(opts))
+		out_clk_src_desc.bClockID = i++;
+	if (EPIN_EN(opts))
+		in_clk_src_desc.bClockID = i++;
+
+	usb_out_it_desc.bCSourceID = out_clk_src_desc.bClockID;
+	usb_in_ot_desc.bSourceID = io_in_it_desc.bTerminalID;
+	usb_in_ot_desc.bCSourceID = in_clk_src_desc.bClockID;
+	io_in_it_desc.bCSourceID = in_clk_src_desc.bClockID;
+	io_out_ot_desc.bCSourceID = out_clk_src_desc.bClockID;
+	io_out_ot_desc.bSourceID = usb_out_it_desc.bTerminalID;
+	as_out_hdr_desc.bTerminalLink = usb_out_it_desc.bTerminalID;
+	as_in_hdr_desc.bTerminalLink = usb_in_ot_desc.bTerminalID;
+
+	iad_desc.bInterfaceCount = 1;
+	ac_hdr_desc.wTotalLength = 0;
+
+	if (EPIN_EN(opts)) {
+		u16 len = le16_to_cpu(ac_hdr_desc.wTotalLength);
+
+		len += sizeof(in_clk_src_desc);
+		len += sizeof(usb_in_ot_desc);
+		len += sizeof(io_in_it_desc);
+		ac_hdr_desc.wTotalLength = cpu_to_le16(len);
+		iad_desc.bInterfaceCount++;
+	}
+	if (EPOUT_EN(opts)) {
+		u16 len = le16_to_cpu(ac_hdr_desc.wTotalLength);
+
+		len += sizeof(out_clk_src_desc);
+		len += sizeof(usb_out_it_desc);
+		len += sizeof(io_out_ot_desc);
+		ac_hdr_desc.wTotalLength = cpu_to_le16(len);
+		iad_desc.bInterfaceCount++;
+	}
+
+	i = 0;
+	fs_audio_desc[i++] = USBDHDR(&iad_desc);
+	fs_audio_desc[i++] = USBDHDR(&std_ac_if_desc);
+	fs_audio_desc[i++] = USBDHDR(&ac_hdr_desc);
+	if (EPIN_EN(opts))
+		fs_audio_desc[i++] = USBDHDR(&in_clk_src_desc);
+	if (EPOUT_EN(opts)) {
+		fs_audio_desc[i++] = USBDHDR(&out_clk_src_desc);
+		fs_audio_desc[i++] = USBDHDR(&usb_out_it_desc);
+	}
+	if (EPIN_EN(opts)) {
+		fs_audio_desc[i++] = USBDHDR(&io_in_it_desc);
+		fs_audio_desc[i++] = USBDHDR(&usb_in_ot_desc);
+	}
+	if (EPOUT_EN(opts)) {
+		fs_audio_desc[i++] = USBDHDR(&io_out_ot_desc);
+		fs_audio_desc[i++] = USBDHDR(&std_as_out_if0_desc);
+		fs_audio_desc[i++] = USBDHDR(&std_as_out_if1_desc);
+		fs_audio_desc[i++] = USBDHDR(&as_out_hdr_desc);
+		fs_audio_desc[i++] = USBDHDR(&as_out_fmt1_desc);
+		fs_audio_desc[i++] = USBDHDR(&fs_epout_desc);
+		fs_audio_desc[i++] = USBDHDR(&as_iso_out_desc);
+	}
+	if (EPIN_EN(opts)) {
+		fs_audio_desc[i++] = USBDHDR(&std_as_in_if0_desc);
+		fs_audio_desc[i++] = USBDHDR(&std_as_in_if1_desc);
+		fs_audio_desc[i++] = USBDHDR(&as_in_hdr_desc);
+		fs_audio_desc[i++] = USBDHDR(&as_in_fmt1_desc);
+		fs_audio_desc[i++] = USBDHDR(&fs_epin_desc);
+		fs_audio_desc[i++] = USBDHDR(&as_iso_in_desc);
+	}
+	fs_audio_desc[i] = NULL;
+
+	i = 0;
+	hs_audio_desc[i++] = USBDHDR(&iad_desc);
+	hs_audio_desc[i++] = USBDHDR(&std_ac_if_desc);
+	hs_audio_desc[i++] = USBDHDR(&ac_hdr_desc);
+	if (EPIN_EN(opts))
+		hs_audio_desc[i++] = USBDHDR(&in_clk_src_desc);
+	if (EPOUT_EN(opts)) {
+		hs_audio_desc[i++] = USBDHDR(&out_clk_src_desc);
+		hs_audio_desc[i++] = USBDHDR(&usb_out_it_desc);
+	}
+	if (EPIN_EN(opts)) {
+		hs_audio_desc[i++] = USBDHDR(&io_in_it_desc);
+		hs_audio_desc[i++] = USBDHDR(&usb_in_ot_desc);
+	}
+	if (EPOUT_EN(opts)) {
+		hs_audio_desc[i++] = USBDHDR(&io_out_ot_desc);
+		hs_audio_desc[i++] = USBDHDR(&std_as_out_if0_desc);
+		hs_audio_desc[i++] = USBDHDR(&std_as_out_if1_desc);
+		hs_audio_desc[i++] = USBDHDR(&as_out_hdr_desc);
+		hs_audio_desc[i++] = USBDHDR(&as_out_fmt1_desc);
+		hs_audio_desc[i++] = USBDHDR(&hs_epout_desc);
+		hs_audio_desc[i++] = USBDHDR(&as_iso_out_desc);
+	}
+	if (EPIN_EN(opts)) {
+		hs_audio_desc[i++] = USBDHDR(&std_as_in_if0_desc);
+		hs_audio_desc[i++] = USBDHDR(&std_as_in_if1_desc);
+		hs_audio_desc[i++] = USBDHDR(&as_in_hdr_desc);
+		hs_audio_desc[i++] = USBDHDR(&as_in_fmt1_desc);
+		hs_audio_desc[i++] = USBDHDR(&hs_epin_desc);
+		hs_audio_desc[i++] = USBDHDR(&as_iso_in_desc);
+	}
+	hs_audio_desc[i] = NULL;
+}
+
 static int
 afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
 {
@@ -530,25 +648,29 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
 	uac2->ac_intf = ret;
 	uac2->ac_alt = 0;
 
-	ret = usb_interface_id(cfg, fn);
-	if (ret < 0) {
-		dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
-		return ret;
+	if (EPOUT_EN(uac2_opts)) {
+		ret = usb_interface_id(cfg, fn);
+		if (ret < 0) {
+			dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
+			return ret;
+		}
+		std_as_out_if0_desc.bInterfaceNumber = ret;
+		std_as_out_if1_desc.bInterfaceNumber = ret;
+		uac2->as_out_intf = ret;
+		uac2->as_out_alt = 0;
 	}
-	std_as_out_if0_desc.bInterfaceNumber = ret;
-	std_as_out_if1_desc.bInterfaceNumber = ret;
-	uac2->as_out_intf = ret;
-	uac2->as_out_alt = 0;
 
-	ret = usb_interface_id(cfg, fn);
-	if (ret < 0) {
-		dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
-		return ret;
+	if (EPIN_EN(uac2_opts)) {
+		ret = usb_interface_id(cfg, fn);
+		if (ret < 0) {
+			dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
+			return ret;
+		}
+		std_as_in_if0_desc.bInterfaceNumber = ret;
+		std_as_in_if1_desc.bInterfaceNumber = ret;
+		uac2->as_in_intf = ret;
+		uac2->as_in_alt = 0;
 	}
-	std_as_in_if0_desc.bInterfaceNumber = ret;
-	std_as_in_if1_desc.bInterfaceNumber = ret;
-	uac2->as_in_intf = ret;
-	uac2->as_in_alt = 0;
 
 	/* Calculate wMaxPacketSize according to audio bandwidth */
 	set_ep_max_packet_size(uac2_opts, &fs_epin_desc, 1000, true);
@@ -556,16 +678,20 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
 	set_ep_max_packet_size(uac2_opts, &hs_epin_desc, 8000, true);
 	set_ep_max_packet_size(uac2_opts, &hs_epout_desc, 8000, false);
 
-	agdev->out_ep = usb_ep_autoconfig(gadget, &fs_epout_desc);
-	if (!agdev->out_ep) {
-		dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
-		return -ENODEV;
+	if (EPOUT_EN(uac2_opts)) {
+		agdev->out_ep = usb_ep_autoconfig(gadget, &fs_epout_desc);
+		if (!agdev->out_ep) {
+			dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
+			return -ENODEV;
+		}
 	}
 
-	agdev->in_ep = usb_ep_autoconfig(gadget, &fs_epin_desc);
-	if (!agdev->in_ep) {
-		dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
-		return -ENODEV;
+	if (EPIN_EN(uac2_opts)) {
+		agdev->in_ep = usb_ep_autoconfig(gadget, &fs_epin_desc);
+		if (!agdev->in_ep) {
+			dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
+			return -ENODEV;
+		}
 	}
 
 	agdev->in_ep_maxpsize = max_t(u16,
@@ -578,6 +704,8 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
 	hs_epout_desc.bEndpointAddress = fs_epout_desc.bEndpointAddress;
 	hs_epin_desc.bEndpointAddress = fs_epin_desc.bEndpointAddress;
 
+	setup_descriptor(uac2_opts);
+
 	ret = usb_assign_descriptors(fn, fs_audio_desc, hs_audio_desc, NULL,
 				     NULL);
 	if (ret)
diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index d8ce7868fe22..8c99392df593 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -197,12 +197,6 @@ static const struct usb_descriptor_header * const uvc_ss_streaming[] = {
 	NULL,
 };
 
-void uvc_set_trace_param(unsigned int trace)
-{
-	uvc_gadget_trace_param = trace;
-}
-EXPORT_SYMBOL(uvc_set_trace_param);
-
 /* --------------------------------------------------------------------------
  * Control requests
  */
@@ -232,13 +226,8 @@ uvc_function_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
 	struct v4l2_event v4l2_event;
 	struct uvc_event *uvc_event = (void *)&v4l2_event.u.data;
 
-	/* printk(KERN_INFO "setup request %02x %02x value %04x index %04x %04x\n",
-	 *	ctrl->bRequestType, ctrl->bRequest, le16_to_cpu(ctrl->wValue),
-	 *	le16_to_cpu(ctrl->wIndex), le16_to_cpu(ctrl->wLength));
-	 */
-
 	if ((ctrl->bRequestType & USB_TYPE_MASK) != USB_TYPE_CLASS) {
-		INFO(f->config->cdev, "invalid request type\n");
+		uvcg_info(f, "invalid request type\n");
 		return -EINVAL;
 	}
 
@@ -272,7 +261,7 @@ uvc_function_get_alt(struct usb_function *f, unsigned interface)
 {
 	struct uvc_device *uvc = to_uvc(f);
 
-	INFO(f->config->cdev, "uvc_function_get_alt(%u)\n", interface);
+	uvcg_info(f, "%s(%u)\n", __func__, interface);
 
 	if (interface == uvc->control_intf)
 		return 0;
@@ -291,13 +280,13 @@ uvc_function_set_alt(struct usb_function *f, unsigned interface, unsigned alt)
 	struct uvc_event *uvc_event = (void *)&v4l2_event.u.data;
 	int ret;
 
-	INFO(cdev, "uvc_function_set_alt(%u, %u)\n", interface, alt);
+	uvcg_info(f, "%s(%u, %u)\n", __func__, interface, alt);
 
 	if (interface == uvc->control_intf) {
 		if (alt)
 			return -EINVAL;
 
-		INFO(cdev, "reset UVC Control\n");
+		uvcg_info(f, "reset UVC Control\n");
 		usb_ep_disable(uvc->control_ep);
 
 		if (!uvc->control_ep->desc)
@@ -348,7 +337,7 @@ uvc_function_set_alt(struct usb_function *f, unsigned interface, unsigned alt)
 		if (!uvc->video.ep)
 			return -EINVAL;
 
-		INFO(cdev, "reset UVC\n");
+		uvcg_info(f, "reset UVC\n");
 		usb_ep_disable(uvc->video.ep);
 
 		ret = config_ep_by_speed(f->config->cdev->gadget,
@@ -373,7 +362,7 @@ uvc_function_disable(struct usb_function *f)
 	struct uvc_device *uvc = to_uvc(f);
 	struct v4l2_event v4l2_event;
 
-	INFO(f->config->cdev, "uvc_function_disable\n");
+	uvcg_info(f, "%s()\n", __func__);
 
 	memset(&v4l2_event, 0, sizeof(v4l2_event));
 	v4l2_event.type = UVC_EVENT_DISCONNECT;
@@ -392,21 +381,19 @@ uvc_function_disable(struct usb_function *f)
 void
 uvc_function_connect(struct uvc_device *uvc)
 {
-	struct usb_composite_dev *cdev = uvc->func.config->cdev;
 	int ret;
 
 	if ((ret = usb_function_activate(&uvc->func)) < 0)
-		INFO(cdev, "UVC connect failed with %d\n", ret);
+		uvcg_info(&uvc->func, "UVC connect failed with %d\n", ret);
 }
 
 void
 uvc_function_disconnect(struct uvc_device *uvc)
 {
-	struct usb_composite_dev *cdev = uvc->func.config->cdev;
 	int ret;
 
 	if ((ret = usb_function_deactivate(&uvc->func)) < 0)
-		INFO(cdev, "UVC disconnect failed with %d\n", ret);
+		uvcg_info(&uvc->func, "UVC disconnect failed with %d\n", ret);
 }
 
 /* --------------------------------------------------------------------------
@@ -605,7 +592,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
 	struct f_uvc_opts *opts;
 	int ret = -EINVAL;
 
-	INFO(cdev, "uvc_function_bind\n");
+	uvcg_info(f, "%s()\n", __func__);
 
 	opts = fi_to_f_uvc_opts(f->fi);
 	/* Sanity check the streaming endpoint module parameters.
@@ -618,8 +605,8 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
 	if (opts->streaming_maxburst &&
 	    (opts->streaming_maxpacket % 1024) != 0) {
 		opts->streaming_maxpacket = roundup(opts->streaming_maxpacket, 1024);
-		INFO(cdev, "overriding streaming_maxpacket to %d\n",
-		     opts->streaming_maxpacket);
+		uvcg_info(f, "overriding streaming_maxpacket to %d\n",
+			  opts->streaming_maxpacket);
 	}
 
 	/* Fill in the FS/HS/SS Video Streaming specific descriptors from the
@@ -658,7 +645,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
 	/* Allocate endpoints. */
 	ep = usb_ep_autoconfig(cdev->gadget, &uvc_control_ep);
 	if (!ep) {
-		INFO(cdev, "Unable to allocate control EP\n");
+		uvcg_info(f, "Unable to allocate control EP\n");
 		goto error;
 	}
 	uvc->control_ep = ep;
@@ -672,7 +659,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
 		ep = usb_ep_autoconfig(cdev->gadget, &uvc_fs_streaming_ep);
 
 	if (!ep) {
-		INFO(cdev, "Unable to allocate streaming EP\n");
+		uvcg_info(f, "Unable to allocate streaming EP\n");
 		goto error;
 	}
 	uvc->video.ep = ep;
@@ -699,12 +686,14 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
 	uvc_iad.bFirstInterface = ret;
 	uvc_control_intf.bInterfaceNumber = ret;
 	uvc->control_intf = ret;
+	opts->control_interface = ret;
 
 	if ((ret = usb_interface_id(c, f)) < 0)
 		goto error;
 	uvc_streaming_intf_alt0.bInterfaceNumber = ret;
 	uvc_streaming_intf_alt1.bInterfaceNumber = ret;
 	uvc->streaming_intf = ret;
+	opts->streaming_interface = ret;
 
 	/* Copy descriptors */
 	f->fs_descriptors = uvc_copy_descriptors(uvc, USB_SPEED_FULL);
@@ -743,19 +732,19 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
 	uvc->control_req->context = uvc;
 
 	if (v4l2_device_register(&cdev->gadget->dev, &uvc->v4l2_dev)) {
-		printk(KERN_INFO "v4l2_device_register failed\n");
+		uvcg_err(f, "failed to register V4L2 device\n");
 		goto error;
 	}
 
 	/* Initialise video. */
-	ret = uvcg_video_init(&uvc->video);
+	ret = uvcg_video_init(&uvc->video, uvc);
 	if (ret < 0)
 		goto error;
 
 	/* Register a V4L2 device. */
 	ret = uvc_register_video(uvc);
 	if (ret < 0) {
-		printk(KERN_INFO "Unable to register video device\n");
+		uvcg_err(f, "failed to register video device\n");
 		goto error;
 	}
 
@@ -792,6 +781,7 @@ static struct usb_function_instance *uvc_alloc_inst(void)
 	struct uvc_output_terminal_descriptor *od;
 	struct uvc_color_matching_descriptor *md;
 	struct uvc_descriptor_header **ctl_cls;
+	int ret;
 
 	opts = kzalloc(sizeof(*opts), GFP_KERNEL);
 	if (!opts)
@@ -868,7 +858,12 @@ static struct usb_function_instance *uvc_alloc_inst(void)
 	opts->streaming_interval = 1;
 	opts->streaming_maxpacket = 1024;
 
-	uvcg_attach_configfs(opts);
+	ret = uvcg_attach_configfs(opts);
+	if (ret < 0) {
+		kfree(opts);
+		return ERR_PTR(ret);
+	}
+
 	return &opts->func_inst;
 }
 
@@ -886,7 +881,7 @@ static void uvc_unbind(struct usb_configuration *c, struct usb_function *f)
 	struct usb_composite_dev *cdev = c->cdev;
 	struct uvc_device *uvc = to_uvc(f);
 
-	INFO(cdev, "%s\n", __func__);
+	uvcg_info(f, "%s\n", __func__);
 
 	device_remove_file(&uvc->vdev.dev, &dev_attr_function_name);
 	video_unregister_device(&uvc->vdev);
diff --git a/drivers/usb/gadget/function/u_uvc.h b/drivers/usb/gadget/function/u_uvc.h
index 2ed292e94fbc..5242d489e20a 100644
--- a/drivers/usb/gadget/function/u_uvc.h
+++ b/drivers/usb/gadget/function/u_uvc.h
@@ -25,6 +25,9 @@ struct f_uvc_opts {
 	unsigned int					streaming_maxpacket;
 	unsigned int					streaming_maxburst;
 
+	unsigned int					control_interface;
+	unsigned int					streaming_interface;
+
 	/*
 	 * Control descriptors array pointers for full-/high-speed and
 	 * super-speed. They point by default to the uvc_fs_control_cls and
diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index 93cf78b420fe..099d650082e5 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -24,6 +24,7 @@
 struct usb_ep;
 struct usb_request;
 struct uvc_descriptor_header;
+struct uvc_device;
 
 /* ------------------------------------------------------------------------
  * Debugging, printing and logging
@@ -51,14 +52,12 @@ extern unsigned int uvc_gadget_trace_param;
 			printk(KERN_DEBUG "uvcvideo: " msg); \
 	} while (0)
 
-#define uvc_warn_once(dev, warn, msg...) \
-	do { \
-		if (!test_and_set_bit(warn, &dev->warnings)) \
-			printk(KERN_INFO "uvcvideo: " msg); \
-	} while (0)
-
-#define uvc_printk(level, msg...) \
-	printk(level "uvcvideo: " msg)
+#define uvcg_dbg(f, fmt, args...) \
+	dev_dbg(&(f)->config->cdev->gadget->dev, "%s: " fmt, (f)->name, ##args)
+#define uvcg_info(f, fmt, args...) \
+	dev_info(&(f)->config->cdev->gadget->dev, "%s: " fmt, (f)->name, ##args)
+#define uvcg_err(f, fmt, args...) \
+	dev_err(&(f)->config->cdev->gadget->dev, "%s: " fmt, (f)->name, ##args)
 
 /* ------------------------------------------------------------------------
  * Driver specific constants
@@ -73,6 +72,7 @@ extern unsigned int uvc_gadget_trace_param;
  */
 
 struct uvc_video {
+	struct uvc_device *uvc;
 	struct usb_ep *ep;
 
 	/* Frame parameters */
diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c
index b51f0d278826..bc1e2af566c3 100644
--- a/drivers/usb/gadget/function/uvc_configfs.c
+++ b/drivers/usb/gadget/function/uvc_configfs.c
@@ -9,9 +9,16 @@
  *
  * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
  */
+
+#include <linux/sort.h>
+
 #include "u_uvc.h"
 #include "uvc_configfs.h"
 
+/* -----------------------------------------------------------------------------
+ * Global Utility Structures and Macros
+ */
+
 #define UVCG_STREAMING_CONTROL_SIZE	1
 
 #define UVC_ATTR(prefix, cname, aname) \
@@ -31,13 +38,93 @@ static struct configfs_attribute prefix##attr_##cname = { \
 	.show		= prefix##cname##_show,				\
 }
 
+#define le8_to_cpu(x)	(x)
+#define cpu_to_le8(x)	(x)
+
+static int uvcg_config_compare_u32(const void *l, const void *r)
+{
+	u32 li = *(const u32 *)l;
+	u32 ri = *(const u32 *)r;
+
+	return li < ri ? -1 : li == ri ? 0 : 1;
+}
+
 static inline struct f_uvc_opts *to_f_uvc_opts(struct config_item *item)
 {
 	return container_of(to_config_group(item), struct f_uvc_opts,
 			    func_inst.group);
 }
 
-/* control/header/<NAME> */
+struct uvcg_config_group_type {
+	struct config_item_type type;
+	const char *name;
+	const struct uvcg_config_group_type **children;
+	int (*create_children)(struct config_group *group);
+};
+
+static void uvcg_config_item_release(struct config_item *item)
+{
+	struct config_group *group = to_config_group(item);
+
+	kfree(group);
+}
+
+static struct configfs_item_operations uvcg_config_item_ops = {
+	.release	= uvcg_config_item_release,
+};
+
+static int uvcg_config_create_group(struct config_group *parent,
+				    const struct uvcg_config_group_type *type);
+
+static int uvcg_config_create_children(struct config_group *group,
+				const struct uvcg_config_group_type *type)
+{
+	const struct uvcg_config_group_type **child;
+	int ret;
+
+	if (type->create_children)
+		return type->create_children(group);
+
+	for (child = type->children; child && *child; ++child) {
+		ret = uvcg_config_create_group(group, *child);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int uvcg_config_create_group(struct config_group *parent,
+				    const struct uvcg_config_group_type *type)
+{
+	struct config_group *group;
+
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	if (!group)
+		return -ENOMEM;
+
+	config_group_init_type_name(group, type->name, &type->type);
+	configfs_add_default_group(group, parent);
+
+	return uvcg_config_create_children(group, type);
+}
+
+static void uvcg_config_remove_children(struct config_group *group)
+{
+	struct config_group *child, *n;
+
+	list_for_each_entry_safe(child, n, &group->default_groups, group_entry) {
+		list_del(&child->group_entry);
+		uvcg_config_remove_children(child);
+		config_item_put(&child->cg_item);
+	}
+}
+
+/* -----------------------------------------------------------------------------
+ * control/header/<NAME>
+ * control/header
+ */
+
 DECLARE_UVC_HEADER_DESCRIPTOR(1);
 
 struct uvcg_control_header {
@@ -51,9 +138,9 @@ static struct uvcg_control_header *to_uvcg_control_header(struct config_item *it
 	return container_of(item, struct uvcg_control_header, item);
 }
 
-#define UVCG_CTRL_HDR_ATTR(cname, aname, conv, str2u, uxx, vnoc, limit)	\
+#define UVCG_CTRL_HDR_ATTR(cname, aname, bits, limit)			\
 static ssize_t uvcg_control_header_##cname##_show(			\
-	struct config_item *item, char *page)			\
+	struct config_item *item, char *page)				\
 {									\
 	struct uvcg_control_header *ch = to_uvcg_control_header(item);	\
 	struct f_uvc_opts *opts;					\
@@ -67,7 +154,7 @@ static ssize_t uvcg_control_header_##cname##_show(			\
 	opts = to_f_uvc_opts(opts_item);				\
 									\
 	mutex_lock(&opts->lock);					\
-	result = sprintf(page, "%d\n", conv(ch->desc.aname));		\
+	result = sprintf(page, "%u\n", le##bits##_to_cpu(ch->desc.aname));\
 	mutex_unlock(&opts->lock);					\
 									\
 	mutex_unlock(su_mutex);						\
@@ -83,7 +170,7 @@ uvcg_control_header_##cname##_store(struct config_item *item,		\
 	struct config_item *opts_item;					\
 	struct mutex *su_mutex = &ch->item.ci_group->cg_subsys->su_mutex;\
 	int ret;							\
-	uxx num;							\
+	u##bits num;							\
 									\
 	mutex_lock(su_mutex); /* for navigating configfs hierarchy */	\
 									\
@@ -96,7 +183,7 @@ uvcg_control_header_##cname##_store(struct config_item *item,		\
 		goto end;						\
 	}								\
 									\
-	ret = str2u(page, 0, &num);					\
+	ret = kstrtou##bits(page, 0, &num);				\
 	if (ret)							\
 		goto end;						\
 									\
@@ -104,7 +191,7 @@ uvcg_control_header_##cname##_store(struct config_item *item,		\
 		ret = -EINVAL;						\
 		goto end;						\
 	}								\
-	ch->desc.aname = vnoc(num);					\
+	ch->desc.aname = cpu_to_le##bits(num);				\
 	ret = len;							\
 end:									\
 	mutex_unlock(&opts->lock);					\
@@ -114,11 +201,9 @@ end:									\
 									\
 UVC_ATTR(uvcg_control_header_, cname, aname)
 
-UVCG_CTRL_HDR_ATTR(bcd_uvc, bcdUVC, le16_to_cpu, kstrtou16, u16, cpu_to_le16,
-		   0xffff);
+UVCG_CTRL_HDR_ATTR(bcd_uvc, bcdUVC, 16, 0xffff);
 
-UVCG_CTRL_HDR_ATTR(dw_clock_frequency, dwClockFrequency, le32_to_cpu, kstrtou32,
-		   u32, cpu_to_le32, 0x7fffffff);
+UVCG_CTRL_HDR_ATTR(dw_clock_frequency, dwClockFrequency, 32, 0x7fffffff);
 
 #undef UVCG_CTRL_HDR_ATTR
 
@@ -129,6 +214,7 @@ static struct configfs_attribute *uvcg_control_header_attrs[] = {
 };
 
 static const struct config_item_type uvcg_control_header_type = {
+	.ct_item_ops	= &uvcg_config_item_ops,
 	.ct_attrs	= uvcg_control_header_attrs,
 	.ct_owner	= THIS_MODULE,
 };
@@ -153,60 +239,42 @@ static struct config_item *uvcg_control_header_make(struct config_group *group,
 	return &h->item;
 }
 
-static void uvcg_control_header_drop(struct config_group *group,
-			      struct config_item *item)
-{
-	struct uvcg_control_header *h = to_uvcg_control_header(item);
-
-	kfree(h);
-}
-
-/* control/header */
-static struct uvcg_control_header_grp {
-	struct config_group	group;
-} uvcg_control_header_grp;
-
 static struct configfs_group_operations uvcg_control_header_grp_ops = {
 	.make_item		= uvcg_control_header_make,
-	.drop_item		= uvcg_control_header_drop,
 };
 
-static const struct config_item_type uvcg_control_header_grp_type = {
-	.ct_group_ops	= &uvcg_control_header_grp_ops,
-	.ct_owner	= THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_control_header_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_group_ops	= &uvcg_control_header_grp_ops,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "header",
 };
 
-/* control/processing/default */
-static struct uvcg_default_processing {
-	struct config_group	group;
-} uvcg_default_processing;
-
-static inline struct uvcg_default_processing
-*to_uvcg_default_processing(struct config_item *item)
-{
-	return container_of(to_config_group(item),
-			    struct uvcg_default_processing, group);
-}
+/* -----------------------------------------------------------------------------
+ * control/processing/default
+ */
 
-#define UVCG_DEFAULT_PROCESSING_ATTR(cname, aname, conv)		\
+#define UVCG_DEFAULT_PROCESSING_ATTR(cname, aname, bits)		\
 static ssize_t uvcg_default_processing_##cname##_show(			\
 	struct config_item *item, char *page)				\
 {									\
-	struct uvcg_default_processing *dp = to_uvcg_default_processing(item); \
+	struct config_group *group = to_config_group(item);		\
 	struct f_uvc_opts *opts;					\
 	struct config_item *opts_item;					\
-	struct mutex *su_mutex = &dp->group.cg_subsys->su_mutex;	\
+	struct mutex *su_mutex = &group->cg_subsys->su_mutex;		\
 	struct uvc_processing_unit_descriptor *pd;			\
 	int result;							\
 									\
 	mutex_lock(su_mutex); /* for navigating configfs hierarchy */	\
 									\
-	opts_item = dp->group.cg_item.ci_parent->ci_parent->ci_parent;	\
+	opts_item = group->cg_item.ci_parent->ci_parent->ci_parent;	\
 	opts = to_f_uvc_opts(opts_item);				\
 	pd = &opts->uvc_processing;					\
 									\
 	mutex_lock(&opts->lock);					\
-	result = sprintf(page, "%d\n", conv(pd->aname));		\
+	result = sprintf(page, "%u\n", le##bits##_to_cpu(pd->aname));	\
 	mutex_unlock(&opts->lock);					\
 									\
 	mutex_unlock(su_mutex);						\
@@ -215,37 +283,33 @@ static ssize_t uvcg_default_processing_##cname##_show(			\
 									\
 UVC_ATTR_RO(uvcg_default_processing_, cname, aname)
 
-#define identity_conv(x) (x)
-
-UVCG_DEFAULT_PROCESSING_ATTR(b_unit_id, bUnitID, identity_conv);
-UVCG_DEFAULT_PROCESSING_ATTR(b_source_id, bSourceID, identity_conv);
-UVCG_DEFAULT_PROCESSING_ATTR(w_max_multiplier, wMaxMultiplier, le16_to_cpu);
-UVCG_DEFAULT_PROCESSING_ATTR(i_processing, iProcessing, identity_conv);
-
-#undef identity_conv
+UVCG_DEFAULT_PROCESSING_ATTR(b_unit_id, bUnitID, 8);
+UVCG_DEFAULT_PROCESSING_ATTR(b_source_id, bSourceID, 8);
+UVCG_DEFAULT_PROCESSING_ATTR(w_max_multiplier, wMaxMultiplier, 16);
+UVCG_DEFAULT_PROCESSING_ATTR(i_processing, iProcessing, 8);
 
 #undef UVCG_DEFAULT_PROCESSING_ATTR
 
 static ssize_t uvcg_default_processing_bm_controls_show(
 	struct config_item *item, char *page)
 {
-	struct uvcg_default_processing *dp = to_uvcg_default_processing(item);
+	struct config_group *group = to_config_group(item);
 	struct f_uvc_opts *opts;
 	struct config_item *opts_item;
-	struct mutex *su_mutex = &dp->group.cg_subsys->su_mutex;
+	struct mutex *su_mutex = &group->cg_subsys->su_mutex;
 	struct uvc_processing_unit_descriptor *pd;
 	int result, i;
 	char *pg = page;
 
 	mutex_lock(su_mutex); /* for navigating configfs hierarchy */
 
-	opts_item = dp->group.cg_item.ci_parent->ci_parent->ci_parent;
+	opts_item = group->cg_item.ci_parent->ci_parent->ci_parent;
 	opts = to_f_uvc_opts(opts_item);
 	pd = &opts->uvc_processing;
 
 	mutex_lock(&opts->lock);
 	for (result = 0, i = 0; i < pd->bControlSize; ++i) {
-		result += sprintf(pg, "%d\n", pd->bmControls[i]);
+		result += sprintf(pg, "%u\n", pd->bmControls[i]);
 		pg = page + result;
 	}
 	mutex_unlock(&opts->lock);
@@ -266,54 +330,55 @@ static struct configfs_attribute *uvcg_default_processing_attrs[] = {
 	NULL,
 };
 
-static const struct config_item_type uvcg_default_processing_type = {
-	.ct_attrs	= uvcg_default_processing_attrs,
-	.ct_owner	= THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_default_processing_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_attrs	= uvcg_default_processing_attrs,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "default",
 };
 
-/* struct uvcg_processing {}; */
-
-/* control/processing */
-static struct uvcg_processing_grp {
-	struct config_group	group;
-} uvcg_processing_grp;
+/* -----------------------------------------------------------------------------
+ * control/processing
+ */
 
-static const struct config_item_type uvcg_processing_grp_type = {
-	.ct_owner = THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_processing_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "processing",
+	.children = (const struct uvcg_config_group_type*[]) {
+		&uvcg_default_processing_type,
+		NULL,
+	},
 };
 
-/* control/terminal/camera/default */
-static struct uvcg_default_camera {
-	struct config_group	group;
-} uvcg_default_camera;
-
-static inline struct uvcg_default_camera
-*to_uvcg_default_camera(struct config_item *item)
-{
-	return container_of(to_config_group(item),
-			    struct uvcg_default_camera, group);
-}
+/* -----------------------------------------------------------------------------
+ * control/terminal/camera/default
+ */
 
-#define UVCG_DEFAULT_CAMERA_ATTR(cname, aname, conv)			\
+#define UVCG_DEFAULT_CAMERA_ATTR(cname, aname, bits)			\
 static ssize_t uvcg_default_camera_##cname##_show(			\
 	struct config_item *item, char *page)				\
 {									\
-	struct uvcg_default_camera *dc = to_uvcg_default_camera(item);	\
+	struct config_group *group = to_config_group(item);		\
 	struct f_uvc_opts *opts;					\
 	struct config_item *opts_item;					\
-	struct mutex *su_mutex = &dc->group.cg_subsys->su_mutex;	\
+	struct mutex *su_mutex = &group->cg_subsys->su_mutex;		\
 	struct uvc_camera_terminal_descriptor *cd;			\
 	int result;							\
 									\
 	mutex_lock(su_mutex); /* for navigating configfs hierarchy */	\
 									\
-	opts_item = dc->group.cg_item.ci_parent->ci_parent->ci_parent->	\
+	opts_item = group->cg_item.ci_parent->ci_parent->ci_parent->	\
 			ci_parent;					\
 	opts = to_f_uvc_opts(opts_item);				\
 	cd = &opts->uvc_camera_terminal;				\
 									\
 	mutex_lock(&opts->lock);					\
-	result = sprintf(page, "%d\n", conv(cd->aname));		\
+	result = sprintf(page, "%u\n", le##bits##_to_cpu(cd->aname));	\
 	mutex_unlock(&opts->lock);					\
 									\
 	mutex_unlock(su_mutex);						\
@@ -323,44 +388,40 @@ static ssize_t uvcg_default_camera_##cname##_show(			\
 									\
 UVC_ATTR_RO(uvcg_default_camera_, cname, aname)
 
-#define identity_conv(x) (x)
-
-UVCG_DEFAULT_CAMERA_ATTR(b_terminal_id, bTerminalID, identity_conv);
-UVCG_DEFAULT_CAMERA_ATTR(w_terminal_type, wTerminalType, le16_to_cpu);
-UVCG_DEFAULT_CAMERA_ATTR(b_assoc_terminal, bAssocTerminal, identity_conv);
-UVCG_DEFAULT_CAMERA_ATTR(i_terminal, iTerminal, identity_conv);
+UVCG_DEFAULT_CAMERA_ATTR(b_terminal_id, bTerminalID, 8);
+UVCG_DEFAULT_CAMERA_ATTR(w_terminal_type, wTerminalType, 16);
+UVCG_DEFAULT_CAMERA_ATTR(b_assoc_terminal, bAssocTerminal, 8);
+UVCG_DEFAULT_CAMERA_ATTR(i_terminal, iTerminal, 8);
 UVCG_DEFAULT_CAMERA_ATTR(w_objective_focal_length_min, wObjectiveFocalLengthMin,
-			 le16_to_cpu);
+			 16);
 UVCG_DEFAULT_CAMERA_ATTR(w_objective_focal_length_max, wObjectiveFocalLengthMax,
-			 le16_to_cpu);
+			 16);
 UVCG_DEFAULT_CAMERA_ATTR(w_ocular_focal_length, wOcularFocalLength,
-			 le16_to_cpu);
-
-#undef identity_conv
+			 16);
 
 #undef UVCG_DEFAULT_CAMERA_ATTR
 
 static ssize_t uvcg_default_camera_bm_controls_show(
 	struct config_item *item, char *page)
 {
-	struct uvcg_default_camera *dc = to_uvcg_default_camera(item);
+	struct config_group *group = to_config_group(item);
 	struct f_uvc_opts *opts;
 	struct config_item *opts_item;
-	struct mutex *su_mutex = &dc->group.cg_subsys->su_mutex;
+	struct mutex *su_mutex = &group->cg_subsys->su_mutex;
 	struct uvc_camera_terminal_descriptor *cd;
 	int result, i;
 	char *pg = page;
 
 	mutex_lock(su_mutex); /* for navigating configfs hierarchy */
 
-	opts_item = dc->group.cg_item.ci_parent->ci_parent->ci_parent->
+	opts_item = group->cg_item.ci_parent->ci_parent->ci_parent->
 			ci_parent;
 	opts = to_f_uvc_opts(opts_item);
 	cd = &opts->uvc_camera_terminal;
 
 	mutex_lock(&opts->lock);
 	for (result = 0, i = 0; i < cd->bControlSize; ++i) {
-		result += sprintf(pg, "%d\n", cd->bmControls[i]);
+		result += sprintf(pg, "%u\n", cd->bmControls[i]);
 		pg = page + result;
 	}
 	mutex_unlock(&opts->lock);
@@ -383,54 +444,55 @@ static struct configfs_attribute *uvcg_default_camera_attrs[] = {
 	NULL,
 };
 
-static const struct config_item_type uvcg_default_camera_type = {
-	.ct_attrs	= uvcg_default_camera_attrs,
-	.ct_owner	= THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_default_camera_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_attrs	= uvcg_default_camera_attrs,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "default",
 };
 
-/* struct uvcg_camera {}; */
-
-/* control/terminal/camera */
-static struct uvcg_camera_grp {
-	struct config_group	group;
-} uvcg_camera_grp;
+/* -----------------------------------------------------------------------------
+ * control/terminal/camera
+ */
 
-static const struct config_item_type uvcg_camera_grp_type = {
-	.ct_owner = THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_camera_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "camera",
+	.children = (const struct uvcg_config_group_type*[]) {
+		&uvcg_default_camera_type,
+		NULL,
+	},
 };
 
-/* control/terminal/output/default */
-static struct uvcg_default_output {
-	struct config_group	group;
-} uvcg_default_output;
-
-static inline struct uvcg_default_output
-*to_uvcg_default_output(struct config_item *item)
-{
-	return container_of(to_config_group(item),
-			    struct uvcg_default_output, group);
-}
+/* -----------------------------------------------------------------------------
+ * control/terminal/output/default
+ */
 
-#define UVCG_DEFAULT_OUTPUT_ATTR(cname, aname, conv)			\
+#define UVCG_DEFAULT_OUTPUT_ATTR(cname, aname, bits)			\
 static ssize_t uvcg_default_output_##cname##_show(			\
-	struct config_item *item, char *page)			\
+	struct config_item *item, char *page)				\
 {									\
-	struct uvcg_default_output *dout = to_uvcg_default_output(item); \
+	struct config_group *group = to_config_group(item);		\
 	struct f_uvc_opts *opts;					\
 	struct config_item *opts_item;					\
-	struct mutex *su_mutex = &dout->group.cg_subsys->su_mutex;	\
+	struct mutex *su_mutex = &group->cg_subsys->su_mutex;		\
 	struct uvc_output_terminal_descriptor *cd;			\
 	int result;							\
 									\
 	mutex_lock(su_mutex); /* for navigating configfs hierarchy */	\
 									\
-	opts_item = dout->group.cg_item.ci_parent->ci_parent->		\
+	opts_item = group->cg_item.ci_parent->ci_parent->		\
 			ci_parent->ci_parent;				\
 	opts = to_f_uvc_opts(opts_item);				\
 	cd = &opts->uvc_output_terminal;				\
 									\
 	mutex_lock(&opts->lock);					\
-	result = sprintf(page, "%d\n", conv(cd->aname));		\
+	result = sprintf(page, "%u\n", le##bits##_to_cpu(cd->aname));	\
 	mutex_unlock(&opts->lock);					\
 									\
 	mutex_unlock(su_mutex);						\
@@ -440,15 +502,11 @@ static ssize_t uvcg_default_output_##cname##_show(			\
 									\
 UVC_ATTR_RO(uvcg_default_output_, cname, aname)
 
-#define identity_conv(x) (x)
-
-UVCG_DEFAULT_OUTPUT_ATTR(b_terminal_id, bTerminalID, identity_conv);
-UVCG_DEFAULT_OUTPUT_ATTR(w_terminal_type, wTerminalType, le16_to_cpu);
-UVCG_DEFAULT_OUTPUT_ATTR(b_assoc_terminal, bAssocTerminal, identity_conv);
-UVCG_DEFAULT_OUTPUT_ATTR(b_source_id, bSourceID, identity_conv);
-UVCG_DEFAULT_OUTPUT_ATTR(i_terminal, iTerminal, identity_conv);
-
-#undef identity_conv
+UVCG_DEFAULT_OUTPUT_ATTR(b_terminal_id, bTerminalID, 8);
+UVCG_DEFAULT_OUTPUT_ATTR(w_terminal_type, wTerminalType, 16);
+UVCG_DEFAULT_OUTPUT_ATTR(b_assoc_terminal, bAssocTerminal, 8);
+UVCG_DEFAULT_OUTPUT_ATTR(b_source_id, bSourceID, 8);
+UVCG_DEFAULT_OUTPUT_ATTR(i_terminal, iTerminal, 8);
 
 #undef UVCG_DEFAULT_OUTPUT_ATTR
 
@@ -461,47 +519,68 @@ static struct configfs_attribute *uvcg_default_output_attrs[] = {
 	NULL,
 };
 
-static const struct config_item_type uvcg_default_output_type = {
-	.ct_attrs	= uvcg_default_output_attrs,
-	.ct_owner	= THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_default_output_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_attrs	= uvcg_default_output_attrs,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "default",
 };
 
-/* struct uvcg_output {}; */
-
-/* control/terminal/output */
-static struct uvcg_output_grp {
-	struct config_group	group;
-} uvcg_output_grp;
+/* -----------------------------------------------------------------------------
+ * control/terminal/output
+ */
 
-static const struct config_item_type uvcg_output_grp_type = {
-	.ct_owner = THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_output_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "output",
+	.children = (const struct uvcg_config_group_type*[]) {
+		&uvcg_default_output_type,
+		NULL,
+	},
 };
 
-/* control/terminal */
-static struct uvcg_terminal_grp {
-	struct config_group	group;
-} uvcg_terminal_grp;
+/* -----------------------------------------------------------------------------
+ * control/terminal
+ */
 
-static const struct config_item_type uvcg_terminal_grp_type = {
-	.ct_owner = THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_terminal_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "terminal",
+	.children = (const struct uvcg_config_group_type*[]) {
+		&uvcg_camera_grp_type,
+		&uvcg_output_grp_type,
+		NULL,
+	},
 };
 
-/* control/class/{fs} */
-static struct uvcg_control_class {
-	struct config_group	group;
-} uvcg_control_class_fs, uvcg_control_class_ss;
+/* -----------------------------------------------------------------------------
+ * control/class/{fs|ss}
+ */
 
+struct uvcg_control_class_group {
+	struct config_group group;
+	const char *name;
+};
 
 static inline struct uvc_descriptor_header
 **uvcg_get_ctl_class_arr(struct config_item *i, struct f_uvc_opts *o)
 {
-	struct uvcg_control_class *cl = container_of(to_config_group(i),
-		struct uvcg_control_class, group);
+	struct uvcg_control_class_group *group =
+		container_of(i, struct uvcg_control_class_group,
+			     group.cg_item);
 
-	if (cl == &uvcg_control_class_fs)
+	if (!strcmp(group->name, "fs"))
 		return o->uvc_fs_control_cls;
 
-	if (cl == &uvcg_control_class_ss)
+	if (!strcmp(group->name, "ss"))
 		return o->uvc_ss_control_cls;
 
 	return NULL;
@@ -544,6 +623,7 @@ static int uvcg_control_class_allow_link(struct config_item *src,
 unlock:
 	mutex_unlock(&opts->lock);
 out:
+	config_item_put(header);
 	mutex_unlock(su_mutex);
 	return ret;
 }
@@ -579,10 +659,12 @@ static void uvcg_control_class_drop_link(struct config_item *src,
 unlock:
 	mutex_unlock(&opts->lock);
 out:
+	config_item_put(header);
 	mutex_unlock(su_mutex);
 }
 
 static struct configfs_item_operations uvcg_control_class_item_ops = {
+	.release	= uvcg_config_item_release,
 	.allow_link	= uvcg_control_class_allow_link,
 	.drop_link	= uvcg_control_class_drop_link,
 };
@@ -592,37 +674,99 @@ static const struct config_item_type uvcg_control_class_type = {
 	.ct_owner	= THIS_MODULE,
 };
 
-/* control/class */
-static struct uvcg_control_class_grp {
-	struct config_group	group;
-} uvcg_control_class_grp;
+/* -----------------------------------------------------------------------------
+ * control/class
+ */
+
+static int uvcg_control_class_create_children(struct config_group *parent)
+{
+	static const char * const names[] = { "fs", "ss" };
+	unsigned int i;
 
-static const struct config_item_type uvcg_control_class_grp_type = {
-	.ct_owner = THIS_MODULE,
+	for (i = 0; i < ARRAY_SIZE(names); ++i) {
+		struct uvcg_control_class_group *group;
+
+		group = kzalloc(sizeof(*group), GFP_KERNEL);
+		if (!group)
+			return -ENOMEM;
+
+		group->name = names[i];
+
+		config_group_init_type_name(&group->group, group->name,
+					    &uvcg_control_class_type);
+		configfs_add_default_group(&group->group, parent);
+	}
+
+	return 0;
+}
+
+static const struct uvcg_config_group_type uvcg_control_class_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "class",
+	.create_children = uvcg_control_class_create_children,
 };
 
-/* control */
-static struct uvcg_control_grp {
-	struct config_group	group;
-} uvcg_control_grp;
+/* -----------------------------------------------------------------------------
+ * control
+ */
+
+static ssize_t uvcg_default_control_b_interface_number_show(
+	struct config_item *item, char *page)
+{
+	struct config_group *group = to_config_group(item);
+	struct mutex *su_mutex = &group->cg_subsys->su_mutex;
+	struct config_item *opts_item;
+	struct f_uvc_opts *opts;
+	int result = 0;
+
+	mutex_lock(su_mutex); /* for navigating configfs hierarchy */
+
+	opts_item = item->ci_parent;
+	opts = to_f_uvc_opts(opts_item);
+
+	mutex_lock(&opts->lock);
+	result += sprintf(page, "%u\n", opts->control_interface);
+	mutex_unlock(&opts->lock);
+
+	mutex_unlock(su_mutex);
+
+	return result;
+}
+
+UVC_ATTR_RO(uvcg_default_control_, b_interface_number, bInterfaceNumber);
 
-static const struct config_item_type uvcg_control_grp_type = {
-	.ct_owner = THIS_MODULE,
+static struct configfs_attribute *uvcg_default_control_attrs[] = {
+	&uvcg_default_control_attr_b_interface_number,
+	NULL,
 };
 
-/* streaming/uncompressed */
-static struct uvcg_uncompressed_grp {
-	struct config_group	group;
-} uvcg_uncompressed_grp;
+static const struct uvcg_config_group_type uvcg_control_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_attrs	= uvcg_default_control_attrs,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "control",
+	.children = (const struct uvcg_config_group_type*[]) {
+		&uvcg_control_header_grp_type,
+		&uvcg_processing_grp_type,
+		&uvcg_terminal_grp_type,
+		&uvcg_control_class_grp_type,
+		NULL,
+	},
+};
 
-/* streaming/mjpeg */
-static struct uvcg_mjpeg_grp {
-	struct config_group	group;
-} uvcg_mjpeg_grp;
+/* -----------------------------------------------------------------------------
+ * streaming/uncompressed
+ * streaming/mjpeg
+ */
 
-static struct config_item *fmt_parent[] = {
-	&uvcg_uncompressed_grp.group.cg_item,
-	&uvcg_mjpeg_grp.group.cg_item,
+static const char * const uvcg_format_names[] = {
+	"uncompressed",
+	"mjpeg",
 };
 
 enum uvcg_format_type {
@@ -706,7 +850,11 @@ struct uvcg_format_ptr {
 	struct list_head	entry;
 };
 
-/* streaming/header/<NAME> */
+/* -----------------------------------------------------------------------------
+ * streaming/header/<NAME>
+ * streaming/header
+ */
+
 struct uvcg_streaming_header {
 	struct config_item				item;
 	struct uvc_input_header_descriptor		desc;
@@ -720,6 +868,8 @@ static struct uvcg_streaming_header *to_uvcg_streaming_header(struct config_item
 	return container_of(item, struct uvcg_streaming_header, item);
 }
 
+static void uvcg_format_set_indices(struct config_group *fmt);
+
 static int uvcg_streaming_header_allow_link(struct config_item *src,
 					    struct config_item *target)
 {
@@ -744,10 +894,22 @@ static int uvcg_streaming_header_allow_link(struct config_item *src,
 		goto out;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(fmt_parent); ++i)
-		if (target->ci_parent == fmt_parent[i])
+	/*
+	 * Linking is only allowed to direct children of the format nodes
+	 * (streaming/uncompressed or streaming/mjpeg nodes). First check that
+	 * the grand-parent of the target matches the grand-parent of the source
+	 * (the streaming node), and then verify that the target parent is a
+	 * format node.
+	 */
+	if (src->ci_parent->ci_parent != target->ci_parent->ci_parent)
+		goto out;
+
+	for (i = 0; i < ARRAY_SIZE(uvcg_format_names); ++i) {
+		if (!strcmp(target->ci_parent->ci_name, uvcg_format_names[i]))
 			break;
-	if (i == ARRAY_SIZE(fmt_parent))
+	}
+
+	if (i == ARRAY_SIZE(uvcg_format_names))
 		goto out;
 
 	target_fmt = container_of(to_config_group(target), struct uvcg_format,
@@ -755,6 +917,8 @@ static int uvcg_streaming_header_allow_link(struct config_item *src,
 	if (!target_fmt)
 		goto out;
 
+	uvcg_format_set_indices(to_config_group(target));
+
 	format_ptr = kzalloc(sizeof(*format_ptr), GFP_KERNEL);
 	if (!format_ptr) {
 		ret = -ENOMEM;
@@ -764,6 +928,7 @@ static int uvcg_streaming_header_allow_link(struct config_item *src,
 	format_ptr->fmt = target_fmt;
 	list_add_tail(&format_ptr->entry, &src_hdr->formats);
 	++src_hdr->num_fmt;
+	++target_fmt->linked;
 
 out:
 	mutex_unlock(&opts->lock);
@@ -801,19 +966,22 @@ static void uvcg_streaming_header_drop_link(struct config_item *src,
 			break;
 		}
 
+	--target_fmt->linked;
+
 out:
 	mutex_unlock(&opts->lock);
 	mutex_unlock(su_mutex);
 }
 
 static struct configfs_item_operations uvcg_streaming_header_item_ops = {
-	.allow_link		= uvcg_streaming_header_allow_link,
-	.drop_link		= uvcg_streaming_header_drop_link,
+	.release	= uvcg_config_item_release,
+	.allow_link	= uvcg_streaming_header_allow_link,
+	.drop_link	= uvcg_streaming_header_drop_link,
 };
 
-#define UVCG_STREAMING_HEADER_ATTR(cname, aname, conv)			\
+#define UVCG_STREAMING_HEADER_ATTR(cname, aname, bits)			\
 static ssize_t uvcg_streaming_header_##cname##_show(			\
-	struct config_item *item, char *page)			\
+	struct config_item *item, char *page)				\
 {									\
 	struct uvcg_streaming_header *sh = to_uvcg_streaming_header(item); \
 	struct f_uvc_opts *opts;					\
@@ -827,7 +995,7 @@ static ssize_t uvcg_streaming_header_##cname##_show(			\
 	opts = to_f_uvc_opts(opts_item);				\
 									\
 	mutex_lock(&opts->lock);					\
-	result = sprintf(page, "%d\n", conv(sh->desc.aname));		\
+	result = sprintf(page, "%u\n", le##bits##_to_cpu(sh->desc.aname));\
 	mutex_unlock(&opts->lock);					\
 									\
 	mutex_unlock(su_mutex);						\
@@ -836,16 +1004,11 @@ static ssize_t uvcg_streaming_header_##cname##_show(			\
 									\
 UVC_ATTR_RO(uvcg_streaming_header_, cname, aname)
 
-#define identity_conv(x) (x)
-
-UVCG_STREAMING_HEADER_ATTR(bm_info, bmInfo, identity_conv);
-UVCG_STREAMING_HEADER_ATTR(b_terminal_link, bTerminalLink, identity_conv);
-UVCG_STREAMING_HEADER_ATTR(b_still_capture_method, bStillCaptureMethod,
-			   identity_conv);
-UVCG_STREAMING_HEADER_ATTR(b_trigger_support, bTriggerSupport, identity_conv);
-UVCG_STREAMING_HEADER_ATTR(b_trigger_usage, bTriggerUsage, identity_conv);
-
-#undef identity_conv
+UVCG_STREAMING_HEADER_ATTR(bm_info, bmInfo, 8);
+UVCG_STREAMING_HEADER_ATTR(b_terminal_link, bTerminalLink, 8);
+UVCG_STREAMING_HEADER_ATTR(b_still_capture_method, bStillCaptureMethod, 8);
+UVCG_STREAMING_HEADER_ATTR(b_trigger_support, bTriggerSupport, 8);
+UVCG_STREAMING_HEADER_ATTR(b_trigger_usage, bTriggerUsage, 8);
 
 #undef UVCG_STREAMING_HEADER_ATTR
 
@@ -884,31 +1047,26 @@ static struct config_item
 	return &h->item;
 }
 
-static void uvcg_streaming_header_drop(struct config_group *group,
-			      struct config_item *item)
-{
-	struct uvcg_streaming_header *h = to_uvcg_streaming_header(item);
-
-	kfree(h);
-}
-
-/* streaming/header */
-static struct uvcg_streaming_header_grp {
-	struct config_group	group;
-} uvcg_streaming_header_grp;
-
 static struct configfs_group_operations uvcg_streaming_header_grp_ops = {
 	.make_item		= uvcg_streaming_header_make,
-	.drop_item		= uvcg_streaming_header_drop,
 };
 
-static const struct config_item_type uvcg_streaming_header_grp_type = {
-	.ct_group_ops	= &uvcg_streaming_header_grp_ops,
-	.ct_owner	= THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_streaming_header_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_group_ops	= &uvcg_streaming_header_grp_ops,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "header",
 };
 
-/* streaming/<mode>/<format>/<NAME> */
+/* -----------------------------------------------------------------------------
+ * streaming/<mode>/<format>/<NAME>
+ */
+
 struct uvcg_frame {
+	struct config_item	item;
+	enum uvcg_format_type	fmt_type;
 	struct {
 		u8	b_length;
 		u8	b_descriptor_type;
@@ -924,8 +1082,6 @@ struct uvcg_frame {
 		u8	b_frame_interval_type;
 	} __attribute__((packed)) frame;
 	u32 *dw_frame_interval;
-	enum uvcg_format_type	fmt_type;
-	struct config_item	item;
 };
 
 static struct uvcg_frame *to_uvcg_frame(struct config_item *item)
@@ -933,7 +1089,7 @@ static struct uvcg_frame *to_uvcg_frame(struct config_item *item)
 	return container_of(item, struct uvcg_frame, item);
 }
 
-#define UVCG_FRAME_ATTR(cname, aname, to_cpu_endian, to_little_endian, bits) \
+#define UVCG_FRAME_ATTR(cname, aname, bits) \
 static ssize_t uvcg_frame_##cname##_show(struct config_item *item, char *page)\
 {									\
 	struct uvcg_frame *f = to_uvcg_frame(item);			\
@@ -948,7 +1104,7 @@ static ssize_t uvcg_frame_##cname##_show(struct config_item *item, char *page)\
 	opts = to_f_uvc_opts(opts_item);				\
 									\
 	mutex_lock(&opts->lock);					\
-	result = sprintf(page, "%d\n", to_cpu_endian(f->frame.cname));	\
+	result = sprintf(page, "%u\n", f->frame.cname);			\
 	mutex_unlock(&opts->lock);					\
 									\
 	mutex_unlock(su_mutex);						\
@@ -963,8 +1119,8 @@ static ssize_t  uvcg_frame_##cname##_store(struct config_item *item,	\
 	struct config_item *opts_item;					\
 	struct uvcg_format *fmt;					\
 	struct mutex *su_mutex = &f->item.ci_group->cg_subsys->su_mutex;\
+	typeof(f->frame.cname) num;					\
 	int ret;							\
-	u##bits num;							\
 									\
 	ret = kstrtou##bits(page, 0, &num);				\
 	if (ret)							\
@@ -982,7 +1138,7 @@ static ssize_t  uvcg_frame_##cname##_store(struct config_item *item,	\
 		goto end;						\
 	}								\
 									\
-	f->frame.cname = to_little_endian(num);				\
+	f->frame.cname = num;						\
 	ret = len;							\
 end:									\
 	mutex_unlock(&opts->lock);					\
@@ -992,20 +1148,48 @@ end:									\
 									\
 UVC_ATTR(uvcg_frame_, cname, aname);
 
-#define noop_conversion(x) (x)
+static ssize_t uvcg_frame_b_frame_index_show(struct config_item *item,
+					     char *page)
+{
+	struct uvcg_frame *f = to_uvcg_frame(item);
+	struct uvcg_format *fmt;
+	struct f_uvc_opts *opts;
+	struct config_item *opts_item;
+	struct config_item *fmt_item;
+	struct mutex *su_mutex = &f->item.ci_group->cg_subsys->su_mutex;
+	int result;
+
+	mutex_lock(su_mutex); /* for navigating configfs hierarchy */
+
+	fmt_item = f->item.ci_parent;
+	fmt = to_uvcg_format(fmt_item);
 
-UVCG_FRAME_ATTR(bm_capabilities, bmCapabilities, noop_conversion,
-		noop_conversion, 8);
-UVCG_FRAME_ATTR(w_width, wWidth, le16_to_cpu, cpu_to_le16, 16);
-UVCG_FRAME_ATTR(w_height, wHeight, le16_to_cpu, cpu_to_le16, 16);
-UVCG_FRAME_ATTR(dw_min_bit_rate, dwMinBitRate, le32_to_cpu, cpu_to_le32, 32);
-UVCG_FRAME_ATTR(dw_max_bit_rate, dwMaxBitRate, le32_to_cpu, cpu_to_le32, 32);
-UVCG_FRAME_ATTR(dw_max_video_frame_buffer_size, dwMaxVideoFrameBufferSize,
-		le32_to_cpu, cpu_to_le32, 32);
-UVCG_FRAME_ATTR(dw_default_frame_interval, dwDefaultFrameInterval,
-		le32_to_cpu, cpu_to_le32, 32);
+	if (!fmt->linked) {
+		result = -EBUSY;
+		goto out;
+	}
 
-#undef noop_conversion
+	opts_item = fmt_item->ci_parent->ci_parent->ci_parent;
+	opts = to_f_uvc_opts(opts_item);
+
+	mutex_lock(&opts->lock);
+	result = sprintf(page, "%u\n", f->frame.b_frame_index);
+	mutex_unlock(&opts->lock);
+
+out:
+	mutex_unlock(su_mutex);
+	return result;
+}
+
+UVC_ATTR_RO(uvcg_frame_, b_frame_index, bFrameIndex);
+
+UVCG_FRAME_ATTR(bm_capabilities, bmCapabilities, 8);
+UVCG_FRAME_ATTR(w_width, wWidth, 16);
+UVCG_FRAME_ATTR(w_height, wHeight, 16);
+UVCG_FRAME_ATTR(dw_min_bit_rate, dwMinBitRate, 32);
+UVCG_FRAME_ATTR(dw_max_bit_rate, dwMaxBitRate, 32);
+UVCG_FRAME_ATTR(dw_max_video_frame_buffer_size, dwMaxVideoFrameBufferSize, 32);
+UVCG_FRAME_ATTR(dw_default_frame_interval, dwDefaultFrameInterval, 32);
 
 #undef UVCG_FRAME_ATTR
 
@@ -1026,8 +1210,7 @@ static ssize_t uvcg_frame_dw_frame_interval_show(struct config_item *item,
 
 	mutex_lock(&opts->lock);
 	for (result = 0, i = 0; i < frm->frame.b_frame_interval_type; ++i) {
-		result += sprintf(pg, "%d\n",
-				  le32_to_cpu(frm->dw_frame_interval[i]));
+		result += sprintf(pg, "%u\n", frm->dw_frame_interval[i]);
 		pg = page + result;
 	}
 	mutex_unlock(&opts->lock);
@@ -1052,7 +1235,7 @@ static inline int __uvcg_fill_frm_intrv(char *buf, void *priv)
 		return ret;
 
 	interv = priv;
-	**interv = cpu_to_le32(num);
+	**interv = num;
 	++*interv;
 
 	return 0;
@@ -1129,6 +1312,8 @@ static ssize_t uvcg_frame_dw_frame_interval_store(struct config_item *item,
 	kfree(ch->dw_frame_interval);
 	ch->dw_frame_interval = frm_intrv;
 	ch->frame.b_frame_interval_type = n;
+	sort(ch->dw_frame_interval, n, sizeof(*ch->dw_frame_interval),
+	     uvcg_config_compare_u32, NULL);
 	ret = len;
 
 end:
@@ -1140,6 +1325,7 @@ end:
 UVC_ATTR(uvcg_frame_, dw_frame_interval, dwFrameInterval);
 
 static struct configfs_attribute *uvcg_frame_attrs[] = {
+	&uvcg_frame_attr_b_frame_index,
 	&uvcg_frame_attr_bm_capabilities,
 	&uvcg_frame_attr_w_width,
 	&uvcg_frame_attr_w_height,
@@ -1152,6 +1338,7 @@ static struct configfs_attribute *uvcg_frame_attrs[] = {
 };
 
 static const struct config_item_type uvcg_frame_type = {
+	.ct_item_ops	= &uvcg_config_item_ops,
 	.ct_attrs	= uvcg_frame_attrs,
 	.ct_owner	= THIS_MODULE,
 };
@@ -1170,12 +1357,12 @@ static struct config_item *uvcg_frame_make(struct config_group *group,
 
 	h->frame.b_descriptor_type		= USB_DT_CS_INTERFACE;
 	h->frame.b_frame_index			= 1;
-	h->frame.w_width			= cpu_to_le16(640);
-	h->frame.w_height			= cpu_to_le16(360);
-	h->frame.dw_min_bit_rate		= cpu_to_le32(18432000);
-	h->frame.dw_max_bit_rate		= cpu_to_le32(55296000);
-	h->frame.dw_max_video_frame_buffer_size	= cpu_to_le32(460800);
-	h->frame.dw_default_frame_interval	= cpu_to_le32(666666);
+	h->frame.w_width			= 640;
+	h->frame.w_height			= 360;
+	h->frame.dw_min_bit_rate		= 18432000;
+	h->frame.dw_max_bit_rate		= 55296000;
+	h->frame.dw_max_video_frame_buffer_size	= 460800;
+	h->frame.dw_default_frame_interval	= 666666;
 
 	opts_item = group->cg_item.ci_parent->ci_parent->ci_parent;
 	opts = to_f_uvc_opts(opts_item);
@@ -1203,7 +1390,6 @@ static struct config_item *uvcg_frame_make(struct config_group *group,
 
 static void uvcg_frame_drop(struct config_group *group, struct config_item *item)
 {
-	struct uvcg_frame *h = to_uvcg_frame(item);
 	struct uvcg_format *fmt;
 	struct f_uvc_opts *opts;
 	struct config_item *opts_item;
@@ -1214,11 +1400,31 @@ static void uvcg_frame_drop(struct config_group *group, struct config_item *item
 	mutex_lock(&opts->lock);
 	fmt = to_uvcg_format(&group->cg_item);
 	--fmt->num_frames;
-	kfree(h);
 	mutex_unlock(&opts->lock);
+
+	config_item_put(item);
+}
+
+static void uvcg_format_set_indices(struct config_group *fmt)
+{
+	struct config_item *ci;
+	unsigned int i = 1;
+
+	list_for_each_entry(ci, &fmt->cg_children, ci_entry) {
+		struct uvcg_frame *frm;
+
+		if (ci->ci_type != &uvcg_frame_type)
+			continue;
+
+		frm = to_uvcg_frame(ci);
+		frm->frame.b_frame_index = i++;
+	}
 }
 
-/* streaming/uncompressed/<NAME> */
+/* -----------------------------------------------------------------------------
+ * streaming/uncompressed/<NAME>
+ */
+
 struct uvcg_uncompressed {
 	struct uvcg_format		fmt;
 	struct uvc_format_uncompressed	desc;
@@ -1290,7 +1496,7 @@ end:
 
 UVC_ATTR(uvcg_uncompressed_, guid_format, guidFormat);
 
-#define UVCG_UNCOMPRESSED_ATTR_RO(cname, aname, conv)			\
+#define UVCG_UNCOMPRESSED_ATTR_RO(cname, aname, bits)			\
 static ssize_t uvcg_uncompressed_##cname##_show(			\
 	struct config_item *item, char *page)				\
 {									\
@@ -1306,7 +1512,7 @@ static ssize_t uvcg_uncompressed_##cname##_show(			\
 	opts = to_f_uvc_opts(opts_item);				\
 									\
 	mutex_lock(&opts->lock);					\
-	result = sprintf(page, "%d\n", conv(u->desc.aname));		\
+	result = sprintf(page, "%u\n", le##bits##_to_cpu(u->desc.aname));\
 	mutex_unlock(&opts->lock);					\
 									\
 	mutex_unlock(su_mutex);						\
@@ -1315,7 +1521,7 @@ static ssize_t uvcg_uncompressed_##cname##_show(			\
 									\
 UVC_ATTR_RO(uvcg_uncompressed_, cname, aname);
 
-#define UVCG_UNCOMPRESSED_ATTR(cname, aname, conv)			\
+#define UVCG_UNCOMPRESSED_ATTR(cname, aname, bits)			\
 static ssize_t uvcg_uncompressed_##cname##_show(			\
 	struct config_item *item, char *page)				\
 {									\
@@ -1331,7 +1537,7 @@ static ssize_t uvcg_uncompressed_##cname##_show(			\
 	opts = to_f_uvc_opts(opts_item);				\
 									\
 	mutex_lock(&opts->lock);					\
-	result = sprintf(page, "%d\n", conv(u->desc.aname));		\
+	result = sprintf(page, "%u\n", le##bits##_to_cpu(u->desc.aname));\
 	mutex_unlock(&opts->lock);					\
 									\
 	mutex_unlock(su_mutex);						\
@@ -1378,16 +1584,12 @@ end:									\
 									\
 UVC_ATTR(uvcg_uncompressed_, cname, aname);
 
-#define identity_conv(x) (x)
-
-UVCG_UNCOMPRESSED_ATTR(b_bits_per_pixel, bBitsPerPixel, identity_conv);
-UVCG_UNCOMPRESSED_ATTR(b_default_frame_index, bDefaultFrameIndex,
-		       identity_conv);
-UVCG_UNCOMPRESSED_ATTR_RO(b_aspect_ratio_x, bAspectRatioX, identity_conv);
-UVCG_UNCOMPRESSED_ATTR_RO(b_aspect_ratio_y, bAspectRatioY, identity_conv);
-UVCG_UNCOMPRESSED_ATTR_RO(bm_interface_flags, bmInterfaceFlags, identity_conv);
-
-#undef identity_conv
+UVCG_UNCOMPRESSED_ATTR_RO(b_format_index, bFormatIndex, 8);
+UVCG_UNCOMPRESSED_ATTR(b_bits_per_pixel, bBitsPerPixel, 8);
+UVCG_UNCOMPRESSED_ATTR(b_default_frame_index, bDefaultFrameIndex, 8);
+UVCG_UNCOMPRESSED_ATTR_RO(b_aspect_ratio_x, bAspectRatioX, 8);
+UVCG_UNCOMPRESSED_ATTR_RO(b_aspect_ratio_y, bAspectRatioY, 8);
+UVCG_UNCOMPRESSED_ATTR_RO(bm_interface_flags, bmInterfaceFlags, 8);
 
 #undef UVCG_UNCOMPRESSED_ATTR
 #undef UVCG_UNCOMPRESSED_ATTR_RO
@@ -1410,6 +1612,7 @@ uvcg_uncompressed_bma_controls_store(struct config_item *item,
 UVC_ATTR(uvcg_uncompressed_, bma_controls, bmaControls);
 
 static struct configfs_attribute *uvcg_uncompressed_attrs[] = {
+	&uvcg_uncompressed_attr_b_format_index,
 	&uvcg_uncompressed_attr_guid_format,
 	&uvcg_uncompressed_attr_b_bits_per_pixel,
 	&uvcg_uncompressed_attr_b_default_frame_index,
@@ -1421,6 +1624,7 @@ static struct configfs_attribute *uvcg_uncompressed_attrs[] = {
 };
 
 static const struct config_item_type uvcg_uncompressed_type = {
+	.ct_item_ops	= &uvcg_config_item_ops,
 	.ct_group_ops	= &uvcg_uncompressed_group_ops,
 	.ct_attrs	= uvcg_uncompressed_attrs,
 	.ct_owner	= THIS_MODULE,
@@ -1457,25 +1661,23 @@ static struct config_group *uvcg_uncompressed_make(struct config_group *group,
 	return &h->fmt.group;
 }
 
-static void uvcg_uncompressed_drop(struct config_group *group,
-			    struct config_item *item)
-{
-	struct uvcg_uncompressed *h = to_uvcg_uncompressed(item);
-
-	kfree(h);
-}
-
 static struct configfs_group_operations uvcg_uncompressed_grp_ops = {
 	.make_group		= uvcg_uncompressed_make,
-	.drop_item		= uvcg_uncompressed_drop,
 };
 
-static const struct config_item_type uvcg_uncompressed_grp_type = {
-	.ct_group_ops	= &uvcg_uncompressed_grp_ops,
-	.ct_owner	= THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_uncompressed_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_group_ops	= &uvcg_uncompressed_grp_ops,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "uncompressed",
 };
 
-/* streaming/mjpeg/<NAME> */
+/* -----------------------------------------------------------------------------
+ * streaming/mjpeg/<NAME>
+ */
+
 struct uvcg_mjpeg {
 	struct uvcg_format		fmt;
 	struct uvc_format_mjpeg		desc;
@@ -1493,7 +1695,7 @@ static struct configfs_group_operations uvcg_mjpeg_group_ops = {
 	.drop_item		= uvcg_frame_drop,
 };
 
-#define UVCG_MJPEG_ATTR_RO(cname, aname, conv)				\
+#define UVCG_MJPEG_ATTR_RO(cname, aname, bits)				\
 static ssize_t uvcg_mjpeg_##cname##_show(struct config_item *item, char *page)\
 {									\
 	struct uvcg_mjpeg *u = to_uvcg_mjpeg(item);			\
@@ -1508,7 +1710,7 @@ static ssize_t uvcg_mjpeg_##cname##_show(struct config_item *item, char *page)\
 	opts = to_f_uvc_opts(opts_item);				\
 									\
 	mutex_lock(&opts->lock);					\
-	result = sprintf(page, "%d\n", conv(u->desc.aname));		\
+	result = sprintf(page, "%u\n", le##bits##_to_cpu(u->desc.aname));\
 	mutex_unlock(&opts->lock);					\
 									\
 	mutex_unlock(su_mutex);						\
@@ -1517,7 +1719,7 @@ static ssize_t uvcg_mjpeg_##cname##_show(struct config_item *item, char *page)\
 									\
 UVC_ATTR_RO(uvcg_mjpeg_, cname, aname)
 
-#define UVCG_MJPEG_ATTR(cname, aname, conv)				\
+#define UVCG_MJPEG_ATTR(cname, aname, bits)				\
 static ssize_t uvcg_mjpeg_##cname##_show(struct config_item *item, char *page)\
 {									\
 	struct uvcg_mjpeg *u = to_uvcg_mjpeg(item);			\
@@ -1532,7 +1734,7 @@ static ssize_t uvcg_mjpeg_##cname##_show(struct config_item *item, char *page)\
 	opts = to_f_uvc_opts(opts_item);				\
 									\
 	mutex_lock(&opts->lock);					\
-	result = sprintf(page, "%d\n", conv(u->desc.aname));		\
+	result = sprintf(page, "%u\n", le##bits##_to_cpu(u->desc.aname));\
 	mutex_unlock(&opts->lock);					\
 									\
 	mutex_unlock(su_mutex);						\
@@ -1579,16 +1781,12 @@ end:									\
 									\
 UVC_ATTR(uvcg_mjpeg_, cname, aname)
 
-#define identity_conv(x) (x)
-
-UVCG_MJPEG_ATTR(b_default_frame_index, bDefaultFrameIndex,
-		       identity_conv);
-UVCG_MJPEG_ATTR_RO(bm_flags, bmFlags, identity_conv);
-UVCG_MJPEG_ATTR_RO(b_aspect_ratio_x, bAspectRatioX, identity_conv);
-UVCG_MJPEG_ATTR_RO(b_aspect_ratio_y, bAspectRatioY, identity_conv);
-UVCG_MJPEG_ATTR_RO(bm_interface_flags, bmInterfaceFlags, identity_conv);
-
-#undef identity_conv
+UVCG_MJPEG_ATTR_RO(b_format_index, bFormatIndex, 8);
+UVCG_MJPEG_ATTR(b_default_frame_index, bDefaultFrameIndex, 8);
+UVCG_MJPEG_ATTR_RO(bm_flags, bmFlags, 8);
+UVCG_MJPEG_ATTR_RO(b_aspect_ratio_x, bAspectRatioX, 8);
+UVCG_MJPEG_ATTR_RO(b_aspect_ratio_y, bAspectRatioY, 8);
+UVCG_MJPEG_ATTR_RO(bm_interface_flags, bmInterfaceFlags, 8);
 
 #undef UVCG_MJPEG_ATTR
 #undef UVCG_MJPEG_ATTR_RO
@@ -1611,6 +1809,7 @@ uvcg_mjpeg_bma_controls_store(struct config_item *item,
 UVC_ATTR(uvcg_mjpeg_, bma_controls, bmaControls);
 
 static struct configfs_attribute *uvcg_mjpeg_attrs[] = {
+	&uvcg_mjpeg_attr_b_format_index,
 	&uvcg_mjpeg_attr_b_default_frame_index,
 	&uvcg_mjpeg_attr_bm_flags,
 	&uvcg_mjpeg_attr_b_aspect_ratio_x,
@@ -1621,6 +1820,7 @@ static struct configfs_attribute *uvcg_mjpeg_attrs[] = {
 };
 
 static const struct config_item_type uvcg_mjpeg_type = {
+	.ct_item_ops	= &uvcg_config_item_ops,
 	.ct_group_ops	= &uvcg_mjpeg_group_ops,
 	.ct_attrs	= uvcg_mjpeg_attrs,
 	.ct_owner	= THIS_MODULE,
@@ -1651,56 +1851,42 @@ static struct config_group *uvcg_mjpeg_make(struct config_group *group,
 	return &h->fmt.group;
 }
 
-static void uvcg_mjpeg_drop(struct config_group *group,
-			    struct config_item *item)
-{
-	struct uvcg_mjpeg *h = to_uvcg_mjpeg(item);
-
-	kfree(h);
-}
-
 static struct configfs_group_operations uvcg_mjpeg_grp_ops = {
 	.make_group		= uvcg_mjpeg_make,
-	.drop_item		= uvcg_mjpeg_drop,
 };
 
-static const struct config_item_type uvcg_mjpeg_grp_type = {
-	.ct_group_ops	= &uvcg_mjpeg_grp_ops,
-	.ct_owner	= THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_mjpeg_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_group_ops	= &uvcg_mjpeg_grp_ops,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "mjpeg",
 };
 
-/* streaming/color_matching/default */
-static struct uvcg_default_color_matching {
-	struct config_group	group;
-} uvcg_default_color_matching;
-
-static inline struct uvcg_default_color_matching
-*to_uvcg_default_color_matching(struct config_item *item)
-{
-	return container_of(to_config_group(item),
-			    struct uvcg_default_color_matching, group);
-}
+/* -----------------------------------------------------------------------------
+ * streaming/color_matching/default
+ */
 
-#define UVCG_DEFAULT_COLOR_MATCHING_ATTR(cname, aname, conv)		\
+#define UVCG_DEFAULT_COLOR_MATCHING_ATTR(cname, aname, bits)		\
 static ssize_t uvcg_default_color_matching_##cname##_show(		\
-	struct config_item *item, char *page)		\
+	struct config_item *item, char *page)				\
 {									\
-	struct uvcg_default_color_matching *dc =			\
-		to_uvcg_default_color_matching(item);			\
+	struct config_group *group = to_config_group(item);		\
 	struct f_uvc_opts *opts;					\
 	struct config_item *opts_item;					\
-	struct mutex *su_mutex = &dc->group.cg_subsys->su_mutex;	\
+	struct mutex *su_mutex = &group->cg_subsys->su_mutex;		\
 	struct uvc_color_matching_descriptor *cd;			\
 	int result;							\
 									\
 	mutex_lock(su_mutex); /* for navigating configfs hierarchy */	\
 									\
-	opts_item = dc->group.cg_item.ci_parent->ci_parent->ci_parent;	\
+	opts_item = group->cg_item.ci_parent->ci_parent->ci_parent;	\
 	opts = to_f_uvc_opts(opts_item);				\
 	cd = &opts->uvc_color_matching;					\
 									\
 	mutex_lock(&opts->lock);					\
-	result = sprintf(page, "%d\n", conv(cd->aname));		\
+	result = sprintf(page, "%u\n", le##bits##_to_cpu(cd->aname));	\
 	mutex_unlock(&opts->lock);					\
 									\
 	mutex_unlock(su_mutex);						\
@@ -1709,16 +1895,10 @@ static ssize_t uvcg_default_color_matching_##cname##_show(		\
 									\
 UVC_ATTR_RO(uvcg_default_color_matching_, cname, aname)
 
-#define identity_conv(x) (x)
-
-UVCG_DEFAULT_COLOR_MATCHING_ATTR(b_color_primaries, bColorPrimaries,
-				 identity_conv);
+UVCG_DEFAULT_COLOR_MATCHING_ATTR(b_color_primaries, bColorPrimaries, 8);
 UVCG_DEFAULT_COLOR_MATCHING_ATTR(b_transfer_characteristics,
-				 bTransferCharacteristics, identity_conv);
-UVCG_DEFAULT_COLOR_MATCHING_ATTR(b_matrix_coefficients, bMatrixCoefficients,
-				 identity_conv);
-
-#undef identity_conv
+				 bTransferCharacteristics, 8);
+UVCG_DEFAULT_COLOR_MATCHING_ATTR(b_matrix_coefficients, bMatrixCoefficients, 8);
 
 #undef UVCG_DEFAULT_COLOR_MATCHING_ATTR
 
@@ -1729,41 +1909,54 @@ static struct configfs_attribute *uvcg_default_color_matching_attrs[] = {
 	NULL,
 };
 
-static const struct config_item_type uvcg_default_color_matching_type = {
-	.ct_attrs	= uvcg_default_color_matching_attrs,
-	.ct_owner	= THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_default_color_matching_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_attrs	= uvcg_default_color_matching_attrs,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "default",
 };
 
-/* struct uvcg_color_matching {}; */
-
-/* streaming/color_matching */
-static struct uvcg_color_matching_grp {
-	struct config_group	group;
-} uvcg_color_matching_grp;
+/* -----------------------------------------------------------------------------
+ * streaming/color_matching
+ */
 
-static const struct config_item_type uvcg_color_matching_grp_type = {
-	.ct_owner = THIS_MODULE,
+static const struct uvcg_config_group_type uvcg_color_matching_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "color_matching",
+	.children = (const struct uvcg_config_group_type*[]) {
+		&uvcg_default_color_matching_type,
+		NULL,
+	},
 };
 
-/* streaming/class/{fs|hs|ss} */
-static struct uvcg_streaming_class {
-	struct config_group	group;
-} uvcg_streaming_class_fs, uvcg_streaming_class_hs, uvcg_streaming_class_ss;
+/* -----------------------------------------------------------------------------
+ * streaming/class/{fs|hs|ss}
+ */
 
+struct uvcg_streaming_class_group {
+	struct config_group group;
+	const char *name;
+};
 
 static inline struct uvc_descriptor_header
 ***__uvcg_get_stream_class_arr(struct config_item *i, struct f_uvc_opts *o)
 {
-	struct uvcg_streaming_class *cl = container_of(to_config_group(i),
-		struct uvcg_streaming_class, group);
+	struct uvcg_streaming_class_group *group =
+		container_of(i, struct uvcg_streaming_class_group,
+			     group.cg_item);
 
-	if (cl == &uvcg_streaming_class_fs)
+	if (!strcmp(group->name, "fs"))
 		return &o->uvc_fs_streaming_cls;
 
-	if (cl == &uvcg_streaming_class_hs)
+	if (!strcmp(group->name, "hs"))
 		return &o->uvc_hs_streaming_cls;
 
-	if (cl == &uvcg_streaming_class_ss)
+	if (!strcmp(group->name, "ss"))
 		return &o->uvc_ss_streaming_cls;
 
 	return NULL;
@@ -1922,24 +2115,22 @@ static int __uvcg_fill_strm(void *priv1, void *priv2, void *priv3, int n,
 		struct uvcg_format *fmt = priv1;
 
 		if (fmt->type == UVCG_UNCOMPRESSED) {
-			struct uvc_format_uncompressed *unc = *dest;
 			struct uvcg_uncompressed *u =
 				container_of(fmt, struct uvcg_uncompressed,
 					     fmt);
 
+			u->desc.bFormatIndex = n + 1;
+			u->desc.bNumFrameDescriptors = fmt->num_frames;
 			memcpy(*dest, &u->desc, sizeof(u->desc));
 			*dest += sizeof(u->desc);
-			unc->bNumFrameDescriptors = fmt->num_frames;
-			unc->bFormatIndex = n + 1;
 		} else if (fmt->type == UVCG_MJPEG) {
-			struct uvc_format_mjpeg *mjp = *dest;
 			struct uvcg_mjpeg *m =
 				container_of(fmt, struct uvcg_mjpeg, fmt);
 
+			m->desc.bFormatIndex = n + 1;
+			m->desc.bNumFrameDescriptors = fmt->num_frames;
 			memcpy(*dest, &m->desc, sizeof(m->desc));
 			*dest += sizeof(m->desc);
-			mjp->bNumFrameDescriptors = fmt->num_frames;
-			mjp->bFormatIndex = n + 1;
 		} else {
 			return -EINVAL;
 		}
@@ -2038,6 +2229,7 @@ static int uvcg_streaming_class_allow_link(struct config_item *src,
 unlock:
 	mutex_unlock(&opts->lock);
 out:
+	config_item_put(header);
 	mutex_unlock(su_mutex);
 	return ret;
 }
@@ -2078,10 +2270,12 @@ static void uvcg_streaming_class_drop_link(struct config_item *src,
 unlock:
 	mutex_unlock(&opts->lock);
 out:
+	config_item_put(header);
 	mutex_unlock(su_mutex);
 }
 
 static struct configfs_item_operations uvcg_streaming_class_item_ops = {
+	.release	= uvcg_config_item_release,
 	.allow_link	= uvcg_streaming_class_allow_link,
 	.drop_link	= uvcg_streaming_class_drop_link,
 };
@@ -2091,36 +2285,109 @@ static const struct config_item_type uvcg_streaming_class_type = {
 	.ct_owner	= THIS_MODULE,
 };
 
-/* streaming/class */
-static struct uvcg_streaming_class_grp {
-	struct config_group	group;
-} uvcg_streaming_class_grp;
+/* -----------------------------------------------------------------------------
+ * streaming/class
+ */
+
+static int uvcg_streaming_class_create_children(struct config_group *parent)
+{
+	static const char * const names[] = { "fs", "hs", "ss" };
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(names); ++i) {
+		struct uvcg_streaming_class_group *group;
+
+		group = kzalloc(sizeof(*group), GFP_KERNEL);
+		if (!group)
+			return -ENOMEM;
 
-static const struct config_item_type uvcg_streaming_class_grp_type = {
-	.ct_owner = THIS_MODULE,
+		group->name = names[i];
+
+		config_group_init_type_name(&group->group, group->name,
+					    &uvcg_streaming_class_type);
+		configfs_add_default_group(&group->group, parent);
+	}
+
+	return 0;
+}
+
+static const struct uvcg_config_group_type uvcg_streaming_class_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "class",
+	.create_children = uvcg_streaming_class_create_children,
 };
 
-/* streaming */
-static struct uvcg_streaming_grp {
-	struct config_group	group;
-} uvcg_streaming_grp;
+/* -----------------------------------------------------------------------------
+ * streaming
+ */
 
-static const struct config_item_type uvcg_streaming_grp_type = {
-	.ct_owner = THIS_MODULE,
+static ssize_t uvcg_default_streaming_b_interface_number_show(
+	struct config_item *item, char *page)
+{
+	struct config_group *group = to_config_group(item);
+	struct mutex *su_mutex = &group->cg_subsys->su_mutex;
+	struct config_item *opts_item;
+	struct f_uvc_opts *opts;
+	int result = 0;
+
+	mutex_lock(su_mutex); /* for navigating configfs hierarchy */
+
+	opts_item = item->ci_parent;
+	opts = to_f_uvc_opts(opts_item);
+
+	mutex_lock(&opts->lock);
+	result += sprintf(page, "%u\n", opts->streaming_interface);
+	mutex_unlock(&opts->lock);
+
+	mutex_unlock(su_mutex);
+
+	return result;
+}
+
+UVC_ATTR_RO(uvcg_default_streaming_, b_interface_number, bInterfaceNumber);
+
+static struct configfs_attribute *uvcg_default_streaming_attrs[] = {
+	&uvcg_default_streaming_attr_b_interface_number,
+	NULL,
+};
+
+static const struct uvcg_config_group_type uvcg_streaming_grp_type = {
+	.type = {
+		.ct_item_ops	= &uvcg_config_item_ops,
+		.ct_attrs	= uvcg_default_streaming_attrs,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "streaming",
+	.children = (const struct uvcg_config_group_type*[]) {
+		&uvcg_streaming_header_grp_type,
+		&uvcg_uncompressed_grp_type,
+		&uvcg_mjpeg_grp_type,
+		&uvcg_color_matching_grp_type,
+		&uvcg_streaming_class_grp_type,
+		NULL,
+	},
 };
 
-static void uvc_attr_release(struct config_item *item)
+/* -----------------------------------------------------------------------------
+ * UVC function
+ */
+
+static void uvc_func_item_release(struct config_item *item)
 {
 	struct f_uvc_opts *opts = to_f_uvc_opts(item);
 
+	uvcg_config_remove_children(to_config_group(item));
 	usb_put_function_instance(&opts->func_inst);
 }
 
-static struct configfs_item_operations uvc_item_ops = {
-	.release		= uvc_attr_release,
+static struct configfs_item_operations uvc_func_item_ops = {
+	.release	= uvc_func_item_release,
 };
 
-#define UVCG_OPTS_ATTR(cname, aname, conv, str2u, uxx, vnoc, limit)	\
+#define UVCG_OPTS_ATTR(cname, aname, limit)				\
 static ssize_t f_uvc_opts_##cname##_show(				\
 	struct config_item *item, char *page)				\
 {									\
@@ -2128,7 +2395,7 @@ static ssize_t f_uvc_opts_##cname##_show(				\
 	int result;							\
 									\
 	mutex_lock(&opts->lock);					\
-	result = sprintf(page, "%d\n", conv(opts->cname));		\
+	result = sprintf(page, "%u\n", opts->cname);			\
 	mutex_unlock(&opts->lock);					\
 									\
 	return result;							\
@@ -2139,8 +2406,8 @@ f_uvc_opts_##cname##_store(struct config_item *item,			\
 			   const char *page, size_t len)		\
 {									\
 	struct f_uvc_opts *opts = to_f_uvc_opts(item);			\
+	unsigned int num;						\
 	int ret;							\
-	uxx num;							\
 									\
 	mutex_lock(&opts->lock);					\
 	if (opts->refcnt) {						\
@@ -2148,7 +2415,7 @@ f_uvc_opts_##cname##_store(struct config_item *item,			\
 		goto end;						\
 	}								\
 									\
-	ret = str2u(page, 0, &num);					\
+	ret = kstrtouint(page, 0, &num);				\
 	if (ret)							\
 		goto end;						\
 									\
@@ -2156,7 +2423,7 @@ f_uvc_opts_##cname##_store(struct config_item *item,			\
 		ret = -EINVAL;						\
 		goto end;						\
 	}								\
-	opts->cname = vnoc(num);					\
+	opts->cname = num;						\
 	ret = len;							\
 end:									\
 	mutex_unlock(&opts->lock);					\
@@ -2165,16 +2432,9 @@ end:									\
 									\
 UVC_ATTR(f_uvc_opts_, cname, cname)
 
-#define identity_conv(x) (x)
-
-UVCG_OPTS_ATTR(streaming_interval, streaming_interval, identity_conv,
-	       kstrtou8, u8, identity_conv, 16);
-UVCG_OPTS_ATTR(streaming_maxpacket, streaming_maxpacket, le16_to_cpu,
-	       kstrtou16, u16, le16_to_cpu, 3072);
-UVCG_OPTS_ATTR(streaming_maxburst, streaming_maxburst, identity_conv,
-	       kstrtou8, u8, identity_conv, 15);
-
-#undef identity_conv
+UVCG_OPTS_ATTR(streaming_interval, streaming_interval, 16);
+UVCG_OPTS_ATTR(streaming_maxpacket, streaming_maxpacket, 3072);
+UVCG_OPTS_ATTR(streaming_maxburst, streaming_maxburst, 15);
 
 #undef UVCG_OPTS_ATTR
 
@@ -2185,123 +2445,31 @@ static struct configfs_attribute *uvc_attrs[] = {
 	NULL,
 };
 
-static const struct config_item_type uvc_func_type = {
-	.ct_item_ops	= &uvc_item_ops,
-	.ct_attrs	= uvc_attrs,
-	.ct_owner	= THIS_MODULE,
+static const struct uvcg_config_group_type uvc_func_type = {
+	.type = {
+		.ct_item_ops	= &uvc_func_item_ops,
+		.ct_attrs	= uvc_attrs,
+		.ct_owner	= THIS_MODULE,
+	},
+	.name = "",
+	.children = (const struct uvcg_config_group_type*[]) {
+		&uvcg_control_grp_type,
+		&uvcg_streaming_grp_type,
+		NULL,
+	},
 };
 
 int uvcg_attach_configfs(struct f_uvc_opts *opts)
 {
-	config_group_init_type_name(&uvcg_control_header_grp.group,
-				    "header",
-				    &uvcg_control_header_grp_type);
-
-	config_group_init_type_name(&uvcg_default_processing.group,
-			"default", &uvcg_default_processing_type);
-	config_group_init_type_name(&uvcg_processing_grp.group,
-			"processing", &uvcg_processing_grp_type);
-	configfs_add_default_group(&uvcg_default_processing.group,
-			&uvcg_processing_grp.group);
-
-	config_group_init_type_name(&uvcg_default_camera.group,
-			"default", &uvcg_default_camera_type);
-	config_group_init_type_name(&uvcg_camera_grp.group,
-			"camera", &uvcg_camera_grp_type);
-	configfs_add_default_group(&uvcg_default_camera.group,
-			&uvcg_camera_grp.group);
-
-	config_group_init_type_name(&uvcg_default_output.group,
-			"default", &uvcg_default_output_type);
-	config_group_init_type_name(&uvcg_output_grp.group,
-			"output", &uvcg_output_grp_type);
-	configfs_add_default_group(&uvcg_default_output.group,
-			&uvcg_output_grp.group);
-
-	config_group_init_type_name(&uvcg_terminal_grp.group,
-			"terminal", &uvcg_terminal_grp_type);
-	configfs_add_default_group(&uvcg_camera_grp.group,
-			&uvcg_terminal_grp.group);
-	configfs_add_default_group(&uvcg_output_grp.group,
-			&uvcg_terminal_grp.group);
-
-	config_group_init_type_name(&uvcg_control_class_fs.group,
-			"fs", &uvcg_control_class_type);
-	config_group_init_type_name(&uvcg_control_class_ss.group,
-			"ss", &uvcg_control_class_type);
-	config_group_init_type_name(&uvcg_control_class_grp.group,
-			"class",
-			&uvcg_control_class_grp_type);
-	configfs_add_default_group(&uvcg_control_class_fs.group,
-			&uvcg_control_class_grp.group);
-	configfs_add_default_group(&uvcg_control_class_ss.group,
-			&uvcg_control_class_grp.group);
-
-	config_group_init_type_name(&uvcg_control_grp.group,
-			"control",
-			&uvcg_control_grp_type);
-	configfs_add_default_group(&uvcg_control_header_grp.group,
-			&uvcg_control_grp.group);
-	configfs_add_default_group(&uvcg_processing_grp.group,
-			&uvcg_control_grp.group);
-	configfs_add_default_group(&uvcg_terminal_grp.group,
-			&uvcg_control_grp.group);
-	configfs_add_default_group(&uvcg_control_class_grp.group,
-			&uvcg_control_grp.group);
-
-	config_group_init_type_name(&uvcg_streaming_header_grp.group,
-				    "header",
-				    &uvcg_streaming_header_grp_type);
-	config_group_init_type_name(&uvcg_uncompressed_grp.group,
-				    "uncompressed",
-				    &uvcg_uncompressed_grp_type);
-	config_group_init_type_name(&uvcg_mjpeg_grp.group,
-				    "mjpeg",
-				    &uvcg_mjpeg_grp_type);
-	config_group_init_type_name(&uvcg_default_color_matching.group,
-				    "default",
-				    &uvcg_default_color_matching_type);
-	config_group_init_type_name(&uvcg_color_matching_grp.group,
-			"color_matching",
-			&uvcg_color_matching_grp_type);
-	configfs_add_default_group(&uvcg_default_color_matching.group,
-			&uvcg_color_matching_grp.group);
-
-	config_group_init_type_name(&uvcg_streaming_class_fs.group,
-			"fs", &uvcg_streaming_class_type);
-	config_group_init_type_name(&uvcg_streaming_class_hs.group,
-			"hs", &uvcg_streaming_class_type);
-	config_group_init_type_name(&uvcg_streaming_class_ss.group,
-			"ss", &uvcg_streaming_class_type);
-	config_group_init_type_name(&uvcg_streaming_class_grp.group,
-			"class", &uvcg_streaming_class_grp_type);
-	configfs_add_default_group(&uvcg_streaming_class_fs.group,
-			&uvcg_streaming_class_grp.group);
-	configfs_add_default_group(&uvcg_streaming_class_hs.group,
-			&uvcg_streaming_class_grp.group);
-	configfs_add_default_group(&uvcg_streaming_class_ss.group,
-			&uvcg_streaming_class_grp.group);
-
-	config_group_init_type_name(&uvcg_streaming_grp.group,
-			"streaming", &uvcg_streaming_grp_type);
-	configfs_add_default_group(&uvcg_streaming_header_grp.group,
-			&uvcg_streaming_grp.group);
-	configfs_add_default_group(&uvcg_uncompressed_grp.group,
-			&uvcg_streaming_grp.group);
-	configfs_add_default_group(&uvcg_mjpeg_grp.group,
-			&uvcg_streaming_grp.group);
-	configfs_add_default_group(&uvcg_color_matching_grp.group,
-			&uvcg_streaming_grp.group);
-	configfs_add_default_group(&uvcg_streaming_class_grp.group,
-			&uvcg_streaming_grp.group);
-
-	config_group_init_type_name(&opts->func_inst.group,
-			"",
-			&uvc_func_type);
-	configfs_add_default_group(&uvcg_control_grp.group,
-			&opts->func_inst.group);
-	configfs_add_default_group(&uvcg_streaming_grp.group,
-			&opts->func_inst.group);
+	int ret;
 
-	return 0;
+	config_group_init_type_name(&opts->func_inst.group, uvc_func_type.name,
+				    &uvc_func_type.type);
+
+	ret = uvcg_config_create_children(&opts->func_inst.group,
+					  &uvc_func_type);
+	if (ret < 0)
+		config_group_put(&opts->func_inst.group);
+
+	return ret;
 }
diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c
index 7f1ca3b57823..a1183eccee22 100644
--- a/drivers/usb/gadget/function/uvc_v4l2.c
+++ b/drivers/usb/gadget/function/uvc_v4l2.c
@@ -115,8 +115,8 @@ uvc_v4l2_set_format(struct file *file, void *fh, struct v4l2_format *fmt)
 	}
 
 	if (i == ARRAY_SIZE(uvc_formats)) {
-		printk(KERN_INFO "Unsupported format 0x%08x.\n",
-			fmt->fmt.pix.pixelformat);
+		uvcg_info(&uvc->func, "Unsupported format 0x%08x.\n",
+			  fmt->fmt.pix.pixelformat);
 		return -EINVAL;
 	}
 
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index d3567b90343a..5c042f380708 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -125,6 +125,23 @@ uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video,
  * Request handling
  */
 
+static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
+{
+	int ret;
+
+	ret = usb_ep_queue(video->ep, req, GFP_ATOMIC);
+	if (ret < 0) {
+		uvcg_err(&video->uvc->func, "Failed to queue request (%d).\n",
+			 ret);
+
+		/* Isochronous endpoints can't be halted. */
+		if (usb_endpoint_xfer_bulk(video->ep->desc))
+			usb_ep_set_halt(video->ep);
+	}
+
+	return ret;
+}
+
 /*
  * I somehow feel that synchronisation won't be easy to achieve here. We have
  * three events that control USB requests submission:
@@ -169,13 +186,14 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 		break;
 
 	case -ESHUTDOWN:	/* disconnect from host. */
-		printk(KERN_DEBUG "VS request cancelled.\n");
+		uvcg_dbg(&video->uvc->func, "VS request cancelled.\n");
 		uvcg_queue_cancel(queue, 1);
 		goto requeue;
 
 	default:
-		printk(KERN_INFO "VS request completed with status %d.\n",
-			req->status);
+		uvcg_info(&video->uvc->func,
+			  "VS request completed with status %d.\n",
+			  req->status);
 		uvcg_queue_cancel(queue, 0);
 		goto requeue;
 	}
@@ -189,14 +207,13 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 
 	video->encode(req, video, buf);
 
-	if ((ret = usb_ep_queue(ep, req, GFP_ATOMIC)) < 0) {
-		printk(KERN_INFO "Failed to queue request (%d).\n", ret);
-		usb_ep_set_halt(ep);
-		spin_unlock_irqrestore(&video->queue.irqlock, flags);
+	ret = uvcg_video_ep_queue(video, req);
+	spin_unlock_irqrestore(&video->queue.irqlock, flags);
+
+	if (ret < 0) {
 		uvcg_queue_cancel(queue, 0);
 		goto requeue;
 	}
-	spin_unlock_irqrestore(&video->queue.irqlock, flags);
 
 	return;
 
@@ -316,15 +333,13 @@ int uvcg_video_pump(struct uvc_video *video)
 		video->encode(req, video, buf);
 
 		/* Queue the USB request */
-		ret = usb_ep_queue(video->ep, req, GFP_ATOMIC);
+		ret = uvcg_video_ep_queue(video, req);
+		spin_unlock_irqrestore(&queue->irqlock, flags);
+
 		if (ret < 0) {
-			printk(KERN_INFO "Failed to queue request (%d)\n", ret);
-			usb_ep_set_halt(video->ep);
-			spin_unlock_irqrestore(&queue->irqlock, flags);
 			uvcg_queue_cancel(queue, 0);
 			break;
 		}
-		spin_unlock_irqrestore(&queue->irqlock, flags);
 	}
 
 	spin_lock_irqsave(&video->req_lock, flags);
@@ -342,8 +357,8 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
 	int ret;
 
 	if (video->ep == NULL) {
-		printk(KERN_INFO "Video enable failed, device is "
-			"uninitialized.\n");
+		uvcg_info(&video->uvc->func,
+			  "Video enable failed, device is uninitialized.\n");
 		return -ENODEV;
 	}
 
@@ -375,11 +390,12 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
 /*
  * Initialize the UVC video stream.
  */
-int uvcg_video_init(struct uvc_video *video)
+int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 {
 	INIT_LIST_HEAD(&video->req_free);
 	spin_lock_init(&video->req_lock);
 
+	video->uvc = uvc;
 	video->fcc = V4L2_PIX_FMT_YUYV;
 	video->bpp = 16;
 	video->width = 320;
diff --git a/drivers/usb/gadget/function/uvc_video.h b/drivers/usb/gadget/function/uvc_video.h
index 7d77122b0ff9..278dc52c7604 100644
--- a/drivers/usb/gadget/function/uvc_video.h
+++ b/drivers/usb/gadget/function/uvc_video.h
@@ -18,6 +18,6 @@ int uvcg_video_pump(struct uvc_video *video);
 
 int uvcg_video_enable(struct uvc_video *video, int enable);
 
-int uvcg_video_init(struct uvc_video *video);
+int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc);
 
 #endif /* __UVC_VIDEO_H__ */
diff --git a/drivers/usb/gadget/udc/aspeed-vhub/epn.c b/drivers/usb/gadget/udc/aspeed-vhub/epn.c
index 5939eb1e97f2..4a28e3fbeb0b 100644
--- a/drivers/usb/gadget/udc/aspeed-vhub/epn.c
+++ b/drivers/usb/gadget/udc/aspeed-vhub/epn.c
@@ -353,7 +353,7 @@ static int ast_vhub_epn_queue(struct usb_ep* u_ep, struct usb_request *u_req,
 	/* Endpoint enabled ? */
 	if (!ep->epn.enabled || !u_ep->desc || !ep->dev || !ep->d_idx ||
 	    !ep->dev->enabled || ep->dev->suspended) {
-		EPDBG(ep,"Enqueing request on wrong or disabled EP\n");
+		EPDBG(ep, "Enqueuing request on wrong or disabled EP\n");
 		return -ESHUTDOWN;
 	}
 
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index 17147b8c771e..11247322d587 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -2004,7 +2004,6 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev,
 						    struct usba_udc *udc)
 {
 	u32 val;
-	const char *name;
 	struct device_node *np = pdev->dev.of_node;
 	const struct of_device_id *match;
 	struct device_node *pp;
@@ -2018,6 +2017,8 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev,
 	udc->errata = match->data;
 	udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9g45-pmc");
 	if (IS_ERR(udc->pmc))
+		udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9rl-pmc");
+	if (IS_ERR(udc->pmc))
 		udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9x5-pmc");
 	if (udc->errata && IS_ERR(udc->pmc))
 		return ERR_CAST(udc->pmc);
@@ -2094,11 +2095,6 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev,
 		ep->can_dma = of_property_read_bool(pp, "atmel,can-dma");
 		ep->can_isoc = of_property_read_bool(pp, "atmel,can-isoc");
 
-		ret = of_property_read_string(pp, "name", &name);
-		if (ret) {
-			dev_err(&pdev->dev, "of_probe: name error(%d)\n", ret);
-			goto err;
-		}
 		sprintf(ep->name, "ep%d", ep->index);
 		ep->ep.name = ep->name;
 
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index af88b48c1cea..87d6b12779f2 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -690,6 +690,9 @@ EXPORT_SYMBOL_GPL(usb_gadget_connect);
  * as a disconnect (when a VBUS session is active).  Not all systems
  * support software pullup controls.
  *
+ * Following a successful disconnect, invoke the ->disconnect() callback
+ * for the current gadget driver so that UDC drivers don't need to.
+ *
  * Returns zero on success, else negative errno.
  */
 int usb_gadget_disconnect(struct usb_gadget *gadget)
@@ -711,8 +714,10 @@ int usb_gadget_disconnect(struct usb_gadget *gadget)
 	}
 
 	ret = gadget->ops->pullup(gadget, 0);
-	if (!ret)
+	if (!ret) {
 		gadget->connected = 0;
+		gadget->udc->driver->disconnect(gadget);
+	}
 
 out:
 	trace_usb_gadget_disconnect(gadget, ret);
@@ -1281,7 +1286,6 @@ static void usb_gadget_remove_driver(struct usb_udc *udc)
 	kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
 
 	usb_gadget_disconnect(udc->gadget);
-	udc->driver->disconnect(udc->gadget);
 	udc->driver->unbind(udc->gadget);
 	usb_gadget_udc_stop(udc);
 
@@ -1471,7 +1475,6 @@ static ssize_t soft_connect_store(struct device *dev,
 		usb_gadget_connect(udc->gadget);
 	} else if (sysfs_streq(buf, "disconnect")) {
 		usb_gadget_disconnect(udc->gadget);
-		udc->driver->disconnect(udc->gadget);
 		usb_gadget_udc_stop(udc);
 	} else {
 		dev_err(dev, "unsupported command '%s'\n", buf);
diff --git a/drivers/usb/gadget/udc/fotg210-udc.c b/drivers/usb/gadget/udc/fotg210-udc.c
index 587c5037ff07..bc6abaea907d 100644
--- a/drivers/usb/gadget/udc/fotg210-udc.c
+++ b/drivers/usb/gadget/udc/fotg210-udc.c
@@ -741,7 +741,7 @@ static void fotg210_get_status(struct fotg210_udc *fotg210,
 	fotg210->ep0_req->length = 2;
 
 	spin_unlock(&fotg210->lock);
-	fotg210_ep_queue(fotg210->gadget.ep0, fotg210->ep0_req, GFP_KERNEL);
+	fotg210_ep_queue(fotg210->gadget.ep0, fotg210->ep0_req, GFP_ATOMIC);
 	spin_lock(&fotg210->lock);
 }
 
diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index be59309e848c..20141c3096f6 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -2234,8 +2234,10 @@ static void fsl_udc_release(struct device *dev)
 	Internal structure setup functions
 *******************************************************************/
 /*------------------------------------------------------------------
- * init resource for globle controller
- * Return the udc handle on success or NULL on failure
+ * init resource for global controller called by fsl_udc_probe()
+ * On success the udc handle is initialized, on failure it is
+ * unchanged (reset).
+ * Return 0 on success and -1 on allocation failure
  ------------------------------------------------------------------*/
 static int struct_udc_setup(struct fsl_udc *udc,
 		struct platform_device *pdev)
@@ -2247,8 +2249,10 @@ static int struct_udc_setup(struct fsl_udc *udc,
 	udc->phy_mode = pdata->phy_mode;
 
 	udc->eps = kcalloc(udc->max_ep, sizeof(struct fsl_ep), GFP_KERNEL);
-	if (!udc->eps)
-		return -1;
+	if (!udc->eps) {
+		ERR("kmalloc udc endpoint status failed\n");
+		goto eps_alloc_failed;
+	}
 
 	/* initialized QHs, take care of alignment */
 	size = udc->max_ep * sizeof(struct ep_queue_head);
@@ -2262,8 +2266,7 @@ static int struct_udc_setup(struct fsl_udc *udc,
 					&udc->ep_qh_dma, GFP_KERNEL);
 	if (!udc->ep_qh) {
 		ERR("malloc QHs for udc failed\n");
-		kfree(udc->eps);
-		return -1;
+		goto ep_queue_alloc_failed;
 	}
 
 	udc->ep_qh_size = size;
@@ -2272,8 +2275,17 @@ static int struct_udc_setup(struct fsl_udc *udc,
 	/* FIXME: fsl_alloc_request() ignores ep argument */
 	udc->status_req = container_of(fsl_alloc_request(NULL, GFP_KERNEL),
 			struct fsl_req, req);
+	if (!udc->status_req) {
+		ERR("kzalloc for udc status request failed\n");
+		goto udc_status_alloc_failed;
+	}
+
 	/* allocate a small amount of memory to get valid address */
 	udc->status_req->req.buf = kmalloc(8, GFP_KERNEL);
+	if (!udc->status_req->req.buf) {
+		ERR("kzalloc for udc request buffer failed\n");
+		goto udc_req_buf_alloc_failed;
+	}
 
 	udc->resume_state = USB_STATE_NOTATTACHED;
 	udc->usb_state = USB_STATE_POWERED;
@@ -2281,6 +2293,18 @@ static int struct_udc_setup(struct fsl_udc *udc,
 	udc->remote_wakeup = 0;	/* default to 0 on reset */
 
 	return 0;
+
+udc_req_buf_alloc_failed:
+	kfree(udc->status_req);
+udc_status_alloc_failed:
+	kfree(udc->ep_qh);
+	udc->ep_qh_size = 0;
+ep_queue_alloc_failed:
+	kfree(udc->eps);
+eps_alloc_failed:
+	udc->phy_mode = 0;
+	return -1;
+
 }
 
 /*----------------------------------------------------------------
diff --git a/drivers/usb/gadget/udc/mv_udc_core.c b/drivers/usb/gadget/udc/mv_udc_core.c
index 95f52232493b..cafde053788b 100644
--- a/drivers/usb/gadget/udc/mv_udc_core.c
+++ b/drivers/usb/gadget/udc/mv_udc_core.c
@@ -185,7 +185,7 @@ static int process_ep_req(struct mv_udc *udc, int index,
 	else
 		bit_pos = 1 << (16 + curr_req->ep->ep_num);
 
-	while ((curr_dqh->curr_dtd_ptr == curr_dtd->td_dma)) {
+	while (curr_dqh->curr_dtd_ptr == curr_dtd->td_dma) {
 		if (curr_dtd->dtd_next == EP_QUEUE_HEAD_NEXT_TERMINATE) {
 			while (readl(&udc->op_regs->epstatus) & bit_pos)
 				udelay(1);
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index b02ab2a8d927..e7dae5379e04 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -1550,9 +1550,6 @@ static int net2280_pullup(struct usb_gadget *_gadget, int is_on)
 
 	spin_unlock_irqrestore(&dev->lock, flags);
 
-	if (!is_on && dev->driver)
-		dev->driver->disconnect(&dev->gadget);
-
 	return 0;
 }
 
diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index e1656f361e08..cdffbd1e0316 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -2437,6 +2437,9 @@ static ssize_t renesas_usb3_b_device_write(struct file *file,
 	else
 		usb3->forced_b_device = false;
 
+	if (usb3->workaround_for_vbus)
+		usb3_disconnect(usb3);
+
 	/* Let this driver call usb3_connect() anyway */
 	usb3_check_id(usb3);
 
@@ -2600,6 +2603,13 @@ static const struct renesas_usb3_priv renesas_usb3_priv_gen3 = {
 	.ramsize_per_pipe = SZ_4K,
 };
 
+static const struct renesas_usb3_priv renesas_usb3_priv_r8a77990 = {
+	.ramsize_per_ramif = SZ_16K,
+	.num_ramif = 4,
+	.ramsize_per_pipe = SZ_4K,
+	.workaround_for_vbus = true,
+};
+
 static const struct of_device_id usb3_of_match[] = {
 	{
 		.compatible = "renesas,r8a7795-usb3-peri",
@@ -2618,6 +2628,10 @@ static const struct soc_device_attribute renesas_usb3_quirks_match[] = {
 		.soc_id = "r8a7795", .revision = "ES1.*",
 		.data = &renesas_usb3_priv_r8a7795_es1,
 	},
+	{
+		.soc_id = "r8a77990",
+		.data = &renesas_usb3_priv_r8a77990,
+	},
 	{ /* sentinel */ },
 };
 
diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c
index 6407e433bc78..b1f4104d1283 100644
--- a/drivers/usb/gadget/udc/udc-xilinx.c
+++ b/drivers/usb/gadget/udc/udc-xilinx.c
@@ -1078,7 +1078,7 @@ static int xudc_ep_queue(struct usb_ep *_ep, struct usb_request *_req,
 	unsigned long flags;
 
 	if (!ep->desc) {
-		dev_dbg(udc->dev, "%s:queing request to disabled %s\n",
+		dev_dbg(udc->dev, "%s: queuing request to disabled %s\n",
 			__func__, ep->name);
 		return -ESHUTDOWN;
 	}
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 1a4ea98cac2a..16758b12a5e9 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -276,7 +276,7 @@ config USB_EHCI_EXYNOS
 	Enable support for the Samsung Exynos SOC's on-chip EHCI controller.
 
 config USB_EHCI_MV
-	bool "EHCI support for Marvell PXA/MMP USB controller"
+	tristate "EHCI support for Marvell PXA/MMP USB controller"
 	depends on (ARCH_PXA || ARCH_MMP)
 	select USB_EHCI_ROOT_HUB_TT
 	---help---
diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile
index e6235269c151..84514f71ae44 100644
--- a/drivers/usb/host/Makefile
+++ b/drivers/usb/host/Makefile
@@ -87,6 +87,7 @@ obj-$(CONFIG_USB_IMX21_HCD)	+= imx21-hcd.o
 obj-$(CONFIG_USB_FSL_USB2)	+= fsl-mph-dr-of.o
 obj-$(CONFIG_USB_EHCI_FSL)	+= fsl-mph-dr-of.o
 obj-$(CONFIG_USB_EHCI_FSL)	+= ehci-fsl.o
+obj-$(CONFIG_USB_EHCI_MV)	+= ehci-mv.o
 obj-$(CONFIG_USB_HCD_BCMA)	+= bcma-hcd.o
 obj-$(CONFIG_USB_HCD_SSB)	+= ssb-hcd.o
 obj-$(CONFIG_USB_FOTG210_HCD)	+= fotg210-hcd.o
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 8608ac513fb7..cdafa97f632d 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -730,9 +730,9 @@ static irqreturn_t ehci_irq (struct usb_hcd *hcd)
 	/* normal [4.15.1.2] or error [4.15.1.1] completion */
 	if (likely ((status & (STS_INT|STS_ERR)) != 0)) {
 		if (likely ((status & STS_ERR) == 0))
-			COUNT (ehci->stats.normal);
+			INCR(ehci->stats.normal);
 		else
-			COUNT (ehci->stats.error);
+			INCR(ehci->stats.error);
 		bh = 1;
 	}
 
@@ -756,7 +756,7 @@ static irqreturn_t ehci_irq (struct usb_hcd *hcd)
 		if (cmd & CMD_IAAD)
 			ehci_dbg(ehci, "IAA with IAAD still set?\n");
 		if (ehci->iaa_in_progress)
-			COUNT(ehci->stats.iaa);
+			INCR(ehci->stats.iaa);
 		end_iaa_cycle(ehci);
 	}
 
@@ -1286,11 +1286,6 @@ MODULE_LICENSE ("GPL");
 #define PLATFORM_DRIVER		ehci_grlib_driver
 #endif
 
-#ifdef CONFIG_USB_EHCI_MV
-#include "ehci-mv.c"
-#define        PLATFORM_DRIVER         ehci_mv_driver
-#endif
-
 static int __init ehci_hcd_init(void)
 {
 	int retval = 0;
diff --git a/drivers/usb/host/ehci-mv.c b/drivers/usb/host/ehci-mv.c
index de764459e05a..f26109eafdbf 100644
--- a/drivers/usb/host/ehci-mv.c
+++ b/drivers/usb/host/ehci-mv.c
@@ -12,24 +12,33 @@
 #include <linux/err.h>
 #include <linux/usb/otg.h>
 #include <linux/platform_data/mv_usb.h>
+#include <linux/io.h>
+
+#include <linux/usb/hcd.h>
+
+#include "ehci.h"
+
+/* registers */
+#define U2x_CAPREGS_OFFSET       0x100
 
 #define CAPLENGTH_MASK         (0xff)
 
-struct ehci_hcd_mv {
-	struct usb_hcd *hcd;
+#define hcd_to_ehci_hcd_mv(h) ((struct ehci_hcd_mv *)hcd_to_ehci(h)->priv)
 
+struct ehci_hcd_mv {
 	/* Which mode does this ehci running OTG/Host ? */
 	int mode;
 
-	void __iomem *phy_regs;
+	void __iomem *base;
 	void __iomem *cap_regs;
 	void __iomem *op_regs;
 
 	struct usb_phy *otg;
+	struct clk *clk;
 
-	struct mv_usb_platform_data *pdata;
+	struct phy *phy;
 
-	struct clk *clk;
+	int (*set_vbus)(unsigned int vbus);
 };
 
 static void ehci_clock_enable(struct ehci_hcd_mv *ehci_mv)
@@ -44,29 +53,20 @@ static void ehci_clock_disable(struct ehci_hcd_mv *ehci_mv)
 
 static int mv_ehci_enable(struct ehci_hcd_mv *ehci_mv)
 {
-	int retval;
-
 	ehci_clock_enable(ehci_mv);
-	if (ehci_mv->pdata->phy_init) {
-		retval = ehci_mv->pdata->phy_init(ehci_mv->phy_regs);
-		if (retval)
-			return retval;
-	}
-
-	return 0;
+	return phy_init(ehci_mv->phy);
 }
 
 static void mv_ehci_disable(struct ehci_hcd_mv *ehci_mv)
 {
-	if (ehci_mv->pdata->phy_deinit)
-		ehci_mv->pdata->phy_deinit(ehci_mv->phy_regs);
+	phy_exit(ehci_mv->phy);
 	ehci_clock_disable(ehci_mv);
 }
 
 static int mv_ehci_reset(struct usb_hcd *hcd)
 {
 	struct device *dev = hcd->self.controller;
-	struct ehci_hcd_mv *ehci_mv = dev_get_drvdata(dev);
+	struct ehci_hcd_mv *ehci_mv = hcd_to_ehci_hcd_mv(hcd);
 	int retval;
 
 	if (ehci_mv == NULL) {
@@ -83,46 +83,11 @@ static int mv_ehci_reset(struct usb_hcd *hcd)
 	return retval;
 }
 
-static const struct hc_driver mv_ehci_hc_driver = {
-	.description = hcd_name,
-	.product_desc = "Marvell EHCI",
-	.hcd_priv_size = sizeof(struct ehci_hcd),
-
-	/*
-	 * generic hardware linkage
-	 */
-	.irq = ehci_irq,
-	.flags = HCD_MEMORY | HCD_USB2 | HCD_BH,
-
-	/*
-	 * basic lifecycle operations
-	 */
-	.reset = mv_ehci_reset,
-	.start = ehci_run,
-	.stop = ehci_stop,
-	.shutdown = ehci_shutdown,
-
-	/*
-	 * managing i/o requests and associated device resources
-	 */
-	.urb_enqueue = ehci_urb_enqueue,
-	.urb_dequeue = ehci_urb_dequeue,
-	.endpoint_disable = ehci_endpoint_disable,
-	.endpoint_reset = ehci_endpoint_reset,
-	.clear_tt_buffer_complete = ehci_clear_tt_buffer_complete,
-
-	/*
-	 * scheduling support
-	 */
-	.get_frame_number = ehci_get_frame,
-
-	/*
-	 * root hub support
-	 */
-	.hub_status_data = ehci_hub_status_data,
-	.hub_control = ehci_hub_control,
-	.bus_suspend = ehci_bus_suspend,
-	.bus_resume = ehci_bus_resume,
+static struct hc_driver __read_mostly ehci_platform_hc_driver;
+
+static const struct ehci_driver_overrides platform_overrides __initconst = {
+	.reset =		mv_ehci_reset,
+	.extra_priv_size =	sizeof(struct ehci_hcd_mv),
 };
 
 static int mv_ehci_probe(struct platform_device *pdev)
@@ -135,27 +100,29 @@ static int mv_ehci_probe(struct platform_device *pdev)
 	int retval = -ENODEV;
 	u32 offset;
 
-	if (!pdata) {
-		dev_err(&pdev->dev, "missing platform_data\n");
-		return -ENODEV;
-	}
-
 	if (usb_disabled())
 		return -ENODEV;
 
-	hcd = usb_create_hcd(&mv_ehci_hc_driver, &pdev->dev, "mv ehci");
+	hcd = usb_create_hcd(&ehci_platform_hc_driver, &pdev->dev, "mv ehci");
 	if (!hcd)
 		return -ENOMEM;
 
-	ehci_mv = devm_kzalloc(&pdev->dev, sizeof(*ehci_mv), GFP_KERNEL);
-	if (ehci_mv == NULL) {
-		retval = -ENOMEM;
-		goto err_put_hcd;
+	platform_set_drvdata(pdev, hcd);
+	ehci_mv = hcd_to_ehci_hcd_mv(hcd);
+
+	ehci_mv->mode = MV_USB_MODE_HOST;
+	if (pdata) {
+		ehci_mv->mode = pdata->mode;
+		ehci_mv->set_vbus = pdata->set_vbus;
 	}
 
-	platform_set_drvdata(pdev, ehci_mv);
-	ehci_mv->pdata = pdata;
-	ehci_mv->hcd = hcd;
+	ehci_mv->phy = devm_phy_get(&pdev->dev, "usb");
+	if (IS_ERR(ehci_mv->phy)) {
+		retval = PTR_ERR(ehci_mv->phy);
+		if (retval != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "Failed to get phy.\n");
+		goto err_put_hcd;
+	}
 
 	ehci_mv->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(ehci_mv->clk)) {
@@ -164,17 +131,12 @@ static int mv_ehci_probe(struct platform_device *pdev)
 		goto err_put_hcd;
 	}
 
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "phyregs");
-	ehci_mv->phy_regs = devm_ioremap_resource(&pdev->dev, r);
-	if (IS_ERR(ehci_mv->phy_regs)) {
-		retval = PTR_ERR(ehci_mv->phy_regs);
-		goto err_put_hcd;
-	}
 
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "capregs");
-	ehci_mv->cap_regs = devm_ioremap_resource(&pdev->dev, r);
-	if (IS_ERR(ehci_mv->cap_regs)) {
-		retval = PTR_ERR(ehci_mv->cap_regs);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ehci_mv->base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(ehci_mv->base)) {
+		retval = PTR_ERR(ehci_mv->base);
 		goto err_put_hcd;
 	}
 
@@ -184,6 +146,8 @@ static int mv_ehci_probe(struct platform_device *pdev)
 		goto err_put_hcd;
 	}
 
+	ehci_mv->cap_regs =
+		(void __iomem *) ((unsigned long) ehci_mv->base + U2x_CAPREGS_OFFSET);
 	offset = readl(ehci_mv->cap_regs) & CAPLENGTH_MASK;
 	ehci_mv->op_regs =
 		(void __iomem *) ((unsigned long) ehci_mv->cap_regs + offset);
@@ -202,7 +166,6 @@ static int mv_ehci_probe(struct platform_device *pdev)
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = (struct ehci_caps *) ehci_mv->cap_regs;
 
-	ehci_mv->mode = pdata->mode;
 	if (ehci_mv->mode == MV_USB_MODE_OTG) {
 		ehci_mv->otg = devm_usb_get_phy(&pdev->dev, USB_PHY_TYPE_USB2);
 		if (IS_ERR(ehci_mv->otg)) {
@@ -227,8 +190,8 @@ static int mv_ehci_probe(struct platform_device *pdev)
 		/* otg will enable clock before use as host */
 		mv_ehci_disable(ehci_mv);
 	} else {
-		if (pdata->set_vbus)
-			pdata->set_vbus(1);
+		if (ehci_mv->set_vbus)
+			ehci_mv->set_vbus(1);
 
 		retval = usb_add_hcd(hcd, hcd->irq, IRQF_SHARED);
 		if (retval) {
@@ -239,9 +202,6 @@ static int mv_ehci_probe(struct platform_device *pdev)
 		device_wakeup_enable(hcd->self.controller);
 	}
 
-	if (pdata->private_init)
-		pdata->private_init(ehci_mv->op_regs, ehci_mv->phy_regs);
-
 	dev_info(&pdev->dev,
 		 "successful find EHCI device with regs 0x%p irq %d"
 		 " working in %s mode\n", hcd->regs, hcd->irq,
@@ -250,8 +210,8 @@ static int mv_ehci_probe(struct platform_device *pdev)
 	return 0;
 
 err_set_vbus:
-	if (pdata->set_vbus)
-		pdata->set_vbus(0);
+	if (ehci_mv->set_vbus)
+		ehci_mv->set_vbus(0);
 err_disable_clk:
 	mv_ehci_disable(ehci_mv);
 err_put_hcd:
@@ -262,8 +222,8 @@ err_put_hcd:
 
 static int mv_ehci_remove(struct platform_device *pdev)
 {
-	struct ehci_hcd_mv *ehci_mv = platform_get_drvdata(pdev);
-	struct usb_hcd *hcd = ehci_mv->hcd;
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct ehci_hcd_mv *ehci_mv = hcd_to_ehci_hcd_mv(hcd);
 
 	if (hcd->rh_registered)
 		usb_remove_hcd(hcd);
@@ -272,8 +232,8 @@ static int mv_ehci_remove(struct platform_device *pdev)
 		otg_set_host(ehci_mv->otg->otg, NULL);
 
 	if (ehci_mv->mode == MV_USB_MODE_HOST) {
-		if (ehci_mv->pdata->set_vbus)
-			ehci_mv->pdata->set_vbus(0);
+		if (ehci_mv->set_vbus)
+			ehci_mv->set_vbus(0);
 
 		mv_ehci_disable(ehci_mv);
 	}
@@ -295,8 +255,7 @@ static const struct platform_device_id ehci_id_table[] = {
 
 static void mv_ehci_shutdown(struct platform_device *pdev)
 {
-	struct ehci_hcd_mv *ehci_mv = platform_get_drvdata(pdev);
-	struct usb_hcd *hcd = ehci_mv->hcd;
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
 
 	if (!hcd->rh_registered)
 		return;
@@ -305,13 +264,41 @@ static void mv_ehci_shutdown(struct platform_device *pdev)
 		hcd->driver->shutdown(hcd);
 }
 
+static const struct of_device_id ehci_mv_dt_ids[] = {
+	{ .compatible = "marvell,pxau2o-ehci", },
+	{},
+};
+
 static struct platform_driver ehci_mv_driver = {
 	.probe = mv_ehci_probe,
 	.remove = mv_ehci_remove,
 	.shutdown = mv_ehci_shutdown,
 	.driver = {
-		   .name = "mv-ehci",
-		   .bus = &platform_bus_type,
-		   },
+		.name = "mv-ehci",
+		.bus = &platform_bus_type,
+		.of_match_table = ehci_mv_dt_ids,
+	},
 	.id_table = ehci_id_table,
 };
+
+static int __init ehci_platform_init(void)
+{
+	if (usb_disabled())
+		return -ENODEV;
+
+	ehci_init_driver(&ehci_platform_hc_driver, &platform_overrides);
+	return platform_driver_register(&ehci_mv_driver);
+}
+module_init(ehci_platform_init);
+
+static void __exit ehci_platform_cleanup(void)
+{
+	platform_driver_unregister(&ehci_mv_driver);
+}
+module_exit(ehci_platform_cleanup);
+
+MODULE_DESCRIPTION("Marvell EHCI driver");
+MODULE_AUTHOR("Chao Xie <chao.xie@marvell.com>");
+MODULE_AUTHOR("Neil Zhang <zhangwm@marvell.com>");
+MODULE_ALIAS("mv-ehci");
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 327630405695..aa2f77f1506d 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -245,12 +245,12 @@ ehci_urb_done(struct ehci_hcd *ehci, struct urb *urb, int status)
 	}
 
 	if (unlikely(urb->unlinked)) {
-		COUNT(ehci->stats.unlink);
+		INCR(ehci->stats.unlink);
 	} else {
 		/* report non-error and short read status as zero */
 		if (status == -EINPROGRESS || status == -EREMOTEIO)
 			status = 0;
-		COUNT(ehci->stats.complete);
+		INCR(ehci->stats.complete);
 	}
 
 #ifdef EHCI_URB_TRACE
diff --git a/drivers/usb/host/ehci-timer.c b/drivers/usb/host/ehci-timer.c
index 4fcebda4b79d..a79c8ac0a55f 100644
--- a/drivers/usb/host/ehci-timer.c
+++ b/drivers/usb/host/ehci-timer.c
@@ -347,7 +347,7 @@ static void ehci_iaa_watchdog(struct ehci_hcd *ehci)
 	 */
 	status = ehci_readl(ehci, &ehci->regs->status);
 	if ((status & STS_IAA) || !(cmd & CMD_IAAD)) {
-		COUNT(ehci->stats.lost_iaa);
+		INCR(ehci->stats.lost_iaa);
 		ehci_writel(ehci, STS_IAA, &ehci->regs->status);
 	}
 
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index c8e9a48e1d51..ac5e967907d1 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -235,9 +235,9 @@ struct ehci_hcd {			/* one per controller */
 	/* irq statistics */
 #ifdef EHCI_STATS
 	struct ehci_stats	stats;
-#	define COUNT(x) ((x)++)
+#	define INCR(x) ((x)++)
 #else
-#	define COUNT(x)
+#	define INCR(x) do {} while (0)
 #endif
 
 	/* debug files */
diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c
index e64eb47770c8..0da68df259c8 100644
--- a/drivers/usb/host/fotg210-hcd.c
+++ b/drivers/usb/host/fotg210-hcd.c
@@ -31,6 +31,7 @@
 #include <linux/uaccess.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
+#include <linux/clk.h>
 
 #include <asm/byteorder.h>
 #include <asm/irq.h>
@@ -1285,7 +1286,7 @@ static void fotg210_iaa_watchdog(struct fotg210_hcd *fotg210)
 		 */
 		status = fotg210_readl(fotg210, &fotg210->regs->status);
 		if ((status & STS_IAA) || !(cmd & CMD_IAAD)) {
-			COUNT(fotg210->stats.lost_iaa);
+			INCR(fotg210->stats.lost_iaa);
 			fotg210_writel(fotg210, STS_IAA,
 					&fotg210->regs->status);
 		}
@@ -2204,12 +2205,12 @@ __acquires(fotg210->lock)
 	}
 
 	if (unlikely(urb->unlinked)) {
-		COUNT(fotg210->stats.unlink);
+		INCR(fotg210->stats.unlink);
 	} else {
 		/* report non-error and short read status as zero */
 		if (status == -EINPROGRESS || status == -EREMOTEIO)
 			status = 0;
-		COUNT(fotg210->stats.complete);
+		INCR(fotg210->stats.complete);
 	}
 
 #ifdef FOTG210_URB_TRACE
@@ -5153,9 +5154,9 @@ static irqreturn_t fotg210_irq(struct usb_hcd *hcd)
 	/* normal [4.15.1.2] or error [4.15.1.1] completion */
 	if (likely((status & (STS_INT|STS_ERR)) != 0)) {
 		if (likely((status & STS_ERR) == 0))
-			COUNT(fotg210->stats.normal);
+			INCR(fotg210->stats.normal);
 		else
-			COUNT(fotg210->stats.error);
+			INCR(fotg210->stats.error);
 		bh = 1;
 	}
 
@@ -5180,7 +5181,7 @@ static irqreturn_t fotg210_irq(struct usb_hcd *hcd)
 		if (cmd & CMD_IAAD)
 			fotg210_dbg(fotg210, "IAA with IAAD still set?\n");
 		if (fotg210->async_iaa) {
-			COUNT(fotg210->stats.iaa);
+			INCR(fotg210->stats.iaa);
 			end_unlink_async(fotg210);
 		} else
 			fotg210_dbg(fotg210, "IAA with nothing unlinked?\n");
@@ -5596,7 +5597,7 @@ static int fotg210_hcd_probe(struct platform_device *pdev)
 	hcd->regs = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(hcd->regs)) {
 		retval = PTR_ERR(hcd->regs);
-		goto failed;
+		goto failed_put_hcd;
 	}
 
 	hcd->rsrc_start = res->start;
@@ -5606,22 +5607,43 @@ static int fotg210_hcd_probe(struct platform_device *pdev)
 
 	fotg210->caps = hcd->regs;
 
+	/* It's OK not to supply this clock */
+	fotg210->pclk = clk_get(dev, "PCLK");
+	if (!IS_ERR(fotg210->pclk)) {
+		retval = clk_prepare_enable(fotg210->pclk);
+		if (retval) {
+			dev_err(dev, "failed to enable PCLK\n");
+			goto failed_put_hcd;
+		}
+	} else if (PTR_ERR(fotg210->pclk) == -EPROBE_DEFER) {
+		/*
+		 * Percolate deferrals, for anything else,
+		 * just live without the clocking.
+		 */
+		retval = PTR_ERR(fotg210->pclk);
+		goto failed_dis_clk;
+	}
+
 	retval = fotg210_setup(hcd);
 	if (retval)
-		goto failed;
+		goto failed_dis_clk;
 
 	fotg210_init(fotg210);
 
 	retval = usb_add_hcd(hcd, irq, IRQF_SHARED);
 	if (retval) {
 		dev_err(dev, "failed to add hcd with err %d\n", retval);
-		goto failed;
+		goto failed_dis_clk;
 	}
 	device_wakeup_enable(hcd->self.controller);
+	platform_set_drvdata(pdev, hcd);
 
 	return retval;
 
-failed:
+failed_dis_clk:
+	if (!IS_ERR(fotg210->pclk))
+		clk_disable_unprepare(fotg210->pclk);
+failed_put_hcd:
 	usb_put_hcd(hcd);
 fail_create_hcd:
 	dev_err(dev, "init %s fail, %d\n", dev_name(dev), retval);
@@ -5635,11 +5657,11 @@ fail_create_hcd:
  */
 static int fotg210_hcd_remove(struct platform_device *pdev)
 {
-	struct device *dev = &pdev->dev;
-	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct fotg210_hcd *fotg210 = hcd_to_fotg210(hcd);
 
-	if (!hcd)
-		return 0;
+	if (!IS_ERR(fotg210->pclk))
+		clk_disable_unprepare(fotg210->pclk);
 
 	usb_remove_hcd(hcd);
 	usb_put_hcd(hcd);
diff --git a/drivers/usb/host/fotg210.h b/drivers/usb/host/fotg210.h
index 7fcd785c7bc8..1b4db95e5c43 100644
--- a/drivers/usb/host/fotg210.h
+++ b/drivers/usb/host/fotg210.h
@@ -177,11 +177,14 @@ struct fotg210_hcd {			/* one per controller */
 	/* irq statistics */
 #ifdef FOTG210_STATS
 	struct fotg210_stats	stats;
-#	define COUNT(x) ((x)++)
+#	define INCR(x) ((x)++)
 #else
-#	define COUNT(x)
+#	define INCR(x) do {} while (0)
 #endif
 
+	/* silicon clock */
+	struct clk		*pclk;
+
 	/* debug files */
 	struct dentry		*debug_dir;
 };
diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index e98673954020..ec6739ef3129 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -551,6 +551,8 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev)
 		pdata->overcurrent_pin[i] =
 			devm_gpiod_get_index_optional(&pdev->dev, "atmel,oc",
 						      i, GPIOD_IN);
+		if (!pdata->overcurrent_pin[i])
+			continue;
 		if (IS_ERR(pdata->overcurrent_pin[i])) {
 			err = PTR_ERR(pdata->overcurrent_pin[i]);
 			dev_err(&pdev->dev, "unable to claim gpio \"overcurrent\": %d\n", err);
diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index 3625a5c1a41b..3ce71cbfbb58 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -783,15 +783,9 @@ static void quirk_usb_handoff_ohci(struct pci_dev *pdev)
 	/* disable interrupts */
 	writel((u32) ~0, base + OHCI_INTRDISABLE);
 
-	/* Reset the USB bus, if the controller isn't already in RESET */
-	if (control & OHCI_HCFS) {
-		/* Go into RESET, preserving RWC (and possibly IR) */
-		writel(control & OHCI_CTRL_MASK, base + OHCI_CONTROL);
-		readl(base + OHCI_CONTROL);
-
-		/* drive bus reset for at least 50 ms (7.1.7.5) */
-		msleep(50);
-	}
+	/* Go into the USB_RESET state, preserving RWC (and possibly IR) */
+	writel(control & OHCI_CTRL_MASK, base + OHCI_CONTROL);
+	readl(base + OHCI_CONTROL);
 
 	/* software reset of the controller, preserving HcFmInterval */
 	if (!no_fminterval)
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 7e2a531ba321..12eea73d9f20 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -900,6 +900,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
 				set_bit(wIndex, &bus_state->resuming_ports);
 				bus_state->resume_done[wIndex] = timeout;
 				mod_timer(&hcd->rh_timer, timeout);
+				usb_hcd_start_port_resume(&hcd->self, wIndex);
 			}
 		/* Has resume been signalled for USB_RESUME_TIME yet? */
 		} else if (time_after_eq(jiffies,
@@ -940,6 +941,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
 				clear_bit(wIndex, &bus_state->rexit_ports);
 			}
 
+			usb_hcd_end_port_resume(&hcd->self, wIndex);
 			bus_state->port_c_suspend |= 1 << wIndex;
 			bus_state->suspended_ports &= ~(1 << wIndex);
 		} else {
@@ -962,6 +964,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
 	    (raw_port_status & PORT_PLS_MASK) != XDEV_RESUME) {
 		bus_state->resume_done[wIndex] = 0;
 		clear_bit(wIndex, &bus_state->resuming_ports);
+		usb_hcd_end_port_resume(&hcd->self, wIndex);
 	}
 
 
@@ -1337,6 +1340,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 					goto error;
 
 				set_bit(wIndex, &bus_state->resuming_ports);
+				usb_hcd_start_port_resume(&hcd->self, wIndex);
 				xhci_set_link_state(xhci, ports[wIndex],
 						    XDEV_RESUME);
 				spin_unlock_irqrestore(&xhci->lock, flags);
@@ -1345,6 +1349,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 				xhci_set_link_state(xhci, ports[wIndex],
 							XDEV_U0);
 				clear_bit(wIndex, &bus_state->resuming_ports);
+				usb_hcd_end_port_resume(&hcd->self, wIndex);
 			}
 			bus_state->port_c_suspend |= 1 << wIndex;
 
diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c
index fa33d6e5b1cb..fea555570ad4 100644
--- a/drivers/usb/host/xhci-mtk-sch.c
+++ b/drivers/usb/host/xhci-mtk-sch.c
@@ -13,14 +13,20 @@
 #include "xhci.h"
 #include "xhci-mtk.h"
 
+#define SSP_BW_BOUNDARY	130000
 #define SS_BW_BOUNDARY	51000
 /* table 5-5. High-speed Isoc Transaction Limits in usb_20 spec */
 #define HS_BW_BOUNDARY	6144
 /* usb2 spec section11.18.1: at most 188 FS bytes per microframe */
 #define FS_PAYLOAD_MAX 188
+/*
+ * max number of microframes for split transfer,
+ * for fs isoc in : 1 ss + 1 idle + 7 cs
+ */
+#define TT_MICROFRAMES_MAX 9
 
 /* mtk scheduler bitmasks */
-#define EP_BPKTS(p)	((p) & 0x3f)
+#define EP_BPKTS(p)	((p) & 0x7f)
 #define EP_BCSCOUNT(p)	(((p) & 0x7) << 8)
 #define EP_BBM(p)	((p) << 11)
 #define EP_BOFFSET(p)	((p) & 0x3fff)
@@ -51,7 +57,7 @@ static int get_bw_index(struct xhci_hcd *xhci, struct usb_device *udev,
 
 	virt_dev = xhci->devs[udev->slot_id];
 
-	if (udev->speed == USB_SPEED_SUPER) {
+	if (udev->speed >= USB_SPEED_SUPER) {
 		if (usb_endpoint_dir_out(&ep->desc))
 			bw_index = (virt_dev->real_port - 1) * 2;
 		else
@@ -64,25 +70,167 @@ static int get_bw_index(struct xhci_hcd *xhci, struct usb_device *udev,
 	return bw_index;
 }
 
+static u32 get_esit(struct xhci_ep_ctx *ep_ctx)
+{
+	u32 esit;
+
+	esit = 1 << CTX_TO_EP_INTERVAL(le32_to_cpu(ep_ctx->ep_info));
+	if (esit > XHCI_MTK_MAX_ESIT)
+		esit = XHCI_MTK_MAX_ESIT;
+
+	return esit;
+}
+
+static struct mu3h_sch_tt *find_tt(struct usb_device *udev)
+{
+	struct usb_tt *utt = udev->tt;
+	struct mu3h_sch_tt *tt, **tt_index, **ptt;
+	unsigned int port;
+	bool allocated_index = false;
+
+	if (!utt)
+		return NULL;	/* Not below a TT */
+
+	/*
+	 * Find/create our data structure.
+	 * For hubs with a single TT, we get it directly.
+	 * For hubs with multiple TTs, there's an extra level of pointers.
+	 */
+	tt_index = NULL;
+	if (utt->multi) {
+		tt_index = utt->hcpriv;
+		if (!tt_index) {	/* Create the index array */
+			tt_index = kcalloc(utt->hub->maxchild,
+					sizeof(*tt_index), GFP_KERNEL);
+			if (!tt_index)
+				return ERR_PTR(-ENOMEM);
+			utt->hcpriv = tt_index;
+			allocated_index = true;
+		}
+		port = udev->ttport - 1;
+		ptt = &tt_index[port];
+	} else {
+		port = 0;
+		ptt = (struct mu3h_sch_tt **) &utt->hcpriv;
+	}
+
+	tt = *ptt;
+	if (!tt) {	/* Create the mu3h_sch_tt */
+		tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+		if (!tt) {
+			if (allocated_index) {
+				utt->hcpriv = NULL;
+				kfree(tt_index);
+			}
+			return ERR_PTR(-ENOMEM);
+		}
+		INIT_LIST_HEAD(&tt->ep_list);
+		tt->usb_tt = utt;
+		tt->tt_port = port;
+		*ptt = tt;
+	}
+
+	return tt;
+}
+
+/* Release the TT above udev, if it's not in use */
+static void drop_tt(struct usb_device *udev)
+{
+	struct usb_tt *utt = udev->tt;
+	struct mu3h_sch_tt *tt, **tt_index, **ptt;
+	int i, cnt;
+
+	if (!utt || !utt->hcpriv)
+		return;		/* Not below a TT, or never allocated */
+
+	cnt = 0;
+	if (utt->multi) {
+		tt_index = utt->hcpriv;
+		ptt = &tt_index[udev->ttport - 1];
+		/*  How many entries are left in tt_index? */
+		for (i = 0; i < utt->hub->maxchild; ++i)
+			cnt += !!tt_index[i];
+	} else {
+		tt_index = NULL;
+		ptt = (struct mu3h_sch_tt **)&utt->hcpriv;
+	}
+
+	tt = *ptt;
+	if (!tt || !list_empty(&tt->ep_list))
+		return;		/* never allocated , or still in use*/
+
+	*ptt = NULL;
+	kfree(tt);
+
+	if (cnt == 1) {
+		utt->hcpriv = NULL;
+		kfree(tt_index);
+	}
+}
+
+static struct mu3h_sch_ep_info *create_sch_ep(struct usb_device *udev,
+	struct usb_host_endpoint *ep, struct xhci_ep_ctx *ep_ctx)
+{
+	struct mu3h_sch_ep_info *sch_ep;
+	struct mu3h_sch_tt *tt = NULL;
+	u32 len_bw_budget_table;
+	size_t mem_size;
+
+	if (is_fs_or_ls(udev->speed))
+		len_bw_budget_table = TT_MICROFRAMES_MAX;
+	else if ((udev->speed >= USB_SPEED_SUPER)
+			&& usb_endpoint_xfer_isoc(&ep->desc))
+		len_bw_budget_table = get_esit(ep_ctx);
+	else
+		len_bw_budget_table = 1;
+
+	mem_size = sizeof(struct mu3h_sch_ep_info) +
+			len_bw_budget_table * sizeof(u32);
+	sch_ep = kzalloc(mem_size, GFP_KERNEL);
+	if (!sch_ep)
+		return ERR_PTR(-ENOMEM);
+
+	if (is_fs_or_ls(udev->speed)) {
+		tt = find_tt(udev);
+		if (IS_ERR(tt)) {
+			kfree(sch_ep);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
+	sch_ep->sch_tt = tt;
+	sch_ep->ep = ep;
+
+	return sch_ep;
+}
+
 static void setup_sch_info(struct usb_device *udev,
 		struct xhci_ep_ctx *ep_ctx, struct mu3h_sch_ep_info *sch_ep)
 {
 	u32 ep_type;
-	u32 ep_interval;
-	u32 max_packet_size;
+	u32 maxpkt;
 	u32 max_burst;
 	u32 mult;
 	u32 esit_pkts;
+	u32 max_esit_payload;
+	u32 *bwb_table = sch_ep->bw_budget_table;
+	int i;
 
 	ep_type = CTX_TO_EP_TYPE(le32_to_cpu(ep_ctx->ep_info2));
-	ep_interval = CTX_TO_EP_INTERVAL(le32_to_cpu(ep_ctx->ep_info));
-	max_packet_size = MAX_PACKET_DECODED(le32_to_cpu(ep_ctx->ep_info2));
+	maxpkt = MAX_PACKET_DECODED(le32_to_cpu(ep_ctx->ep_info2));
 	max_burst = CTX_TO_MAX_BURST(le32_to_cpu(ep_ctx->ep_info2));
 	mult = CTX_TO_EP_MULT(le32_to_cpu(ep_ctx->ep_info));
-
-	sch_ep->esit = 1 << ep_interval;
+	max_esit_payload =
+		(CTX_TO_MAX_ESIT_PAYLOAD_HI(
+			le32_to_cpu(ep_ctx->ep_info)) << 16) |
+		 CTX_TO_MAX_ESIT_PAYLOAD(le32_to_cpu(ep_ctx->tx_info));
+
+	sch_ep->esit = get_esit(ep_ctx);
+	sch_ep->ep_type = ep_type;
+	sch_ep->maxpkt = maxpkt;
 	sch_ep->offset = 0;
 	sch_ep->burst_mode = 0;
+	sch_ep->repeat = 0;
 
 	if (udev->speed == USB_SPEED_HIGH) {
 		sch_ep->cs_count = 0;
@@ -93,7 +241,6 @@ static void setup_sch_info(struct usb_device *udev,
 		 * in a interval
 		 */
 		sch_ep->num_budget_microframes = 1;
-		sch_ep->repeat = 0;
 
 		/*
 		 * xHCI spec section6.2.3.4
@@ -101,19 +248,33 @@ static void setup_sch_info(struct usb_device *udev,
 		 * opportunities per microframe
 		 */
 		sch_ep->pkts = max_burst + 1;
-		sch_ep->bw_cost_per_microframe = max_packet_size * sch_ep->pkts;
-	} else if (udev->speed == USB_SPEED_SUPER) {
+		sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts;
+		bwb_table[0] = sch_ep->bw_cost_per_microframe;
+	} else if (udev->speed >= USB_SPEED_SUPER) {
 		/* usb3_r1 spec section4.4.7 & 4.4.8 */
 		sch_ep->cs_count = 0;
-		esit_pkts = (mult + 1) * (max_burst + 1);
+		sch_ep->burst_mode = 1;
+		/*
+		 * some device's (d)wBytesPerInterval is set as 0,
+		 * then max_esit_payload is 0, so evaluate esit_pkts from
+		 * mult and burst
+		 */
+		esit_pkts = DIV_ROUND_UP(max_esit_payload, maxpkt);
+		if (esit_pkts == 0)
+			esit_pkts = (mult + 1) * (max_burst + 1);
+
 		if (ep_type == INT_IN_EP || ep_type == INT_OUT_EP) {
 			sch_ep->pkts = esit_pkts;
 			sch_ep->num_budget_microframes = 1;
-			sch_ep->repeat = 0;
+			bwb_table[0] = maxpkt * sch_ep->pkts;
 		}
 
 		if (ep_type == ISOC_IN_EP || ep_type == ISOC_OUT_EP) {
-			if (esit_pkts <= sch_ep->esit)
+			u32 remainder;
+
+			if (sch_ep->esit == 1)
+				sch_ep->pkts = esit_pkts;
+			else if (esit_pkts <= sch_ep->esit)
 				sch_ep->pkts = 1;
 			else
 				sch_ep->pkts = roundup_pow_of_two(esit_pkts)
@@ -122,43 +283,48 @@ static void setup_sch_info(struct usb_device *udev,
 			sch_ep->num_budget_microframes =
 				DIV_ROUND_UP(esit_pkts, sch_ep->pkts);
 
-			if (sch_ep->num_budget_microframes > 1)
-				sch_ep->repeat = 1;
-			else
-				sch_ep->repeat = 0;
+			sch_ep->repeat = !!(sch_ep->num_budget_microframes > 1);
+			sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts;
+
+			remainder = sch_ep->bw_cost_per_microframe;
+			remainder *= sch_ep->num_budget_microframes;
+			remainder -= (maxpkt * esit_pkts);
+			for (i = 0; i < sch_ep->num_budget_microframes - 1; i++)
+				bwb_table[i] = sch_ep->bw_cost_per_microframe;
+
+			/* last one <= bw_cost_per_microframe */
+			bwb_table[i] = remainder;
 		}
-		sch_ep->bw_cost_per_microframe = max_packet_size * sch_ep->pkts;
 	} else if (is_fs_or_ls(udev->speed)) {
+		sch_ep->pkts = 1; /* at most one packet for each microframe */
 
 		/*
-		 * usb_20 spec section11.18.4
-		 * assume worst cases
+		 * num_budget_microframes and cs_count will be updated when
+		 * check TT for INT_OUT_EP, ISOC/INT_IN_EP type
 		 */
-		sch_ep->repeat = 0;
-		sch_ep->pkts = 1; /* at most one packet for each microframe */
-		if (ep_type == INT_IN_EP || ep_type == INT_OUT_EP) {
-			sch_ep->cs_count = 3; /* at most need 3 CS*/
-			/* one for SS and one for budgeted transaction */
-			sch_ep->num_budget_microframes = sch_ep->cs_count + 2;
-			sch_ep->bw_cost_per_microframe = max_packet_size;
-		}
-		if (ep_type == ISOC_OUT_EP) {
+		sch_ep->cs_count = DIV_ROUND_UP(maxpkt, FS_PAYLOAD_MAX);
+		sch_ep->num_budget_microframes = sch_ep->cs_count;
+		sch_ep->bw_cost_per_microframe =
+			(maxpkt < FS_PAYLOAD_MAX) ? maxpkt : FS_PAYLOAD_MAX;
 
+		/* init budget table */
+		if (ep_type == ISOC_OUT_EP) {
+			for (i = 0; i < sch_ep->num_budget_microframes; i++)
+				bwb_table[i] =	sch_ep->bw_cost_per_microframe;
+		} else if (ep_type == INT_OUT_EP) {
+			/* only first one consumes bandwidth, others as zero */
+			bwb_table[0] = sch_ep->bw_cost_per_microframe;
+		} else { /* INT_IN_EP or ISOC_IN_EP */
+			bwb_table[0] = 0; /* start split */
+			bwb_table[1] = 0; /* idle */
 			/*
-			 * the best case FS budget assumes that 188 FS bytes
-			 * occur in each microframe
+			 * due to cs_count will be updated according to cs
+			 * position, assign all remainder budget array
+			 * elements as @bw_cost_per_microframe, but only first
+			 * @num_budget_microframes elements will be used later
 			 */
-			sch_ep->num_budget_microframes = DIV_ROUND_UP(
-				max_packet_size, FS_PAYLOAD_MAX);
-			sch_ep->bw_cost_per_microframe = FS_PAYLOAD_MAX;
-			sch_ep->cs_count = sch_ep->num_budget_microframes;
-		}
-		if (ep_type == ISOC_IN_EP) {
-			/* at most need additional two CS. */
-			sch_ep->cs_count = DIV_ROUND_UP(
-				max_packet_size, FS_PAYLOAD_MAX) + 2;
-			sch_ep->num_budget_microframes = sch_ep->cs_count + 2;
-			sch_ep->bw_cost_per_microframe = FS_PAYLOAD_MAX;
+			for (i = 2; i < TT_MICROFRAMES_MAX; i++)
+				bwb_table[i] =	sch_ep->bw_cost_per_microframe;
 		}
 	}
 }
@@ -169,6 +335,7 @@ static u32 get_max_bw(struct mu3h_sch_bw_info *sch_bw,
 {
 	u32 num_esit;
 	u32 max_bw = 0;
+	u32 bw;
 	int i;
 	int j;
 
@@ -177,15 +344,17 @@ static u32 get_max_bw(struct mu3h_sch_bw_info *sch_bw,
 		u32 base = offset + i * sch_ep->esit;
 
 		for (j = 0; j < sch_ep->num_budget_microframes; j++) {
-			if (sch_bw->bus_bw[base + j] > max_bw)
-				max_bw = sch_bw->bus_bw[base + j];
+			bw = sch_bw->bus_bw[base + j] +
+					sch_ep->bw_budget_table[j];
+			if (bw > max_bw)
+				max_bw = bw;
 		}
 	}
 	return max_bw;
 }
 
 static void update_bus_bw(struct mu3h_sch_bw_info *sch_bw,
-	struct mu3h_sch_ep_info *sch_ep, int bw_cost)
+	struct mu3h_sch_ep_info *sch_ep, bool used)
 {
 	u32 num_esit;
 	u32 base;
@@ -195,9 +364,105 @@ static void update_bus_bw(struct mu3h_sch_bw_info *sch_bw,
 	num_esit = XHCI_MTK_MAX_ESIT / sch_ep->esit;
 	for (i = 0; i < num_esit; i++) {
 		base = sch_ep->offset + i * sch_ep->esit;
+		for (j = 0; j < sch_ep->num_budget_microframes; j++) {
+			if (used)
+				sch_bw->bus_bw[base + j] +=
+					sch_ep->bw_budget_table[j];
+			else
+				sch_bw->bus_bw[base + j] -=
+					sch_ep->bw_budget_table[j];
+		}
+	}
+}
+
+static int check_sch_tt(struct usb_device *udev,
+	struct mu3h_sch_ep_info *sch_ep, u32 offset)
+{
+	struct mu3h_sch_tt *tt = sch_ep->sch_tt;
+	u32 extra_cs_count;
+	u32 fs_budget_start;
+	u32 start_ss, last_ss;
+	u32 start_cs, last_cs;
+	int i;
+
+	start_ss = offset % 8;
+	fs_budget_start = (start_ss + 1) % 8;
+
+	if (sch_ep->ep_type == ISOC_OUT_EP) {
+		last_ss = start_ss + sch_ep->cs_count - 1;
+
+		/*
+		 * usb_20 spec section11.18:
+		 * must never schedule Start-Split in Y6
+		 */
+		if (!(start_ss == 7 || last_ss < 6))
+			return -ERANGE;
+
+		for (i = 0; i < sch_ep->cs_count; i++)
+			if (test_bit(offset + i, tt->split_bit_map))
+				return -ERANGE;
+
+	} else {
+		u32 cs_count = DIV_ROUND_UP(sch_ep->maxpkt, FS_PAYLOAD_MAX);
+
+		/*
+		 * usb_20 spec section11.18:
+		 * must never schedule Start-Split in Y6
+		 */
+		if (start_ss == 6)
+			return -ERANGE;
+
+		/* one uframe for ss + one uframe for idle */
+		start_cs = (start_ss + 2) % 8;
+		last_cs = start_cs + cs_count - 1;
+
+		if (last_cs > 7)
+			return -ERANGE;
+
+		if (sch_ep->ep_type == ISOC_IN_EP)
+			extra_cs_count = (last_cs == 7) ? 1 : 2;
+		else /*  ep_type : INTR IN / INTR OUT */
+			extra_cs_count = (fs_budget_start == 6) ? 1 : 2;
+
+		cs_count += extra_cs_count;
+		if (cs_count > 7)
+			cs_count = 7; /* HW limit */
+
+		for (i = 0; i < cs_count + 2; i++) {
+			if (test_bit(offset + i, tt->split_bit_map))
+				return -ERANGE;
+		}
+
+		sch_ep->cs_count = cs_count;
+		/* one for ss, the other for idle */
+		sch_ep->num_budget_microframes = cs_count + 2;
+
+		/*
+		 * if interval=1, maxp >752, num_budge_micoframe is larger
+		 * than sch_ep->esit, will overstep boundary
+		 */
+		if (sch_ep->num_budget_microframes > sch_ep->esit)
+			sch_ep->num_budget_microframes = sch_ep->esit;
+	}
+
+	return 0;
+}
+
+static void update_sch_tt(struct usb_device *udev,
+	struct mu3h_sch_ep_info *sch_ep)
+{
+	struct mu3h_sch_tt *tt = sch_ep->sch_tt;
+	u32 base, num_esit;
+	int i, j;
+
+	num_esit = XHCI_MTK_MAX_ESIT / sch_ep->esit;
+	for (i = 0; i < num_esit; i++) {
+		base = sch_ep->offset + i * sch_ep->esit;
 		for (j = 0; j < sch_ep->num_budget_microframes; j++)
-			sch_bw->bus_bw[base + j] += bw_cost;
+			set_bit(base + j, tt->split_bit_map);
 	}
+
+	list_add_tail(&sch_ep->tt_endpoint, &tt->ep_list);
 }
 
 static int check_sch_bw(struct usb_device *udev,
@@ -205,17 +470,16 @@ static int check_sch_bw(struct usb_device *udev,
 {
 	u32 offset;
 	u32 esit;
-	u32 num_budget_microframes;
 	u32 min_bw;
 	u32 min_index;
 	u32 worst_bw;
 	u32 bw_boundary;
-
-	if (sch_ep->esit > XHCI_MTK_MAX_ESIT)
-		sch_ep->esit = XHCI_MTK_MAX_ESIT;
+	u32 min_num_budget;
+	u32 min_cs_count;
+	bool tt_offset_ok = false;
+	int ret;
 
 	esit = sch_ep->esit;
-	num_budget_microframes = sch_ep->num_budget_microframes;
 
 	/*
 	 * Search through all possible schedule microframes.
@@ -223,36 +487,56 @@ static int check_sch_bw(struct usb_device *udev,
 	 */
 	min_bw = ~0;
 	min_index = 0;
+	min_cs_count = sch_ep->cs_count;
+	min_num_budget = sch_ep->num_budget_microframes;
 	for (offset = 0; offset < esit; offset++) {
-		if ((offset + num_budget_microframes) > sch_ep->esit)
-			break;
+		if (is_fs_or_ls(udev->speed)) {
+			ret = check_sch_tt(udev, sch_ep, offset);
+			if (ret)
+				continue;
+			else
+				tt_offset_ok = true;
+		}
 
-		/*
-		 * usb_20 spec section11.18:
-		 * must never schedule Start-Split in Y6
-		 */
-		if (is_fs_or_ls(udev->speed) && (offset % 8 == 6))
-			continue;
+		if ((offset + sch_ep->num_budget_microframes) > sch_ep->esit)
+			break;
 
 		worst_bw = get_max_bw(sch_bw, sch_ep, offset);
 		if (min_bw > worst_bw) {
 			min_bw = worst_bw;
 			min_index = offset;
+			min_cs_count = sch_ep->cs_count;
+			min_num_budget = sch_ep->num_budget_microframes;
 		}
 		if (min_bw == 0)
 			break;
 	}
-	sch_ep->offset = min_index;
 
-	bw_boundary = (udev->speed == USB_SPEED_SUPER)
-				? SS_BW_BOUNDARY : HS_BW_BOUNDARY;
+	if (udev->speed == USB_SPEED_SUPER_PLUS)
+		bw_boundary = SSP_BW_BOUNDARY;
+	else if (udev->speed == USB_SPEED_SUPER)
+		bw_boundary = SS_BW_BOUNDARY;
+	else
+		bw_boundary = HS_BW_BOUNDARY;
 
 	/* check bandwidth */
-	if (min_bw + sch_ep->bw_cost_per_microframe > bw_boundary)
+	if (min_bw > bw_boundary)
 		return -ERANGE;
 
+	sch_ep->offset = min_index;
+	sch_ep->cs_count = min_cs_count;
+	sch_ep->num_budget_microframes = min_num_budget;
+
+	if (is_fs_or_ls(udev->speed)) {
+		/* all offset for tt is not ok*/
+		if (!tt_offset_ok)
+			return -ERANGE;
+
+		update_sch_tt(udev, sch_ep);
+	}
+
 	/* update bus bandwidth info */
-	update_bus_bw(sch_bw, sch_ep, sch_ep->bw_cost_per_microframe);
+	update_bus_bw(sch_bw, sch_ep, 1);
 
 	return 0;
 }
@@ -347,8 +631,8 @@ int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev,
 	bw_index = get_bw_index(xhci, udev, ep);
 	sch_bw = &sch_array[bw_index];
 
-	sch_ep = kzalloc(sizeof(struct mu3h_sch_ep_info), GFP_NOIO);
-	if (!sch_ep)
+	sch_ep = create_sch_ep(udev, ep, ep_ctx);
+	if (IS_ERR_OR_NULL(sch_ep))
 		return -ENOMEM;
 
 	setup_sch_info(udev, ep_ctx, sch_ep);
@@ -356,12 +640,14 @@ int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev,
 	ret = check_sch_bw(udev, sch_bw, sch_ep);
 	if (ret) {
 		xhci_err(xhci, "Not enough bandwidth!\n");
+		if (is_fs_or_ls(udev->speed))
+			drop_tt(udev);
+
 		kfree(sch_ep);
 		return -ENOSPC;
 	}
 
 	list_add_tail(&sch_ep->endpoint, &sch_bw->bw_ep_list);
-	sch_ep->ep = ep;
 
 	ep_ctx->reserved[0] |= cpu_to_le32(EP_BPKTS(sch_ep->pkts)
 		| EP_BCSCOUNT(sch_ep->cs_count) | EP_BBM(sch_ep->burst_mode));
@@ -406,9 +692,12 @@ void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev,
 
 	list_for_each_entry(sch_ep, &sch_bw->bw_ep_list, endpoint) {
 		if (sch_ep->ep == ep) {
-			update_bus_bw(sch_bw, sch_ep,
-				-sch_ep->bw_cost_per_microframe);
+			update_bus_bw(sch_bw, sch_ep, 0);
 			list_del(&sch_ep->endpoint);
+			if (is_fs_or_ls(udev->speed)) {
+				list_del(&sch_ep->tt_endpoint);
+				drop_tt(udev);
+			}
 			kfree(sch_ep);
 			break;
 		}
diff --git a/drivers/usb/host/xhci-mtk.h b/drivers/usb/host/xhci-mtk.h
index cc59d80b663b..8be8c5f7ff62 100644
--- a/drivers/usb/host/xhci-mtk.h
+++ b/drivers/usb/host/xhci-mtk.h
@@ -20,6 +20,19 @@
 #define XHCI_MTK_MAX_ESIT	64
 
 /**
+ * @split_bit_map: used to avoid split microframes overlay
+ * @ep_list: Endpoints using this TT
+ * @usb_tt: usb TT related
+ * @tt_port: TT port number
+ */
+struct mu3h_sch_tt {
+	DECLARE_BITMAP(split_bit_map, XHCI_MTK_MAX_ESIT);
+	struct list_head ep_list;
+	struct usb_tt *usb_tt;
+	int tt_port;
+};
+
+/**
  * struct mu3h_sch_bw_info: schedule information for bandwidth domain
  *
  * @bus_bw: array to keep track of bandwidth already used at each uframes
@@ -41,6 +54,10 @@ struct mu3h_sch_bw_info {
  *		(@repeat==1) scheduled within the interval
  * @bw_cost_per_microframe: bandwidth cost per microframe
  * @endpoint: linked into bandwidth domain which it belongs to
+ * @tt_endpoint: linked into mu3h_sch_tt's list which it belongs to
+ * @sch_tt: mu3h_sch_tt linked into
+ * @ep_type: endpoint type
+ * @maxpkt: max packet size of endpoint
  * @ep: address of usb_host_endpoint struct
  * @offset: which uframe of the interval that transfer should be
  *		scheduled first time within the interval
@@ -57,12 +74,17 @@ struct mu3h_sch_bw_info {
  *		times; 1: distribute the (bMaxBurst+1)*(Mult+1) packets
  *		according to @pkts and @repeat. normal mode is used by
  *		default
+ * @bw_budget_table: table to record bandwidth budget per microframe
  */
 struct mu3h_sch_ep_info {
 	u32 esit;
 	u32 num_budget_microframes;
 	u32 bw_cost_per_microframe;
 	struct list_head endpoint;
+	struct list_head tt_endpoint;
+	struct mu3h_sch_tt *sch_tt;
+	u32 ep_type;
+	u32 maxpkt;
 	void *ep;
 	/*
 	 * mtk xHCI scheduling information put into reserved DWs
@@ -73,6 +95,7 @@ struct mu3h_sch_ep_info {
 	u32 pkts;
 	u32 cs_count;
 	u32 burst_mode;
+	u32 bw_budget_table[0];
 };
 
 #define MU3C_U3_PORT_MAX 4
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 51dd8e00c4f8..01c57055c0c5 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -41,6 +41,13 @@
 #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI		0x1aa8
 #define PCI_DEVICE_ID_INTEL_APL_XHCI			0x5aa8
 #define PCI_DEVICE_ID_INTEL_DNV_XHCI			0x19d0
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_XHCI	0x15b5
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_XHCI	0x15b6
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_2C_XHCI	0x15db
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_4C_XHCI	0x15d4
+#define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_2C_XHCI		0x15e9
+#define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_XHCI		0x15ec
+#define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI		0x15f0
 
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4			0x43b9
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3			0x43ba
@@ -193,6 +200,16 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	     pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI))
 		xhci->quirks |= XHCI_MISSING_CAS;
 
+	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+	    (pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_2C_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_4C_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_TITAN_RIDGE_2C_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI))
+		xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
+
 	if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
 			pdev->device == PCI_DEVICE_ID_EJ168) {
 		xhci->quirks |= XHCI_RESET_ON_RESUME;
@@ -336,6 +353,9 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	/* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */
 	pm_runtime_put_noidle(&dev->dev);
 
+	if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW)
+		pm_runtime_allow(&dev->dev);
+
 	return 0;
 
 put_usb3_hcd:
@@ -353,6 +373,10 @@ static void xhci_pci_remove(struct pci_dev *dev)
 
 	xhci = hcd_to_xhci(pci_get_drvdata(dev));
 	xhci->xhc_state |= XHCI_STATE_REMOVING;
+
+	if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW)
+		pm_runtime_forbid(&dev->dev);
+
 	if (xhci->shared_hcd) {
 		usb_remove_hcd(xhci->shared_hcd);
 		usb_put_hcd(xhci->shared_hcd);
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 94e939249b2b..32b5574ad5c5 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -18,6 +18,7 @@
 #include <linux/usb/phy.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
+#include <linux/usb/of.h>
 
 #include "xhci.h"
 #include "xhci-plat.h"
@@ -305,6 +306,8 @@ static int xhci_plat_probe(struct platform_device *pdev)
 		hcd->skip_phy_initialization = 1;
 	}
 
+	hcd->tpl_support = of_usb_host_tpl_support(sysdev->of_node);
+	xhci->shared_hcd->tpl_support = hcd->tpl_support;
 	ret = usb_add_hcd(hcd, irq, IRQF_SHARED);
 	if (ret)
 		goto disable_usb_phy;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index f0a99aa0ac58..a8d92c90fb58 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1155,6 +1155,10 @@ static void xhci_handle_cmd_reset_ep(struct xhci_hcd *xhci, int slot_id,
 		/* Clear our internal halted state */
 		xhci->devs[slot_id]->eps[ep_index].ep_state &= ~EP_HALTED;
 	}
+
+	/* if this was a soft reset, then restart */
+	if ((le32_to_cpu(trb->generic.field[3])) & TRB_TSP)
+		ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
 }
 
 static void xhci_handle_cmd_enable_slot(struct xhci_hcd *xhci, int slot_id,
@@ -1602,6 +1606,7 @@ static void handle_port_status(struct xhci_hcd *xhci,
 			set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
 			mod_timer(&hcd->rh_timer,
 				  bus_state->resume_done[hcd_portnum]);
+			usb_hcd_start_port_resume(&hcd->self, hcd_portnum);
 			bogus_port_status = true;
 		}
 	}
@@ -2132,10 +2137,16 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
 	union xhci_trb *ep_trb, struct xhci_transfer_event *event,
 	struct xhci_virt_ep *ep, int *status)
 {
+	struct xhci_slot_ctx *slot_ctx;
 	struct xhci_ring *ep_ring;
 	u32 trb_comp_code;
 	u32 remaining, requested, ep_trb_len;
+	unsigned int slot_id;
+	int ep_index;
 
+	slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags));
+	slot_ctx = xhci_get_slot_ctx(xhci, xhci->devs[slot_id]->out_ctx);
+	ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1;
 	ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
 	trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
 	remaining = EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
@@ -2144,6 +2155,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
 
 	switch (trb_comp_code) {
 	case COMP_SUCCESS:
+		ep_ring->err_count = 0;
 		/* handle success with untransferred data as short packet */
 		if (ep_trb != td->last_trb || remaining) {
 			xhci_warn(xhci, "WARN Successful completion on short TX\n");
@@ -2167,6 +2179,14 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
 		ep_trb_len	= 0;
 		remaining	= 0;
 		break;
+	case COMP_USB_TRANSACTION_ERROR:
+		if ((ep_ring->err_count++ > MAX_SOFT_RETRY) ||
+		    le32_to_cpu(slot_ctx->tt_info) & TT_SLOT)
+			break;
+		*status = 0;
+		xhci_cleanup_halted_endpoint(xhci, slot_id, ep_index,
+					ep_ring->stream_id, td, EP_SOFT_RESET);
+		return 0;
 	default:
 		/* do nothing */
 		break;
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index 4b463e5202a4..6b5db344de30 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -18,6 +18,7 @@
 #include <linux/phy/tegra/xusb.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
+#include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
@@ -107,35 +108,35 @@
 #define IMEM_BLOCK_SIZE				256
 
 struct tegra_xusb_fw_header {
-	u32 boot_loadaddr_in_imem;
-	u32 boot_codedfi_offset;
-	u32 boot_codetag;
-	u32 boot_codesize;
-	u32 phys_memaddr;
-	u16 reqphys_memsize;
-	u16 alloc_phys_memsize;
-	u32 rodata_img_offset;
-	u32 rodata_section_start;
-	u32 rodata_section_end;
-	u32 main_fnaddr;
-	u32 fwimg_cksum;
-	u32 fwimg_created_time;
-	u32 imem_resident_start;
-	u32 imem_resident_end;
-	u32 idirect_start;
-	u32 idirect_end;
-	u32 l2_imem_start;
-	u32 l2_imem_end;
-	u32 version_id;
+	__le32 boot_loadaddr_in_imem;
+	__le32 boot_codedfi_offset;
+	__le32 boot_codetag;
+	__le32 boot_codesize;
+	__le32 phys_memaddr;
+	__le16 reqphys_memsize;
+	__le16 alloc_phys_memsize;
+	__le32 rodata_img_offset;
+	__le32 rodata_section_start;
+	__le32 rodata_section_end;
+	__le32 main_fnaddr;
+	__le32 fwimg_cksum;
+	__le32 fwimg_created_time;
+	__le32 imem_resident_start;
+	__le32 imem_resident_end;
+	__le32 idirect_start;
+	__le32 idirect_end;
+	__le32 l2_imem_start;
+	__le32 l2_imem_end;
+	__le32 version_id;
 	u8 init_ddirect;
 	u8 reserved[3];
-	u32 phys_addr_log_buffer;
-	u32 total_log_entries;
-	u32 dequeue_ptr;
-	u32 dummy_var[2];
-	u32 fwimg_len;
+	__le32 phys_addr_log_buffer;
+	__le32 total_log_entries;
+	__le32 dequeue_ptr;
+	__le32 dummy_var[2];
+	__le32 fwimg_len;
 	u8 magic[8];
-	u32 ss_low_power_entry_timeout;
+	__le32 ss_low_power_entry_timeout;
 	u8 num_hsic_port;
 	u8 padding[139]; /* Pad to 256 bytes */
 };
@@ -194,6 +195,11 @@ struct tegra_xusb {
 	struct reset_control *host_rst;
 	struct reset_control *ss_rst;
 
+	struct device *genpd_dev_host;
+	struct device *genpd_dev_ss;
+	struct device_link *genpd_dl_host;
+	struct device_link *genpd_dl_ss;
+
 	struct phy **phys;
 	unsigned int num_phys;
 
@@ -928,6 +934,57 @@ static int tegra_xusb_load_firmware(struct tegra_xusb *tegra)
 	return 0;
 }
 
+static void tegra_xusb_powerdomain_remove(struct device *dev,
+					  struct tegra_xusb *tegra)
+{
+	if (tegra->genpd_dl_ss)
+		device_link_del(tegra->genpd_dl_ss);
+	if (tegra->genpd_dl_host)
+		device_link_del(tegra->genpd_dl_host);
+	if (tegra->genpd_dev_ss)
+		dev_pm_domain_detach(tegra->genpd_dev_ss, true);
+	if (tegra->genpd_dev_host)
+		dev_pm_domain_detach(tegra->genpd_dev_host, true);
+}
+
+static int tegra_xusb_powerdomain_init(struct device *dev,
+				       struct tegra_xusb *tegra)
+{
+	int err;
+
+	tegra->genpd_dev_host = dev_pm_domain_attach_by_name(dev, "xusb_host");
+	if (IS_ERR(tegra->genpd_dev_host)) {
+		err = PTR_ERR(tegra->genpd_dev_host);
+		dev_err(dev, "failed to get host pm-domain: %d\n", err);
+		return err;
+	}
+
+	tegra->genpd_dev_ss = dev_pm_domain_attach_by_name(dev, "xusb_ss");
+	if (IS_ERR(tegra->genpd_dev_ss)) {
+		err = PTR_ERR(tegra->genpd_dev_ss);
+		dev_err(dev, "failed to get superspeed pm-domain: %d\n", err);
+		return err;
+	}
+
+	tegra->genpd_dl_host = device_link_add(dev, tegra->genpd_dev_host,
+					       DL_FLAG_PM_RUNTIME |
+					       DL_FLAG_STATELESS);
+	if (!tegra->genpd_dl_host) {
+		dev_err(dev, "adding host device link failed!\n");
+		return -ENODEV;
+	}
+
+	tegra->genpd_dl_ss = device_link_add(dev, tegra->genpd_dev_ss,
+					     DL_FLAG_PM_RUNTIME |
+					     DL_FLAG_STATELESS);
+	if (!tegra->genpd_dl_ss) {
+		dev_err(dev, "adding superspeed device link failed!\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
 static int tegra_xusb_probe(struct platform_device *pdev)
 {
 	struct tegra_xusb_mbox_msg msg;
@@ -1038,7 +1095,7 @@ static int tegra_xusb_probe(struct platform_device *pdev)
 		goto put_padctl;
 	}
 
-	if (!pdev->dev.pm_domain) {
+	if (!of_property_read_bool(pdev->dev.of_node, "power-domains")) {
 		tegra->host_rst = devm_reset_control_get(&pdev->dev,
 							 "xusb_host");
 		if (IS_ERR(tegra->host_rst)) {
@@ -1069,17 +1126,22 @@ static int tegra_xusb_probe(struct platform_device *pdev)
 							tegra->host_clk,
 							tegra->host_rst);
 		if (err) {
+			tegra_powergate_power_off(TEGRA_POWERGATE_XUSBA);
 			dev_err(&pdev->dev,
 				"failed to enable XUSBC domain: %d\n", err);
-			goto disable_xusba;
+			goto put_padctl;
 		}
+	} else {
+		err = tegra_xusb_powerdomain_init(&pdev->dev, tegra);
+		if (err)
+			goto put_powerdomains;
 	}
 
 	tegra->supplies = devm_kcalloc(&pdev->dev, tegra->soc->num_supplies,
 				       sizeof(*tegra->supplies), GFP_KERNEL);
 	if (!tegra->supplies) {
 		err = -ENOMEM;
-		goto disable_xusbc;
+		goto put_powerdomains;
 	}
 
 	for (i = 0; i < tegra->soc->num_supplies; i++)
@@ -1089,7 +1151,7 @@ static int tegra_xusb_probe(struct platform_device *pdev)
 				      tegra->supplies);
 	if (err) {
 		dev_err(&pdev->dev, "failed to get regulators: %d\n", err);
-		goto disable_xusbc;
+		goto put_powerdomains;
 	}
 
 	for (i = 0; i < tegra->soc->num_types; i++)
@@ -1099,7 +1161,7 @@ static int tegra_xusb_probe(struct platform_device *pdev)
 				   sizeof(*tegra->phys), GFP_KERNEL);
 	if (!tegra->phys) {
 		err = -ENOMEM;
-		goto disable_xusbc;
+		goto put_powerdomains;
 	}
 
 	for (i = 0, k = 0; i < tegra->soc->num_types; i++) {
@@ -1115,7 +1177,7 @@ static int tegra_xusb_probe(struct platform_device *pdev)
 					"failed to get PHY %s: %ld\n", prop,
 					PTR_ERR(phy));
 				err = PTR_ERR(phy);
-				goto disable_xusbc;
+				goto put_powerdomains;
 			}
 
 			tegra->phys[k++] = phy;
@@ -1126,7 +1188,7 @@ static int tegra_xusb_probe(struct platform_device *pdev)
 				    dev_name(&pdev->dev));
 	if (!tegra->hcd) {
 		err = -ENOMEM;
-		goto disable_xusbc;
+		goto put_powerdomains;
 	}
 
 	/*
@@ -1222,12 +1284,13 @@ put_rpm:
 disable_rpm:
 	pm_runtime_disable(&pdev->dev);
 	usb_put_hcd(tegra->hcd);
-disable_xusbc:
-	if (!pdev->dev.pm_domain)
+put_powerdomains:
+	if (!of_property_read_bool(pdev->dev.of_node, "power-domains")) {
 		tegra_powergate_power_off(TEGRA_POWERGATE_XUSBC);
-disable_xusba:
-	if (!pdev->dev.pm_domain)
 		tegra_powergate_power_off(TEGRA_POWERGATE_XUSBA);
+	} else {
+		tegra_xusb_powerdomain_remove(&pdev->dev, tegra);
+	}
 put_padctl:
 	tegra_xusb_padctl_put(tegra->padctl);
 	return err;
@@ -1249,6 +1312,13 @@ static int tegra_xusb_remove(struct platform_device *pdev)
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
+	if (!of_property_read_bool(pdev->dev.of_node, "power-domains")) {
+		tegra_powergate_power_off(TEGRA_POWERGATE_XUSBC);
+		tegra_powergate_power_off(TEGRA_POWERGATE_XUSBA);
+	} else {
+		tegra_xusb_powerdomain_remove(&pdev->dev, tegra);
+	}
+
 	tegra_xusb_padctl_put(tegra->padctl);
 
 	return 0;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 6230a578324c..bf0b3692dc9a 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1496,6 +1496,7 @@ static inline const char *xhci_trb_type_string(u8 type)
 /* How much data is left before the 64KB boundary? */
 #define TRB_BUFF_LEN_UP_TO_BOUNDARY(addr)	(TRB_MAX_BUFF_SIZE - \
 					(addr & (TRB_MAX_BUFF_SIZE - 1)))
+#define MAX_SOFT_RETRY		3
 
 struct xhci_segment {
 	union xhci_trb		*trbs;
@@ -1583,6 +1584,7 @@ struct xhci_ring {
 	 * if we own the TRB (if we are the consumer).  See section 4.9.1.
 	 */
 	u32			cycle_state;
+	unsigned int            err_count;
 	unsigned int		stream_id;
 	unsigned int		num_segs;
 	unsigned int		num_trbs_free;
@@ -1846,6 +1848,7 @@ struct xhci_hcd {
 #define XHCI_SUSPEND_DELAY	BIT_ULL(30)
 #define XHCI_INTEL_USB_ROLE_SW	BIT_ULL(31)
 #define XHCI_ZERO_64B_REGS	BIT_ULL(32)
+#define XHCI_DEFAULT_PM_RUNTIME_ALLOW	BIT_ULL(33)
 
 	unsigned int		num_active_eps;
 	unsigned int		limit_active_eps;
diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c
index d746c26a8055..bd539f3058bc 100644
--- a/drivers/usb/misc/appledisplay.c
+++ b/drivers/usb/misc/appledisplay.c
@@ -146,8 +146,11 @@ static int appledisplay_bl_update_status(struct backlight_device *bd)
 		pdata->msgdata, 2,
 		ACD_USB_TIMEOUT);
 	mutex_unlock(&pdata->sysfslock);
-	
-	return retval;
+
+	if (retval < 0)
+		return retval;
+	else
+		return 0;
 }
 
 static int appledisplay_bl_get_brightness(struct backlight_device *bd)
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index c2991b8a65ce..ba05dd80a020 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -808,8 +808,8 @@ static int iowarrior_probe(struct usb_interface *interface,
 			 dev->int_in_endpoint->bInterval);
 	/* create an internal buffer for interrupt data from the device */
 	dev->read_queue =
-	    kmalloc(((dev->report_size + 1) * MAX_INTERRUPT_BUFFER),
-		    GFP_KERNEL);
+	    kmalloc_array(dev->report_size + 1, MAX_INTERRUPT_BUFFER,
+			  GFP_KERNEL);
 	if (!dev->read_queue)
 		goto error;
 	/* Get the serial-number of the chip */
diff --git a/drivers/usb/misc/trancevibrator.c b/drivers/usb/misc/trancevibrator.c
index b3e1f553954a..ac357ce2d1a6 100644
--- a/drivers/usb/misc/trancevibrator.c
+++ b/drivers/usb/misc/trancevibrator.c
@@ -46,7 +46,9 @@ static ssize_t speed_store(struct device *dev, struct device_attribute *attr,
 	struct trancevibrator *tv = usb_get_intfdata(intf);
 	int temp, retval, old;
 
-	temp = simple_strtoul(buf, NULL, 10);
+	retval = kstrtoint(buf, 10, &temp);
+	if (retval)
+		return retval;
 	if (temp > 255)
 		temp = 255;
 	else if (temp < 0)
diff --git a/drivers/usb/mtu3/mtu3_core.c b/drivers/usb/mtu3/mtu3_core.c
index d045d8458f81..ae70b9bfd797 100644
--- a/drivers/usb/mtu3/mtu3_core.c
+++ b/drivers/usb/mtu3/mtu3_core.c
@@ -185,8 +185,8 @@ static void mtu3_intr_enable(struct mtu3 *mtu)
 
 	if (mtu->is_u3_ip) {
 		/* Enable U3 LTSSM interrupts */
-		value = HOT_RST_INTR | WARM_RST_INTR | VBUS_RISE_INTR |
-		    VBUS_FALL_INTR | ENTER_U3_INTR | EXIT_U3_INTR;
+		value = HOT_RST_INTR | WARM_RST_INTR |
+			ENTER_U3_INTR | EXIT_U3_INTR;
 		mtu3_writel(mbase, U3D_LTSSM_INTR_ENABLE, value);
 	}
 
diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c
index 5c60a8c5a0b5..bbcd3332471d 100644
--- a/drivers/usb/mtu3/mtu3_gadget.c
+++ b/drivers/usb/mtu3/mtu3_gadget.c
@@ -585,6 +585,17 @@ static const struct usb_gadget_ops mtu3_gadget_ops = {
 	.udc_stop = mtu3_gadget_stop,
 };
 
+static void mtu3_state_reset(struct mtu3 *mtu)
+{
+	mtu->address = 0;
+	mtu->ep0_state = MU3D_EP0_STATE_SETUP;
+	mtu->may_wakeup = 0;
+	mtu->u1_enable = 0;
+	mtu->u2_enable = 0;
+	mtu->delayed_status = false;
+	mtu->test_mode = false;
+}
+
 static void init_hw_ep(struct mtu3 *mtu, struct mtu3_ep *mep,
 		u32 epnum, u32 is_in)
 {
@@ -702,6 +713,7 @@ void mtu3_gadget_disconnect(struct mtu3 *mtu)
 		spin_lock(&mtu->lock);
 	}
 
+	mtu3_state_reset(mtu);
 	usb_gadget_set_state(&mtu->g, USB_STATE_NOTATTACHED);
 }
 
@@ -712,12 +724,6 @@ void mtu3_gadget_reset(struct mtu3 *mtu)
 	/* report disconnect, if we didn't flush EP state */
 	if (mtu->g.speed != USB_SPEED_UNKNOWN)
 		mtu3_gadget_disconnect(mtu);
-
-	mtu->address = 0;
-	mtu->ep0_state = MU3D_EP0_STATE_SETUP;
-	mtu->may_wakeup = 0;
-	mtu->u1_enable = 0;
-	mtu->u2_enable = 0;
-	mtu->delayed_status = false;
-	mtu->test_mode = false;
+	else
+		mtu3_state_reset(mtu);
 }
diff --git a/drivers/usb/phy/phy-ab8500-usb.c b/drivers/usb/phy/phy-ab8500-usb.c
index 66143ab8c043..aaf363f19714 100644
--- a/drivers/usb/phy/phy-ab8500-usb.c
+++ b/drivers/usb/phy/phy-ab8500-usb.c
@@ -505,15 +505,19 @@ static int abx500_usb_link_status_update(struct ab8500_usb *ab)
 	if (is_ab8500(ab->ab8500)) {
 		enum ab8500_usb_link_status lsts;
 
-		abx500_get_register_interruptible(ab->dev,
+		ret = abx500_get_register_interruptible(ab->dev,
 				AB8500_USB, AB8500_USB_LINE_STAT_REG, &reg);
+		if (ret < 0)
+			return ret;
 		lsts = (reg >> 3) & 0x0F;
 		ret = ab8500_usb_link_status_update(ab, lsts);
 	} else if (is_ab8505(ab->ab8500)) {
 		enum ab8505_usb_link_status lsts;
 
-		abx500_get_register_interruptible(ab->dev,
+		ret = abx500_get_register_interruptible(ab->dev,
 				AB8500_USB, AB8505_USB_LINE_STAT_REG, &reg);
+		if (ret < 0)
+			return ret;
 		lsts = (reg >> 3) & 0x1F;
 		ret = ab8505_usb_link_status_update(ab, lsts);
 	}
diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c
index e5aa24c1e4fd..1b1bb0ad40c3 100644
--- a/drivers/usb/phy/phy-mxs-usb.c
+++ b/drivers/usb/phy/phy-mxs-usb.c
@@ -563,7 +563,7 @@ static enum usb_charger_type mxs_charger_primary_detection(struct mxs_phy *x)
 	regmap_read(regmap, ANADIG_USB1_CHRG_DET_STAT, &val);
 	if (!(val & ANADIG_USB1_CHRG_DET_STAT_CHRG_DETECTED)) {
 		chgr_type = SDP_TYPE;
-		dev_dbg(x->phy.dev, "It is a stardard downstream port\n");
+		dev_dbg(x->phy.dev, "It is a standard downstream port\n");
 	}
 
 	/* Disable charger detector */
diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index 4310df46639d..a3e1290d682d 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2011 Renesas Solutions Corp.
  * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
  */
+#include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
 #include <linux/io.h>
@@ -12,6 +13,7 @@
 #include <linux/of_device.h>
 #include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
+#include <linux/reset.h>
 #include <linux/slab.h>
 #include <linux/sysfs.h>
 #include "common.h"
@@ -290,6 +292,79 @@ static void usbhsc_set_buswait(struct usbhs_priv *priv)
 		usbhs_bset(priv, BUSWAIT, 0x000F, wait);
 }
 
+static bool usbhsc_is_multi_clks(struct usbhs_priv *priv)
+{
+	if (priv->dparam.type == USBHS_TYPE_RCAR_GEN3 ||
+	    priv->dparam.type == USBHS_TYPE_RCAR_GEN3_WITH_PLL)
+		return true;
+
+	return false;
+}
+
+static int usbhsc_clk_get(struct device *dev, struct usbhs_priv *priv)
+{
+	if (!usbhsc_is_multi_clks(priv))
+		return 0;
+
+	/* The first clock should exist */
+	priv->clks[0] = of_clk_get(dev->of_node, 0);
+	if (IS_ERR(priv->clks[0]))
+		return PTR_ERR(priv->clks[0]);
+
+	/*
+	 * To backward compatibility with old DT, this driver checks the return
+	 * value if it's -ENOENT or not.
+	 */
+	priv->clks[1] = of_clk_get(dev->of_node, 1);
+	if (PTR_ERR(priv->clks[1]) == -ENOENT)
+		priv->clks[1] = NULL;
+	else if (IS_ERR(priv->clks[1]))
+		return PTR_ERR(priv->clks[1]);
+
+	return 0;
+}
+
+static void usbhsc_clk_put(struct usbhs_priv *priv)
+{
+	int i;
+
+	if (!usbhsc_is_multi_clks(priv))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(priv->clks); i++)
+		clk_put(priv->clks[i]);
+}
+
+static int usbhsc_clk_prepare_enable(struct usbhs_priv *priv)
+{
+	int i, ret;
+
+	if (!usbhsc_is_multi_clks(priv))
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(priv->clks); i++) {
+		ret = clk_prepare_enable(priv->clks[i]);
+		if (ret) {
+			while (--i >= 0)
+				clk_disable_unprepare(priv->clks[i]);
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static void usbhsc_clk_disable_unprepare(struct usbhs_priv *priv)
+{
+	int i;
+
+	if (!usbhsc_is_multi_clks(priv))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(priv->clks); i++)
+		clk_disable_unprepare(priv->clks[i]);
+}
+
 /*
  *		platform default param
  */
@@ -340,6 +415,10 @@ static void usbhsc_power_ctrl(struct usbhs_priv *priv, int enable)
 		/* enable PM */
 		pm_runtime_get_sync(dev);
 
+		/* enable clks */
+		if (usbhsc_clk_prepare_enable(priv))
+			return;
+
 		/* enable platform power */
 		usbhs_platform_call(priv, power_ctrl, pdev, priv->base, enable);
 
@@ -352,6 +431,9 @@ static void usbhsc_power_ctrl(struct usbhs_priv *priv, int enable)
 		/* disable platform power */
 		usbhs_platform_call(priv, power_ctrl, pdev, priv->base, enable);
 
+		/* disable clks */
+		usbhsc_clk_disable_unprepare(priv);
+
 		/* disable PM */
 		pm_runtime_put_sync(dev);
 	}
@@ -478,6 +560,10 @@ static const struct of_device_id usbhs_of_match[] = {
 		.data = (void *)USBHS_TYPE_RCAR_GEN3,
 	},
 	{
+		.compatible = "renesas,usbhs-r8a77990",
+		.data = (void *)USBHS_TYPE_RCAR_GEN3_WITH_PLL,
+	},
+	{
 		.compatible = "renesas,usbhs-r8a77995",
 		.data = (void *)USBHS_TYPE_RCAR_GEN3_WITH_PLL,
 	},
@@ -574,6 +660,10 @@ static int usbhs_probe(struct platform_device *pdev)
 			return PTR_ERR(priv->edev);
 	}
 
+	priv->rsts = devm_reset_control_array_get_optional_shared(&pdev->dev);
+	if (IS_ERR(priv->rsts))
+		return PTR_ERR(priv->rsts);
+
 	/*
 	 * care platform info
 	 */
@@ -591,15 +681,6 @@ static int usbhs_probe(struct platform_device *pdev)
 		break;
 	case USBHS_TYPE_RCAR_GEN3_WITH_PLL:
 		priv->pfunc = usbhs_rcar3_with_pll_ops;
-		if (!IS_ERR_OR_NULL(priv->edev)) {
-			priv->nb.notifier_call = priv->pfunc.notifier;
-			ret = devm_extcon_register_notifier(&pdev->dev,
-							    priv->edev,
-							    EXTCON_USB_HOST,
-							    &priv->nb);
-			if (ret < 0)
-				dev_err(&pdev->dev, "no notifier registered\n");
-		}
 		break;
 	case USBHS_TYPE_RZA1:
 		priv->pfunc = usbhs_rza1_ops;
@@ -658,6 +739,14 @@ static int usbhs_probe(struct platform_device *pdev)
 	/* dev_set_drvdata should be called after usbhs_mod_init */
 	platform_set_drvdata(pdev, priv);
 
+	ret = reset_control_deassert(priv->rsts);
+	if (ret)
+		goto probe_fail_rst;
+
+	ret = usbhsc_clk_get(&pdev->dev, priv);
+	if (ret)
+		goto probe_fail_clks;
+
 	/*
 	 * deviece reset here because
 	 * USB device might be used in boot loader.
@@ -711,6 +800,10 @@ static int usbhs_probe(struct platform_device *pdev)
 	return ret;
 
 probe_end_mod_exit:
+	usbhsc_clk_put(priv);
+probe_fail_clks:
+	reset_control_assert(priv->rsts);
+probe_fail_rst:
 	usbhs_mod_remove(priv);
 probe_end_fifo_exit:
 	usbhs_fifo_remove(priv);
@@ -739,6 +832,8 @@ static int usbhs_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 
 	usbhs_platform_call(priv, hardware_exit, pdev);
+	usbhsc_clk_put(priv);
+	reset_control_assert(priv->rsts);
 	usbhs_mod_remove(priv);
 	usbhs_fifo_remove(priv);
 	usbhs_pipe_remove(priv);
diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h
index 6137f7942c05..3777af848a35 100644
--- a/drivers/usb/renesas_usbhs/common.h
+++ b/drivers/usb/renesas_usbhs/common.h
@@ -8,8 +8,10 @@
 #ifndef RENESAS_USB_DRIVER_H
 #define RENESAS_USB_DRIVER_H
 
+#include <linux/clk.h>
 #include <linux/extcon.h>
 #include <linux/platform_device.h>
+#include <linux/reset.h>
 #include <linux/usb/renesas_usbhs.h>
 
 struct usbhs_priv;
@@ -255,7 +257,6 @@ struct usbhs_priv {
 	struct platform_device *pdev;
 
 	struct extcon_dev *edev;
-	struct notifier_block nb;
 
 	spinlock_t		lock;
 
@@ -277,6 +278,8 @@ struct usbhs_priv {
 	struct usbhs_fifo_info fifo_info;
 
 	struct phy *phy;
+	struct reset_control *rsts;
+	struct clk *clks[2];
 };
 
 /*
diff --git a/drivers/usb/renesas_usbhs/rcar3.c b/drivers/usb/renesas_usbhs/rcar3.c
index d0ea4ff89622..aa3820448286 100644
--- a/drivers/usb/renesas_usbhs/rcar3.c
+++ b/drivers/usb/renesas_usbhs/rcar3.c
@@ -27,7 +27,6 @@
  * Remarks: bit[31:11] and bit[9:6] should be 0
  */
 #define UGCTRL2_RESERVED_3	0x00000001	/* bit[3:0] should be B'0001 */
-#define UGCTRL2_USB0SEL_EHCI	0x00000010
 #define UGCTRL2_USB0SEL_HSUSB	0x00000020
 #define UGCTRL2_USB0SEL_OTG	0x00000030
 #define UGCTRL2_VBUSSEL		0x00000400
@@ -50,14 +49,6 @@ static void usbhs_rcar3_set_ugctrl2(struct usbhs_priv *priv, u32 val)
 	usbhs_write32(priv, UGCTRL2, val | UGCTRL2_RESERVED_3);
 }
 
-static void usbhs_rcar3_set_usbsel(struct usbhs_priv *priv, bool ehci)
-{
-	if (ehci)
-		usbhs_rcar3_set_ugctrl2(priv, UGCTRL2_USB0SEL_EHCI);
-	else
-		usbhs_rcar3_set_ugctrl2(priv, UGCTRL2_USB0SEL_HSUSB);
-}
-
 static int usbhs_rcar3_power_ctrl(struct platform_device *pdev,
 				void __iomem *base, int enable)
 {
@@ -83,14 +74,11 @@ static int usbhs_rcar3_power_and_pll_ctrl(struct platform_device *pdev,
 	struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev);
 	u32 val;
 	int timeout = 1000;
-	bool is_host = false;
 
 	if (enable) {
 		usbhs_write32(priv, UGCTRL, 0);	/* release PLLRESET */
-		if (priv->edev)
-			is_host = extcon_get_state(priv->edev, EXTCON_USB_HOST);
-
-		usbhs_rcar3_set_usbsel(priv, is_host);
+		usbhs_rcar3_set_ugctrl2(priv,
+					UGCTRL2_USB0SEL_OTG | UGCTRL2_VBUSSEL);
 
 		usbhs_bset(priv, LPSTS, LPSTS_SUSPM, LPSTS_SUSPM);
 		do {
@@ -112,16 +100,6 @@ static int usbhs_rcar3_get_id(struct platform_device *pdev)
 	return USBHS_GADGET;
 }
 
-static int usbhs_rcar3_notifier(struct notifier_block *nb, unsigned long event,
-				void *data)
-{
-	struct usbhs_priv *priv = container_of(nb, struct usbhs_priv, nb);
-
-	usbhs_rcar3_set_usbsel(priv, !!event);
-
-	return NOTIFY_DONE;
-}
-
 const struct renesas_usbhs_platform_callback usbhs_rcar3_ops = {
 	.power_ctrl = usbhs_rcar3_power_ctrl,
 	.get_id = usbhs_rcar3_get_id,
@@ -130,5 +108,4 @@ const struct renesas_usbhs_platform_callback usbhs_rcar3_ops = {
 const struct renesas_usbhs_platform_callback usbhs_rcar3_with_pll_ops = {
 	.power_ctrl = usbhs_rcar3_power_and_pll_ctrl,
 	.get_id = usbhs_rcar3_get_id,
-	.notifier = usbhs_rcar3_notifier,
 };
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index e0035c023120..ed51bc48eea6 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -378,7 +378,7 @@ static int cypress_serial_control(struct tty_struct *tty,
 			retval = -ENOTTY;
 			goto out;
 		}
-		dev_dbg(dev, "%s - retreiving serial line settings\n", __func__);
+		dev_dbg(dev, "%s - retrieving serial line settings\n", __func__);
 		do {
 			retval = usb_control_msg(port->serial->dev,
 					usb_rcvctrlpipe(port->serial->dev, 0),
@@ -769,7 +769,7 @@ send:
 
 	usb_fill_int_urb(port->interrupt_out_urb, port->serial->dev,
 		usb_sndintpipe(port->serial->dev, port->interrupt_out_endpointAddress),
-		port->interrupt_out_buffer, port->interrupt_out_size,
+		port->interrupt_out_buffer, actual_size,
 		cypress_write_int_callback, port, priv->write_urb_interval);
 	result = usb_submit_urb(port->interrupt_out_urb, GFP_ATOMIC);
 	if (result) {
@@ -863,7 +863,7 @@ static void cypress_set_termios(struct tty_struct *tty,
 	struct cypress_private *priv = usb_get_serial_port_data(port);
 	struct device *dev = &port->dev;
 	int data_bits, stop_bits, parity_type, parity_enable;
-	unsigned cflag, iflag;
+	unsigned int cflag;
 	unsigned long flags;
 	__u8 oldlines;
 	int linechange = 0;
@@ -899,7 +899,6 @@ static void cypress_set_termios(struct tty_struct *tty,
 	tty->termios.c_cflag &= ~(CMSPAR|CRTSCTS);
 
 	cflag = tty->termios.c_cflag;
-	iflag = tty->termios.c_iflag;
 
 	/* check if there are new settings */
 	if (old_termios) {
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 758ba789e997..609198d9594c 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -39,6 +39,7 @@
 #include <linux/uaccess.h>
 #include <linux/usb.h>
 #include <linux/serial.h>
+#include <linux/gpio/driver.h>
 #include <linux/usb/serial.h>
 #include "ftdi_sio.h"
 #include "ftdi_sio_ids.h"
@@ -72,6 +73,15 @@ struct ftdi_private {
 	unsigned int latency;		/* latency setting in use */
 	unsigned short max_packet_size;
 	struct mutex cfg_lock; /* Avoid mess by parallel calls of config ioctl() and change_speed() */
+#ifdef CONFIG_GPIOLIB
+	struct gpio_chip gc;
+	struct mutex gpio_lock;	/* protects GPIO state */
+	bool gpio_registered;	/* is the gpiochip in kernel registered */
+	bool gpio_used;		/* true if the user requested a gpio */
+	u8 gpio_altfunc;	/* which pins are in gpio mode */
+	u8 gpio_output;		/* pin directions cache */
+	u8 gpio_value;		/* pin value for outputs */
+#endif
 };
 
 /* struct ftdi_sio_quirk is used by devices requiring special attention. */
@@ -1764,6 +1774,375 @@ static void remove_sysfs_attrs(struct usb_serial_port *port)
 
 }
 
+#ifdef CONFIG_GPIOLIB
+
+static int ftdi_set_bitmode(struct usb_serial_port *port, u8 mode)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	struct usb_serial *serial = port->serial;
+	int result;
+	u16 val;
+
+	val = (mode << 8) | (priv->gpio_output << 4) | priv->gpio_value;
+	result = usb_control_msg(serial->dev,
+				 usb_sndctrlpipe(serial->dev, 0),
+				 FTDI_SIO_SET_BITMODE_REQUEST,
+				 FTDI_SIO_SET_BITMODE_REQUEST_TYPE, val,
+				 priv->interface, NULL, 0, WDR_TIMEOUT);
+	if (result < 0) {
+		dev_err(&serial->interface->dev,
+			"bitmode request failed for value 0x%04x: %d\n",
+			val, result);
+	}
+
+	return result;
+}
+
+static int ftdi_set_cbus_pins(struct usb_serial_port *port)
+{
+	return ftdi_set_bitmode(port, FTDI_SIO_BITMODE_CBUS);
+}
+
+static int ftdi_exit_cbus_mode(struct usb_serial_port *port)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+
+	priv->gpio_output = 0;
+	priv->gpio_value = 0;
+	return ftdi_set_bitmode(port, FTDI_SIO_BITMODE_RESET);
+}
+
+static int ftdi_gpio_request(struct gpio_chip *gc, unsigned int offset)
+{
+	struct usb_serial_port *port = gpiochip_get_data(gc);
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	int result;
+
+	if (priv->gpio_altfunc & BIT(offset))
+		return -ENODEV;
+
+	mutex_lock(&priv->gpio_lock);
+	if (!priv->gpio_used) {
+		/* Set default pin states, as we cannot get them from device */
+		priv->gpio_output = 0x00;
+		priv->gpio_value = 0x00;
+		result = ftdi_set_cbus_pins(port);
+		if (result) {
+			mutex_unlock(&priv->gpio_lock);
+			return result;
+		}
+
+		priv->gpio_used = true;
+	}
+	mutex_unlock(&priv->gpio_lock);
+
+	return 0;
+}
+
+static int ftdi_read_cbus_pins(struct usb_serial_port *port)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	struct usb_serial *serial = port->serial;
+	unsigned char *buf;
+	int result;
+
+	buf = kmalloc(1, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	result = usb_control_msg(serial->dev,
+				 usb_rcvctrlpipe(serial->dev, 0),
+				 FTDI_SIO_READ_PINS_REQUEST,
+				 FTDI_SIO_READ_PINS_REQUEST_TYPE, 0,
+				 priv->interface, buf, 1, WDR_TIMEOUT);
+	if (result < 1) {
+		if (result >= 0)
+			result = -EIO;
+	} else {
+		result = buf[0];
+	}
+
+	kfree(buf);
+
+	return result;
+}
+
+static int ftdi_gpio_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct usb_serial_port *port = gpiochip_get_data(gc);
+	int result;
+
+	result = ftdi_read_cbus_pins(port);
+	if (result < 0)
+		return result;
+
+	return !!(result & BIT(gpio));
+}
+
+static void ftdi_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value)
+{
+	struct usb_serial_port *port = gpiochip_get_data(gc);
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+
+	mutex_lock(&priv->gpio_lock);
+
+	if (value)
+		priv->gpio_value |= BIT(gpio);
+	else
+		priv->gpio_value &= ~BIT(gpio);
+
+	ftdi_set_cbus_pins(port);
+
+	mutex_unlock(&priv->gpio_lock);
+}
+
+static int ftdi_gpio_get_multiple(struct gpio_chip *gc, unsigned long *mask,
+					unsigned long *bits)
+{
+	struct usb_serial_port *port = gpiochip_get_data(gc);
+	int result;
+
+	result = ftdi_read_cbus_pins(port);
+	if (result < 0)
+		return result;
+
+	*bits = result & *mask;
+
+	return 0;
+}
+
+static void ftdi_gpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
+					unsigned long *bits)
+{
+	struct usb_serial_port *port = gpiochip_get_data(gc);
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+
+	mutex_lock(&priv->gpio_lock);
+
+	priv->gpio_value &= ~(*mask);
+	priv->gpio_value |= *bits & *mask;
+	ftdi_set_cbus_pins(port);
+
+	mutex_unlock(&priv->gpio_lock);
+}
+
+static int ftdi_gpio_direction_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct usb_serial_port *port = gpiochip_get_data(gc);
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+
+	return !(priv->gpio_output & BIT(gpio));
+}
+
+static int ftdi_gpio_direction_input(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct usb_serial_port *port = gpiochip_get_data(gc);
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	int result;
+
+	mutex_lock(&priv->gpio_lock);
+
+	priv->gpio_output &= ~BIT(gpio);
+	result = ftdi_set_cbus_pins(port);
+
+	mutex_unlock(&priv->gpio_lock);
+
+	return result;
+}
+
+static int ftdi_gpio_direction_output(struct gpio_chip *gc, unsigned int gpio,
+					int value)
+{
+	struct usb_serial_port *port = gpiochip_get_data(gc);
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	int result;
+
+	mutex_lock(&priv->gpio_lock);
+
+	priv->gpio_output |= BIT(gpio);
+	if (value)
+		priv->gpio_value |= BIT(gpio);
+	else
+		priv->gpio_value &= ~BIT(gpio);
+
+	result = ftdi_set_cbus_pins(port);
+
+	mutex_unlock(&priv->gpio_lock);
+
+	return result;
+}
+
+static int ftdi_read_eeprom(struct usb_serial *serial, void *dst, u16 addr,
+				u16 nbytes)
+{
+	int read = 0;
+
+	if (addr % 2 != 0)
+		return -EINVAL;
+	if (nbytes % 2 != 0)
+		return -EINVAL;
+
+	/* Read EEPROM two bytes at a time */
+	while (read < nbytes) {
+		int rv;
+
+		rv = usb_control_msg(serial->dev,
+				     usb_rcvctrlpipe(serial->dev, 0),
+				     FTDI_SIO_READ_EEPROM_REQUEST,
+				     FTDI_SIO_READ_EEPROM_REQUEST_TYPE,
+				     0, (addr + read) / 2, dst + read, 2,
+				     WDR_TIMEOUT);
+		if (rv < 2) {
+			if (rv >= 0)
+				return -EIO;
+			else
+				return rv;
+		}
+
+		read += rv;
+	}
+
+	return 0;
+}
+
+static int ftdi_gpio_init_ft232r(struct usb_serial_port *port)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	u16 cbus_config;
+	u8 *buf;
+	int ret;
+	int i;
+
+	buf = kmalloc(2, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = ftdi_read_eeprom(port->serial, buf, 0x14, 2);
+	if (ret < 0)
+		goto out_free;
+
+	cbus_config = le16_to_cpup((__le16 *)buf);
+	dev_dbg(&port->dev, "cbus_config = 0x%04x\n", cbus_config);
+
+	priv->gc.ngpio = 4;
+
+	priv->gpio_altfunc = 0xff;
+	for (i = 0; i < priv->gc.ngpio; ++i) {
+		if ((cbus_config & 0xf) == FTDI_FT232R_CBUS_MUX_GPIO)
+			priv->gpio_altfunc &= ~BIT(i);
+		cbus_config >>= 4;
+	}
+out_free:
+	kfree(buf);
+
+	return ret;
+}
+
+static int ftdi_gpio_init_ftx(struct usb_serial_port *port)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	struct usb_serial *serial = port->serial;
+	const u16 cbus_cfg_addr = 0x1a;
+	const u16 cbus_cfg_size = 4;
+	u8 *cbus_cfg_buf;
+	int result;
+	u8 i;
+
+	cbus_cfg_buf = kmalloc(cbus_cfg_size, GFP_KERNEL);
+	if (!cbus_cfg_buf)
+		return -ENOMEM;
+
+	result = ftdi_read_eeprom(serial, cbus_cfg_buf,
+				  cbus_cfg_addr, cbus_cfg_size);
+	if (result < 0)
+		goto out_free;
+
+	/* FIXME: FT234XD alone has 1 GPIO, but how to recognize this IC? */
+	priv->gc.ngpio = 4;
+
+	/* Determine which pins are configured for CBUS bitbanging */
+	priv->gpio_altfunc = 0xff;
+	for (i = 0; i < priv->gc.ngpio; ++i) {
+		if (cbus_cfg_buf[i] == FTDI_FTX_CBUS_MUX_GPIO)
+			priv->gpio_altfunc &= ~BIT(i);
+	}
+
+out_free:
+	kfree(cbus_cfg_buf);
+
+	return result;
+}
+
+static int ftdi_gpio_init(struct usb_serial_port *port)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	struct usb_serial *serial = port->serial;
+	int result;
+
+	switch (priv->chip_type) {
+	case FT232RL:
+		result = ftdi_gpio_init_ft232r(port);
+		break;
+	case FTX:
+		result = ftdi_gpio_init_ftx(port);
+		break;
+	default:
+		return 0;
+	}
+
+	if (result < 0)
+		return result;
+
+	mutex_init(&priv->gpio_lock);
+
+	priv->gc.label = "ftdi-cbus";
+	priv->gc.request = ftdi_gpio_request;
+	priv->gc.get_direction = ftdi_gpio_direction_get;
+	priv->gc.direction_input = ftdi_gpio_direction_input;
+	priv->gc.direction_output = ftdi_gpio_direction_output;
+	priv->gc.get = ftdi_gpio_get;
+	priv->gc.set = ftdi_gpio_set;
+	priv->gc.get_multiple = ftdi_gpio_get_multiple;
+	priv->gc.set_multiple = ftdi_gpio_set_multiple;
+	priv->gc.owner = THIS_MODULE;
+	priv->gc.parent = &serial->interface->dev;
+	priv->gc.base = -1;
+	priv->gc.can_sleep = true;
+
+	result = gpiochip_add_data(&priv->gc, port);
+	if (!result)
+		priv->gpio_registered = true;
+
+	return result;
+}
+
+static void ftdi_gpio_remove(struct usb_serial_port *port)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+
+	if (priv->gpio_registered) {
+		gpiochip_remove(&priv->gc);
+		priv->gpio_registered = false;
+	}
+
+	if (priv->gpio_used) {
+		/* Exiting CBUS-mode does not reset pin states. */
+		ftdi_exit_cbus_mode(port);
+		priv->gpio_used = false;
+	}
+}
+
+#else
+
+static int ftdi_gpio_init(struct usb_serial_port *port)
+{
+	return 0;
+}
+
+static void ftdi_gpio_remove(struct usb_serial_port *port) { }
+
+#endif	/* CONFIG_GPIOLIB */
+
 /*
  * ***************************************************************************
  * FTDI driver specific functions
@@ -1792,7 +2171,7 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port)
 {
 	struct ftdi_private *priv;
 	const struct ftdi_sio_quirk *quirk = usb_get_serial_data(port->serial);
-
+	int result;
 
 	priv = kzalloc(sizeof(struct ftdi_private), GFP_KERNEL);
 	if (!priv)
@@ -1811,6 +2190,14 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port)
 		priv->latency = 16;
 	write_latency_timer(port);
 	create_sysfs_attrs(port);
+
+	result = ftdi_gpio_init(port);
+	if (result < 0) {
+		dev_err(&port->serial->interface->dev,
+			"GPIO initialisation failed: %d\n",
+			result);
+	}
+
 	return 0;
 }
 
@@ -1928,6 +2315,8 @@ static int ftdi_sio_port_remove(struct usb_serial_port *port)
 {
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
 
+	ftdi_gpio_remove(port);
+
 	remove_sysfs_attrs(port);
 
 	kfree(priv);
diff --git a/drivers/usb/serial/ftdi_sio.h b/drivers/usb/serial/ftdi_sio.h
index dcd0b6e05baf..a79a1325b4d9 100644
--- a/drivers/usb/serial/ftdi_sio.h
+++ b/drivers/usb/serial/ftdi_sio.h
@@ -35,7 +35,10 @@
 #define FTDI_SIO_SET_EVENT_CHAR		6 /* Set the event character */
 #define FTDI_SIO_SET_ERROR_CHAR		7 /* Set the error character */
 #define FTDI_SIO_SET_LATENCY_TIMER	9 /* Set the latency timer */
-#define FTDI_SIO_GET_LATENCY_TIMER	10 /* Get the latency timer */
+#define FTDI_SIO_GET_LATENCY_TIMER	0x0a /* Get the latency timer */
+#define FTDI_SIO_SET_BITMODE		0x0b /* Set bitbang mode */
+#define FTDI_SIO_READ_PINS		0x0c /* Read immediate value of pins */
+#define FTDI_SIO_READ_EEPROM		0x90 /* Read EEPROM */
 
 /* Interface indices for FT2232, FT2232H and FT4232H devices */
 #define INTERFACE_A		1
@@ -433,6 +436,29 @@ enum ftdi_sio_baudrate {
  *         1 = active
  */
 
+/* FTDI_SIO_SET_BITMODE */
+#define FTDI_SIO_SET_BITMODE_REQUEST_TYPE 0x40
+#define FTDI_SIO_SET_BITMODE_REQUEST FTDI_SIO_SET_BITMODE
+
+/* Possible bitmodes for FTDI_SIO_SET_BITMODE_REQUEST */
+#define FTDI_SIO_BITMODE_RESET		0x00
+#define FTDI_SIO_BITMODE_CBUS		0x20
+
+/* FTDI_SIO_READ_PINS */
+#define FTDI_SIO_READ_PINS_REQUEST_TYPE 0xc0
+#define FTDI_SIO_READ_PINS_REQUEST FTDI_SIO_READ_PINS
+
+/*
+ * FTDI_SIO_READ_EEPROM
+ *
+ * EEPROM format found in FTDI AN_201, "FT-X MTP memory Configuration",
+ * http://www.ftdichip.com/Support/Documents/AppNotes/AN_201_FT-X%20MTP%20Memory%20Configuration.pdf
+ */
+#define FTDI_SIO_READ_EEPROM_REQUEST_TYPE 0xc0
+#define FTDI_SIO_READ_EEPROM_REQUEST FTDI_SIO_READ_EEPROM
+
+#define FTDI_FTX_CBUS_MUX_GPIO		0x8
+#define FTDI_FT232R_CBUS_MUX_GPIO	0xa
 
 
 /* Descriptors returned by the device
diff --git a/drivers/usb/storage/Kconfig b/drivers/usb/storage/Kconfig
index ec84758f0e23..6fd427284b12 100644
--- a/drivers/usb/storage/Kconfig
+++ b/drivers/usb/storage/Kconfig
@@ -23,16 +23,16 @@ config USB_STORAGE
 	  To compile this driver as a module, choose M here: the
 	  module will be called usb-storage.
 
+if USB_STORAGE
+
 config USB_STORAGE_DEBUG
 	bool "USB Mass Storage verbose debug"
-	depends on USB_STORAGE
 	help
 	  Say Y here in order to have the USB Mass Storage code generate
 	  verbose debugging messages.
 
 config USB_STORAGE_REALTEK
 	tristate "Realtek Card Reader support"
-	depends on USB_STORAGE
 	help
 	  Say Y here to include additional code to support the power-saving function
 	  for Realtek RTS51xx USB card readers.
@@ -46,7 +46,6 @@ config REALTEK_AUTOPM
 
 config USB_STORAGE_DATAFAB
 	tristate "Datafab Compact Flash Reader support"
-	depends on USB_STORAGE
 	help
 	  Support for certain Datafab CompactFlash readers.
 	  Datafab has a web page at <http://www.datafab.com/>.
@@ -55,7 +54,6 @@ config USB_STORAGE_DATAFAB
 
 config USB_STORAGE_FREECOM
 	tristate "Freecom USB/ATAPI Bridge support"
-	depends on USB_STORAGE
 	help
 	  Support for the Freecom USB to IDE/ATAPI adaptor.
 	  Freecom has a web page at <http://www.freecom.de/>.
@@ -64,7 +62,6 @@ config USB_STORAGE_FREECOM
 
 config USB_STORAGE_ISD200
 	tristate "ISD-200 USB/ATA Bridge support"
-	depends on USB_STORAGE
 	---help---
 	  Say Y here if you want to use USB Mass Store devices based
 	  on the In-Systems Design ISD-200 USB/ATA bridge.
@@ -82,7 +79,6 @@ config USB_STORAGE_ISD200
 
 config USB_STORAGE_USBAT
 	tristate "USBAT/USBAT02-based storage support"
-	depends on USB_STORAGE
 	help
 	  Say Y here to include additional code to support storage devices
 	  based on the SCM/Shuttle USBAT/USBAT02 processors.
@@ -105,7 +101,6 @@ config USB_STORAGE_USBAT
 
 config USB_STORAGE_SDDR09
 	tristate "SanDisk SDDR-09 (and other SmartMedia, including DPCM) support"
-	depends on USB_STORAGE
 	help
 	  Say Y here to include additional code to support the Sandisk SDDR-09
 	  SmartMedia reader in the USB Mass Storage driver.
@@ -115,7 +110,6 @@ config USB_STORAGE_SDDR09
 
 config USB_STORAGE_SDDR55
 	tristate "SanDisk SDDR-55 SmartMedia support"
-	depends on USB_STORAGE
 	help
 	  Say Y here to include additional code to support the Sandisk SDDR-55
 	  SmartMedia reader in the USB Mass Storage driver.
@@ -124,7 +118,6 @@ config USB_STORAGE_SDDR55
 
 config USB_STORAGE_JUMPSHOT
 	tristate "Lexar Jumpshot Compact Flash Reader"
-	depends on USB_STORAGE
 	help
 	  Say Y here to include additional code to support the Lexar Jumpshot
 	  USB CompactFlash reader.
@@ -133,7 +126,6 @@ config USB_STORAGE_JUMPSHOT
 
 config USB_STORAGE_ALAUDA
 	tristate "Olympus MAUSB-10/Fuji DPC-R1 support"
-	depends on USB_STORAGE
 	help
 	  Say Y here to include additional code to support the Olympus MAUSB-10
 	  and Fujifilm DPC-R1 USB Card reader/writer devices.
@@ -145,7 +137,6 @@ config USB_STORAGE_ALAUDA
 
 config USB_STORAGE_ONETOUCH
 	tristate "Support OneTouch Button on Maxtor Hard Drives"
-	depends on USB_STORAGE
 	depends on INPUT=y || INPUT=USB_STORAGE
 	help
 	  Say Y here to include additional code to support the Maxtor OneTouch
@@ -160,7 +151,6 @@ config USB_STORAGE_ONETOUCH
 
 config USB_STORAGE_KARMA
 	tristate "Support for Rio Karma music player"
-	depends on USB_STORAGE
 	help
 	  Say Y here to include additional code to support the Rio Karma
 	  USB interface.
@@ -174,7 +164,6 @@ config USB_STORAGE_KARMA
 
 config USB_STORAGE_CYPRESS_ATACB
 	tristate "SAT emulation on Cypress USB/ATA Bridge with ATACB"
-	depends on USB_STORAGE
 	---help---
 	  Say Y here if you want to use SAT (ata pass through) on devices based
 	  on the Cypress USB/ATA bridge supporting ATACB. This will allow you
@@ -187,19 +176,15 @@ config USB_STORAGE_CYPRESS_ATACB
 
 config USB_STORAGE_ENE_UB6250
 	tristate "USB ENE card reader support"
-	depends on SCSI
-	depends on USB_STORAGE
 	---help---
 	  Say Y here if you wish to control a ENE SD/MS Card reader.
 	  Note that this driver does not support SM cards.
 
-	  This option depends on 'SCSI' support being enabled, but you
-	  probably also need 'SCSI device support: SCSI disk support'
-	  (BLK_DEV_SD) for most USB storage devices.
-
 	  To compile this driver as a module, choose M here: the
 	  module will be called ums-eneub6250.
 
+endif # USB_STORAGE
+
 config USB_UAS
 	tristate "USB Attached SCSI"
 	depends on SCSI && USB_STORAGE
diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c
index f5e4500d9970..2b474d60b4db 100644
--- a/drivers/usb/storage/isd200.c
+++ b/drivers/usb/storage/isd200.c
@@ -1153,7 +1153,7 @@ static int isd200_get_inquiry_data( struct us_data *us )
 				/* Fill in vendor identification fields */
 				src = (__be16 *)&id[ATA_ID_PROD];
 				dest = (__u16*)info->InquiryData.VendorId;
-				for (i=0;i<4;i++)
+				for (i = 0; i < 4; i++)
 					dest[i] = be16_to_cpu(src[i]);
 
 				src = (__be16 *)&id[ATA_ID_PROD + 8/2];
diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig
index 00878c386dd0..30a847c2089d 100644
--- a/drivers/usb/typec/Kconfig
+++ b/drivers/usb/typec/Kconfig
@@ -45,50 +45,7 @@ menuconfig TYPEC
 
 if TYPEC
 
-config TYPEC_TCPM
-	tristate "USB Type-C Port Controller Manager"
-	depends on USB
-	select USB_ROLE_SWITCH
-	select POWER_SUPPLY
-	help
-	  The Type-C Port Controller Manager provides a USB PD and USB Type-C
-	  state machine for use with Type-C Port Controllers.
-
-if TYPEC_TCPM
-
-config TYPEC_TCPCI
-	tristate "Type-C Port Controller Interface driver"
-	depends on I2C
-	select REGMAP_I2C
-	help
-	  Type-C Port Controller driver for TCPCI-compliant controller.
-
-config TYPEC_RT1711H
-	tristate "Richtek RT1711H Type-C chip driver"
-	depends on I2C
-	select TYPEC_TCPCI
-	help
-	  Richtek RT1711H Type-C chip driver that works with
-	  Type-C Port Controller Manager to provide USB PD and USB
-	  Type-C functionalities.
-
-source "drivers/usb/typec/fusb302/Kconfig"
-
-config TYPEC_WCOVE
-	tristate "Intel WhiskeyCove PMIC USB Type-C PHY driver"
-	depends on ACPI
-	depends on INTEL_SOC_PMIC
-	depends on INTEL_PMC_IPC
-	depends on BXT_WC_PMIC_OPREGION
-	help
-	  This driver adds support for USB Type-C detection on Intel Broxton
-	  platforms that have Intel Whiskey Cove PMIC. The driver can detect the
-	  role and cable orientation.
-
-	  To compile this driver as module, choose M here: the module will be
-	  called typec_wcove
-
-endif # TYPEC_TCPM
+source "drivers/usb/typec/tcpm/Kconfig"
 
 source "drivers/usb/typec/ucsi/Kconfig"
 
diff --git a/drivers/usb/typec/Makefile b/drivers/usb/typec/Makefile
index 45b0aef428a8..6696b7263d61 100644
--- a/drivers/usb/typec/Makefile
+++ b/drivers/usb/typec/Makefile
@@ -2,11 +2,7 @@
 obj-$(CONFIG_TYPEC)		+= typec.o
 typec-y				:= class.o mux.o bus.o
 obj-$(CONFIG_TYPEC)		+= altmodes/
-obj-$(CONFIG_TYPEC_TCPM)	+= tcpm.o
-obj-y				+= fusb302/
-obj-$(CONFIG_TYPEC_WCOVE)	+= typec_wcove.o
+obj-$(CONFIG_TYPEC_TCPM)	+= tcpm/
 obj-$(CONFIG_TYPEC_UCSI)	+= ucsi/
 obj-$(CONFIG_TYPEC_TPS6598X)	+= tps6598x.o
 obj-$(CONFIG_TYPEC)		+= mux/
-obj-$(CONFIG_TYPEC_TCPCI)	+= tcpci.o
-obj-$(CONFIG_TYPEC_RT1711H)	+= tcpci_rt1711h.o
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index e61dffb27a0c..5db0593ca0bd 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -1322,7 +1322,7 @@ void typec_set_pwr_role(struct typec_port *port, enum typec_role role)
 EXPORT_SYMBOL_GPL(typec_set_pwr_role);
 
 /**
- * typec_set_pwr_role - Report VCONN source change
+ * typec_set_vconn_role - Report VCONN source change
  * @port: The USB Type-C Port which VCONN role changed
  * @role: Source when @port is sourcing VCONN, or Sink when it's not
  *
@@ -1500,7 +1500,7 @@ typec_port_register_altmode(struct typec_port *port,
 
 	sprintf(id, "id%04xm%02x", desc->svid, desc->mode);
 
-	mux = typec_mux_get(port->dev.parent, id);
+	mux = typec_mux_get(&port->dev, id);
 	if (IS_ERR(mux))
 		return ERR_CAST(mux);
 
@@ -1540,18 +1540,6 @@ struct typec_port *typec_register_port(struct device *parent,
 		return ERR_PTR(id);
 	}
 
-	port->sw = typec_switch_get(cap->fwnode ? &port->dev : parent);
-	if (IS_ERR(port->sw)) {
-		ret = PTR_ERR(port->sw);
-		goto err_switch;
-	}
-
-	port->mux = typec_mux_get(parent, "typec-mux");
-	if (IS_ERR(port->mux)) {
-		ret = PTR_ERR(port->mux);
-		goto err_mux;
-	}
-
 	switch (cap->type) {
 	case TYPEC_PORT_SRC:
 		port->pwr_role = TYPEC_SOURCE;
@@ -1592,13 +1580,26 @@ struct typec_port *typec_register_port(struct device *parent,
 	port->port_type = cap->type;
 	port->prefer_role = cap->prefer_role;
 
+	device_initialize(&port->dev);
 	port->dev.class = typec_class;
 	port->dev.parent = parent;
 	port->dev.fwnode = cap->fwnode;
 	port->dev.type = &typec_port_dev_type;
 	dev_set_name(&port->dev, "port%d", id);
 
-	ret = device_register(&port->dev);
+	port->sw = typec_switch_get(&port->dev);
+	if (IS_ERR(port->sw)) {
+		put_device(&port->dev);
+		return ERR_CAST(port->sw);
+	}
+
+	port->mux = typec_mux_get(&port->dev, "typec-mux");
+	if (IS_ERR(port->mux)) {
+		put_device(&port->dev);
+		return ERR_CAST(port->mux);
+	}
+
+	ret = device_add(&port->dev);
 	if (ret) {
 		dev_err(parent, "failed to register port (%d)\n", ret);
 		put_device(&port->dev);
@@ -1606,15 +1607,6 @@ struct typec_port *typec_register_port(struct device *parent,
 	}
 
 	return port;
-
-err_mux:
-	typec_switch_put(port->sw);
-
-err_switch:
-	ida_simple_remove(&typec_index_ida, port->id);
-	kfree(port);
-
-	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(typec_register_port);
 
diff --git a/drivers/usb/typec/fusb302/Kconfig b/drivers/usb/typec/fusb302/Kconfig
deleted file mode 100644
index fce099ff39fe..000000000000
--- a/drivers/usb/typec/fusb302/Kconfig
+++ /dev/null
@@ -1,7 +0,0 @@
-config TYPEC_FUSB302
-	tristate "Fairchild FUSB302 Type-C chip driver"
-	depends on I2C
-	help
-	  The Fairchild FUSB302 Type-C chip driver that works with
-	  Type-C Port Controller Manager to provide USB PD and USB
-	  Type-C functionalities.
diff --git a/drivers/usb/typec/fusb302/Makefile b/drivers/usb/typec/fusb302/Makefile
deleted file mode 100644
index 3b51b33631a0..000000000000
--- a/drivers/usb/typec/fusb302/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_TYPEC_FUSB302)	+= fusb302.o
diff --git a/drivers/usb/typec/tcpm/Kconfig b/drivers/usb/typec/tcpm/Kconfig
new file mode 100644
index 000000000000..f03ea8a61768
--- /dev/null
+++ b/drivers/usb/typec/tcpm/Kconfig
@@ -0,0 +1,52 @@
+config TYPEC_TCPM
+	tristate "USB Type-C Port Controller Manager"
+	depends on USB
+	select USB_ROLE_SWITCH
+	select POWER_SUPPLY
+	help
+	  The Type-C Port Controller Manager provides a USB PD and USB Type-C
+	  state machine for use with Type-C Port Controllers.
+
+if TYPEC_TCPM
+
+config TYPEC_TCPCI
+	tristate "Type-C Port Controller Interface driver"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  Type-C Port Controller driver for TCPCI-compliant controller.
+
+if TYPEC_TCPCI
+
+config TYPEC_RT1711H
+	tristate "Richtek RT1711H Type-C chip driver"
+	help
+	  Richtek RT1711H Type-C chip driver that works with
+	  Type-C Port Controller Manager to provide USB PD and USB
+	  Type-C functionalities.
+
+endif # TYPEC_TCPCI
+
+config TYPEC_FUSB302
+	tristate "Fairchild FUSB302 Type-C chip driver"
+	depends on I2C
+	help
+	  The Fairchild FUSB302 Type-C chip driver that works with
+	  Type-C Port Controller Manager to provide USB PD and USB
+	  Type-C functionalities.
+
+config TYPEC_WCOVE
+	tristate "Intel WhiskeyCove PMIC USB Type-C PHY driver"
+	depends on ACPI
+	depends on INTEL_SOC_PMIC
+	depends on INTEL_PMC_IPC
+	depends on BXT_WC_PMIC_OPREGION
+	help
+	  This driver adds support for USB Type-C on Intel Broxton platforms
+	  that have Intel Whiskey Cove PMIC. The driver works with USB Type-C
+	  Port Controller Manager to provide USB PD and Type-C functionalities.
+
+	  To compile this driver as module, choose M here: the module will be
+	  called typec_wcove.ko
+
+endif # TYPEC_TCPM
diff --git a/drivers/usb/typec/tcpm/Makefile b/drivers/usb/typec/tcpm/Makefile
new file mode 100644
index 000000000000..a5ff6c8eb892
--- /dev/null
+++ b/drivers/usb/typec/tcpm/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_TYPEC_TCPM)	+= tcpm.o
+obj-$(CONFIG_TYPEC_FUSB302)	+= fusb302.o
+obj-$(CONFIG_TYPEC_WCOVE)	+= typec_wcove.o
+typec_wcove-y			:= wcove.o
+obj-$(CONFIG_TYPEC_TCPCI)	+= tcpci.o
+obj-$(CONFIG_TYPEC_RT1711H)	+= tcpci_rt1711h.o
diff --git a/drivers/usb/typec/fusb302/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c
index 82bed9810be6..43b64d9309d0 100644
--- a/drivers/usb/typec/fusb302/fusb302.c
+++ b/drivers/usb/typec/tcpm/fusb302.c
@@ -42,19 +42,12 @@
 #define T_BC_LVL_DEBOUNCE_DELAY_MS 30
 
 enum toggling_mode {
-	TOGGLINE_MODE_OFF,
+	TOGGLING_MODE_OFF,
 	TOGGLING_MODE_DRP,
 	TOGGLING_MODE_SNK,
 	TOGGLING_MODE_SRC,
 };
 
-static const char * const toggling_mode_name[] = {
-	[TOGGLINE_MODE_OFF]	= "toggling_OFF",
-	[TOGGLING_MODE_DRP]	= "toggling_DRP",
-	[TOGGLING_MODE_SNK]	= "toggling_SNK",
-	[TOGGLING_MODE_SRC]	= "toggling_SRC",
-};
-
 enum src_current_status {
 	SRC_CURRENT_DEFAULT,
 	SRC_CURRENT_MEDIUM,
@@ -601,7 +594,7 @@ static int fusb302_set_toggling(struct fusb302_chip *chip,
 	chip->intr_comp_chng = false;
 	/* configure toggling mode: none/snk/src/drp */
 	switch (mode) {
-	case TOGGLINE_MODE_OFF:
+	case TOGGLING_MODE_OFF:
 		ret = fusb302_i2c_mask_write(chip, FUSB_REG_CONTROL2,
 					     FUSB_REG_CONTROL2_MODE_MASK,
 					     FUSB_REG_CONTROL2_MODE_NONE);
@@ -633,7 +626,7 @@ static int fusb302_set_toggling(struct fusb302_chip *chip,
 		break;
 	}
 
-	if (mode == TOGGLINE_MODE_OFF) {
+	if (mode == TOGGLING_MODE_OFF) {
 		/* mask TOGDONE interrupt */
 		ret = fusb302_i2c_set_bits(chip, FUSB_REG_MASKA,
 					   FUSB_REG_MASKA_TOGDONE);
@@ -686,6 +679,7 @@ static int tcpm_set_cc(struct tcpc_dev *dev, enum typec_cc_status cc)
 	int ret = 0;
 	bool pull_up, pull_down;
 	u8 rd_mda;
+	enum toggling_mode mode;
 
 	mutex_lock(&chip->lock);
 	switch (cc) {
@@ -709,7 +703,7 @@ static int tcpm_set_cc(struct tcpc_dev *dev, enum typec_cc_status cc)
 		ret = -EINVAL;
 		goto done;
 	}
-	ret = fusb302_set_toggling(chip, TOGGLINE_MODE_OFF);
+	ret = fusb302_set_toggling(chip, TOGGLING_MODE_OFF);
 	if (ret < 0) {
 		fusb302_log(chip, "cannot stop toggling, ret=%d", ret);
 		goto done;
@@ -771,6 +765,29 @@ static int tcpm_set_cc(struct tcpc_dev *dev, enum typec_cc_status cc)
 		chip->intr_comp_chng = false;
 	}
 	fusb302_log(chip, "cc := %s", typec_cc_status_name[cc]);
+
+	/* Enable detection for fixed SNK or SRC only roles */
+	switch (cc) {
+	case TYPEC_CC_RD:
+		mode = TOGGLING_MODE_SNK;
+		break;
+	case TYPEC_CC_RP_DEF:
+	case TYPEC_CC_RP_1_5:
+	case TYPEC_CC_RP_3_0:
+		mode = TOGGLING_MODE_SRC;
+		break;
+	default:
+		mode = TOGGLING_MODE_OFF;
+		break;
+	}
+
+	if (mode != TOGGLING_MODE_OFF) {
+		ret = fusb302_set_toggling(chip, mode);
+		if (ret < 0)
+			fusb302_log(chip,
+				    "cannot set fixed role toggling mode, ret=%d",
+				    ret);
+	}
 done:
 	mutex_unlock(&chip->lock);
 
@@ -1178,10 +1195,6 @@ static const u32 src_pdo[] = {
 	PDO_FIXED(5000, 400, PDO_FIXED_FLAGS),
 };
 
-static const u32 snk_pdo[] = {
-	PDO_FIXED(5000, 400, PDO_FIXED_FLAGS),
-};
-
 static const struct tcpc_config fusb302_tcpc_config = {
 	.src_pdo = src_pdo,
 	.nr_src_pdo = ARRAY_SIZE(src_pdo),
@@ -1303,7 +1316,7 @@ static int fusb302_handle_togdone_snk(struct fusb302_chip *chip,
 		tcpm_cc_change(chip->tcpm_port);
 	}
 	/* turn off toggling */
-	ret = fusb302_set_toggling(chip, TOGGLINE_MODE_OFF);
+	ret = fusb302_set_toggling(chip, TOGGLING_MODE_OFF);
 	if (ret < 0) {
 		fusb302_log(chip,
 			    "cannot set toggling mode off, ret=%d", ret);
@@ -1399,7 +1412,7 @@ static int fusb302_handle_togdone_src(struct fusb302_chip *chip,
 		tcpm_cc_change(chip->tcpm_port);
 	}
 	/* turn off toggling */
-	ret = fusb302_set_toggling(chip, TOGGLINE_MODE_OFF);
+	ret = fusb302_set_toggling(chip, TOGGLING_MODE_OFF);
 	if (ret < 0) {
 		fusb302_log(chip,
 			    "cannot set toggling mode off, ret=%d", ret);
@@ -1730,12 +1743,14 @@ static int fusb302_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	chip->i2c_client = client;
-	i2c_set_clientdata(client, chip);
 	chip->dev = &client->dev;
 	chip->tcpc_config = fusb302_tcpc_config;
 	chip->tcpc_dev.config = &chip->tcpc_config;
 	mutex_init(&chip->lock);
 
+	chip->tcpc_dev.fwnode =
+		device_get_named_child_node(dev, "connector");
+
 	if (!device_property_read_u32(dev, "fcs,operating-sink-microwatt", &v))
 		chip->tcpc_config.operating_snk_mw = v / 1000;
 
@@ -1756,22 +1771,17 @@ static int fusb302_probe(struct i2c_client *client,
 			return -EPROBE_DEFER;
 	}
 
-	fusb302_debugfs_init(chip);
+	chip->vbus = devm_regulator_get(chip->dev, "vbus");
+	if (IS_ERR(chip->vbus))
+		return PTR_ERR(chip->vbus);
 
 	chip->wq = create_singlethread_workqueue(dev_name(chip->dev));
-	if (!chip->wq) {
-		ret = -ENOMEM;
-		goto clear_client_data;
-	}
+	if (!chip->wq)
+		return -ENOMEM;
+
 	INIT_DELAYED_WORK(&chip->bc_lvl_handler, fusb302_bc_lvl_handler_work);
 	init_tcpc_dev(&chip->tcpc_dev);
 
-	chip->vbus = devm_regulator_get(chip->dev, "vbus");
-	if (IS_ERR(chip->vbus)) {
-		ret = PTR_ERR(chip->vbus);
-		goto destroy_workqueue;
-	}
-
 	if (client->irq) {
 		chip->gpio_int_n_irq = client->irq;
 	} else {
@@ -1797,15 +1807,15 @@ static int fusb302_probe(struct i2c_client *client,
 		goto tcpm_unregister_port;
 	}
 	enable_irq_wake(chip->gpio_int_n_irq);
+	fusb302_debugfs_init(chip);
+	i2c_set_clientdata(client, chip);
+
 	return ret;
 
 tcpm_unregister_port:
 	tcpm_unregister_port(chip->tcpm_port);
 destroy_workqueue:
 	destroy_workqueue(chip->wq);
-clear_client_data:
-	i2c_set_clientdata(client, NULL);
-	fusb302_debugfs_exit(chip);
 
 	return ret;
 }
@@ -1816,7 +1826,6 @@ static int fusb302_remove(struct i2c_client *client)
 
 	tcpm_unregister_port(chip->tcpm_port);
 	destroy_workqueue(chip->wq);
-	i2c_set_clientdata(client, NULL);
 	fusb302_debugfs_exit(chip);
 
 	return 0;
diff --git a/drivers/usb/typec/fusb302/fusb302_reg.h b/drivers/usb/typec/tcpm/fusb302_reg.h
index 00b39d365478..00b39d365478 100644
--- a/drivers/usb/typec/fusb302/fusb302_reg.h
+++ b/drivers/usb/typec/tcpm/fusb302_reg.h
diff --git a/drivers/usb/typec/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c
index ac6b418b15f1..ac6b418b15f1 100644
--- a/drivers/usb/typec/tcpci.c
+++ b/drivers/usb/typec/tcpm/tcpci.c
diff --git a/drivers/usb/typec/tcpci.h b/drivers/usb/typec/tcpm/tcpci.h
index 303ebde26546..303ebde26546 100644
--- a/drivers/usb/typec/tcpci.h
+++ b/drivers/usb/typec/tcpm/tcpci.h
diff --git a/drivers/usb/typec/tcpci_rt1711h.c b/drivers/usb/typec/tcpm/tcpci_rt1711h.c
index 017389021b96..017389021b96 100644
--- a/drivers/usb/typec/tcpci_rt1711h.c
+++ b/drivers/usb/typec/tcpm/tcpci_rt1711h.c
diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 4f1f4215f3d6..dbbd71f754d0 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -1430,8 +1430,8 @@ static enum pdo_err tcpm_caps_err(struct tcpm_port *port, const u32 *pdo,
 				if (pdo_apdo_type(pdo[i]) != APDO_TYPE_PPS)
 					break;
 
-				if (pdo_pps_apdo_max_current(pdo[i]) <
-				    pdo_pps_apdo_max_current(pdo[i - 1]))
+				if (pdo_pps_apdo_max_voltage(pdo[i]) <
+				    pdo_pps_apdo_max_voltage(pdo[i - 1]))
 					return PDO_ERR_PPS_APDO_NOT_SORTED;
 				else if (pdo_pps_apdo_min_voltage(pdo[i]) ==
 					  pdo_pps_apdo_min_voltage(pdo[i - 1]) &&
@@ -2209,7 +2209,7 @@ static unsigned int tcpm_pd_select_pps_apdo(struct tcpm_port *port)
 {
 	unsigned int i, j, max_mw = 0, max_mv = 0;
 	unsigned int min_src_mv, max_src_mv, src_ma, src_mw;
-	unsigned int min_snk_mv, max_snk_mv, snk_ma;
+	unsigned int min_snk_mv, max_snk_mv;
 	u32 pdo;
 	unsigned int src_pdo = 0, snk_pdo = 0;
 
@@ -2253,8 +2253,6 @@ static unsigned int tcpm_pd_select_pps_apdo(struct tcpm_port *port)
 						pdo_pps_apdo_min_voltage(pdo);
 					max_snk_mv =
 						pdo_pps_apdo_max_voltage(pdo);
-					snk_ma =
-						pdo_pps_apdo_max_current(pdo);
 					break;
 				default:
 					tcpm_log(port,
@@ -2402,7 +2400,7 @@ static int tcpm_pd_send_request(struct tcpm_port *port)
 
 static int tcpm_pd_build_pps_request(struct tcpm_port *port, u32 *rdo)
 {
-	unsigned int out_mv, op_ma, op_mw, min_mv, max_mv, max_ma, flags;
+	unsigned int out_mv, op_ma, op_mw, max_mv, max_ma, flags;
 	enum pd_pdo_type type;
 	unsigned int src_pdo_index;
 	u32 pdo;
@@ -2420,7 +2418,6 @@ static int tcpm_pd_build_pps_request(struct tcpm_port *port, u32 *rdo)
 			tcpm_log(port, "Invalid APDO selected!");
 			return -EINVAL;
 		}
-		min_mv = port->pps_data.min_volt;
 		max_mv = port->pps_data.max_volt;
 		max_ma = port->pps_data.max_curr;
 		out_mv = port->pps_data.out_volt;
@@ -4116,6 +4113,9 @@ static int tcpm_pps_set_op_curr(struct tcpm_port *port, u16 op_curr)
 		goto port_unlock;
 	}
 
+	/* Round down operating current to align with PPS valid steps */
+	op_curr = op_curr - (op_curr % RDO_PROG_CURR_MA_STEP);
+
 	reinit_completion(&port->pps_complete);
 	port->pps_data.op_curr = op_curr;
 	port->pps_status = 0;
@@ -4169,6 +4169,9 @@ static int tcpm_pps_set_out_volt(struct tcpm_port *port, u16 out_volt)
 		goto port_unlock;
 	}
 
+	/* Round down output voltage to align with PPS valid steps */
+	out_volt = out_volt - (out_volt % RDO_PROG_VOLT_MV_STEP);
+
 	reinit_completion(&port->pps_complete);
 	port->pps_data.out_volt = out_volt;
 	port->pps_status = 0;
diff --git a/drivers/usb/typec/typec_wcove.c b/drivers/usb/typec/tcpm/wcove.c
index 423208e19383..423208e19383 100644
--- a/drivers/usb/typec/typec_wcove.c
+++ b/drivers/usb/typec/tcpm/wcove.c
diff --git a/drivers/usb/usbip/vudc_main.c b/drivers/usb/usbip/vudc_main.c
index 3fc22037a82f..390733e6937e 100644
--- a/drivers/usb/usbip/vudc_main.c
+++ b/drivers/usb/usbip/vudc_main.c
@@ -73,6 +73,10 @@ static int __init init(void)
 cleanup:
 	list_for_each_entry_safe(udc_dev, udc_dev2, &vudc_devices, dev_entry) {
 		list_del(&udc_dev->dev_entry);
+		/*
+		 * Just do platform_device_del() here, put_vudc_device()
+		 * calls the platform_device_put()
+		 */
 		platform_device_del(udc_dev->pdev);
 		put_vudc_device(udc_dev);
 	}
@@ -89,7 +93,11 @@ static void __exit cleanup(void)
 
 	list_for_each_entry_safe(udc_dev, udc_dev2, &vudc_devices, dev_entry) {
 		list_del(&udc_dev->dev_entry);
-		platform_device_unregister(udc_dev->pdev);
+		/*
+		 * Just do platform_device_del() here, put_vudc_device()
+		 * calls the platform_device_put()
+		 */
+		platform_device_del(udc_dev->pdev);
 		put_vudc_device(udc_dev);
 	}
 	platform_driver_unregister(&vudc_driver);
diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c
index aff50eb09ca9..68ddee86a886 100644
--- a/drivers/usb/wusbcore/crypto.c
+++ b/drivers/usb/wusbcore/crypto.c
@@ -189,7 +189,7 @@ struct wusb_mac_scratch {
  * NOTE: blen is not aligned to a block size, we'll pad zeros, that's
  *       what sg[4] is for. Maybe there is a smarter way to do this.
  */
-static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
+static int wusb_ccm_mac(struct crypto_sync_skcipher *tfm_cbc,
 			struct crypto_cipher *tfm_aes,
 			struct wusb_mac_scratch *scratch,
 			void *mic,
@@ -198,7 +198,7 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
 			size_t blen)
 {
 	int result = 0;
-	SKCIPHER_REQUEST_ON_STACK(req, tfm_cbc);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm_cbc);
 	struct scatterlist sg[4], sg_dst;
 	void *dst_buf;
 	size_t dst_size;
@@ -224,7 +224,7 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
 	if (!dst_buf)
 		goto error_dst_buf;
 
-	iv = kzalloc(crypto_skcipher_ivsize(tfm_cbc), GFP_KERNEL);
+	iv = kzalloc(crypto_sync_skcipher_ivsize(tfm_cbc), GFP_KERNEL);
 	if (!iv)
 		goto error_iv;
 
@@ -251,7 +251,7 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
 	sg_set_page(&sg[3], ZERO_PAGE(0), zero_padding, 0);
 	sg_init_one(&sg_dst, dst_buf, dst_size);
 
-	skcipher_request_set_tfm(req, tfm_cbc);
+	skcipher_request_set_sync_tfm(req, tfm_cbc);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, &sg_dst, dst_size, iv);
 	result = crypto_skcipher_encrypt(req);
@@ -298,19 +298,19 @@ ssize_t wusb_prf(void *out, size_t out_size,
 {
 	ssize_t result, bytes = 0, bitr;
 	struct aes_ccm_nonce n = *_n;
-	struct crypto_skcipher *tfm_cbc;
+	struct crypto_sync_skcipher *tfm_cbc;
 	struct crypto_cipher *tfm_aes;
 	struct wusb_mac_scratch *scratch;
 	u64 sfn = 0;
 	__le64 sfn_le;
 
-	tfm_cbc = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+	tfm_cbc = crypto_alloc_sync_skcipher("cbc(aes)", 0, 0);
 	if (IS_ERR(tfm_cbc)) {
 		result = PTR_ERR(tfm_cbc);
 		printk(KERN_ERR "E: can't load CBC(AES): %d\n", (int)result);
 		goto error_alloc_cbc;
 	}
-	result = crypto_skcipher_setkey(tfm_cbc, key, 16);
+	result = crypto_sync_skcipher_setkey(tfm_cbc, key, 16);
 	if (result < 0) {
 		printk(KERN_ERR "E: can't set CBC key: %d\n", (int)result);
 		goto error_setkey_cbc;
@@ -351,7 +351,7 @@ error_setkey_aes:
 	crypto_free_cipher(tfm_aes);
 error_alloc_aes:
 error_setkey_cbc:
-	crypto_free_skcipher(tfm_cbc);
+	crypto_free_sync_skcipher(tfm_cbc);
 error_alloc_cbc:
 	return result;
 }
diff --git a/drivers/usb/wusbcore/wa-rpipe.c b/drivers/usb/wusbcore/wa-rpipe.c
index 38884aac862b..a5734cbcd5ad 100644
--- a/drivers/usb/wusbcore/wa-rpipe.c
+++ b/drivers/usb/wusbcore/wa-rpipe.c
@@ -470,9 +470,7 @@ error:
 int wa_rpipes_create(struct wahc *wa)
 {
 	wa->rpipes = le16_to_cpu(wa->wa_descr->wNumRPipes);
-	wa->rpipe_bm = kcalloc(BITS_TO_LONGS(wa->rpipes),
-			       sizeof(unsigned long),
-			       GFP_KERNEL);
+	wa->rpipe_bm = bitmap_zalloc(wa->rpipes, GFP_KERNEL);
 	if (wa->rpipe_bm == NULL)
 		return -ENOMEM;
 	return 0;
@@ -487,7 +485,7 @@ void wa_rpipes_destroy(struct wahc *wa)
 		dev_err(dev, "BUG: pipes not released on exit: %*pb\n",
 			wa->rpipes, wa->rpipe_bm);
 	}
-	kfree(wa->rpipe_bm);
+	bitmap_free(wa->rpipe_bm);
 }
 
 /*
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 96721b154454..b30926e11d87 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -444,7 +444,7 @@ static void tce_iommu_unuse_page_v2(struct tce_container *container,
 	struct mm_iommu_table_group_mem_t *mem = NULL;
 	int ret;
 	unsigned long hpa = 0;
-	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
 
 	if (!pua)
 		return;
@@ -467,8 +467,27 @@ static int tce_iommu_clear(struct tce_container *container,
 	unsigned long oldhpa;
 	long ret;
 	enum dma_data_direction direction;
+	unsigned long lastentry = entry + pages;
+
+	for ( ; entry < lastentry; ++entry) {
+		if (tbl->it_indirect_levels && tbl->it_userspace) {
+			/*
+			 * For multilevel tables, we can take a shortcut here
+			 * and skip some TCEs as we know that the userspace
+			 * addresses cache is a mirror of the real TCE table
+			 * and if it is missing some indirect levels, then
+			 * the hardware table does not have them allocated
+			 * either and therefore does not require updating.
+			 */
+			__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl,
+					entry);
+			if (!pua) {
+				/* align to level_size which is power of two */
+				entry |= tbl->it_level_size - 1;
+				continue;
+			}
+		}
 
-	for ( ; pages; --pages, ++entry) {
 		cond_resched();
 
 		direction = DMA_NONE;
diff --git a/drivers/video/fbdev/fsl-diu-fb.c b/drivers/video/fbdev/fsl-diu-fb.c
index bc9eb8afc313..332a56b6811f 100644
--- a/drivers/video/fbdev/fsl-diu-fb.c
+++ b/drivers/video/fbdev/fsl-diu-fb.c
@@ -1925,7 +1925,7 @@ static int __init fsl_diu_init(void)
 	pr_info("Freescale Display Interface Unit (DIU) framebuffer driver\n");
 
 #ifdef CONFIG_NOT_COHERENT_CACHE
-	np = of_find_node_by_type(NULL, "cpu");
+	np = of_get_cpu_node(0, NULL);
 	if (!np) {
 		pr_err("fsl-diu-fb: can't find 'cpu' device node\n");
 		return -ENODEV;
diff --git a/drivers/vme/vme.c b/drivers/vme/vme.c
index 92500f6bdad1..520a5f9c27de 100644
--- a/drivers/vme/vme.c
+++ b/drivers/vme/vme.c
@@ -1890,7 +1890,6 @@ static int __vme_register_driver_bus(struct vme_driver *drv,
 
 err_reg:
 	put_device(&vdev->dev);
-	kfree(vdev);
 err_devalloc:
 	list_for_each_entry_safe(vdev, tmp, &drv->devices, drv_list) {
 		list_del(&vdev->drv_list);
diff --git a/drivers/w1/masters/omap_hdq.c b/drivers/w1/masters/omap_hdq.c
index 83fc9aab34e8..3099052e1243 100644
--- a/drivers/w1/masters/omap_hdq.c
+++ b/drivers/w1/masters/omap_hdq.c
@@ -763,6 +763,8 @@ static int omap_hdq_remove(struct platform_device *pdev)
 	/* remove module dependency */
 	pm_runtime_disable(&pdev->dev);
 
+	w1_remove_master_device(&omap_w1_master);
+
 	return 0;
 }
 
diff --git a/drivers/w1/slaves/w1_ds2438.c b/drivers/w1/slaves/w1_ds2438.c
index bf641a191d07..7c4e33dbee4d 100644
--- a/drivers/w1/slaves/w1_ds2438.c
+++ b/drivers/w1/slaves/w1_ds2438.c
@@ -186,8 +186,8 @@ static int w1_ds2438_change_config_bit(struct w1_slave *sl, u8 mask, u8 value)
 	return -1;
 }
 
-static uint16_t w1_ds2438_get_voltage(struct w1_slave *sl,
-				      int adc_input, uint16_t *voltage)
+static int w1_ds2438_get_voltage(struct w1_slave *sl,
+				 int adc_input, uint16_t *voltage)
 {
 	unsigned int retries = W1_DS2438_RETRIES;
 	u8 w1_buf[DS2438_PAGE_SIZE + 1 /*for CRC*/];
@@ -235,6 +235,25 @@ post_unlock:
 	return ret;
 }
 
+static int w1_ds2438_get_current(struct w1_slave *sl, int16_t *voltage)
+{
+	u8 w1_buf[DS2438_PAGE_SIZE + 1 /*for CRC*/];
+	int ret;
+
+	mutex_lock(&sl->master->bus_mutex);
+
+	if (w1_ds2438_get_page(sl, 0, w1_buf) == 0) {
+		/* The voltage measured across current sense resistor RSENS. */
+		*voltage = (((int16_t) w1_buf[DS2438_CURRENT_MSB]) << 8) | ((int16_t) w1_buf[DS2438_CURRENT_LSB]);
+		ret = 0;
+	} else
+		ret = -1;
+
+	mutex_unlock(&sl->master->bus_mutex);
+
+	return ret;
+}
+
 static ssize_t iad_write(struct file *filp, struct kobject *kobj,
 			 struct bin_attribute *bin_attr, char *buf,
 			 loff_t off, size_t count)
@@ -257,6 +276,27 @@ static ssize_t iad_write(struct file *filp, struct kobject *kobj,
 	return ret;
 }
 
+static ssize_t iad_read(struct file *filp, struct kobject *kobj,
+			struct bin_attribute *bin_attr, char *buf,
+			loff_t off, size_t count)
+{
+	struct w1_slave *sl = kobj_to_w1_slave(kobj);
+	int ret;
+	int16_t voltage;
+
+	if (off != 0)
+		return 0;
+	if (!buf)
+		return -EINVAL;
+
+	if (w1_ds2438_get_current(sl, &voltage) == 0) {
+		ret = snprintf(buf, count, "%i\n", voltage);
+	} else
+		ret = -EIO;
+
+	return ret;
+}
+
 static ssize_t page0_read(struct file *filp, struct kobject *kobj,
 			  struct bin_attribute *bin_attr, char *buf,
 			  loff_t off, size_t count)
@@ -272,9 +312,13 @@ static ssize_t page0_read(struct file *filp, struct kobject *kobj,
 
 	mutex_lock(&sl->master->bus_mutex);
 
+	/* Read no more than page0 size */
+	if (count > DS2438_PAGE_SIZE)
+		count = DS2438_PAGE_SIZE;
+
 	if (w1_ds2438_get_page(sl, 0, w1_buf) == 0) {
-		memcpy(buf, &w1_buf, DS2438_PAGE_SIZE);
-		ret = DS2438_PAGE_SIZE;
+		memcpy(buf, &w1_buf, count);
+		ret = count;
 	} else
 		ret = -EIO;
 
@@ -289,7 +333,6 @@ static ssize_t temperature_read(struct file *filp, struct kobject *kobj,
 {
 	struct w1_slave *sl = kobj_to_w1_slave(kobj);
 	int ret;
-	ssize_t c = PAGE_SIZE;
 	int16_t temp;
 
 	if (off != 0)
@@ -298,8 +341,7 @@ static ssize_t temperature_read(struct file *filp, struct kobject *kobj,
 		return -EINVAL;
 
 	if (w1_ds2438_get_temperature(sl, &temp) == 0) {
-		c -= snprintf(buf + PAGE_SIZE - c, c, "%d\n", temp);
-		ret = PAGE_SIZE - c;
+		ret = snprintf(buf, count, "%i\n", temp);
 	} else
 		ret = -EIO;
 
@@ -312,7 +354,6 @@ static ssize_t vad_read(struct file *filp, struct kobject *kobj,
 {
 	struct w1_slave *sl = kobj_to_w1_slave(kobj);
 	int ret;
-	ssize_t c = PAGE_SIZE;
 	uint16_t voltage;
 
 	if (off != 0)
@@ -321,8 +362,7 @@ static ssize_t vad_read(struct file *filp, struct kobject *kobj,
 		return -EINVAL;
 
 	if (w1_ds2438_get_voltage(sl, DS2438_ADC_INPUT_VAD, &voltage) == 0) {
-		c -= snprintf(buf + PAGE_SIZE - c, c, "%d\n", voltage);
-		ret = PAGE_SIZE - c;
+		ret = snprintf(buf, count, "%u\n", voltage);
 	} else
 		ret = -EIO;
 
@@ -335,7 +375,6 @@ static ssize_t vdd_read(struct file *filp, struct kobject *kobj,
 {
 	struct w1_slave *sl = kobj_to_w1_slave(kobj);
 	int ret;
-	ssize_t c = PAGE_SIZE;
 	uint16_t voltage;
 
 	if (off != 0)
@@ -344,15 +383,14 @@ static ssize_t vdd_read(struct file *filp, struct kobject *kobj,
 		return -EINVAL;
 
 	if (w1_ds2438_get_voltage(sl, DS2438_ADC_INPUT_VDD, &voltage) == 0) {
-		c -= snprintf(buf + PAGE_SIZE - c, c, "%d\n", voltage);
-		ret = PAGE_SIZE - c;
+		ret = snprintf(buf, count, "%u\n", voltage);
 	} else
 		ret = -EIO;
 
 	return ret;
 }
 
-static BIN_ATTR(iad, S_IRUGO | S_IWUSR | S_IWGRP, NULL, iad_write, 1);
+static BIN_ATTR(iad, S_IRUGO | S_IWUSR | S_IWGRP, iad_read, iad_write, 0);
 static BIN_ATTR_RO(page0, DS2438_PAGE_SIZE);
 static BIN_ATTR_RO(temperature, 0/* real length varies */);
 static BIN_ATTR_RO(vad, 0/* real length varies */);
diff --git a/fs/aio.c b/fs/aio.c
index b9350f3360c6..301e6314183b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2135,12 +2135,12 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
 		       compat_long_t, min_nr,
 		       compat_long_t, nr,
 		       struct io_event __user *, events,
-		       struct compat_timespec __user *, timeout)
+		       struct old_timespec32 __user *, timeout)
 {
 	struct timespec64 t;
 	int ret;
 
-	if (timeout && compat_get_timespec64(&t, timeout))
+	if (timeout && get_old_timespec32(&t, timeout))
 		return -EFAULT;
 
 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
@@ -2160,7 +2160,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
 		compat_long_t, min_nr,
 		compat_long_t, nr,
 		struct io_event __user *, events,
-		struct compat_timespec __user *, timeout,
+		struct old_timespec32 __user *, timeout,
 		const struct __compat_aio_sigset __user *, usig)
 {
 	struct __compat_aio_sigset ksig = { NULL, };
@@ -2168,7 +2168,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
 	struct timespec64 t;
 	int ret;
 
-	if (timeout && compat_get_timespec64(&t, timeout))
+	if (timeout && get_old_timespec32(&t, timeout))
 		return -EFAULT;
 
 	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f22f77172c5f..181c58b23110 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5764,16 +5764,10 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 				   unsigned int flags)
 {
-	struct inode *inode;
-
-	inode = btrfs_lookup_dentry(dir, dentry);
-	if (IS_ERR(inode)) {
-		if (PTR_ERR(inode) == -ENOENT)
-			inode = NULL;
-		else
-			return ERR_CAST(inode);
-	}
+	struct inode *inode = btrfs_lookup_dentry(dir, dentry);
 
+	if (inode == ERR_PTR(-ENOENT))
+		inode = NULL;
 	return d_splice_alias(inode, dentry);
 }
 
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index 504b3c3539dc..15f6e96b3bd9 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -52,7 +52,7 @@
 #define elf_prpsinfo	compat_elf_prpsinfo
 
 #undef ns_to_timeval
-#define ns_to_timeval ns_to_compat_timeval
+#define ns_to_timeval ns_to_old_timeval32
 
 /*
  * To use this file, asm/elf.h must define compat_elf_check_arch.
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 0c445a03e682..ce2cc2169040 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -22,37 +22,21 @@
 #include <linux/smp.h>
 #include <linux/ioctl.h>
 #include <linux/if.h>
-#include <linux/if_bridge.h>
 #include <linux/raid/md_u.h>
-#include <linux/kd.h>
-#include <linux/route.h>
-#include <linux/in6.h>
-#include <linux/ipv6_route.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/vt.h>
 #include <linux/falloc.h>
-#include <linux/fs.h>
 #include <linux/file.h>
-#include <linux/ppp_defs.h>
 #include <linux/ppp-ioctl.h>
 #include <linux/if_pppox.h>
 #include <linux/mtio.h>
 #include <linux/tty.h>
 #include <linux/vt_kern.h>
-#include <linux/fb.h>
-#include <linux/videodev2.h>
-#include <linux/netdevice.h>
 #include <linux/raw.h>
 #include <linux/blkdev.h>
-#include <linux/elevator.h>
 #include <linux/rtc.h>
 #include <linux/pci.h>
 #include <linux/serial.h>
-#include <linux/if_tun.h>
 #include <linux/ctype.h>
 #include <linux/syscalls.h>
-#include <linux/atalk.h>
 #include <linux/gfp.h>
 #include <linux/cec.h>
 
@@ -74,32 +58,9 @@
 #endif
 
 #include <linux/uaccess.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_bonding.h>
 #include <linux/watchdog.h>
 
 #include <linux/soundcard.h>
-#include <linux/lp.h>
-#include <linux/ppdev.h>
-
-#include <linux/atm.h>
-#include <linux/atmarp.h>
-#include <linux/atmclip.h>
-#include <linux/atmdev.h>
-#include <linux/atmioc.h>
-#include <linux/atmlec.h>
-#include <linux/atmmpc.h>
-#include <linux/atmsvc.h>
-#include <linux/atm_tcp.h>
-#include <linux/sonet.h>
-#include <linux/atm_suni.h>
-
-#include <linux/usb.h>
-#include <linux/usbdevice_fs.h>
-#include <linux/nbd.h>
-#include <linux/random.h>
-#include <linux/filter.h>
 
 #include <linux/hiddev.h>
 
@@ -112,6 +73,7 @@
 #include <linux/sort.h>
 
 #ifdef CONFIG_SPARC
+#include <linux/fb.h>
 #include <asm/fbio.h>
 #endif
 
@@ -544,22 +506,6 @@ static int mt_ioctl_trans(struct file *file,
 #define HCIUARTSETFLAGS		_IOW('U', 203, int)
 #define HCIUARTGETFLAGS		_IOR('U', 204, int)
 
-#define BNEPCONNADD	_IOW('B', 200, int)
-#define BNEPCONNDEL	_IOW('B', 201, int)
-#define BNEPGETCONNLIST	_IOR('B', 210, int)
-#define BNEPGETCONNINFO	_IOR('B', 211, int)
-#define BNEPGETSUPPFEAT	_IOR('B', 212, int)
-
-#define CMTPCONNADD	_IOW('C', 200, int)
-#define CMTPCONNDEL	_IOW('C', 201, int)
-#define CMTPGETCONNLIST	_IOR('C', 210, int)
-#define CMTPGETCONNINFO	_IOR('C', 211, int)
-
-#define HIDPCONNADD	_IOW('H', 200, int)
-#define HIDPCONNDEL	_IOW('H', 201, int)
-#define HIDPGETCONNLIST	_IOR('H', 210, int)
-#define HIDPGETCONNINFO	_IOR('H', 211, int)
-
 #define RTC_IRQP_READ32		_IOR('p', 0x0b, compat_ulong_t)
 #define RTC_IRQP_SET32		_IOW('p', 0x0c, compat_ulong_t)
 #define RTC_EPOCH_READ32	_IOR('p', 0x0d, compat_ulong_t)
@@ -974,19 +920,6 @@ COMPATIBLE_IOCTL(RFCOMMRELEASEDEV)
 COMPATIBLE_IOCTL(RFCOMMGETDEVLIST)
 COMPATIBLE_IOCTL(RFCOMMGETDEVINFO)
 COMPATIBLE_IOCTL(RFCOMMSTEALDLC)
-COMPATIBLE_IOCTL(BNEPCONNADD)
-COMPATIBLE_IOCTL(BNEPCONNDEL)
-COMPATIBLE_IOCTL(BNEPGETCONNLIST)
-COMPATIBLE_IOCTL(BNEPGETCONNINFO)
-COMPATIBLE_IOCTL(BNEPGETSUPPFEAT)
-COMPATIBLE_IOCTL(CMTPCONNADD)
-COMPATIBLE_IOCTL(CMTPCONNDEL)
-COMPATIBLE_IOCTL(CMTPGETCONNLIST)
-COMPATIBLE_IOCTL(CMTPGETCONNINFO)
-COMPATIBLE_IOCTL(HIDPCONNADD)
-COMPATIBLE_IOCTL(HIDPCONNDEL)
-COMPATIBLE_IOCTL(HIDPGETCONNLIST)
-COMPATIBLE_IOCTL(HIDPGETCONNINFO)
 /* CAPI */
 COMPATIBLE_IOCTL(CAPI_REGISTER)
 COMPATIBLE_IOCTL(CAPI_GET_MANUFACTURER)
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 39c20ef26db4..79debfc9cef9 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -83,10 +83,6 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
 	    filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
 		return true;
 
-	if (contents_mode == FS_ENCRYPTION_MODE_SPECK128_256_XTS &&
-	    filenames_mode == FS_ENCRYPTION_MODE_SPECK128_256_CTS)
-		return true;
-
 	return false;
 }
 
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index e997ca51192f..7874c9bb2fc5 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -174,16 +174,6 @@ static struct fscrypt_mode {
 		.cipher_str = "cts(cbc(aes))",
 		.keysize = 16,
 	},
-	[FS_ENCRYPTION_MODE_SPECK128_256_XTS] = {
-		.friendly_name = "Speck128/256-XTS",
-		.cipher_str = "xts(speck128)",
-		.keysize = 64,
-	},
-	[FS_ENCRYPTION_MODE_SPECK128_256_CTS] = {
-		.friendly_name = "Speck128/256-CTS-CBC",
-		.cipher_str = "cts(cbc(speck128))",
-		.keysize = 32,
-	},
 };
 
 static struct fscrypt_mode *
diff --git a/fs/exec.c b/fs/exec.c
index 1ebf6e5a521d..fc281b738a98 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -908,14 +908,14 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
 		goto out;
 
 	i_size = i_size_read(file_inode(file));
-	if (max_size > 0 && i_size > max_size) {
-		ret = -EFBIG;
-		goto out;
-	}
 	if (i_size <= 0) {
 		ret = -EINVAL;
 		goto out;
 	}
+	if (i_size > SIZE_MAX || (max_size > 0 && i_size > max_size)) {
+		ret = -EFBIG;
+		goto out;
+	}
 
 	if (id != READING_FIRMWARE_PREALLOC_BUFFER)
 		*buf = vmalloc(i_size);
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 305b220af45d..162f43b80c84 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -72,6 +72,9 @@ static int kernfs_get_target_path(struct kernfs_node *parent,
 		if (base == kn)
 			break;
 
+		if ((s - path) + 3 >= PATH_MAX)
+			return -ENAMETOOLONG;
+
 		strcpy(s, "../");
 		s += 3;
 		base = base->parent;
@@ -88,7 +91,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent,
 	if (len < 2)
 		return -EINVAL;
 	len--;
-	if ((s - path) + len > PATH_MAX)
+	if ((s - path) + len >= PATH_MAX)
 		return -ENAMETOOLONG;
 
 	/* reverse fillup of target string from target to base */
diff --git a/fs/read_write.c b/fs/read_write.c
index 8a2737f0d61d..603794b207eb 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -331,7 +331,7 @@ COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned i
 }
 #endif
 
-#ifdef __ARCH_WANT_SYS_LLSEEK
+#if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT)
 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 		unsigned long, offset_low, loff_t __user *, result,
 		unsigned int, whence)
diff --git a/fs/select.c b/fs/select.c
index 4a6b6e4b21cb..22b3bf89f051 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -1120,7 +1120,7 @@ int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *
 		ts.tv_sec = ts.tv_nsec = 0;
 
 	if (timeval) {
-		struct compat_timeval rtv;
+		struct old_timeval32 rtv;
 
 		rtv.tv_sec = ts.tv_sec;
 		rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
@@ -1128,7 +1128,7 @@ int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *
 		if (!copy_to_user(p, &rtv, sizeof(rtv)))
 			return ret;
 	} else {
-		if (!compat_put_timespec64(&ts, p))
+		if (!put_old_timespec32(&ts, p))
 			return ret;
 	}
 	/*
@@ -1257,10 +1257,10 @@ out_nofds:
 
 static int do_compat_select(int n, compat_ulong_t __user *inp,
 	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
-	struct compat_timeval __user *tvp)
+	struct old_timeval32 __user *tvp)
 {
 	struct timespec64 end_time, *to = NULL;
-	struct compat_timeval tv;
+	struct old_timeval32 tv;
 	int ret;
 
 	if (tvp) {
@@ -1282,7 +1282,7 @@ static int do_compat_select(int n, compat_ulong_t __user *inp,
 
 COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
 	compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
-	struct compat_timeval __user *, tvp)
+	struct old_timeval32 __user *, tvp)
 {
 	return do_compat_select(n, inp, outp, exp, tvp);
 }
@@ -1307,7 +1307,7 @@ COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
 
 static long do_compat_pselect(int n, compat_ulong_t __user *inp,
 	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
-	struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
+	struct old_timespec32 __user *tsp, compat_sigset_t __user *sigmask,
 	compat_size_t sigsetsize)
 {
 	sigset_t ksigmask, sigsaved;
@@ -1315,7 +1315,7 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp,
 	int ret;
 
 	if (tsp) {
-		if (compat_get_timespec64(&ts, tsp))
+		if (get_old_timespec32(&ts, tsp))
 			return -EFAULT;
 
 		to = &end_time;
@@ -1355,7 +1355,7 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp,
 
 COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
 	compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
-	struct compat_timespec __user *, tsp, void __user *, sig)
+	struct old_timespec32 __user *, tsp, void __user *, sig)
 {
 	compat_size_t sigsetsize = 0;
 	compat_uptr_t up = 0;
@@ -1373,7 +1373,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
 }
 
 COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
-	unsigned int,  nfds, struct compat_timespec __user *, tsp,
+	unsigned int,  nfds, struct old_timespec32 __user *, tsp,
 	const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
 {
 	sigset_t ksigmask, sigsaved;
@@ -1381,7 +1381,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
 	int ret;
 
 	if (tsp) {
-		if (compat_get_timespec64(&ts, tsp))
+		if (get_old_timespec32(&ts, tsp))
 			return -EFAULT;
 
 		to = &end_time;
diff --git a/fs/stat.c b/fs/stat.c
index f8e6fb2c3657..adbfcd86c81b 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -280,6 +280,8 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat
 
 #endif /* __ARCH_WANT_OLD_STAT */
 
+#ifdef __ARCH_WANT_NEW_STAT
+
 #if BITS_PER_LONG == 32
 #  define choose_32_64(a,b) a
 #else
@@ -378,6 +380,7 @@ SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf)
 
 	return error;
 }
+#endif
 
 static int do_readlinkat(int dfd, const char __user *pathname,
 			 char __user *buf, int bufsiz)
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d69ad801eb80..803ca070d42e 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -561,29 +561,29 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *,
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
-		const struct compat_itimerspec __user *, utmr,
-		struct compat_itimerspec __user *, otmr)
+		const struct old_itimerspec32 __user *, utmr,
+		struct old_itimerspec32 __user *, otmr)
 {
 	struct itimerspec64 new, old;
 	int ret;
 
-	if (get_compat_itimerspec64(&new, utmr))
+	if (get_old_itimerspec32(&new, utmr))
 		return -EFAULT;
 	ret = do_timerfd_settime(ufd, flags, &new, &old);
 	if (ret)
 		return ret;
-	if (otmr && put_compat_itimerspec64(&old, otmr))
+	if (otmr && put_old_itimerspec32(&old, otmr))
 		return -EFAULT;
 	return ret;
 }
 
 COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd,
-		struct compat_itimerspec __user *, otmr)
+		struct old_itimerspec32 __user *, otmr)
 {
 	struct itimerspec64 kotmr;
 	int ret = do_timerfd_gettime(ufd, &kotmr);
 	if (ret)
 		return ret;
-	return put_compat_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
+	return put_old_itimerspec32(&kotmr, otmr) ? -EFAULT : 0;
 }
 #endif
diff --git a/fs/utimes.c b/fs/utimes.c
index 69d4b6ba1bfb..bdcf2daf39c1 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -8,35 +8,6 @@
 #include <linux/compat.h>
 #include <asm/unistd.h>
 
-#ifdef __ARCH_WANT_SYS_UTIME
-
-/*
- * sys_utime() can be implemented in user-level using sys_utimes().
- * Is this for backwards compatibility?  If so, why not move it
- * into the appropriate arch directory (for those architectures that
- * need it).
- */
-
-/* If times==NULL, set access and modification to current time,
- * must be owner or have write permission.
- * Else, update from *times, must be owner or super user.
- */
-SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times)
-{
-	struct timespec64 tv[2];
-
-	if (times) {
-		if (get_user(tv[0].tv_sec, &times->actime) ||
-		    get_user(tv[1].tv_sec, &times->modtime))
-			return -EFAULT;
-		tv[0].tv_nsec = 0;
-		tv[1].tv_nsec = 0;
-	}
-	return do_utimes(AT_FDCWD, filename, times ? tv : NULL, 0);
-}
-
-#endif
-
 static bool nsec_valid(long nsec)
 {
 	if (nsec == UTIME_OMIT || nsec == UTIME_NOW)
@@ -166,7 +137,7 @@ out:
 }
 
 SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename,
-		struct timespec __user *, utimes, int, flags)
+		struct __kernel_timespec __user *, utimes, int, flags)
 {
 	struct timespec64 tstimes[2];
 
@@ -184,6 +155,13 @@ SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename,
 	return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags);
 }
 
+#ifdef __ARCH_WANT_SYS_UTIME
+/*
+ * futimesat(), utimes() and utime() are older versions of utimensat()
+ * that are provided for compatibility with traditional C libraries.
+ * On modern architectures, we always use libc wrappers around
+ * utimensat() instead.
+ */
 static long do_futimesat(int dfd, const char __user *filename,
 			 struct timeval __user *utimes)
 {
@@ -225,13 +203,29 @@ SYSCALL_DEFINE2(utimes, char __user *, filename,
 	return do_futimesat(AT_FDCWD, filename, utimes);
 }
 
-#ifdef CONFIG_COMPAT
+SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times)
+{
+	struct timespec64 tv[2];
+
+	if (times) {
+		if (get_user(tv[0].tv_sec, &times->actime) ||
+		    get_user(tv[1].tv_sec, &times->modtime))
+			return -EFAULT;
+		tv[0].tv_nsec = 0;
+		tv[1].tv_nsec = 0;
+	}
+	return do_utimes(AT_FDCWD, filename, times ? tv : NULL, 0);
+}
+#endif
+
+#ifdef CONFIG_COMPAT_32BIT_TIME
 /*
  * Not all architectures have sys_utime, so implement this in terms
  * of sys_utimes.
  */
+#ifdef __ARCH_WANT_SYS_UTIME32
 COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename,
-		       struct compat_utimbuf __user *, t)
+		       struct old_utimbuf32 __user *, t)
 {
 	struct timespec64 tv[2];
 
@@ -244,14 +238,15 @@ COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename,
 	}
 	return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
 }
+#endif
 
-COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags)
+COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct old_timespec32 __user *, t, int, flags)
 {
 	struct timespec64 tv[2];
 
 	if  (t) {
-		if (compat_get_timespec64(&tv[0], &t[0]) ||
-		    compat_get_timespec64(&tv[1], &t[1]))
+		if (get_old_timespec32(&tv[0], &t[0]) ||
+		    get_old_timespec32(&tv[1], &t[1]))
 			return -EFAULT;
 
 		if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT)
@@ -260,8 +255,9 @@ COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filena
 	return do_utimes(dfd, filename, t ? tv : NULL, flags);
 }
 
+#ifdef __ARCH_WANT_SYS_UTIME32
 static long do_compat_futimesat(unsigned int dfd, const char __user *filename,
-				struct compat_timeval __user *t)
+				struct old_timeval32 __user *t)
 {
 	struct timespec64 tv[2];
 
@@ -282,13 +278,14 @@ static long do_compat_futimesat(unsigned int dfd, const char __user *filename,
 
 COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd,
 		       const char __user *, filename,
-		       struct compat_timeval __user *, t)
+		       struct old_timeval32 __user *, t)
 {
 	return do_compat_futimesat(dfd, filename, t);
 }
 
-COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t)
+COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct old_timeval32 __user *, t)
 {
 	return do_compat_futimesat(AT_FDCWD, filename, t);
 }
 #endif
+#endif
diff --git a/include/asm-generic/compat.h b/include/asm-generic/compat.h
index 28819451b6d1..a86f65bffab8 100644
--- a/include/asm-generic/compat.h
+++ b/include/asm-generic/compat.h
@@ -1,3 +1,25 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_COMPAT_H
+#define __ASM_GENERIC_COMPAT_H
 
-/* This is an empty stub for 32-bit-only architectures */
+/* These types are common across all compat ABIs */
+typedef u32 compat_size_t;
+typedef s32 compat_ssize_t;
+typedef s32 compat_clock_t;
+typedef s32 compat_pid_t;
+typedef u32 compat_ino_t;
+typedef s32 compat_off_t;
+typedef s64 compat_loff_t;
+typedef s32 compat_daddr_t;
+typedef s32 compat_timer_t;
+typedef s32 compat_key_t;
+typedef s16 compat_short_t;
+typedef s32 compat_int_t;
+typedef s32 compat_long_t;
+typedef u16 compat_ushort_t;
+typedef u32 compat_uint_t;
+typedef u32 compat_ulong_t;
+typedef u32 compat_uptr_t;
+typedef u32 compat_aio_context_t;
+
+#endif
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 849cd8eb5ca0..d79abca81a52 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -141,4 +141,18 @@ static inline bool init_section_intersects(void *virt, size_t size)
 	return memory_intersects(__init_begin, __init_end, virt, size);
 }
 
+/**
+ * is_kernel_rodata - checks if the pointer address is located in the
+ *                    .rodata section
+ *
+ * @addr: address to check
+ *
+ * Returns: true if the address is located in .rodata, false otherwise.
+ */
+static inline bool is_kernel_rodata(unsigned long addr)
+{
+	return addr >= (unsigned long)__start_rodata &&
+	       addr < (unsigned long)__end_rodata;
+}
+
 #endif /* _ASM_GENERIC_SECTIONS_H_ */
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
deleted file mode 100644
index cdf904265caf..000000000000
--- a/include/asm-generic/unistd.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <uapi/asm-generic/unistd.h>
-#include <linux/export.h>
-
-/*
- * These are required system calls, we should
- * invert the logic eventually and let them
- * be selected by default.
- */
-#if __BITS_PER_LONG == 32
-#define __ARCH_WANT_STAT64
-#define __ARCH_WANT_SYS_LLSEEK
-#endif
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index e328b52425a8..22e6f412c595 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -234,6 +234,34 @@ static inline void acomp_request_set_params(struct acomp_req *req,
 		req->flags |= CRYPTO_ACOMP_ALLOC_OUTPUT;
 }
 
+static inline void crypto_stat_compress(struct acomp_req *req, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->compress_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->compress_cnt);
+		atomic64_add(req->slen, &tfm->base.__crt_alg->compress_tlen);
+	}
+#endif
+}
+
+static inline void crypto_stat_decompress(struct acomp_req *req, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->compress_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->decompress_cnt);
+		atomic64_add(req->slen, &tfm->base.__crt_alg->decompress_tlen);
+	}
+#endif
+}
+
 /**
  * crypto_acomp_compress() -- Invoke asynchronous compress operation
  *
@@ -246,8 +274,11 @@ static inline void acomp_request_set_params(struct acomp_req *req,
 static inline int crypto_acomp_compress(struct acomp_req *req)
 {
 	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	int ret;
 
-	return tfm->compress(req);
+	ret = tfm->compress(req);
+	crypto_stat_compress(req, ret);
+	return ret;
 }
 
 /**
@@ -262,8 +293,11 @@ static inline int crypto_acomp_compress(struct acomp_req *req)
 static inline int crypto_acomp_decompress(struct acomp_req *req)
 {
 	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	int ret;
 
-	return tfm->decompress(req);
+	ret = tfm->decompress(req);
+	crypto_stat_decompress(req, ret);
+	return ret;
 }
 
 #endif
diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index 1e26f790b03f..0d765d7bfb82 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -306,6 +306,34 @@ static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req)
 	return __crypto_aead_cast(req->base.tfm);
 }
 
+static inline void crypto_stat_aead_encrypt(struct aead_request *req, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->aead_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->encrypt_cnt);
+		atomic64_add(req->cryptlen, &tfm->base.__crt_alg->encrypt_tlen);
+	}
+#endif
+}
+
+static inline void crypto_stat_aead_decrypt(struct aead_request *req, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->aead_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->decrypt_cnt);
+		atomic64_add(req->cryptlen, &tfm->base.__crt_alg->decrypt_tlen);
+	}
+#endif
+}
+
 /**
  * crypto_aead_encrypt() - encrypt plaintext
  * @req: reference to the aead_request handle that holds all information
@@ -328,11 +356,14 @@ static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req)
 static inline int crypto_aead_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	int ret;
 
 	if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY)
-		return -ENOKEY;
-
-	return crypto_aead_alg(aead)->encrypt(req);
+		ret = -ENOKEY;
+	else
+		ret = crypto_aead_alg(aead)->encrypt(req);
+	crypto_stat_aead_encrypt(req, ret);
+	return ret;
 }
 
 /**
@@ -360,14 +391,16 @@ static inline int crypto_aead_encrypt(struct aead_request *req)
 static inline int crypto_aead_decrypt(struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	int ret;
 
 	if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY)
-		return -ENOKEY;
-
-	if (req->cryptlen < crypto_aead_authsize(aead))
-		return -EINVAL;
-
-	return crypto_aead_alg(aead)->decrypt(req);
+		ret = -ENOKEY;
+	else if (req->cryptlen < crypto_aead_authsize(aead))
+		ret = -EINVAL;
+	else
+		ret = crypto_aead_alg(aead)->decrypt(req);
+	crypto_stat_aead_decrypt(req, ret);
+	return ret;
 }
 
 /**
diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h
index b5e11de4d497..afac71119396 100644
--- a/include/crypto/akcipher.h
+++ b/include/crypto/akcipher.h
@@ -271,6 +271,62 @@ static inline unsigned int crypto_akcipher_maxsize(struct crypto_akcipher *tfm)
 	return alg->max_size(tfm);
 }
 
+static inline void crypto_stat_akcipher_encrypt(struct akcipher_request *req,
+						int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->encrypt_cnt);
+		atomic64_add(req->src_len, &tfm->base.__crt_alg->encrypt_tlen);
+	}
+#endif
+}
+
+static inline void crypto_stat_akcipher_decrypt(struct akcipher_request *req,
+						int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->decrypt_cnt);
+		atomic64_add(req->src_len, &tfm->base.__crt_alg->decrypt_tlen);
+	}
+#endif
+}
+
+static inline void crypto_stat_akcipher_sign(struct akcipher_request *req,
+					     int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY)
+		atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt);
+	else
+		atomic_inc(&tfm->base.__crt_alg->sign_cnt);
+#endif
+}
+
+static inline void crypto_stat_akcipher_verify(struct akcipher_request *req,
+					       int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY)
+		atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt);
+	else
+		atomic_inc(&tfm->base.__crt_alg->verify_cnt);
+#endif
+}
+
 /**
  * crypto_akcipher_encrypt() - Invoke public key encrypt operation
  *
@@ -285,8 +341,11 @@ static inline int crypto_akcipher_encrypt(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
+	int ret;
 
-	return alg->encrypt(req);
+	ret = alg->encrypt(req);
+	crypto_stat_akcipher_encrypt(req, ret);
+	return ret;
 }
 
 /**
@@ -303,8 +362,11 @@ static inline int crypto_akcipher_decrypt(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
+	int ret;
 
-	return alg->decrypt(req);
+	ret = alg->decrypt(req);
+	crypto_stat_akcipher_decrypt(req, ret);
+	return ret;
 }
 
 /**
@@ -321,8 +383,11 @@ static inline int crypto_akcipher_sign(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
+	int ret;
 
-	return alg->sign(req);
+	ret = alg->sign(req);
+	crypto_stat_akcipher_sign(req, ret);
+	return ret;
 }
 
 /**
@@ -339,8 +404,11 @@ static inline int crypto_akcipher_verify(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
+	int ret;
 
-	return alg->verify(req);
+	ret = alg->verify(req);
+	crypto_stat_akcipher_verify(req, ret);
+	return ret;
 }
 
 /**
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index bd5e8ccf1687..4a5ad10e75f0 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -20,8 +20,10 @@
 /*
  * Maximum values for blocksize and alignmask, used to allocate
  * static buffers that are big enough for any combination of
- * ciphers and architectures.
+ * algs and architectures. Ciphers have a lower maximum size.
  */
+#define MAX_ALGAPI_BLOCKSIZE		160
+#define MAX_ALGAPI_ALIGNMASK		63
 #define MAX_CIPHER_BLOCKSIZE		16
 #define MAX_CIPHER_ALIGNMASK		15
 
@@ -425,4 +427,14 @@ static inline void crypto_yield(u32 flags)
 #endif
 }
 
+int crypto_register_notifier(struct notifier_block *nb);
+int crypto_unregister_notifier(struct notifier_block *nb);
+
+/* Crypto notification events. */
+enum {
+	CRYPTO_MSG_ALG_REQUEST,
+	CRYPTO_MSG_ALG_REGISTER,
+	CRYPTO_MSG_ALG_LOADED,
+};
+
 #endif	/* _CRYPTO_ALGAPI_H */
diff --git a/include/crypto/cbc.h b/include/crypto/cbc.h
index f5b8bfc22e6d..3bf28beefa33 100644
--- a/include/crypto/cbc.h
+++ b/include/crypto/cbc.h
@@ -113,7 +113,7 @@ static inline int crypto_cbc_decrypt_inplace(
 	unsigned int bsize = crypto_skcipher_blocksize(tfm);
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
-	u8 last_iv[bsize];
+	u8 last_iv[MAX_CIPHER_BLOCKSIZE];
 
 	/* Start of the last block. */
 	src += nbytes - (nbytes & (bsize - 1)) - bsize;
diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
index b83d66073db0..f76302d99e2b 100644
--- a/include/crypto/chacha20.h
+++ b/include/crypto/chacha20.h
@@ -13,13 +13,12 @@
 #define CHACHA20_IV_SIZE	16
 #define CHACHA20_KEY_SIZE	32
 #define CHACHA20_BLOCK_SIZE	64
-#define CHACHA20_BLOCK_WORDS	(CHACHA20_BLOCK_SIZE / sizeof(u32))
 
 struct chacha20_ctx {
 	u32 key[8];
 };
 
-void chacha20_block(u32 *state, u32 *stream);
+void chacha20_block(u32 *state, u8 *stream);
 void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
 int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			   unsigned int keysize);
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 76e432cab75d..bc7796600338 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -151,9 +151,13 @@ struct shash_desc {
 	void *__ctx[] CRYPTO_MINALIGN_ATTR;
 };
 
+#define HASH_MAX_DIGESTSIZE	 64
+#define HASH_MAX_DESCSIZE	360
+#define HASH_MAX_STATESIZE	512
+
 #define SHASH_DESC_ON_STACK(shash, ctx)				  \
 	char __##shash##_desc[sizeof(struct shash_desc) +	  \
-		crypto_shash_descsize(ctx)] CRYPTO_MINALIGN_ATTR; \
+		HASH_MAX_DESCSIZE] CRYPTO_MINALIGN_ATTR; \
 	struct shash_desc *shash = (struct shash_desc *)__##shash##_desc
 
 /**
@@ -408,6 +412,32 @@ static inline void *ahash_request_ctx(struct ahash_request *req)
 int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 			unsigned int keylen);
 
+static inline void crypto_stat_ahash_update(struct ahash_request *req, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY)
+		atomic_inc(&tfm->base.__crt_alg->hash_err_cnt);
+	else
+		atomic64_add(req->nbytes, &tfm->base.__crt_alg->hash_tlen);
+#endif
+}
+
+static inline void crypto_stat_ahash_final(struct ahash_request *req, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->hash_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->hash_cnt);
+		atomic64_add(req->nbytes, &tfm->base.__crt_alg->hash_tlen);
+	}
+#endif
+}
+
 /**
  * crypto_ahash_finup() - update and finalize message digest
  * @req: reference to the ahash_request handle that holds all information
@@ -522,7 +552,11 @@ static inline int crypto_ahash_init(struct ahash_request *req)
  */
 static inline int crypto_ahash_update(struct ahash_request *req)
 {
-	return crypto_ahash_reqtfm(req)->update(req);
+	int ret;
+
+	ret = crypto_ahash_reqtfm(req)->update(req);
+	crypto_stat_ahash_update(req, ret);
+	return ret;
 }
 
 /**
diff --git a/include/crypto/internal/cryptouser.h b/include/crypto/internal/cryptouser.h
new file mode 100644
index 000000000000..8db299c25566
--- /dev/null
+++ b/include/crypto/internal/cryptouser.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <net/netlink.h>
+
+struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact);
+
+int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb);
+int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs);
+int crypto_dump_reportstat_done(struct netlink_callback *cb);
diff --git a/include/crypto/internal/geniv.h b/include/crypto/internal/geniv.h
index 2bcfb931bc5b..71be24cd59bd 100644
--- a/include/crypto/internal/geniv.h
+++ b/include/crypto/internal/geniv.h
@@ -20,7 +20,7 @@
 struct aead_geniv_ctx {
 	spinlock_t lock;
 	struct crypto_aead *child;
-	struct crypto_skcipher *sknull;
+	struct crypto_sync_skcipher *sknull;
 	u8 salt[] __attribute__ ((aligned(__alignof__(u32))));
 };
 
diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h
index 1bde0a6514fa..f517ba6d3a27 100644
--- a/include/crypto/kpp.h
+++ b/include/crypto/kpp.h
@@ -268,6 +268,42 @@ struct kpp_secret {
 	unsigned short len;
 };
 
+static inline void crypto_stat_kpp_set_secret(struct crypto_kpp *tfm, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	if (ret)
+		atomic_inc(&tfm->base.__crt_alg->kpp_err_cnt);
+	else
+		atomic_inc(&tfm->base.__crt_alg->setsecret_cnt);
+#endif
+}
+
+static inline void crypto_stat_kpp_generate_public_key(struct kpp_request *req,
+						       int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+
+	if (ret)
+		atomic_inc(&tfm->base.__crt_alg->kpp_err_cnt);
+	else
+		atomic_inc(&tfm->base.__crt_alg->generate_public_key_cnt);
+#endif
+}
+
+static inline void crypto_stat_kpp_compute_shared_secret(struct kpp_request *req,
+							 int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+
+	if (ret)
+		atomic_inc(&tfm->base.__crt_alg->kpp_err_cnt);
+	else
+		atomic_inc(&tfm->base.__crt_alg->compute_shared_secret_cnt);
+#endif
+}
+
 /**
  * crypto_kpp_set_secret() - Invoke kpp operation
  *
@@ -287,8 +323,11 @@ static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm,
 					const void *buffer, unsigned int len)
 {
 	struct kpp_alg *alg = crypto_kpp_alg(tfm);
+	int ret;
 
-	return alg->set_secret(tfm, buffer, len);
+	ret = alg->set_secret(tfm, buffer, len);
+	crypto_stat_kpp_set_secret(tfm, ret);
+	return ret;
 }
 
 /**
@@ -308,8 +347,11 @@ static inline int crypto_kpp_generate_public_key(struct kpp_request *req)
 {
 	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
 	struct kpp_alg *alg = crypto_kpp_alg(tfm);
+	int ret;
 
-	return alg->generate_public_key(req);
+	ret = alg->generate_public_key(req);
+	crypto_stat_kpp_generate_public_key(req, ret);
+	return ret;
 }
 
 /**
@@ -326,8 +368,11 @@ static inline int crypto_kpp_compute_shared_secret(struct kpp_request *req)
 {
 	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
 	struct kpp_alg *alg = crypto_kpp_alg(tfm);
+	int ret;
 
-	return alg->compute_shared_secret(req);
+	ret = alg->compute_shared_secret(req);
+	crypto_stat_kpp_compute_shared_secret(req, ret);
+	return ret;
 }
 
 /**
diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h
deleted file mode 100644
index b67404fc4b34..000000000000
--- a/include/crypto/mcryptd.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Software async multibuffer crypto daemon headers
- *
- *    Author:
- *             Tim Chen <tim.c.chen@linux.intel.com>
- *
- *    Copyright (c) 2014, Intel Corporation.
- */
-
-#ifndef _CRYPTO_MCRYPT_H
-#define _CRYPTO_MCRYPT_H
-
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-#include <crypto/hash.h>
-
-struct mcryptd_ahash {
-	struct crypto_ahash base;
-};
-
-static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
-	struct crypto_ahash *tfm)
-{
-	return (struct mcryptd_ahash *)tfm;
-}
-
-struct mcryptd_cpu_queue {
-	struct crypto_queue queue;
-	spinlock_t q_lock;
-	struct work_struct work;
-};
-
-struct mcryptd_queue {
-	struct mcryptd_cpu_queue __percpu *cpu_queue;
-};
-
-struct mcryptd_instance_ctx {
-	struct crypto_spawn spawn;
-	struct mcryptd_queue *queue;
-};
-
-struct mcryptd_hash_ctx {
-	struct crypto_ahash *child;
-	struct mcryptd_alg_state *alg_state;
-};
-
-struct mcryptd_tag {
-	/* seq number of request */
-	unsigned seq_num;
-	/* arrival time of request */
-	unsigned long arrival;
-	unsigned long expire;
-	int	cpu;
-};
-
-struct mcryptd_hash_request_ctx {
-	struct list_head waiter;
-	crypto_completion_t complete;
-	struct mcryptd_tag tag;
-	struct crypto_hash_walk walk;
-	u8 *out;
-	int flag;
-	struct ahash_request areq;
-};
-
-struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
-					u32 type, u32 mask);
-struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm);
-struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req);
-void mcryptd_free_ahash(struct mcryptd_ahash *tfm);
-void mcryptd_flusher(struct work_struct *work);
-
-enum mcryptd_req_type {
-	MCRYPTD_NONE,
-	MCRYPTD_UPDATE,
-	MCRYPTD_FINUP,
-	MCRYPTD_DIGEST,
-	MCRYPTD_FINAL
-};
-
-struct mcryptd_alg_cstate {
-	unsigned long next_flush;
-	unsigned next_seq_num;
-	bool	flusher_engaged;
-	struct  delayed_work flush;
-	int	cpu;
-	struct  mcryptd_alg_state *alg_state;
-	void	*mgr;
-	spinlock_t work_lock;
-	struct list_head work_list;
-	struct list_head flush_list;
-};
-
-struct mcryptd_alg_state {
-	struct mcryptd_alg_cstate __percpu *alg_cstate;
-	unsigned long (*flusher)(struct mcryptd_alg_cstate *cstate);
-};
-
-/* return delay in jiffies from current time */
-static inline unsigned long get_delay(unsigned long t)
-{
-	long delay;
-
-	delay = (long) t - (long) jiffies;
-	if (delay <= 0)
-		return 0;
-	else
-		return (unsigned long) delay;
-}
-
-void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay);
-
-#endif
diff --git a/include/crypto/morus1280_glue.h b/include/crypto/morus1280_glue.h
index b26dd70efd9a..ba782e10065e 100644
--- a/include/crypto/morus1280_glue.h
+++ b/include/crypto/morus1280_glue.h
@@ -82,7 +82,7 @@ void cryptd_morus1280_glue_exit_tfm(struct crypto_aead *aead);
 	{ \
 	} \
 	\
-	struct aead_alg crypto_morus1280_##id##_algs[] = {\
+	static struct aead_alg crypto_morus1280_##id##_algs[] = {\
 		{ \
 			.setkey = crypto_morus1280_glue_setkey, \
 			.setauthsize = crypto_morus1280_glue_setauthsize, \
diff --git a/include/crypto/morus640_glue.h b/include/crypto/morus640_glue.h
index 90c8db07e740..27fa790a2362 100644
--- a/include/crypto/morus640_glue.h
+++ b/include/crypto/morus640_glue.h
@@ -82,7 +82,7 @@ void cryptd_morus640_glue_exit_tfm(struct crypto_aead *aead);
 	{ \
 	} \
 	\
-	struct aead_alg crypto_morus640_##id##_algs[] = {\
+	static struct aead_alg crypto_morus640_##id##_algs[] = {\
 		{ \
 			.setkey = crypto_morus640_glue_setkey, \
 			.setauthsize = crypto_morus640_glue_setauthsize, \
diff --git a/include/crypto/null.h b/include/crypto/null.h
index 15aeef6e30ef..0ef577cc00e3 100644
--- a/include/crypto/null.h
+++ b/include/crypto/null.h
@@ -9,7 +9,7 @@
 #define NULL_DIGEST_SIZE	0
 #define NULL_IV_SIZE		0
 
-struct crypto_skcipher *crypto_get_default_null_skcipher(void);
+struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void);
 void crypto_put_default_null_skcipher(void);
 
 #endif
diff --git a/include/crypto/rng.h b/include/crypto/rng.h
index b95ede354a66..6d258f5b68f1 100644
--- a/include/crypto/rng.h
+++ b/include/crypto/rng.h
@@ -122,6 +122,29 @@ static inline void crypto_free_rng(struct crypto_rng *tfm)
 	crypto_destroy_tfm(tfm, crypto_rng_tfm(tfm));
 }
 
+static inline void crypto_stat_rng_seed(struct crypto_rng *tfm, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY)
+		atomic_inc(&tfm->base.__crt_alg->rng_err_cnt);
+	else
+		atomic_inc(&tfm->base.__crt_alg->seed_cnt);
+#endif
+}
+
+static inline void crypto_stat_rng_generate(struct crypto_rng *tfm,
+					    unsigned int dlen, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->rng_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->generate_cnt);
+		atomic64_add(dlen, &tfm->base.__crt_alg->generate_tlen);
+	}
+#endif
+}
+
 /**
  * crypto_rng_generate() - get random number
  * @tfm: cipher handle
@@ -140,7 +163,11 @@ static inline int crypto_rng_generate(struct crypto_rng *tfm,
 				      const u8 *src, unsigned int slen,
 				      u8 *dst, unsigned int dlen)
 {
-	return crypto_rng_alg(tfm)->generate(tfm, src, slen, dst, dlen);
+	int ret;
+
+	ret = crypto_rng_alg(tfm)->generate(tfm, src, slen, dst, dlen);
+	crypto_stat_rng_generate(tfm, dlen, ret);
+	return ret;
 }
 
 /**
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
index 2f327f090c3e..925f547cdcfa 100644
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
@@ -65,6 +65,10 @@ struct crypto_skcipher {
 	struct crypto_tfm base;
 };
 
+struct crypto_sync_skcipher {
+	struct crypto_skcipher base;
+};
+
 /**
  * struct skcipher_alg - symmetric key cipher definition
  * @min_keysize: Minimum key size supported by the transformation. This is the
@@ -139,9 +143,17 @@ struct skcipher_alg {
 	struct crypto_alg base;
 };
 
-#define SKCIPHER_REQUEST_ON_STACK(name, tfm) \
+#define MAX_SYNC_SKCIPHER_REQSIZE      384
+/*
+ * This performs a type-check against the "tfm" argument to make sure
+ * all users have the correct skcipher tfm for doing on-stack requests.
+ */
+#define SYNC_SKCIPHER_REQUEST_ON_STACK(name, tfm) \
 	char __##name##_desc[sizeof(struct skcipher_request) + \
-		crypto_skcipher_reqsize(tfm)] CRYPTO_MINALIGN_ATTR; \
+			     MAX_SYNC_SKCIPHER_REQSIZE + \
+			     (!(sizeof((struct crypto_sync_skcipher *)1 == \
+				       (typeof(tfm))1))) \
+			    ] CRYPTO_MINALIGN_ATTR; \
 	struct skcipher_request *name = (void *)__##name##_desc
 
 /**
@@ -197,6 +209,9 @@ static inline struct crypto_skcipher *__crypto_skcipher_cast(
 struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
 					      u32 type, u32 mask);
 
+struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(const char *alg_name,
+					      u32 type, u32 mask);
+
 static inline struct crypto_tfm *crypto_skcipher_tfm(
 	struct crypto_skcipher *tfm)
 {
@@ -212,6 +227,11 @@ static inline void crypto_free_skcipher(struct crypto_skcipher *tfm)
 	crypto_destroy_tfm(tfm, crypto_skcipher_tfm(tfm));
 }
 
+static inline void crypto_free_sync_skcipher(struct crypto_sync_skcipher *tfm)
+{
+	crypto_free_skcipher(&tfm->base);
+}
+
 /**
  * crypto_has_skcipher() - Search for the availability of an skcipher.
  * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
@@ -280,6 +300,12 @@ static inline unsigned int crypto_skcipher_ivsize(struct crypto_skcipher *tfm)
 	return tfm->ivsize;
 }
 
+static inline unsigned int crypto_sync_skcipher_ivsize(
+	struct crypto_sync_skcipher *tfm)
+{
+	return crypto_skcipher_ivsize(&tfm->base);
+}
+
 static inline unsigned int crypto_skcipher_alg_chunksize(
 	struct skcipher_alg *alg)
 {
@@ -356,6 +382,12 @@ static inline unsigned int crypto_skcipher_blocksize(
 	return crypto_tfm_alg_blocksize(crypto_skcipher_tfm(tfm));
 }
 
+static inline unsigned int crypto_sync_skcipher_blocksize(
+	struct crypto_sync_skcipher *tfm)
+{
+	return crypto_skcipher_blocksize(&tfm->base);
+}
+
 static inline unsigned int crypto_skcipher_alignmask(
 	struct crypto_skcipher *tfm)
 {
@@ -379,6 +411,24 @@ static inline void crypto_skcipher_clear_flags(struct crypto_skcipher *tfm,
 	crypto_tfm_clear_flags(crypto_skcipher_tfm(tfm), flags);
 }
 
+static inline u32 crypto_sync_skcipher_get_flags(
+	struct crypto_sync_skcipher *tfm)
+{
+	return crypto_skcipher_get_flags(&tfm->base);
+}
+
+static inline void crypto_sync_skcipher_set_flags(
+	struct crypto_sync_skcipher *tfm, u32 flags)
+{
+	crypto_skcipher_set_flags(&tfm->base, flags);
+}
+
+static inline void crypto_sync_skcipher_clear_flags(
+	struct crypto_sync_skcipher *tfm, u32 flags)
+{
+	crypto_skcipher_clear_flags(&tfm->base, flags);
+}
+
 /**
  * crypto_skcipher_setkey() - set key for cipher
  * @tfm: cipher handle
@@ -401,6 +451,12 @@ static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm,
 	return tfm->setkey(tfm, key, keylen);
 }
 
+static inline int crypto_sync_skcipher_setkey(struct crypto_sync_skcipher *tfm,
+					 const u8 *key, unsigned int keylen)
+{
+	return crypto_skcipher_setkey(&tfm->base, key, keylen);
+}
+
 static inline unsigned int crypto_skcipher_default_keysize(
 	struct crypto_skcipher *tfm)
 {
@@ -422,6 +478,40 @@ static inline struct crypto_skcipher *crypto_skcipher_reqtfm(
 	return __crypto_skcipher_cast(req->base.tfm);
 }
 
+static inline struct crypto_sync_skcipher *crypto_sync_skcipher_reqtfm(
+	struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+
+	return container_of(tfm, struct crypto_sync_skcipher, base);
+}
+
+static inline void crypto_stat_skcipher_encrypt(struct skcipher_request *req,
+						int ret, struct crypto_alg *alg)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&alg->cipher_err_cnt);
+	} else {
+		atomic_inc(&alg->encrypt_cnt);
+		atomic64_add(req->cryptlen, &alg->encrypt_tlen);
+	}
+#endif
+}
+
+static inline void crypto_stat_skcipher_decrypt(struct skcipher_request *req,
+						int ret, struct crypto_alg *alg)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&alg->cipher_err_cnt);
+	} else {
+		atomic_inc(&alg->decrypt_cnt);
+		atomic64_add(req->cryptlen, &alg->decrypt_tlen);
+	}
+#endif
+}
+
 /**
  * crypto_skcipher_encrypt() - encrypt plaintext
  * @req: reference to the skcipher_request handle that holds all information
@@ -436,11 +526,14 @@ static inline struct crypto_skcipher *crypto_skcipher_reqtfm(
 static inline int crypto_skcipher_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	int ret;
 
 	if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
-		return -ENOKEY;
-
-	return tfm->encrypt(req);
+		ret = -ENOKEY;
+	else
+		ret = tfm->encrypt(req);
+	crypto_stat_skcipher_encrypt(req, ret, tfm->base.__crt_alg);
+	return ret;
 }
 
 /**
@@ -457,11 +550,14 @@ static inline int crypto_skcipher_encrypt(struct skcipher_request *req)
 static inline int crypto_skcipher_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	int ret;
 
 	if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
-		return -ENOKEY;
-
-	return tfm->decrypt(req);
+		ret = -ENOKEY;
+	else
+		ret = tfm->decrypt(req);
+	crypto_stat_skcipher_decrypt(req, ret, tfm->base.__crt_alg);
+	return ret;
 }
 
 /**
@@ -500,6 +596,12 @@ static inline void skcipher_request_set_tfm(struct skcipher_request *req,
 	req->base.tfm = crypto_skcipher_tfm(tfm);
 }
 
+static inline void skcipher_request_set_sync_tfm(struct skcipher_request *req,
+					    struct crypto_sync_skcipher *tfm)
+{
+	skcipher_request_set_tfm(req, &tfm->base);
+}
+
 static inline struct skcipher_request *skcipher_request_cast(
 	struct crypto_async_request *req)
 {
diff --git a/include/crypto/speck.h b/include/crypto/speck.h
deleted file mode 100644
index 73cfc952d405..000000000000
--- a/include/crypto/speck.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Common values for the Speck algorithm
- */
-
-#ifndef _CRYPTO_SPECK_H
-#define _CRYPTO_SPECK_H
-
-#include <linux/types.h>
-
-/* Speck128 */
-
-#define SPECK128_BLOCK_SIZE	16
-
-#define SPECK128_128_KEY_SIZE	16
-#define SPECK128_128_NROUNDS	32
-
-#define SPECK128_192_KEY_SIZE	24
-#define SPECK128_192_NROUNDS	33
-
-#define SPECK128_256_KEY_SIZE	32
-#define SPECK128_256_NROUNDS	34
-
-struct speck128_tfm_ctx {
-	u64 round_keys[SPECK128_256_NROUNDS];
-	int nrounds;
-};
-
-void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx,
-			     u8 *out, const u8 *in);
-
-void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx,
-			     u8 *out, const u8 *in);
-
-int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key,
-			   unsigned int keysize);
-
-/* Speck64 */
-
-#define SPECK64_BLOCK_SIZE	8
-
-#define SPECK64_96_KEY_SIZE	12
-#define SPECK64_96_NROUNDS	26
-
-#define SPECK64_128_KEY_SIZE	16
-#define SPECK64_128_NROUNDS	27
-
-struct speck64_tfm_ctx {
-	u32 round_keys[SPECK64_128_NROUNDS];
-	int nrounds;
-};
-
-void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx,
-			    u8 *out, const u8 *in);
-
-void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx,
-			    u8 *out, const u8 *in);
-
-int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key,
-			  unsigned int keysize);
-
-#endif /* _CRYPTO_SPECK_H */
diff --git a/include/dt-bindings/clock/exynos3250.h b/include/dt-bindings/clock/exynos3250.h
index c796ff02ceeb..fe8214017b46 100644
--- a/include/dt-bindings/clock/exynos3250.h
+++ b/include/dt-bindings/clock/exynos3250.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2014 Samsung Electronics Co., Ltd.
  * 	Author: Tomasz Figa <t.figa@samsung.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Device Tree binding constants for Samsung Exynos3250 clock controllers.
  */
 
diff --git a/include/dt-bindings/clock/exynos4.h b/include/dt-bindings/clock/exynos4.h
index e9f9d400c322..5b1d68512360 100644
--- a/include/dt-bindings/clock/exynos4.h
+++ b/include/dt-bindings/clock/exynos4.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2013 Samsung Electronics Co., Ltd.
  * Author: Andrzej Hajda <a.hajda@samsung.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Device Tree binding constants for Exynos4 clock controller.
-*/
+ */
 
 #ifndef _DT_BINDINGS_CLOCK_EXYNOS_4_H
 #define _DT_BINDINGS_CLOCK_EXYNOS_4_H
diff --git a/include/dt-bindings/clock/exynos5250.h b/include/dt-bindings/clock/exynos5250.h
index 15508adcdfde..bc8a3c53a54b 100644
--- a/include/dt-bindings/clock/exynos5250.h
+++ b/include/dt-bindings/clock/exynos5250.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2013 Samsung Electronics Co., Ltd.
  * Author: Andrzej Hajda <a.hajda@samsung.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Device Tree binding constants for Exynos5250 clock controller.
-*/
+ */
 
 #ifndef _DT_BINDINGS_CLOCK_EXYNOS_5250_H
 #define _DT_BINDINGS_CLOCK_EXYNOS_5250_H
diff --git a/include/dt-bindings/clock/exynos5260-clk.h b/include/dt-bindings/clock/exynos5260-clk.h
index a4bac9a1764f..98a58cbd81b2 100644
--- a/include/dt-bindings/clock/exynos5260-clk.h
+++ b/include/dt-bindings/clock/exynos5260-clk.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2014 Samsung Electronics Co., Ltd.
  * Author: Rahul Sharma <rahul.sharma@samsung.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Provides Constants for Exynos5260 clocks.
-*/
+ */
 
 #ifndef _DT_BINDINGS_CLK_EXYNOS5260_H
 #define _DT_BINDINGS_CLK_EXYNOS5260_H
diff --git a/include/dt-bindings/clock/exynos5410.h b/include/dt-bindings/clock/exynos5410.h
index 6cb4e90f81fc..f179eabbcdb7 100644
--- a/include/dt-bindings/clock/exynos5410.h
+++ b/include/dt-bindings/clock/exynos5410.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2014 Samsung Electronics Co., Ltd.
  * Copyright (c) 2016 Krzysztof Kozlowski
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Device Tree binding constants for Exynos5421 clock controller.
-*/
+ */
 
 #ifndef _DT_BINDINGS_CLOCK_EXYNOS_5410_H
 #define _DT_BINDINGS_CLOCK_EXYNOS_5410_H
diff --git a/include/dt-bindings/clock/exynos5420.h b/include/dt-bindings/clock/exynos5420.h
index 2740ae0424a9..355f469943f1 100644
--- a/include/dt-bindings/clock/exynos5420.h
+++ b/include/dt-bindings/clock/exynos5420.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2013 Samsung Electronics Co., Ltd.
  * Author: Andrzej Hajda <a.hajda@samsung.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Device Tree binding constants for Exynos5420 clock controller.
-*/
+ */
 
 #ifndef _DT_BINDINGS_CLOCK_EXYNOS_5420_H
 #define _DT_BINDINGS_CLOCK_EXYNOS_5420_H
diff --git a/include/dt-bindings/clock/exynos5433.h b/include/dt-bindings/clock/exynos5433.h
index be39d23e6a32..98bd85ce1e45 100644
--- a/include/dt-bindings/clock/exynos5433.h
+++ b/include/dt-bindings/clock/exynos5433.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2014 Samsung Electronics Co., Ltd.
  * Author: Chanwoo Choi <cw00.choi@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef _DT_BINDINGS_CLOCK_EXYNOS5433_H
diff --git a/include/dt-bindings/clock/exynos7-clk.h b/include/dt-bindings/clock/exynos7-clk.h
index 10c558611085..fce33c7050c8 100644
--- a/include/dt-bindings/clock/exynos7-clk.h
+++ b/include/dt-bindings/clock/exynos7-clk.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2014 Samsung Electronics Co., Ltd.
  * Author: Naveen Krishna Ch <naveenkrishna.ch@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
+ */
 
 #ifndef _DT_BINDINGS_CLOCK_EXYNOS7_H
 #define _DT_BINDINGS_CLOCK_EXYNOS7_H
diff --git a/include/dt-bindings/clock/s3c2410.h b/include/dt-bindings/clock/s3c2410.h
index 352a7673fc69..0fb65c3f2f59 100644
--- a/include/dt-bindings/clock/s3c2410.h
+++ b/include/dt-bindings/clock/s3c2410.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Device Tree binding constants clock controllers of Samsung S3C2410 and later.
  */
 
diff --git a/include/dt-bindings/clock/s3c2412.h b/include/dt-bindings/clock/s3c2412.h
index aac1dcfda81c..b4656156cc0f 100644
--- a/include/dt-bindings/clock/s3c2412.h
+++ b/include/dt-bindings/clock/s3c2412.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Device Tree binding constants clock controllers of Samsung S3C2412.
  */
 
diff --git a/include/dt-bindings/clock/s3c2443.h b/include/dt-bindings/clock/s3c2443.h
index f3ba68a25ecb..a9d2f105d536 100644
--- a/include/dt-bindings/clock/s3c2443.h
+++ b/include/dt-bindings/clock/s3c2443.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Device Tree binding constants clock controllers of Samsung S3C2443 and later.
  */
 
diff --git a/include/dt-bindings/interrupt-controller/arm-gic.h b/include/dt-bindings/interrupt-controller/arm-gic.h
index 0c85f65c81c7..35b6f69b7db6 100644
--- a/include/dt-bindings/interrupt-controller/arm-gic.h
+++ b/include/dt-bindings/interrupt-controller/arm-gic.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /*
  * This header provides constants for the ARM GIC.
  */
diff --git a/include/dt-bindings/interrupt-controller/irq.h b/include/dt-bindings/interrupt-controller/irq.h
index a8b310555f14..9e3d183e1381 100644
--- a/include/dt-bindings/interrupt-controller/irq.h
+++ b/include/dt-bindings/interrupt-controller/irq.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /*
  * This header provides constants for most IRQ bindings.
  *
diff --git a/include/dt-bindings/thermal/thermal_exynos.h b/include/dt-bindings/thermal/thermal_exynos.h
index 0646500bca69..642e4e7f4084 100644
--- a/include/dt-bindings/thermal/thermal_exynos.h
+++ b/include/dt-bindings/thermal/thermal_exynos.h
@@ -1,19 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * thermal_exynos.h - Samsung EXYNOS TMU device tree definitions
  *
  *  Copyright (C) 2014 Samsung Electronics
  *  Lukasz Majewski <l.majewski@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #ifndef _EXYNOS_THERMAL_TMU_DT_H
diff --git a/include/dt-bindings/usb/pd.h b/include/dt-bindings/usb/pd.h
index 7b7a92fefa0a..985f2bbd4d24 100644
--- a/include/dt-bindings/usb/pd.h
+++ b/include/dt-bindings/usb/pd.h
@@ -59,4 +59,30 @@
 	(PDO_TYPE(PDO_TYPE_VAR) | PDO_VAR_MIN_VOLT(min_mv) |	\
 	 PDO_VAR_MAX_VOLT(max_mv) | PDO_VAR_MAX_CURR(max_ma))
 
+#define APDO_TYPE_PPS		0
+
+#define PDO_APDO_TYPE_SHIFT	28	/* Only valid value currently is 0x0 - PPS */
+#define PDO_APDO_TYPE_MASK	0x3
+
+#define PDO_APDO_TYPE(t)	((t) << PDO_APDO_TYPE_SHIFT)
+
+#define PDO_PPS_APDO_MAX_VOLT_SHIFT	17	/* 100mV units */
+#define PDO_PPS_APDO_MIN_VOLT_SHIFT	8	/* 100mV units */
+#define PDO_PPS_APDO_MAX_CURR_SHIFT	0	/* 50mA units */
+
+#define PDO_PPS_APDO_VOLT_MASK	0xff
+#define PDO_PPS_APDO_CURR_MASK	0x7f
+
+#define PDO_PPS_APDO_MIN_VOLT(mv)	\
+	((((mv) / 100) & PDO_PPS_APDO_VOLT_MASK) << PDO_PPS_APDO_MIN_VOLT_SHIFT)
+#define PDO_PPS_APDO_MAX_VOLT(mv)	\
+	((((mv) / 100) & PDO_PPS_APDO_VOLT_MASK) << PDO_PPS_APDO_MAX_VOLT_SHIFT)
+#define PDO_PPS_APDO_MAX_CURR(ma)	\
+	((((ma) / 50) & PDO_PPS_APDO_CURR_MASK) << PDO_PPS_APDO_MAX_CURR_SHIFT)
+
+#define PDO_PPS_APDO(min_mv, max_mv, max_ma)					\
+	(PDO_TYPE(PDO_TYPE_APDO) | PDO_APDO_TYPE(APDO_TYPE_PPS) |		\
+	 PDO_PPS_APDO_MIN_VOLT(min_mv) | PDO_PPS_APDO_MAX_VOLT(max_mv) |	\
+	 PDO_PPS_APDO_MAX_CURR(max_ma))
+
  #endif /* __DT_POWER_DELIVERY_H */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b8bcbdeb2eac..b622d6608605 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -569,20 +569,11 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
 static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
 					     int ancestor_level)
 {
-	struct cgroup *ptr;
-
 	if (cgrp->level < ancestor_level)
 		return NULL;
-
-	for (ptr = cgrp;
-	     ptr && ptr->level > ancestor_level;
-	     ptr = cgroup_parent(ptr))
-		;
-
-	if (ptr && ptr->level == ancestor_level)
-		return ptr;
-
-	return NULL;
+	while (cgrp && cgrp->level > ancestor_level)
+		cgrp = cgroup_parent(cgrp);
+	return cgrp;
 }
 
 /**
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 6e6b86f9046d..b21db536fd52 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -262,9 +262,6 @@ extern int clocksource_i8253_init(void);
 #define TIMER_OF_DECLARE(name, compat, fn) \
 	OF_DECLARE_1_RET(timer, name, compat, fn)
 
-#define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \
-	TIMER_OF_DECLARE(name, compat, fn)
-
 #ifdef CONFIG_TIMER_PROBE
 extern void timer_probe(void);
 #else
diff --git a/include/linux/compat.h b/include/linux/compat.h
index e75b926bc5df..d30e4dbd4be2 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -7,7 +7,7 @@
  */
 
 #include <linux/types.h>
-#include <linux/compat_time.h>
+#include <linux/time.h>
 
 #include <linux/stat.h>
 #include <linux/param.h>	/* for HZ */
@@ -113,19 +113,12 @@ typedef struct compat_sigaltstack {
 typedef __compat_uid32_t	compat_uid_t;
 typedef __compat_gid32_t	compat_gid_t;
 
-typedef	compat_ulong_t		compat_aio_context_t;
-
 struct compat_sel_arg_struct;
 struct rusage;
 
-struct compat_utimbuf {
-	compat_time_t		actime;
-	compat_time_t		modtime;
-};
-
 struct compat_itimerval {
-	struct compat_timeval	it_interval;
-	struct compat_timeval	it_value;
+	struct old_timeval32	it_interval;
+	struct old_timeval32	it_value;
 };
 
 struct itimerval;
@@ -149,7 +142,7 @@ struct compat_timex {
 	compat_long_t constant;
 	compat_long_t precision;
 	compat_long_t tolerance;
-	struct compat_timeval time;
+	struct old_timeval32 time;
 	compat_long_t tick;
 	compat_long_t ppsfreq;
 	compat_long_t jitter;
@@ -310,8 +303,8 @@ struct compat_rlimit {
 };
 
 struct compat_rusage {
-	struct compat_timeval ru_utime;
-	struct compat_timeval ru_stime;
+	struct old_timeval32 ru_utime;
+	struct old_timeval32 ru_stime;
 	compat_long_t	ru_maxrss;
 	compat_long_t	ru_ixrss;
 	compat_long_t	ru_idrss;
@@ -460,8 +453,8 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginf
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
 
-static inline int compat_timeval_compare(struct compat_timeval *lhs,
-					struct compat_timeval *rhs)
+static inline int old_timeval32_compare(struct old_timeval32 *lhs,
+					struct old_timeval32 *rhs)
 {
 	if (lhs->tv_sec < rhs->tv_sec)
 		return -1;
@@ -470,8 +463,8 @@ static inline int compat_timeval_compare(struct compat_timeval *lhs,
 	return lhs->tv_usec - rhs->tv_usec;
 }
 
-static inline int compat_timespec_compare(struct compat_timespec *lhs,
-					struct compat_timespec *rhs)
+static inline int old_timespec32_compare(struct old_timespec32 *lhs,
+					struct old_timespec32 *rhs)
 {
 	if (lhs->tv_sec < rhs->tv_sec)
 		return -1;
@@ -555,12 +548,12 @@ asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id,
 					compat_long_t min_nr,
 					compat_long_t nr,
 					struct io_event __user *events,
-					struct compat_timespec __user *timeout);
+					struct old_timespec32 __user *timeout);
 asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id,
 					compat_long_t min_nr,
 					compat_long_t nr,
 					struct io_event __user *events,
-					struct compat_timespec __user *timeout,
+					struct old_timespec32 __user *timeout,
 					const struct __compat_aio_sigset __user *usig);
 
 /* fs/cookies.c */
@@ -645,11 +638,11 @@ asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd,
 asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
 				    compat_ulong_t __user *outp,
 				    compat_ulong_t __user *exp,
-				    struct compat_timespec __user *tsp,
+				    struct old_timespec32 __user *tsp,
 				    void __user *sig);
 asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
 				 unsigned int nfds,
-				 struct compat_timespec __user *tsp,
+				 struct old_timespec32 __user *tsp,
 				 const compat_sigset_t __user *sigmask,
 				 compat_size_t sigsetsize);
 
@@ -674,15 +667,15 @@ asmlinkage long compat_sys_newfstat(unsigned int fd,
 
 /* fs/timerfd.c */
 asmlinkage long compat_sys_timerfd_gettime(int ufd,
-				   struct compat_itimerspec __user *otmr);
+				   struct old_itimerspec32 __user *otmr);
 asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
-				   const struct compat_itimerspec __user *utmr,
-				   struct compat_itimerspec __user *otmr);
+				   const struct old_itimerspec32 __user *utmr,
+				   struct old_itimerspec32 __user *otmr);
 
 /* fs/utimes.c */
 asmlinkage long compat_sys_utimensat(unsigned int dfd,
 				     const char __user *filename,
-				     struct compat_timespec __user *t,
+				     struct old_timespec32 __user *t,
 				     int flags);
 
 /* kernel/exit.c */
@@ -694,7 +687,7 @@ asmlinkage long compat_sys_waitid(int, compat_pid_t,
 
 /* kernel/futex.c */
 asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
-		struct compat_timespec __user *utime, u32 __user *uaddr2,
+		struct old_timespec32 __user *utime, u32 __user *uaddr2,
 		u32 val3);
 asmlinkage long
 compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
@@ -704,8 +697,8 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
 			   compat_size_t __user *len_ptr);
 
 /* kernel/hrtimer.c */
-asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
-				     struct compat_timespec __user *rmtp);
+asmlinkage long compat_sys_nanosleep(struct old_timespec32 __user *rqtp,
+				     struct old_timespec32 __user *rmtp);
 
 /* kernel/itimer.c */
 asmlinkage long compat_sys_getitimer(int which,
@@ -725,19 +718,19 @@ asmlinkage long compat_sys_timer_create(clockid_t which_clock,
 			struct compat_sigevent __user *timer_event_spec,
 			timer_t __user *created_timer_id);
 asmlinkage long compat_sys_timer_gettime(timer_t timer_id,
-				 struct compat_itimerspec __user *setting);
+				 struct old_itimerspec32 __user *setting);
 asmlinkage long compat_sys_timer_settime(timer_t timer_id, int flags,
-					 struct compat_itimerspec __user *new,
-					 struct compat_itimerspec __user *old);
+					 struct old_itimerspec32 __user *new,
+					 struct old_itimerspec32 __user *old);
 asmlinkage long compat_sys_clock_settime(clockid_t which_clock,
-					 struct compat_timespec __user *tp);
+					 struct old_timespec32 __user *tp);
 asmlinkage long compat_sys_clock_gettime(clockid_t which_clock,
-					 struct compat_timespec __user *tp);
+					 struct old_timespec32 __user *tp);
 asmlinkage long compat_sys_clock_getres(clockid_t which_clock,
-					struct compat_timespec __user *tp);
+					struct old_timespec32 __user *tp);
 asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
-					   struct compat_timespec __user *rqtp,
-					   struct compat_timespec __user *rmtp);
+					   struct old_timespec32 __user *rqtp,
+					   struct old_timespec32 __user *rmtp);
 
 /* kernel/ptrace.c */
 asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
@@ -751,7 +744,7 @@ asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid,
 				     unsigned int len,
 				     compat_ulong_t __user *user_mask_ptr);
 asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
-						 struct compat_timespec __user *interval);
+						 struct old_timespec32 __user *interval);
 
 /* kernel/signal.c */
 asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
@@ -771,7 +764,7 @@ asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset,
 					 compat_size_t sigsetsize);
 asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese,
 		struct compat_siginfo __user *uinfo,
-		struct compat_timespec __user *uts, compat_size_t sigsetsize);
+		struct old_timespec32 __user *uts, compat_size_t sigsetsize);
 asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig,
 				struct compat_siginfo __user *uinfo);
 /* No generic prototype for rt_sigreturn */
@@ -785,9 +778,9 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource,
 asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru);
 
 /* kernel/time.c */
-asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
+asmlinkage long compat_sys_gettimeofday(struct old_timeval32 __user *tv,
 		struct timezone __user *tz);
-asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
+asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv,
 		struct timezone __user *tz);
 asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
 
@@ -801,11 +794,11 @@ asmlinkage long compat_sys_mq_open(const char __user *u_name,
 asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes,
 			const char __user *u_msg_ptr,
 			compat_size_t msg_len, unsigned int msg_prio,
-			const struct compat_timespec __user *u_abs_timeout);
+			const struct old_timespec32 __user *u_abs_timeout);
 asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes,
 			char __user *u_msg_ptr,
 			compat_size_t msg_len, unsigned int __user *u_msg_prio,
-			const struct compat_timespec __user *u_abs_timeout);
+			const struct old_timespec32 __user *u_abs_timeout);
 asmlinkage long compat_sys_mq_notify(mqd_t mqdes,
 			const struct compat_sigevent __user *u_notification);
 asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes,
@@ -822,7 +815,7 @@ asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp,
 /* ipc/sem.c */
 asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg);
 asmlinkage long compat_sys_semtimedop(int semid, struct sembuf __user *tsems,
-		unsigned nsems, const struct compat_timespec __user *timeout);
+		unsigned nsems, const struct old_timespec32 __user *timeout);
 
 /* ipc/shm.c */
 asmlinkage long compat_sys_shmctl(int first, int second, void __user *uptr);
@@ -879,7 +872,7 @@ asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid,
 					struct compat_siginfo __user *uinfo);
 asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 				    unsigned vlen, unsigned int flags,
-				    struct compat_timespec __user *timeout);
+				    struct old_timespec32 __user *timeout);
 asmlinkage long compat_sys_wait4(compat_pid_t pid,
 				 compat_uint_t __user *stat_addr, int options,
 				 struct compat_rusage __user *ru);
@@ -931,7 +924,7 @@ asmlinkage long compat_sys_pwritev64v2(unsigned long fd,
 asmlinkage long compat_sys_open(const char __user *filename, int flags,
 				umode_t mode);
 asmlinkage long compat_sys_utimes(const char __user *filename,
-				  struct compat_timeval __user *t);
+				  struct old_timeval32 __user *t);
 
 /* __ARCH_WANT_SYSCALL_NO_FLAGS */
 asmlinkage long compat_sys_signalfd(int ufd,
@@ -945,15 +938,15 @@ asmlinkage long compat_sys_newlstat(const char __user *filename,
 				    struct compat_stat __user *statbuf);
 
 /* __ARCH_WANT_SYSCALL_DEPRECATED */
-asmlinkage long compat_sys_time(compat_time_t __user *tloc);
+asmlinkage long compat_sys_time(old_time32_t __user *tloc);
 asmlinkage long compat_sys_utime(const char __user *filename,
-				 struct compat_utimbuf __user *t);
+				 struct old_utimbuf32 __user *t);
 asmlinkage long compat_sys_futimesat(unsigned int dfd,
 				     const char __user *filename,
-				     struct compat_timeval __user *t);
+				     struct old_timeval32 __user *t);
 asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
-		struct compat_timeval __user *tvp);
+		struct old_timeval32 __user *tvp);
 asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32);
 asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len,
 				unsigned flags);
@@ -986,7 +979,7 @@ asmlinkage long compat_sys_sigaction(int sig,
 #endif
 
 /* obsolete: kernel/time/time.c */
-asmlinkage long compat_sys_stime(compat_time_t __user *tptr);
+asmlinkage long compat_sys_stime(old_time32_t __user *tptr);
 
 /* obsolete: net/socket.c */
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
@@ -1005,15 +998,15 @@ static inline bool in_compat_syscall(void) { return is_compat_task(); }
 #endif
 
 /**
- * ns_to_compat_timeval - Compat version of ns_to_timeval
+ * ns_to_old_timeval32 - Compat version of ns_to_timeval
  * @nsec:	the nanoseconds value to be converted
  *
- * Returns the compat_timeval representation of the nsec parameter.
+ * Returns the old_timeval32 representation of the nsec parameter.
  */
-static inline struct compat_timeval ns_to_compat_timeval(s64 nsec)
+static inline struct old_timeval32 ns_to_old_timeval32(s64 nsec)
 {
 	struct timeval tv;
-	struct compat_timeval ctv;
+	struct old_timeval32 ctv;
 
 	tv = ns_to_timeval(nsec);
 	ctv.tv_sec = tv.tv_sec;
diff --git a/include/linux/compat_time.h b/include/linux/compat_time.h
deleted file mode 100644
index e70bfd1d2c3f..000000000000
--- a/include/linux/compat_time.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_COMPAT_TIME_H
-#define _LINUX_COMPAT_TIME_H
-
-#include <linux/types.h>
-#include <linux/time64.h>
-
-typedef s32		compat_time_t;
-
-struct compat_timespec {
-	compat_time_t	tv_sec;
-	s32		tv_nsec;
-};
-
-struct compat_timeval {
-	compat_time_t	tv_sec;
-	s32		tv_usec;
-};
-
-struct compat_itimerspec {
-	struct compat_timespec it_interval;
-	struct compat_timespec it_value;
-};
-
-extern int compat_get_timespec64(struct timespec64 *, const void __user *);
-extern int compat_put_timespec64(const struct timespec64 *, void __user *);
-extern int get_compat_itimerspec64(struct itimerspec64 *its,
-			const struct compat_itimerspec __user *uits);
-extern int put_compat_itimerspec64(const struct itimerspec64 *its,
-			struct compat_itimerspec __user *uits);
-
-#endif /* _LINUX_COMPAT_TIME_H */
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index db192becfec4..97cfe29b3f0a 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -198,7 +198,6 @@ struct ftrace_likely_data {
  */
 #define __pure			__attribute__((pure))
 #define __aligned(x)		__attribute__((aligned(x)))
-#define __aligned_largest	__attribute__((aligned))
 #define __printf(a, b)		__attribute__((format(printf, a, b)))
 #define __scanf(a, b)		__attribute__((format(scanf, a, b)))
 #define __maybe_unused		__attribute__((unused))
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index d828a6efe0b1..46c67a764877 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -94,20 +94,15 @@ union coresight_dev_subtype {
  * @cpu:	the CPU a source belongs to. Only applicable for ETM/PTMs.
  * @name:	name of the component as shown under sysfs.
  * @nr_inport:	number of input ports for this component.
- * @outports:	list of remote endpoint port number.
- * @child_names:name of all child components connected to this device.
- * @child_ports:child component port number the current component is
-		connected  to.
  * @nr_outport:	number of output ports for this component.
+ * @conns:	Array of nr_outport connections from this component
  */
 struct coresight_platform_data {
 	int cpu;
 	const char *name;
 	int nr_inport;
-	int *outports;
-	const char **child_names;
-	int *child_ports;
 	int nr_outport;
+	struct coresight_connection *conns;
 };
 
 /**
@@ -190,23 +185,15 @@ struct coresight_device {
  * @disable:		disables the sink.
  * @alloc_buffer:	initialises perf's ring buffer for trace collection.
  * @free_buffer:	release memory allocated in @get_config.
- * @set_buffer:		initialises buffer mechanic before a trace session.
- * @reset_buffer:	finalises buffer mechanic after a trace session.
  * @update_buffer:	update buffer pointers after a trace session.
  */
 struct coresight_ops_sink {
-	int (*enable)(struct coresight_device *csdev, u32 mode);
+	int (*enable)(struct coresight_device *csdev, u32 mode, void *data);
 	void (*disable)(struct coresight_device *csdev);
 	void *(*alloc_buffer)(struct coresight_device *csdev, int cpu,
 			      void **pages, int nr_pages, bool overwrite);
 	void (*free_buffer)(void *config);
-	int (*set_buffer)(struct coresight_device *csdev,
-			  struct perf_output_handle *handle,
-			  void *sink_config);
-	unsigned long (*reset_buffer)(struct coresight_device *csdev,
-				      struct perf_output_handle *handle,
-				      void *sink_config);
-	void (*update_buffer)(struct coresight_device *csdev,
+	unsigned long (*update_buffer)(struct coresight_device *csdev,
 			      struct perf_output_handle *handle,
 			      void *sink_config);
 };
@@ -270,6 +257,13 @@ extern int coresight_enable(struct coresight_device *csdev);
 extern void coresight_disable(struct coresight_device *csdev);
 extern int coresight_timeout(void __iomem *addr, u32 offset,
 			     int position, int value);
+
+extern int coresight_claim_device(void __iomem *base);
+extern int coresight_claim_device_unlocked(void __iomem *base);
+
+extern void coresight_disclaim_device(void __iomem *base);
+extern void coresight_disclaim_device_unlocked(void __iomem *base);
+
 #else
 static inline struct coresight_device *
 coresight_register(struct coresight_desc *desc) { return NULL; }
@@ -279,6 +273,19 @@ coresight_enable(struct coresight_device *csdev) { return -ENOSYS; }
 static inline void coresight_disable(struct coresight_device *csdev) {}
 static inline int coresight_timeout(void __iomem *addr, u32 offset,
 				     int position, int value) { return 1; }
+static inline int coresight_claim_device_unlocked(void __iomem *base)
+{
+	return -EINVAL;
+}
+
+static inline int coresight_claim_device(void __iomem *base)
+{
+	return -EINVAL;
+}
+
+static inline void coresight_disclaim_device(void __iomem *base) {}
+static inline void coresight_disclaim_device_unlocked(void __iomem *base) {}
+
 #endif
 
 #ifdef CONFIG_OF
diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h
index 986c06c88d81..84d3c81b5978 100644
--- a/include/linux/cpufeature.h
+++ b/include/linux/cpufeature.h
@@ -45,7 +45,7 @@
  * 'asm/cpufeature.h' of your favorite architecture.
  */
 #define module_cpu_feature_match(x, __initfunc)			\
-static struct cpu_feature const cpu_feature_match_ ## x[] =	\
+static struct cpu_feature const __maybe_unused cpu_feature_match_ ## x[] = \
 	{ { .feature = cpu_feature(x) }, { } };			\
 MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x);		\
 								\
diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h
index 1fe0cfcdea30..6bb0c0bf357b 100644
--- a/include/linux/crc-t10dif.h
+++ b/include/linux/crc-t10dif.h
@@ -6,6 +6,7 @@
 
 #define CRC_T10DIF_DIGEST_SIZE 2
 #define CRC_T10DIF_BLOCK_SIZE 1
+#define CRC_T10DIF_STRING "crct10dif"
 
 extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer,
 				size_t len);
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index e8839d3a7559..3634ad6fe202 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -454,6 +454,33 @@ struct compress_alg {
  * @cra_refcnt: internally used
  * @cra_destroy: internally used
  *
+ * All following statistics are for this crypto_alg
+ * @encrypt_cnt:	number of encrypt requests
+ * @decrypt_cnt:	number of decrypt requests
+ * @compress_cnt:	number of compress requests
+ * @decompress_cnt:	number of decompress requests
+ * @generate_cnt:	number of RNG generate requests
+ * @seed_cnt:		number of times the rng was seeded
+ * @hash_cnt:		number of hash requests
+ * @sign_cnt:		number of sign requests
+ * @setsecret_cnt:	number of setsecrey operation
+ * @generate_public_key_cnt:	number of generate_public_key operation
+ * @verify_cnt:			number of verify operation
+ * @compute_shared_secret_cnt:	number of compute_shared_secret operation
+ * @encrypt_tlen:	total data size handled by encrypt requests
+ * @decrypt_tlen:	total data size handled by decrypt requests
+ * @compress_tlen:	total data size handled by compress requests
+ * @decompress_tlen:	total data size handled by decompress requests
+ * @generate_tlen:	total data size of generated data by the RNG
+ * @hash_tlen:		total data size hashed
+ * @akcipher_err_cnt:	number of error for akcipher requests
+ * @cipher_err_cnt:	number of error for akcipher requests
+ * @compress_err_cnt:	number of error for akcipher requests
+ * @aead_err_cnt:	number of error for akcipher requests
+ * @hash_err_cnt:	number of error for akcipher requests
+ * @rng_err_cnt:	number of error for akcipher requests
+ * @kpp_err_cnt:	number of error for akcipher requests
+ *
  * The struct crypto_alg describes a generic Crypto API algorithm and is common
  * for all of the transformations. Any variable not documented here shall not
  * be used by a cipher implementation as it is internal to the Crypto API.
@@ -487,6 +514,45 @@ struct crypto_alg {
 	void (*cra_destroy)(struct crypto_alg *alg);
 	
 	struct module *cra_module;
+
+	union {
+		atomic_t encrypt_cnt;
+		atomic_t compress_cnt;
+		atomic_t generate_cnt;
+		atomic_t hash_cnt;
+		atomic_t setsecret_cnt;
+	};
+	union {
+		atomic64_t encrypt_tlen;
+		atomic64_t compress_tlen;
+		atomic64_t generate_tlen;
+		atomic64_t hash_tlen;
+	};
+	union {
+		atomic_t akcipher_err_cnt;
+		atomic_t cipher_err_cnt;
+		atomic_t compress_err_cnt;
+		atomic_t aead_err_cnt;
+		atomic_t hash_err_cnt;
+		atomic_t rng_err_cnt;
+		atomic_t kpp_err_cnt;
+	};
+	union {
+		atomic_t decrypt_cnt;
+		atomic_t decompress_cnt;
+		atomic_t seed_cnt;
+		atomic_t generate_public_key_cnt;
+	};
+	union {
+		atomic64_t decrypt_tlen;
+		atomic64_t decompress_tlen;
+	};
+	union {
+		atomic_t verify_cnt;
+		atomic_t compute_shared_secret_cnt;
+	};
+	atomic_t sign_cnt;
+
 } CRYPTO_MINALIGN_ATTR;
 
 /*
@@ -907,6 +973,38 @@ static inline struct crypto_ablkcipher *crypto_ablkcipher_reqtfm(
 	return __crypto_ablkcipher_cast(req->base.tfm);
 }
 
+static inline void crypto_stat_ablkcipher_encrypt(struct ablkcipher_request *req,
+						  int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct ablkcipher_tfm *crt =
+		crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req));
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&crt->base->base.__crt_alg->cipher_err_cnt);
+	} else {
+		atomic_inc(&crt->base->base.__crt_alg->encrypt_cnt);
+		atomic64_add(req->nbytes, &crt->base->base.__crt_alg->encrypt_tlen);
+	}
+#endif
+}
+
+static inline void crypto_stat_ablkcipher_decrypt(struct ablkcipher_request *req,
+						  int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct ablkcipher_tfm *crt =
+		crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req));
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&crt->base->base.__crt_alg->cipher_err_cnt);
+	} else {
+		atomic_inc(&crt->base->base.__crt_alg->decrypt_cnt);
+		atomic64_add(req->nbytes, &crt->base->base.__crt_alg->decrypt_tlen);
+	}
+#endif
+}
+
 /**
  * crypto_ablkcipher_encrypt() - encrypt plaintext
  * @req: reference to the ablkcipher_request handle that holds all information
@@ -922,7 +1020,11 @@ static inline int crypto_ablkcipher_encrypt(struct ablkcipher_request *req)
 {
 	struct ablkcipher_tfm *crt =
 		crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req));
-	return crt->encrypt(req);
+	int ret;
+
+	ret = crt->encrypt(req);
+	crypto_stat_ablkcipher_encrypt(req, ret);
+	return ret;
 }
 
 /**
@@ -940,7 +1042,11 @@ static inline int crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
 {
 	struct ablkcipher_tfm *crt =
 		crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req));
-	return crt->decrypt(req);
+	int ret;
+
+	ret = crt->decrypt(req);
+	crypto_stat_ablkcipher_decrypt(req, ret);
+	return ret;
 }
 
 /**
diff --git a/include/linux/device.h b/include/linux/device.h
index 983506789402..1b25c7a43f4c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -55,6 +55,8 @@ struct bus_attribute {
 	struct bus_attribute bus_attr_##_name = __ATTR_RW(_name)
 #define BUS_ATTR_RO(_name) \
 	struct bus_attribute bus_attr_##_name = __ATTR_RO(_name)
+#define BUS_ATTR_WO(_name) \
+	struct bus_attribute bus_attr_##_name = __ATTR_WO(_name)
 
 extern int __must_check bus_create_file(struct bus_type *,
 					struct bus_attribute *);
@@ -692,8 +694,10 @@ static inline void *devm_kcalloc(struct device *dev,
 {
 	return devm_kmalloc_array(dev, n, size, flags | __GFP_ZERO);
 }
-extern void devm_kfree(struct device *dev, void *p);
+extern void devm_kfree(struct device *dev, const void *p);
 extern char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc;
+extern const char *devm_kstrdup_const(struct device *dev,
+				      const char *s, gfp_t gfp);
 extern void *devm_kmemdup(struct device *dev, const void *src, size_t len,
 			  gfp_t gfp);
 
@@ -774,6 +778,30 @@ void device_connection_add(struct device_connection *con);
 void device_connection_remove(struct device_connection *con);
 
 /**
+ * device_connections_add - Add multiple device connections at once
+ * @cons: Zero terminated array of device connection descriptors
+ */
+static inline void device_connections_add(struct device_connection *cons)
+{
+	struct device_connection *c;
+
+	for (c = cons; c->endpoint[0]; c++)
+		device_connection_add(c);
+}
+
+/**
+ * device_connections_remove - Remove multiple device connections at once
+ * @cons: Zero terminated array of device connection descriptors
+ */
+static inline void device_connections_remove(struct device_connection *cons)
+{
+	struct device_connection *c;
+
+	for (c = cons; c->endpoint[0]; c++)
+		device_connection_remove(c);
+}
+
+/**
  * enum device_link_state - Device link states.
  * @DL_STATE_NONE: The presence of the drivers is not being tracked.
  * @DL_STATE_DORMANT: None of the supplier/consumer drivers is present.
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index fbca184ff5a0..bd73e7a91410 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -5,6 +5,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/mem_encrypt.h>
 
+#define DIRECT_MAPPING_ERROR		0
+
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include <asm/dma-direct.h>
 #else
diff --git a/include/linux/elfcore-compat.h b/include/linux/elfcore-compat.h
index b5f2efdd05e0..7a37f4ce9fd2 100644
--- a/include/linux/elfcore-compat.h
+++ b/include/linux/elfcore-compat.h
@@ -27,10 +27,10 @@ struct compat_elf_prstatus
 	compat_pid_t			pr_ppid;
 	compat_pid_t			pr_pgrp;
 	compat_pid_t			pr_sid;
-	struct compat_timeval		pr_utime;
-	struct compat_timeval		pr_stime;
-	struct compat_timeval		pr_cutime;
-	struct compat_timeval		pr_cstime;
+	struct old_timeval32		pr_utime;
+	struct old_timeval32		pr_stime;
+	struct old_timeval32		pr_cutime;
+	struct old_timeval32		pr_cstime;
 	compat_elf_gregset_t		pr_reg;
 #ifdef CONFIG_BINFMT_ELF_FDPIC
 	compat_ulong_t			pr_exec_fdpic_loadmap;
diff --git a/include/linux/fpga/fpga-bridge.h b/include/linux/fpga/fpga-bridge.h
index ce550fcf6360..817600a32c93 100644
--- a/include/linux/fpga/fpga-bridge.h
+++ b/include/linux/fpga/fpga-bridge.h
@@ -69,4 +69,8 @@ void fpga_bridge_free(struct fpga_bridge *br);
 int fpga_bridge_register(struct fpga_bridge *br);
 void fpga_bridge_unregister(struct fpga_bridge *br);
 
+struct fpga_bridge
+*devm_fpga_bridge_create(struct device *dev, const char *name,
+			 const struct fpga_bridge_ops *br_ops, void *priv);
+
 #endif /* _LINUX_FPGA_BRIDGE_H */
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 8ab5df769923..e8ca62b2cb5b 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -198,4 +198,8 @@ void fpga_mgr_free(struct fpga_manager *mgr);
 int fpga_mgr_register(struct fpga_manager *mgr);
 void fpga_mgr_unregister(struct fpga_manager *mgr);
 
+struct fpga_manager *devm_fpga_mgr_create(struct device *dev, const char *name,
+					  const struct fpga_manager_ops *mops,
+					  void *priv);
+
 #endif /*_LINUX_FPGA_MGR_H */
diff --git a/include/linux/fpga/fpga-region.h b/include/linux/fpga/fpga-region.h
index 0521b7f577a4..27cb706275db 100644
--- a/include/linux/fpga/fpga-region.h
+++ b/include/linux/fpga/fpga-region.h
@@ -44,4 +44,8 @@ void fpga_region_free(struct fpga_region *region);
 int fpga_region_register(struct fpga_region *region);
 void fpga_region_unregister(struct fpga_region *region);
 
+struct fpga_region
+*devm_fpga_region_create(struct device *dev, struct fpga_manager *mgr,
+			int (*get_bridges)(struct fpga_region *));
+
 #endif /* _FPGA_REGION_H */
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index f27cb14088a4..9d3f668df7df 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -351,6 +351,14 @@ int mc_send_command(struct fsl_mc_io *mc_io, struct fsl_mc_command *cmd);
 #define dev_is_fsl_mc(_dev) (0)
 #endif
 
+/* Macro to check if a device is a container device */
+#define fsl_mc_is_cont_dev(_dev) (to_fsl_mc_device(_dev)->flags & \
+	FSL_MC_IS_DPRC)
+
+/* Macro to get the container device of a MC device */
+#define fsl_mc_cont_dev(_dev) (fsl_mc_is_cont_dev(_dev) ? \
+	(_dev) : (_dev)->parent)
+
 /*
  * module_fsl_mc_driver() - Helper macro for drivers that don't do
  * anything special in module init/exit.  This eliminates a lot of
@@ -405,6 +413,7 @@ extern struct device_type fsl_mc_bus_dpcon_type;
 extern struct device_type fsl_mc_bus_dpmcp_type;
 extern struct device_type fsl_mc_bus_dpmac_type;
 extern struct device_type fsl_mc_bus_dprtc_type;
+extern struct device_type fsl_mc_bus_dpseci_type;
 
 static inline bool is_fsl_mc_bus_dprc(const struct fsl_mc_device *mc_dev)
 {
@@ -451,6 +460,11 @@ static inline bool is_fsl_mc_bus_dprtc(const struct fsl_mc_device *mc_dev)
 	return mc_dev->dev.type == &fsl_mc_bus_dprtc_type;
 }
 
+static inline bool is_fsl_mc_bus_dpseci(const struct fsl_mc_device *mc_dev)
+{
+	return mc_dev->dev.type == &fsl_mc_bus_dpseci_type;
+}
+
 /*
  * Data Path Buffer Pool (DPBP) API
  * Contains initialization APIs and runtime control APIs for DPBP
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index bee0827766a3..c0b93e0ff0c0 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -33,7 +33,8 @@
  *			and max is a multiple of 4 and >= 32 bytes.
  * @priv:		Private data, for use by the RNG driver.
  * @quality:		Estimation of true entropy in RNG's bitstream
- *			(per mill).
+ *			(in bits of entropy per 1024 bits of input;
+ *			valid values: 1 to 1024, or 0 for unknown).
  */
 struct hwrng {
 	const char *name;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index efda23cf32c7..b3e24368930a 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -739,8 +739,9 @@ struct vmbus_channel {
 	u32 ringbuffer_gpadlhandle;
 
 	/* Allocated memory for ring buffer */
-	void *ringbuffer_pages;
+	struct page *ringbuffer_page;
 	u32 ringbuffer_pagecount;
+	u32 ringbuffer_send_offset;
 	struct hv_ring_buffer_info outbound;	/* send to parent */
 	struct hv_ring_buffer_info inbound;	/* receive from parent */
 
@@ -1021,6 +1022,14 @@ struct vmbus_packet_mpb_array {
 	struct hv_mpb_array range;
 } __packed;
 
+int vmbus_alloc_ring(struct vmbus_channel *channel,
+		     u32 send_size, u32 recv_size);
+void vmbus_free_ring(struct vmbus_channel *channel);
+
+int vmbus_connect_ring(struct vmbus_channel *channel,
+		       void (*onchannel_callback)(void *context),
+		       void *context);
+int vmbus_disconnect_ring(struct vmbus_channel *channel);
 
 extern int vmbus_open(struct vmbus_channel *channel,
 			    u32 send_ringbuffersize,
@@ -1125,6 +1134,7 @@ struct hv_device {
 	u16 device_id;
 
 	struct device device;
+	char *driver_override; /* Driver name to force a match */
 
 	struct vmbus_channel *channel;
 	struct kset	     *channels_kset;
@@ -1442,7 +1452,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
 				const int *srv_version, int srv_vercnt,
 				int *nego_fw_version, int *nego_srv_version);
 
-void hv_process_channel_removal(u32 relid);
+void hv_process_channel_removal(struct vmbus_channel *channel);
 
 void vmbus_setevent(struct vmbus_channel *channel);
 /*
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 28004d74ae04..b0ae25837361 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -72,6 +72,42 @@
 #define	DMAR_PEDATA_REG	0xe4	/* Page request event interrupt data register */
 #define	DMAR_PEADDR_REG	0xe8	/* Page request event interrupt addr register */
 #define	DMAR_PEUADDR_REG 0xec	/* Page request event Upper address register */
+#define DMAR_MTRRCAP_REG 0x100	/* MTRR capability register */
+#define DMAR_MTRRDEF_REG 0x108	/* MTRR default type register */
+#define DMAR_MTRR_FIX64K_00000_REG 0x120 /* MTRR Fixed range registers */
+#define DMAR_MTRR_FIX16K_80000_REG 0x128
+#define DMAR_MTRR_FIX16K_A0000_REG 0x130
+#define DMAR_MTRR_FIX4K_C0000_REG 0x138
+#define DMAR_MTRR_FIX4K_C8000_REG 0x140
+#define DMAR_MTRR_FIX4K_D0000_REG 0x148
+#define DMAR_MTRR_FIX4K_D8000_REG 0x150
+#define DMAR_MTRR_FIX4K_E0000_REG 0x158
+#define DMAR_MTRR_FIX4K_E8000_REG 0x160
+#define DMAR_MTRR_FIX4K_F0000_REG 0x168
+#define DMAR_MTRR_FIX4K_F8000_REG 0x170
+#define DMAR_MTRR_PHYSBASE0_REG 0x180 /* MTRR Variable range registers */
+#define DMAR_MTRR_PHYSMASK0_REG 0x188
+#define DMAR_MTRR_PHYSBASE1_REG 0x190
+#define DMAR_MTRR_PHYSMASK1_REG 0x198
+#define DMAR_MTRR_PHYSBASE2_REG 0x1a0
+#define DMAR_MTRR_PHYSMASK2_REG 0x1a8
+#define DMAR_MTRR_PHYSBASE3_REG 0x1b0
+#define DMAR_MTRR_PHYSMASK3_REG 0x1b8
+#define DMAR_MTRR_PHYSBASE4_REG 0x1c0
+#define DMAR_MTRR_PHYSMASK4_REG 0x1c8
+#define DMAR_MTRR_PHYSBASE5_REG 0x1d0
+#define DMAR_MTRR_PHYSMASK5_REG 0x1d8
+#define DMAR_MTRR_PHYSBASE6_REG 0x1e0
+#define DMAR_MTRR_PHYSMASK6_REG 0x1e8
+#define DMAR_MTRR_PHYSBASE7_REG 0x1f0
+#define DMAR_MTRR_PHYSMASK7_REG 0x1f8
+#define DMAR_MTRR_PHYSBASE8_REG 0x200
+#define DMAR_MTRR_PHYSMASK8_REG 0x208
+#define DMAR_MTRR_PHYSBASE9_REG 0x210
+#define DMAR_MTRR_PHYSMASK9_REG 0x218
+#define DMAR_VCCAP_REG		0xe00 /* Virtual command capability register */
+#define DMAR_VCMD_REG		0xe10 /* Virtual command register */
+#define DMAR_VCRSP_REG		0xe20 /* Virtual command response register */
 
 #define OFFSET_STRIDE		(9)
 
@@ -389,6 +425,33 @@ struct pasid_entry;
 struct pasid_state_entry;
 struct page_req_dsc;
 
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+	u64     lo;
+	u64     hi;
+};
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+	u64 lo;
+	u64 hi;
+};
+
 struct dmar_domain {
 	int	nid;			/* node id */
 
@@ -558,6 +621,15 @@ extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_
 extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
 #endif
 
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+void intel_iommu_debugfs_init(void);
+#else
+static inline void intel_iommu_debugfs_init(void) {}
+#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
+
 extern const struct attribute_group *intel_iommu_groups[];
+bool context_present(struct context_entry *context);
+struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+					 u8 devfn, int alloc);
 
 #endif
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index eeceac3376fc..1d6711c28271 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -45,7 +45,7 @@
  * IRQF_PERCPU - Interrupt is per cpu
  * IRQF_NOBALANCING - Flag to exclude this interrupt from irq balancing
  * IRQF_IRQPOLL - Interrupt is used for polling (only the interrupt that is
- *                registered first in an shared interrupt is considered for
+ *                registered first in a shared interrupt is considered for
  *                performance reasons)
  * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished.
  *                Used by threaded interrupts which need to keep the
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 87994c265bf5..a1d28f42cb77 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -124,6 +124,7 @@ enum iommu_attr {
 	DOMAIN_ATTR_FSL_PAMU_ENABLE,
 	DOMAIN_ATTR_FSL_PAMUV1,
 	DOMAIN_ATTR_NESTING,	/* two stages of translation */
+	DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
 	DOMAIN_ATTR_MAX,
 };
 
@@ -181,8 +182,6 @@ struct iommu_resv_region {
  * @apply_resv_region: Temporary helper call-back for iova reserved ranges
  * @domain_window_enable: Configure and enable a particular window for a domain
  * @domain_window_disable: Disable a particular window for a domain
- * @domain_set_windows: Set the number of windows for a domain
- * @domain_get_windows: Return the number of windows for a domain
  * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of all possible supported page sizes
  */
@@ -223,10 +222,6 @@ struct iommu_ops {
 	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
 				    phys_addr_t paddr, u64 size, int prot);
 	void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
-	/* Set the number of windows per domain */
-	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
-	/* Get the number of windows per domain */
-	u32 (*domain_get_windows)(struct iommu_domain *domain);
 
 	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
 	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
@@ -293,6 +288,7 @@ extern int iommu_attach_device(struct iommu_domain *domain,
 extern void iommu_detach_device(struct iommu_domain *domain,
 				struct device *dev);
 extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
+extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
 		     phys_addr_t paddr, size_t size, int prot);
 extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
@@ -377,6 +373,8 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain)
 extern struct iommu_group *pci_device_group(struct device *dev);
 /* Generic device grouping function */
 extern struct iommu_group *generic_device_group(struct device *dev);
+/* FSL-MC device grouping function */
+struct iommu_group *fsl_mc_device_group(struct device *dev);
 
 /**
  * struct iommu_fwspec - per-device IOMMU instance data
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 928442dda565..0b93bf96693e 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -75,6 +75,7 @@ struct iova_domain {
 	unsigned long	granule;	/* pfn granularity for this domain */
 	unsigned long	start_pfn;	/* Lower limit for this domain */
 	unsigned long	dma_32bit_pfn;
+	unsigned long	max32_alloc_size; /* Size of last failed allocation */
 	struct iova	anchor;		/* rbtree lookup anchor */
 	struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];	/* IOVA range caches */
 
diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h
index 0a83b4379f34..9a1a479a2bf4 100644
--- a/include/linux/irqchip/arm-gic-common.h
+++ b/include/linux/irqchip/arm-gic-common.h
@@ -13,6 +13,12 @@
 #include <linux/types.h>
 #include <linux/ioport.h>
 
+#define GICD_INT_DEF_PRI		0xa0
+#define GICD_INT_DEF_PRI_X4		((GICD_INT_DEF_PRI << 24) |\
+					(GICD_INT_DEF_PRI << 16) |\
+					(GICD_INT_DEF_PRI << 8) |\
+					GICD_INT_DEF_PRI)
+
 enum gic_type {
 	GIC_V2,
 	GIC_V3,
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 8bdbb5f29494..071b4cbdf010 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -357,6 +357,8 @@
 #define GITS_CBASER_RaWaWt	GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
 #define GITS_CBASER_RaWaWb	GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
 
+#define GITS_CBASER_ADDRESS(cbaser)	((cbaser) & GENMASK_ULL(51, 12))
+
 #define GITS_BASER_NR_REGS		8
 
 #define GITS_BASER_VALID			(1ULL << 63)
@@ -388,6 +390,9 @@
 #define GITS_BASER_ENTRY_SIZE_MASK	GENMASK_ULL(52, 48)
 #define GITS_BASER_PHYS_52_to_48(phys)					\
 	(((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
+#define GITS_BASER_ADDR_48_to_52(baser)					\
+	(((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
+
 #define GITS_BASER_SHAREABILITY_SHIFT	(10)
 #define GITS_BASER_InnerShareable					\
 	GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
@@ -585,8 +590,10 @@ struct rdists {
 		void __iomem	*rd_base;
 		struct page	*pend_page;
 		phys_addr_t	phys_base;
+		bool		lpi_enabled;
 	} __percpu		*rdist;
-	struct page		*prop_page;
+	phys_addr_t		prop_table_pa;
+	void			*prop_table_va;
 	u64			flags;
 	u32			gicd_typer;
 	bool			has_vlpis;
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 6c4aaf04046c..626179077bb0 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -65,11 +65,6 @@
 #define GICD_INT_EN_CLR_X32		0xffffffff
 #define GICD_INT_EN_SET_SGI		0x0000ffff
 #define GICD_INT_EN_CLR_PPI		0xffff0000
-#define GICD_INT_DEF_PRI		0xa0
-#define GICD_INT_DEF_PRI_X4		((GICD_INT_DEF_PRI << 24) |\
-					(GICD_INT_DEF_PRI << 16) |\
-					(GICD_INT_DEF_PRI << 8) |\
-					GICD_INT_DEF_PRI)
 
 #define GICD_IIDR_IMPLEMENTER_SHIFT	0
 #define GICD_IIDR_IMPLEMENTER_MASK	(0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index dccfa65aee96..068aa46f0d55 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -75,6 +75,7 @@ struct irq_fwspec {
 enum irq_domain_bus_token {
 	DOMAIN_BUS_ANY		= 0,
 	DOMAIN_BUS_WIRED,
+	DOMAIN_BUS_GENERIC_MSI,
 	DOMAIN_BUS_PCI_MSI,
 	DOMAIN_BUS_PLATFORM_MSI,
 	DOMAIN_BUS_NEXUS,
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 814643f7ee52..5b36b1287a5a 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -477,10 +477,11 @@ static inline void kernfs_init(void) { }
  * @buf: buffer to copy @kn's name into
  * @buflen: size of @buf
  *
- * Builds and returns the full path of @kn in @buf of @buflen bytes.  The
- * path is built from the end of @buf so the returned pointer usually
- * doesn't match @buf.  If @buf isn't long enough, @buf is nul terminated
- * and %NULL is returned.
+ * If @kn is NULL result will be "(null)".
+ *
+ * Returns the length of the full path.  If the full length is equal to or
+ * greater than @buflen, @buf contains the truncated path with the trailing
+ * '\0'.  On error, -errno is returned.
  */
 static inline int kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
 {
diff --git a/include/linux/libfdt_env.h b/include/linux/libfdt_env.h
index c6ac1fe7ec68..edb0f0c30904 100644
--- a/include/linux/libfdt_env.h
+++ b/include/linux/libfdt_env.h
@@ -2,6 +2,7 @@
 #ifndef LIBFDT_ENV_H
 #define LIBFDT_ENV_H
 
+#include <linux/kernel.h>	/* For INT_MAX */
 #include <linux/string.h>
 
 #include <asm/byteorder.h>
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 31460eeb6fe0..aa5963b5d38e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -97,14 +97,15 @@ enum {
 };
 
 enum {
-	MLX5_ATOMIC_MODE_IB_COMP	= 1 << 16,
-	MLX5_ATOMIC_MODE_CX		= 2 << 16,
-	MLX5_ATOMIC_MODE_8B		= 3 << 16,
-	MLX5_ATOMIC_MODE_16B		= 4 << 16,
-	MLX5_ATOMIC_MODE_32B		= 5 << 16,
-	MLX5_ATOMIC_MODE_64B		= 6 << 16,
-	MLX5_ATOMIC_MODE_128B		= 7 << 16,
-	MLX5_ATOMIC_MODE_256B		= 8 << 16,
+	MLX5_ATOMIC_MODE_OFFSET = 16,
+	MLX5_ATOMIC_MODE_IB_COMP = 1,
+	MLX5_ATOMIC_MODE_CX = 2,
+	MLX5_ATOMIC_MODE_8B = 3,
+	MLX5_ATOMIC_MODE_16B = 4,
+	MLX5_ATOMIC_MODE_32B = 5,
+	MLX5_ATOMIC_MODE_64B = 6,
+	MLX5_ATOMIC_MODE_128B = 7,
+	MLX5_ATOMIC_MODE_256B = 8,
 };
 
 enum {
@@ -163,13 +164,11 @@ enum mlx5_dcbx_oper_mode {
 	MLX5E_DCBX_PARAM_VER_OPER_AUTO  = 0x3,
 };
 
-enum mlx5_dct_atomic_mode {
-	MLX5_ATOMIC_MODE_DCT_CX         = 2,
-};
-
 enum {
 	MLX5_ATOMIC_OPS_CMP_SWAP	= 1 << 0,
 	MLX5_ATOMIC_OPS_FETCH_ADD	= 1 << 1,
+	MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP = 1 << 2,
+	MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD = 1 << 3,
 };
 
 enum mlx5_page_fault_resume_flags {
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 5839d8062dfc..0e9c50052ff3 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -317,11 +317,18 @@ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
 int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
 			     int virq, int nvec, msi_alloc_info_t *args);
 struct irq_domain *
-platform_msi_create_device_domain(struct device *dev,
-				  unsigned int nvec,
-				  irq_write_msi_msg_t write_msi_msg,
-				  const struct irq_domain_ops *ops,
-				  void *host_data);
+__platform_msi_create_device_domain(struct device *dev,
+				    unsigned int nvec,
+				    bool is_tree,
+				    irq_write_msi_msg_t write_msi_msg,
+				    const struct irq_domain_ops *ops,
+				    void *host_data);
+
+#define platform_msi_create_device_domain(dev, nvec, write, ops, data)	\
+	__platform_msi_create_device_domain(dev, nvec, false, write, ops, data)
+#define platform_msi_create_device_tree_domain(dev, nvec, write, ops, data) \
+	__platform_msi_create_device_domain(dev, nvec, true, write, ops, data)
+
 int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
 			      unsigned int nr_irqs);
 void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
index 4e85447f7860..312bfa5efd80 100644
--- a/include/linux/nvmem-consumer.h
+++ b/include/linux/nvmem-consumer.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * nvmem framework consumer.
  *
  * Copyright (C) 2015 Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
  * Copyright (C) 2013 Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 
 #ifndef _LINUX_NVMEM_CONSUMER_H
@@ -14,6 +11,7 @@
 
 #include <linux/err.h>
 #include <linux/errno.h>
+#include <linux/notifier.h>
 
 struct device;
 struct device_node;
@@ -29,11 +27,36 @@ struct nvmem_cell_info {
 	unsigned int		nbits;
 };
 
+/**
+ * struct nvmem_cell_lookup - cell lookup entry
+ *
+ * @nvmem_name:	Name of the provider.
+ * @cell_name:	Name of the nvmem cell as defined in the name field of
+ *		struct nvmem_cell_info.
+ * @dev_id:	Name of the consumer device that will be associated with
+ *		this cell.
+ * @con_id:	Connector id for this cell lookup.
+ */
+struct nvmem_cell_lookup {
+	const char		*nvmem_name;
+	const char		*cell_name;
+	const char		*dev_id;
+	const char		*con_id;
+	struct list_head	node;
+};
+
+enum {
+	NVMEM_ADD = 1,
+	NVMEM_REMOVE,
+	NVMEM_CELL_ADD,
+	NVMEM_CELL_REMOVE,
+};
+
 #if IS_ENABLED(CONFIG_NVMEM)
 
 /* Cell based interface */
-struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *name);
-struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *name);
+struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *id);
+struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *id);
 void nvmem_cell_put(struct nvmem_cell *cell);
 void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
 void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len);
@@ -55,18 +78,28 @@ ssize_t nvmem_device_cell_read(struct nvmem_device *nvmem,
 int nvmem_device_cell_write(struct nvmem_device *nvmem,
 			    struct nvmem_cell_info *info, void *buf);
 
+const char *nvmem_dev_name(struct nvmem_device *nvmem);
+
+void nvmem_add_cell_lookups(struct nvmem_cell_lookup *entries,
+			    size_t nentries);
+void nvmem_del_cell_lookups(struct nvmem_cell_lookup *entries,
+			    size_t nentries);
+
+int nvmem_register_notifier(struct notifier_block *nb);
+int nvmem_unregister_notifier(struct notifier_block *nb);
+
 #else
 
 static inline struct nvmem_cell *nvmem_cell_get(struct device *dev,
-						const char *name)
+						const char *id)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline struct nvmem_cell *devm_nvmem_cell_get(struct device *dev,
-				       const char *name)
+						     const char *id)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline void devm_nvmem_cell_put(struct device *dev,
@@ -80,31 +113,31 @@ static inline void nvmem_cell_put(struct nvmem_cell *cell)
 
 static inline void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline int nvmem_cell_write(struct nvmem_cell *cell,
 				    const char *buf, size_t len)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline int nvmem_cell_read_u32(struct device *dev,
 				      const char *cell_id, u32 *val)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline struct nvmem_device *nvmem_device_get(struct device *dev,
 						    const char *name)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline struct nvmem_device *devm_nvmem_device_get(struct device *dev,
 							 const char *name)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline void nvmem_device_put(struct nvmem_device *nvmem)
@@ -120,47 +153,68 @@ static inline ssize_t nvmem_device_cell_read(struct nvmem_device *nvmem,
 					 struct nvmem_cell_info *info,
 					 void *buf)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline int nvmem_device_cell_write(struct nvmem_device *nvmem,
 					  struct nvmem_cell_info *info,
 					  void *buf)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline int nvmem_device_read(struct nvmem_device *nvmem,
 				    unsigned int offset, size_t bytes,
 				    void *buf)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline int nvmem_device_write(struct nvmem_device *nvmem,
 				     unsigned int offset, size_t bytes,
 				     void *buf)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
+
+static inline const char *nvmem_dev_name(struct nvmem_device *nvmem)
+{
+	return NULL;
+}
+
+static inline void
+nvmem_add_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries) {}
+static inline void
+nvmem_del_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries) {}
+
+static inline int nvmem_register_notifier(struct notifier_block *nb)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int nvmem_unregister_notifier(struct notifier_block *nb)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_NVMEM */
 
 #if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF)
 struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
-				     const char *name);
+				     const char *id);
 struct nvmem_device *of_nvmem_device_get(struct device_node *np,
 					 const char *name);
 #else
 static inline struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
-				     const char *name)
+						   const char *id)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline struct nvmem_device *of_nvmem_device_get(struct device_node *np,
 						       const char *name)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 #endif /* CONFIG_NVMEM && CONFIG_OF */
 
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 24def6ad09bb..1e3283c2af77 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * nvmem framework provider.
  *
  * Copyright (C) 2015 Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
  * Copyright (C) 2013 Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 
 #ifndef _LINUX_NVMEM_PROVIDER_H
@@ -67,30 +64,46 @@ struct nvmem_config {
 	struct device		*base_dev;
 };
 
+/**
+ * struct nvmem_cell_table - NVMEM cell definitions for given provider
+ *
+ * @nvmem_name:		Provider name.
+ * @cells:		Array of cell definitions.
+ * @ncells:		Number of cell definitions in the array.
+ * @node:		List node.
+ *
+ * This structure together with related helper functions is provided for users
+ * that don't can't access the nvmem provided structure but wish to register
+ * cell definitions for it e.g. board files registering an EEPROM device.
+ */
+struct nvmem_cell_table {
+	const char		*nvmem_name;
+	const struct nvmem_cell_info	*cells;
+	size_t			ncells;
+	struct list_head	node;
+};
+
 #if IS_ENABLED(CONFIG_NVMEM)
 
 struct nvmem_device *nvmem_register(const struct nvmem_config *cfg);
-int nvmem_unregister(struct nvmem_device *nvmem);
+void nvmem_unregister(struct nvmem_device *nvmem);
 
 struct nvmem_device *devm_nvmem_register(struct device *dev,
 					 const struct nvmem_config *cfg);
 
 int devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem);
 
-int nvmem_add_cells(struct nvmem_device *nvmem,
-		    const struct nvmem_cell_info *info,
-		    int ncells);
+void nvmem_add_cell_table(struct nvmem_cell_table *table);
+void nvmem_del_cell_table(struct nvmem_cell_table *table);
+
 #else
 
 static inline struct nvmem_device *nvmem_register(const struct nvmem_config *c)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
-static inline int nvmem_unregister(struct nvmem_device *nvmem)
-{
-	return -ENOSYS;
-}
+static inline void nvmem_unregister(struct nvmem_device *nvmem) {}
 
 static inline struct nvmem_device *
 devm_nvmem_register(struct device *dev, const struct nvmem_config *c)
@@ -101,16 +114,11 @@ devm_nvmem_register(struct device *dev, const struct nvmem_config *c)
 static inline int
 devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem)
 {
-	return nvmem_unregister(nvmem);
-
+	return -EOPNOTSUPP;
 }
 
-static inline int nvmem_add_cells(struct nvmem_device *nvmem,
-				  const struct nvmem_cell_info *info,
-				  int ncells)
-{
-	return -ENOSYS;
-}
+static inline void nvmem_add_cell_table(struct nvmem_cell_table *table) {}
+static inline void nvmem_del_cell_table(struct nvmem_cell_table *table) {}
 
 #endif /* CONFIG_NVMEM */
 #endif  /* ifndef _LINUX_NVMEM_PROVIDER_H */
diff --git a/include/linux/of.h b/include/linux/of.h
index 99b0ebf49632..ab96025b2382 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -247,12 +247,6 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 #include <asm/prom.h>
 #endif
 
-/* Default #address and #size cells.  Allow arch asm/prom.h to override */
-#if !defined(OF_ROOT_NODE_ADDR_CELLS_DEFAULT)
-#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 1
-#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT 1
-#endif
-
 #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
 #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
 
@@ -353,6 +347,8 @@ extern const void *of_get_property(const struct device_node *node,
 				const char *name,
 				int *lenp);
 extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
+extern struct device_node *of_get_next_cpu_node(struct device_node *prev);
+
 #define for_each_property_of_node(dn, pp) \
 	for (pp = dn->properties; pp != NULL; pp = pp->next)
 
@@ -550,6 +546,10 @@ bool of_console_check(struct device_node *dn, char *name, int index);
 
 extern int of_cpu_node_to_id(struct device_node *np);
 
+int of_map_rid(struct device_node *np, u32 rid,
+	       const char *map_name, const char *map_mask_name,
+	       struct device_node **target, u32 *id_out);
+
 #else /* CONFIG_OF */
 
 static inline void of_core_init(void)
@@ -754,6 +754,11 @@ static inline struct device_node *of_get_cpu_node(int cpu,
 	return NULL;
 }
 
+static inline struct device_node *of_get_next_cpu_node(struct device_node *prev)
+{
+	return NULL;
+}
+
 static inline int of_n_addr_cells(struct device_node *np)
 {
 	return 0;
@@ -952,6 +957,13 @@ static inline int of_cpu_node_to_id(struct device_node *np)
 	return -ENODEV;
 }
 
+static inline int of_map_rid(struct device_node *np, u32 rid,
+			     const char *map_name, const char *map_mask_name,
+			     struct device_node **target, u32 *id_out)
+{
+	return -EINVAL;
+}
+
 #define of_match_ptr(_ptr)	NULL
 #define of_match_node(_matches, _node)	NULL
 #endif /* CONFIG_OF */
@@ -990,7 +1002,7 @@ static inline struct device_node *of_find_matching_node(
 
 static inline const char *of_node_get_device_type(const struct device_node *np)
 {
-	return of_get_property(np, "type", NULL);
+	return of_get_property(np, "device_type", NULL);
 }
 
 static inline bool of_node_is_type(const struct device_node *np, const char *type)
@@ -1217,6 +1229,10 @@ static inline int of_property_read_s32(const struct device_node *np,
 	for (child = of_get_next_available_child(parent, NULL); child != NULL; \
 	     child = of_get_next_available_child(parent, child))
 
+#define for_each_of_cpu_node(cpu) \
+	for (cpu = of_get_next_cpu_node(NULL); cpu != NULL; \
+	     cpu = of_get_next_cpu_node(cpu))
+
 #define for_each_node_with_property(dn, prop_name) \
 	for (dn = of_find_node_with_property(NULL, prop_name); dn; \
 	     dn = of_find_node_with_property(dn, prop_name))
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index e83d87fc5673..21a89c4880fa 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -14,9 +14,6 @@ struct device_node *of_pci_find_child_device(struct device_node *parent,
 					     unsigned int devfn);
 int of_pci_get_devfn(struct device_node *np);
 void of_pci_check_probe_only(void);
-int of_pci_map_rid(struct device_node *np, u32 rid,
-		   const char *map_name, const char *map_mask_name,
-		   struct device_node **target, u32 *id_out);
 #else
 static inline struct device_node *of_pci_find_child_device(struct device_node *parent,
 					     unsigned int devfn)
@@ -29,13 +26,6 @@ static inline int of_pci_get_devfn(struct device_node *np)
 	return -EINVAL;
 }
 
-static inline int of_pci_map_rid(struct device_node *np, u32 rid,
-			const char *map_name, const char *map_mask_name,
-			struct device_node **target, u32 *id_out)
-{
-	return -EINVAL;
-}
-
 static inline void of_pci_check_probe_only(void) { }
 #endif
 
diff --git a/include/linux/phy/phy-qcom-ufs.h b/include/linux/phy/phy-qcom-ufs.h
deleted file mode 100644
index 0a2c18a9771d..000000000000
--- a/include/linux/phy/phy-qcom-ufs.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef PHY_QCOM_UFS_H_
-#define PHY_QCOM_UFS_H_
-
-#include "phy.h"
-
-/**
- * ufs_qcom_phy_enable_dev_ref_clk() - Enable the device
- * ref clock.
- * @phy: reference to a generic phy.
- */
-void ufs_qcom_phy_enable_dev_ref_clk(struct phy *phy);
-
-/**
- * ufs_qcom_phy_disable_dev_ref_clk() - Disable the device
- * ref clock.
- * @phy: reference to a generic phy.
- */
-void ufs_qcom_phy_disable_dev_ref_clk(struct phy *phy);
-
-int ufs_qcom_phy_set_tx_lane_enable(struct phy *phy, u32 tx_lanes);
-void ufs_qcom_phy_save_controller_version(struct phy *phy,
-			u8 major, u16 minor, u16 step);
-
-#endif /* PHY_QCOM_UFS_H_ */
diff --git a/include/linux/platform_data/ehci-sh.h b/include/linux/platform_data/ehci-sh.h
index 5c15a738e116..219bd79dabfc 100644
--- a/include/linux/platform_data/ehci-sh.h
+++ b/include/linux/platform_data/ehci-sh.h
@@ -1,21 +1,9 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * EHCI SuperH driver platform data
  *
  * Copyright (C) 2012  Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
  * Copyright (C) 2012  Renesas Solutions Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #ifndef __USB_EHCI_SH_H
diff --git a/include/linux/platform_data/mv_usb.h b/include/linux/platform_data/mv_usb.h
index 98b7925f1a2d..c0f624aca81c 100644
--- a/include/linux/platform_data/mv_usb.h
+++ b/include/linux/platform_data/mv_usb.h
@@ -48,6 +48,5 @@ struct mv_usb_platform_data {
 	int	(*phy_init)(void __iomem *regbase);
 	void	(*phy_deinit)(void __iomem *regbase);
 	int	(*set_vbus)(unsigned int vbus);
-	int     (*private_init)(void __iomem *opregs, void __iomem *phyregs);
 };
 #endif
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index df4d13f7e191..d15f8e4815e3 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -39,15 +39,6 @@
 #include <linux/qed/qed_ll2_if.h>
 #include <linux/qed/rdma_common.h>
 
-enum qed_roce_ll2_tx_dest {
-	/* Light L2 TX Destination to the Network */
-	QED_ROCE_LL2_TX_DEST_NW,
-
-	/* Light L2 TX Destination to the Loopback */
-	QED_ROCE_LL2_TX_DEST_LB,
-	QED_ROCE_LL2_TX_DEST_MAX
-};
-
 #define QED_RDMA_MAX_CNQ_SIZE               (0xFFFF)
 
 /* rdma interface */
@@ -581,7 +572,7 @@ struct qed_roce_ll2_packet {
 	int n_seg;
 	struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE];
 	int roce_mode;
-	enum qed_roce_ll2_tx_dest tx_dest;
+	enum qed_ll2_tx_dest tx_dest;
 };
 
 enum qed_rdma_type {
diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
index 5d83d0c1d06c..bba2920e9c05 100644
--- a/include/linux/restart_block.h
+++ b/include/linux/restart_block.h
@@ -10,7 +10,7 @@
 #include <linux/time64.h>
 
 struct timespec;
-struct compat_timespec;
+struct old_timespec32;
 struct pollfd;
 
 enum timespec_type {
@@ -40,7 +40,7 @@ struct restart_block {
 			enum timespec_type type;
 			union {
 				struct __kernel_timespec __user *rmtp;
-				struct compat_timespec __user *compat_rmtp;
+				struct old_timespec32 __user *compat_rmtp;
 			};
 			u64 expires;
 		} nanosleep;
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 7ed4713d5337..8b571e9b9f76 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -348,7 +348,7 @@ struct ucred {
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
-struct timespec;
+struct timespec64;
 
 /* The __sys_...msg variants allow MSG_CMSG_COMPAT iff
  * forbid_cmsg_compat==false
@@ -358,7 +358,7 @@ extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg,
 extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg,
 			  unsigned int flags, bool forbid_cmsg_compat);
 extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
-			  unsigned int flags, struct timespec *timeout);
+			  unsigned int flags, struct timespec64 *timeout);
 extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
 			  unsigned int vlen, unsigned int flags,
 			  bool forbid_cmsg_compat);
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 962971e6a9c7..df313913e856 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -678,6 +678,9 @@ struct sdw_master_ops {
  * @defer_msg: Defer message
  * @clk_stop_timeout: Clock stop timeout computed
  * @bank_switch_timeout: Bank switch timeout computed
+ * @multi_link: Store bus property that indicates if multi links
+ * are supported. This flag is populated by drivers after reading
+ * appropriate firmware (ACPI/DT).
  */
 struct sdw_bus {
 	struct device *dev;
@@ -694,6 +697,7 @@ struct sdw_bus {
 	struct sdw_defer defer_msg;
 	unsigned int clk_stop_timeout;
 	u32 bank_switch_timeout;
+	bool multi_link;
 };
 
 int sdw_add_bus_master(struct sdw_bus *bus);
@@ -768,14 +772,18 @@ struct sdw_stream_params {
  * @params: Stream parameters
  * @state: Current state of the stream
  * @type: Stream type PCM or PDM
- * @m_rt: Master runtime
+ * @master_list: List of Master runtime(s) in this stream.
+ * master_list can contain only one m_rt per Master instance
+ * for a stream
+ * @m_rt_count: Count of Master runtime(s) in this stream
  */
 struct sdw_stream_runtime {
 	char *name;
 	struct sdw_stream_params params;
 	enum sdw_stream_state state;
 	enum sdw_stream_type type;
-	struct sdw_master_runtime *m_rt;
+	struct list_head master_list;
+	int m_rt_count;
 };
 
 struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name);
diff --git a/include/linux/string.h b/include/linux/string.h
index 4a5a0eb7df51..27d0482e5e05 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -131,6 +131,13 @@ static inline void *memset_p(void **p, void *v, __kernel_size_t n)
 		return memset64((uint64_t *)p, (uintptr_t)v, n);
 }
 
+extern void **__memcat_p(void **a, void **b);
+#define memcat_p(a, b) ({					\
+	BUILD_BUG_ON_MSG(!__same_type(*(a), *(b)),		\
+			 "type mismatch in memcat_p()");	\
+	(typeof(*a) *)__memcat_p((void **)(a), (void **)(b));	\
+})
+
 #ifndef __HAVE_ARCH_MEMCPY
 extern void * memcpy(void *,const void *,__kernel_size_t);
 #endif
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 7df625d41e35..f6e8ceafafd8 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -71,10 +71,10 @@ struct gss_krb5_enctype {
 	const u32		keyed_cksum;	/* is it a keyed cksum? */
 	const u32		keybytes;	/* raw key len, in bytes */
 	const u32		keylength;	/* final key len, in bytes */
-	u32 (*encrypt) (struct crypto_skcipher *tfm,
+	u32 (*encrypt) (struct crypto_sync_skcipher *tfm,
 			void *iv, void *in, void *out,
 			int length);		/* encryption function */
-	u32 (*decrypt) (struct crypto_skcipher *tfm,
+	u32 (*decrypt) (struct crypto_sync_skcipher *tfm,
 			void *iv, void *in, void *out,
 			int length);		/* decryption function */
 	u32 (*mk_key) (const struct gss_krb5_enctype *gk5e,
@@ -98,12 +98,12 @@ struct krb5_ctx {
 	u32			enctype;
 	u32			flags;
 	const struct gss_krb5_enctype *gk5e; /* enctype-specific info */
-	struct crypto_skcipher	*enc;
-	struct crypto_skcipher	*seq;
-	struct crypto_skcipher *acceptor_enc;
-	struct crypto_skcipher *initiator_enc;
-	struct crypto_skcipher *acceptor_enc_aux;
-	struct crypto_skcipher *initiator_enc_aux;
+	struct crypto_sync_skcipher *enc;
+	struct crypto_sync_skcipher *seq;
+	struct crypto_sync_skcipher *acceptor_enc;
+	struct crypto_sync_skcipher *initiator_enc;
+	struct crypto_sync_skcipher *acceptor_enc_aux;
+	struct crypto_sync_skcipher *initiator_enc_aux;
 	u8			Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */
 	u8			cksum[GSS_KRB5_MAX_KEYLEN];
 	s32			endtime;
@@ -262,24 +262,24 @@ gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset,
 
 
 u32
-krb5_encrypt(struct crypto_skcipher *key,
+krb5_encrypt(struct crypto_sync_skcipher *key,
 	     void *iv, void *in, void *out, int length);
 
 u32
-krb5_decrypt(struct crypto_skcipher *key,
+krb5_decrypt(struct crypto_sync_skcipher *key,
 	     void *iv, void *in, void *out, int length); 
 
 int
-gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *outbuf,
+gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *outbuf,
 		    int offset, struct page **pages);
 
 int
-gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *inbuf,
+gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *inbuf,
 		    int offset);
 
 s32
 krb5_make_seq_num(struct krb5_ctx *kctx,
-		struct crypto_skcipher *key,
+		struct crypto_sync_skcipher *key,
 		int direction,
 		u32 seqnum, unsigned char *cksum, unsigned char *buf);
 
@@ -320,12 +320,12 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset,
 
 int
 krb5_rc4_setup_seq_key(struct krb5_ctx *kctx,
-		       struct crypto_skcipher *cipher,
+		       struct crypto_sync_skcipher *cipher,
 		       unsigned char *cksum);
 
 int
 krb5_rc4_setup_enc_key(struct krb5_ctx *kctx,
-		       struct crypto_skcipher *cipher,
+		       struct crypto_sync_skcipher *cipher,
 		       s32 seqnum);
 void
 gss_krb5_make_confounder(char *p, u32 conflen);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 965be92c33b5..a387b59640a4 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -67,11 +67,6 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
 
 /* Accessory functions. */
 
-void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle,
-		gfp_t flags, unsigned long attrs);
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_addr, unsigned long attrs);
-
 extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 				   unsigned long offset, size_t size,
 				   enum dma_data_direction dir,
@@ -107,9 +102,6 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 			   int nelems, enum dma_data_direction dir);
 
 extern int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
-
-extern int
 swiotlb_dma_supported(struct device *hwdev, u64 mask);
 
 #ifdef CONFIG_SWIOTLB
@@ -121,7 +113,6 @@ static inline unsigned int swiotlb_max_segment(void) { return 0; }
 #endif
 
 extern void swiotlb_print_info(void);
-extern int is_swiotlb_buffer(phys_addr_t paddr);
 extern void swiotlb_set_max_segment(unsigned int);
 
 extern const struct dma_map_ops swiotlb_dma_ops;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2ff814c92f7f..2ac3d13a915b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -60,7 +60,7 @@ struct tms;
 struct utimbuf;
 struct mq_attr;
 struct compat_stat;
-struct compat_timeval;
+struct old_timeval32;
 struct robust_list_head;
 struct getcpu_cache;
 struct old_linux_dirent;
@@ -513,7 +513,8 @@ asmlinkage long sys_timerfd_gettime(int ufd, struct __kernel_itimerspec __user *
 
 /* fs/utimes.c */
 asmlinkage long sys_utimensat(int dfd, const char __user *filename,
-				struct timespec __user *utimes, int flags);
+				struct __kernel_timespec __user *utimes,
+				int flags);
 
 /* kernel/acct.c */
 asmlinkage long sys_acct(const char __user *name);
@@ -613,7 +614,7 @@ asmlinkage long sys_sched_yield(void);
 asmlinkage long sys_sched_get_priority_max(int policy);
 asmlinkage long sys_sched_get_priority_min(int policy);
 asmlinkage long sys_sched_rr_get_interval(pid_t pid,
-					struct timespec __user *interval);
+				struct __kernel_timespec __user *interval);
 
 /* kernel/signal.c */
 asmlinkage long sys_restart_syscall(void);
@@ -634,7 +635,7 @@ asmlinkage long sys_rt_sigprocmask(int how, sigset_t __user *set,
 asmlinkage long sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize);
 asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese,
 				siginfo_t __user *uinfo,
-				const struct timespec __user *uts,
+				const struct __kernel_timespec __user *uts,
 				size_t sigsetsize);
 asmlinkage long sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo);
 
@@ -829,7 +830,7 @@ asmlinkage long sys_perf_event_open(
 asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int);
 asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg,
 			     unsigned int vlen, unsigned flags,
-			     struct timespec __user *timeout);
+			     struct __kernel_timespec __user *timeout);
 
 asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr,
 				int options, struct rusage __user *ru);
@@ -954,8 +955,6 @@ asmlinkage long sys_access(const char __user *filename, int mode);
 asmlinkage long sys_rename(const char __user *oldname,
 				const char __user *newname);
 asmlinkage long sys_symlink(const char __user *old, const char __user *new);
-asmlinkage long sys_utimes(char __user *filename,
-				struct timeval __user *utimes);
 #if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64)
 asmlinkage long sys_stat64(const char __user *filename,
 				struct stat64 __user *statbuf);
@@ -985,14 +984,18 @@ asmlinkage long sys_alarm(unsigned int seconds);
 asmlinkage long sys_getpgrp(void);
 asmlinkage long sys_pause(void);
 asmlinkage long sys_time(time_t __user *tloc);
+#ifdef __ARCH_WANT_SYS_UTIME
 asmlinkage long sys_utime(char __user *filename,
 				struct utimbuf __user *times);
+asmlinkage long sys_utimes(char __user *filename,
+				struct timeval __user *utimes);
+asmlinkage long sys_futimesat(int dfd, const char __user *filename,
+			      struct timeval __user *utimes);
+#endif
 asmlinkage long sys_creat(const char __user *pathname, umode_t mode);
 asmlinkage long sys_getdents(unsigned int fd,
 				struct linux_dirent __user *dirent,
 				unsigned int count);
-asmlinkage long sys_futimesat(int dfd, const char __user *filename,
-			      struct timeval __user *utimes);
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp);
 asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h
index a3ed26082bc1..bf6ec83e60ee 100644
--- a/include/linux/thunderbolt.h
+++ b/include/linux/thunderbolt.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Thunderbolt service API
  *
@@ -5,10 +6,6 @@
  * Copyright (C) 2017, Intel Corporation
  * Authors: Michael Jamet <michael.jamet@intel.com>
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef THUNDERBOLT_H_
diff --git a/include/linux/time32.h b/include/linux/time32.h
index d1ae43c13e25..61904a6c098f 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -13,6 +13,36 @@
 
 #define TIME_T_MAX	(time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
 
+typedef s32		old_time32_t;
+
+struct old_timespec32 {
+	old_time32_t	tv_sec;
+	s32		tv_nsec;
+};
+
+struct old_timeval32 {
+	old_time32_t	tv_sec;
+	s32		tv_usec;
+};
+
+struct old_itimerspec32 {
+	struct old_timespec32 it_interval;
+	struct old_timespec32 it_value;
+};
+
+struct old_utimbuf32 {
+	old_time32_t	actime;
+	old_time32_t	modtime;
+};
+
+extern int get_old_timespec32(struct timespec64 *, const void __user *);
+extern int put_old_timespec32(const struct timespec64 *, void __user *);
+extern int get_old_itimerspec32(struct itimerspec64 *its,
+			const struct old_itimerspec32 __user *uits);
+extern int put_old_itimerspec32(const struct itimerspec64 *its,
+			struct old_itimerspec32 __user *uits);
+
+
 #if __BITS_PER_LONG == 64
 
 /* timespec64 is defined as timespec here */
@@ -105,16 +135,6 @@ static inline bool timespec_valid(const struct timespec *ts)
 	return true;
 }
 
-static inline bool timespec_valid_strict(const struct timespec *ts)
-{
-	if (!timespec_valid(ts))
-		return false;
-	/* Disallow values that could overflow ktime_t */
-	if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
-		return false;
-	return true;
-}
-
 /**
  * timespec_to_ns - Convert timespec to nanoseconds
  * @ts:		pointer to the timespec variable to be converted
@@ -149,19 +169,6 @@ static __always_inline void timespec_add_ns(struct timespec *a, u64 ns)
 	a->tv_nsec = ns;
 }
 
-/**
- * time_to_tm - converts the calendar time to local broken-down time
- *
- * @totalsecs	the number of seconds elapsed since 00:00:00 on January 1, 1970,
- *		Coordinated Universal Time (UTC).
- * @offset	offset seconds adding to totalsecs.
- * @result	pointer to struct tm variable to receive broken-down time
- */
-static inline void time_to_tm(time_t totalsecs, int offset, struct tm *result)
-{
-	time64_to_tm(totalsecs, offset, result);
-}
-
 static inline unsigned long mktime(const unsigned int year,
 			const unsigned int mon, const unsigned int day,
 			const unsigned int hour, const unsigned int min,
@@ -183,8 +190,6 @@ static inline bool timeval_valid(const struct timeval *tv)
 	return true;
 }
 
-extern struct timespec timespec_trunc(struct timespec t, unsigned int gran);
-
 /**
  * timeval_to_ns - Convert timeval to nanoseconds
  * @ts:		pointer to the timeval variable to be converted
@@ -208,18 +213,17 @@ extern struct timeval ns_to_timeval(const s64 nsec);
 extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec);
 
 /*
- * New aliases for compat time functions. These will be used to replace
- * the compat code so it can be shared between 32-bit and 64-bit builds
- * both of which provide compatibility with old 32-bit tasks.
+ * Old names for the 32-bit time_t interfaces, these will be removed
+ * when everything uses the new names.
  */
-#define old_time32_t		compat_time_t
-#define old_timeval32		compat_timeval
-#define old_timespec32		compat_timespec
-#define old_itimerspec32	compat_itimerspec
-#define ns_to_old_timeval32	ns_to_compat_timeval
-#define get_old_itimerspec32	get_compat_itimerspec64
-#define put_old_itimerspec32	put_compat_itimerspec64
-#define get_old_timespec32	compat_get_timespec64
-#define put_old_timespec32	compat_put_timespec64
+#define compat_time_t		old_time32_t
+#define compat_timeval		old_timeval32
+#define compat_timespec		old_timespec32
+#define compat_itimerspec	old_itimerspec32
+#define ns_to_compat_timeval	ns_to_old_timeval32
+#define get_compat_itimerspec64	get_old_itimerspec32
+#define put_compat_itimerspec64	put_old_itimerspec32
+#define compat_get_timespec64	get_old_timespec32
+#define compat_put_timespec64	put_old_timespec32
 
 #endif
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index a5a3cfc3c2fa..29975e93fcb8 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -266,9 +266,6 @@ extern int update_persistent_clock64(struct timespec64 now);
  * deprecated aliases, don't use in new code
  */
 #define getnstimeofday64(ts)		ktime_get_real_ts64(ts)
-#define get_monotonic_boottime64(ts)	ktime_get_boottime_ts64(ts)
-#define getrawmonotonic64(ts)		ktime_get_raw_ts64(ts)
-#define timekeeping_clocktai64(ts)	ktime_get_clocktai_ts64(ts)
 
 static inline struct timespec64 current_kernel_time64(void)
 {
@@ -279,13 +276,4 @@ static inline struct timespec64 current_kernel_time64(void)
 	return ts;
 }
 
-static inline struct timespec64 get_monotonic_coarse64(void)
-{
-	struct timespec64 ts;
-
-	ktime_get_coarse_ts64(&ts);
-
-	return ts;
-}
-
 #endif
diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h
index 8762c2f45f8b..a502616f7e1c 100644
--- a/include/linux/timekeeping32.h
+++ b/include/linux/timekeeping32.h
@@ -6,27 +6,18 @@
  * over time so we can remove the file here.
  */
 
-extern void do_gettimeofday(struct timeval *tv);
-unsigned long get_seconds(void);
-
-static inline struct timespec current_kernel_time(void)
+static inline void do_gettimeofday(struct timeval *tv)
 {
-	struct timespec64 ts64;
+	struct timespec64 now;
 
-	ktime_get_coarse_real_ts64(&ts64);
-
-	return timespec64_to_timespec(ts64);
+	ktime_get_real_ts64(&now);
+	tv->tv_sec = now.tv_sec;
+	tv->tv_usec = now.tv_nsec/1000;
 }
 
-/**
- * Deprecated. Use do_settimeofday64().
- */
-static inline int do_settimeofday(const struct timespec *ts)
+static inline unsigned long get_seconds(void)
 {
-	struct timespec64 ts64;
-
-	ts64 = timespec_to_timespec64(*ts);
-	return do_settimeofday64(&ts64);
+	return ktime_get_real_seconds();
 }
 
 static inline void getnstimeofday(struct timespec *ts)
@@ -45,14 +36,6 @@ static inline void ktime_get_ts(struct timespec *ts)
 	*ts = timespec64_to_timespec(ts64);
 }
 
-static inline void ktime_get_real_ts(struct timespec *ts)
-{
-	struct timespec64 ts64;
-
-	ktime_get_real_ts64(&ts64);
-	*ts = timespec64_to_timespec(ts64);
-}
-
 static inline void getrawmonotonic(struct timespec *ts)
 {
 	struct timespec64 ts64;
@@ -61,15 +44,6 @@ static inline void getrawmonotonic(struct timespec *ts)
 	*ts = timespec64_to_timespec(ts64);
 }
 
-static inline struct timespec get_monotonic_coarse(void)
-{
-	struct timespec64 ts64;
-
-	ktime_get_coarse_ts64(&ts64);
-
-	return timespec64_to_timespec(ts64);
-}
-
 static inline void getboottime(struct timespec *ts)
 {
 	struct timespec64 ts64;
@@ -79,19 +53,6 @@ static inline void getboottime(struct timespec *ts)
 }
 
 /*
- * Timespec interfaces utilizing the ktime based ones
- */
-static inline void get_monotonic_boottime(struct timespec *ts)
-{
-	*ts = ktime_to_timespec(ktime_get_boottime());
-}
-
-static inline void timekeeping_clocktai(struct timespec *ts)
-{
-	*ts = ktime_to_timespec(ktime_get_clocktai());
-}
-
-/*
  * Persistent clock related interfaces
  */
 extern void read_persistent_clock(struct timespec *ts);
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 6f8b68cd460f..a3cd7cb67a69 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -133,6 +133,7 @@ extern void uio_event_notify(struct uio_info *info);
 #define UIO_MEM_PHYS	1
 #define UIO_MEM_LOGICAL	2
 #define UIO_MEM_VIRTUAL 3
+#define UIO_MEM_IOVA	4
 
 /* defines for uio_port->porttype */
 #define UIO_PORT_NONE	0
diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index 07f99362bc90..63758c399e4e 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -77,6 +77,12 @@ struct ci_hdrc_platform_data {
 	struct ci_hdrc_cable		vbus_extcon;
 	struct ci_hdrc_cable		id_extcon;
 	u32			phy_clkgate_delay_us;
+
+	/* pins */
+	struct pinctrl *pctl;
+	struct pinctrl_state *pins_default;
+	struct pinctrl_state *pins_host;
+	struct pinctrl_state *pins_device;
 };
 
 /* Default offset of capability registers */
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 77c7908b7d73..2734c895c1bf 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -46,7 +46,6 @@
 #include <net/ip.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_pack.h>
-#include <net/ipv6.h>
 #include <net/net_namespace.h>
 
 /**
@@ -95,20 +94,18 @@ int rdma_translate_ip(const struct sockaddr *addr,
  * @timeout_ms: Amount of time to wait for the address resolution to complete.
  * @callback: Call invoked once address resolution has completed, timed out,
  *   or been canceled.  A status of 0 indicates success.
+ * @resolve_by_gid_attr:	Resolve the ip based on the GID attribute from
+ *				rdma_dev_addr.
  * @context: User-specified context associated with the call.
  */
 int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
-		    struct rdma_dev_addr *addr, int timeout_ms,
+		    struct rdma_dev_addr *addr, unsigned long timeout_ms,
 		    void (*callback)(int status, struct sockaddr *src_addr,
 				     struct rdma_dev_addr *addr, void *context),
-		    void *context);
+		    bool resolve_by_gid_attr, void *context);
 
 void rdma_addr_cancel(struct rdma_dev_addr *addr);
 
-void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
-		    const struct net_device *dev,
-		    const unsigned char *dst_dev_addr);
-
 int rdma_addr_size(const struct sockaddr *addr);
 int rdma_addr_size_in6(struct sockaddr_in6 *addr);
 int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr);
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index c10f4b5ea8ab..49f4f75499b3 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -583,7 +583,7 @@ struct ib_cm_sidr_req_param {
 	struct sa_path_rec	*path;
 	const struct ib_gid_attr *sgid_attr;
 	__be64			service_id;
-	int			timeout_ms;
+	unsigned long		timeout_ms;
 	const void		*private_data;
 	u8			private_data_len;
 	u8			max_cm_retries;
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index b6ddf2a1b9d8..19520979b84c 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -449,28 +449,23 @@ struct ib_sa_query;
 
 void ib_sa_cancel_query(int id, struct ib_sa_query *query);
 
-int ib_sa_path_rec_get(struct ib_sa_client *client,
-		       struct ib_device *device, u8 port_num,
-		       struct sa_path_rec *rec,
-		       ib_sa_comp_mask comp_mask,
-		       int timeout_ms, gfp_t gfp_mask,
-		       void (*callback)(int status,
-					struct sa_path_rec *resp,
+int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device,
+		       u8 port_num, struct sa_path_rec *rec,
+		       ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
+		       gfp_t gfp_mask,
+		       void (*callback)(int status, struct sa_path_rec *resp,
 					void *context),
-		       void *context,
-		       struct ib_sa_query **query);
+		       void *context, struct ib_sa_query **query);
 
 int ib_sa_service_rec_query(struct ib_sa_client *client,
-			 struct ib_device *device, u8 port_num,
-			 u8 method,
-			 struct ib_sa_service_rec *rec,
-			 ib_sa_comp_mask comp_mask,
-			 int timeout_ms, gfp_t gfp_mask,
-			 void (*callback)(int status,
-					  struct ib_sa_service_rec *resp,
-					  void *context),
-			 void *context,
-			 struct ib_sa_query **sa_query);
+			    struct ib_device *device, u8 port_num, u8 method,
+			    struct ib_sa_service_rec *rec,
+			    ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
+			    gfp_t gfp_mask,
+			    void (*callback)(int status,
+					     struct ib_sa_service_rec *resp,
+					     void *context),
+			    void *context, struct ib_sa_query **sa_query);
 
 struct ib_sa_multicast {
 	struct ib_sa_mcmember_rec rec;
@@ -573,12 +568,11 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
 			      struct ib_device *device, u8 port_num,
 			      struct ib_sa_guidinfo_rec *rec,
 			      ib_sa_comp_mask comp_mask, u8 method,
-			      int timeout_ms, gfp_t gfp_mask,
+			      unsigned long timeout_ms, gfp_t gfp_mask,
 			      void (*callback)(int status,
 					       struct ib_sa_guidinfo_rec *resp,
 					       void *context),
-			      void *context,
-			      struct ib_sa_query **sa_query);
+			      void *context, struct ib_sa_query **sa_query);
 
 bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
 				    struct ib_device *device,
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index a1fd63871d17..5d3755ec5afa 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -42,15 +42,14 @@ struct ib_umem_odp;
 
 struct ib_umem {
 	struct ib_ucontext     *context;
+	struct mm_struct       *owning_mm;
 	size_t			length;
 	unsigned long		address;
 	int			page_shift;
-	int                     writable;
-	int                     hugetlb;
+	u32 writable : 1;
+	u32 hugetlb : 1;
+	u32 is_odp : 1;
 	struct work_struct	work;
-	struct mm_struct       *mm;
-	unsigned long		diff;
-	struct ib_umem_odp     *odp_data;
 	struct sg_table sg_head;
 	int             nmap;
 	int             npages;
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 381cdf5a9bd1..0b1446fe2fab 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -43,6 +43,9 @@ struct umem_odp_node {
 };
 
 struct ib_umem_odp {
+	struct ib_umem umem;
+	struct ib_ucontext_per_mm *per_mm;
+
 	/*
 	 * An array of the pages included in the on-demand paging umem.
 	 * Indices of pages that are currently not mapped into the device will
@@ -64,16 +67,9 @@ struct ib_umem_odp {
 	struct mutex		umem_mutex;
 	void			*private; /* for the HW driver to use. */
 
-	/* When false, use the notifier counter in the ucontext struct. */
-	bool mn_counters_active;
 	int notifiers_seq;
 	int notifiers_count;
 
-	/* A linked list of umems that don't have private mmu notifier
-	 * counters yet. */
-	struct list_head no_private_counters;
-	struct ib_umem		*umem;
-
 	/* Tree tracking */
 	struct umem_odp_node	interval_tree;
 
@@ -82,15 +78,34 @@ struct ib_umem_odp {
 	struct work_struct	work;
 };
 
+static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
+{
+	return container_of(umem, struct ib_umem_odp, umem);
+}
+
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
-		    int access);
-struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
-				  unsigned long addr,
-				  size_t size);
+struct ib_ucontext_per_mm {
+	struct ib_ucontext *context;
+	struct mm_struct *mm;
+	struct pid *tgid;
+	bool active;
+
+	struct rb_root_cached umem_tree;
+	/* Protects umem_tree */
+	struct rw_semaphore umem_rwsem;
 
-void ib_umem_odp_release(struct ib_umem *umem);
+	struct mmu_notifier mn;
+	unsigned int odp_mrs_count;
+
+	struct list_head ucontext_list;
+	struct rcu_head rcu;
+};
+
+int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access);
+struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
+				      unsigned long addr, size_t size);
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
 
 /*
  * The lower 2 bits of the DMA address signal the R/W permissions for
@@ -105,13 +120,14 @@ void ib_umem_odp_release(struct ib_umem *umem);
 
 #define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
 
-int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
-			      u64 access_mask, unsigned long current_seq);
+int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
+			      u64 bcnt, u64 access_mask,
+			      unsigned long current_seq);
 
-void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
+void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
 				 u64 bound);
 
-typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
+typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end,
 			      void *cookie);
 /*
  * Call the callback on each ib_umem in the range. Returns the logical or of
@@ -129,46 +145,37 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
 struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
 				       u64 addr, u64 length);
 
-static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
+static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
 					     unsigned long mmu_seq)
 {
 	/*
 	 * This code is strongly based on the KVM code from
 	 * mmu_notifier_retry. Should be called with
-	 * the relevant locks taken (item->odp_data->umem_mutex
+	 * the relevant locks taken (umem_odp->umem_mutex
 	 * and the ucontext umem_mutex semaphore locked for read).
 	 */
 
-	/* Do not allow page faults while the new ib_umem hasn't seen a state
-	 * with zero notifiers yet, and doesn't have its own valid set of
-	 * private counters. */
-	if (!item->odp_data->mn_counters_active)
-		return 1;
-
-	if (unlikely(item->odp_data->notifiers_count))
+	if (unlikely(umem_odp->notifiers_count))
 		return 1;
-	if (item->odp_data->notifiers_seq != mmu_seq)
+	if (umem_odp->notifiers_seq != mmu_seq)
 		return 1;
 	return 0;
 }
 
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
-static inline int ib_umem_odp_get(struct ib_ucontext *context,
-				  struct ib_umem *umem,
-				  int access)
+static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
 {
 	return -EINVAL;
 }
 
-static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
-						unsigned long addr,
-						size_t size)
+static inline struct ib_umem_odp *
+ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline void ib_umem_odp_release(struct ib_umem *umem) {}
+static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 0ed5d913a492..9c0c2132a2d6 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -69,8 +69,11 @@
 
 #define IB_FW_VERSION_NAME_MAX	ETHTOOL_FWVERS_LEN
 
+struct ib_umem_odp;
+
 extern struct workqueue_struct *ib_wq;
 extern struct workqueue_struct *ib_comp_wq;
+extern struct workqueue_struct *ib_comp_unbound_wq;
 
 union ib_gid {
 	u8	raw[16];
@@ -1137,7 +1140,9 @@ enum ib_qp_create_flags {
  */
 
 struct ib_qp_init_attr {
+	/* Consumer's event_handler callback must not block */
 	void                  (*event_handler)(struct ib_event *, void *);
+
 	void		       *qp_context;
 	struct ib_cq	       *send_cq;
 	struct ib_cq	       *recv_cq;
@@ -1146,7 +1151,7 @@ struct ib_qp_init_attr {
 	struct ib_qp_cap	cap;
 	enum ib_sig_type	sq_sig_type;
 	enum ib_qp_type		qp_type;
-	enum ib_qp_create_flags	create_flags;
+	u32			create_flags;
 
 	/*
 	 * Only needed for special QP types, or when using the RW API.
@@ -1278,21 +1283,27 @@ struct ib_qp_attr {
 };
 
 enum ib_wr_opcode {
-	IB_WR_RDMA_WRITE,
-	IB_WR_RDMA_WRITE_WITH_IMM,
-	IB_WR_SEND,
-	IB_WR_SEND_WITH_IMM,
-	IB_WR_RDMA_READ,
-	IB_WR_ATOMIC_CMP_AND_SWP,
-	IB_WR_ATOMIC_FETCH_AND_ADD,
-	IB_WR_LSO,
-	IB_WR_SEND_WITH_INV,
-	IB_WR_RDMA_READ_WITH_INV,
-	IB_WR_LOCAL_INV,
-	IB_WR_REG_MR,
-	IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
-	IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+	/* These are shared with userspace */
+	IB_WR_RDMA_WRITE = IB_UVERBS_WR_RDMA_WRITE,
+	IB_WR_RDMA_WRITE_WITH_IMM = IB_UVERBS_WR_RDMA_WRITE_WITH_IMM,
+	IB_WR_SEND = IB_UVERBS_WR_SEND,
+	IB_WR_SEND_WITH_IMM = IB_UVERBS_WR_SEND_WITH_IMM,
+	IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
+	IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
+	IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
+	IB_WR_LSO = IB_UVERBS_WR_TSO,
+	IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
+	IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
+	IB_WR_LOCAL_INV = IB_UVERBS_WR_LOCAL_INV,
+	IB_WR_MASKED_ATOMIC_CMP_AND_SWP =
+		IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
+	IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
+		IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+
+	/* These are kernel only and can not be issued by userspace */
+	IB_WR_REG_MR = 0x20,
 	IB_WR_REG_SIG_MR,
+
 	/* reserve values for low level drivers' internal use.
 	 * These values will not be used at all in the ib core layer.
 	 */
@@ -1485,26 +1496,15 @@ struct ib_ucontext {
 	 * it is set when we are closing the file descriptor and indicates
 	 * that mm_sem may be locked.
 	 */
-	int			closing;
+	bool closing;
 
 	bool cleanup_retryable;
 
-	struct pid             *tgid;
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	struct rb_root_cached   umem_tree;
-	/*
-	 * Protects .umem_rbroot and tree, as well as odp_mrs_count and
-	 * mmu notifiers registration.
-	 */
-	struct rw_semaphore	umem_rwsem;
-	void (*invalidate_range)(struct ib_umem *umem,
+	void (*invalidate_range)(struct ib_umem_odp *umem_odp,
 				 unsigned long start, unsigned long end);
-
-	struct mmu_notifier	mn;
-	atomic_t		notifier_count;
-	/* A list of umems that don't have private mmu notifier counters yet. */
-	struct list_head	no_private_counters;
-	int                     odp_mrs_count;
+	struct mutex per_mm_list_lock;
+	struct list_head per_mm_list;
 #endif
 
 	struct ib_rdmacg_object	cg_obj;
@@ -1570,9 +1570,10 @@ struct ib_ah {
 typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
 
 enum ib_poll_context {
-	IB_POLL_DIRECT,		/* caller context, no hw completions */
-	IB_POLL_SOFTIRQ,	/* poll from softirq context */
-	IB_POLL_WORKQUEUE,	/* poll from workqueue */
+	IB_POLL_DIRECT,		   /* caller context, no hw completions */
+	IB_POLL_SOFTIRQ,	   /* poll from softirq context */
+	IB_POLL_WORKQUEUE,	   /* poll from workqueue */
+	IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
 };
 
 struct ib_cq {
@@ -1589,6 +1590,7 @@ struct ib_cq {
 		struct irq_poll		iop;
 		struct work_struct	work;
 	};
+	struct workqueue_struct *comp_wq;
 	/*
 	 * Implementation details of the RDMA core, don't use in drivers:
 	 */
@@ -2263,10 +2265,11 @@ struct ib_device {
 	struct list_head              event_handler_list;
 	spinlock_t                    event_handler_lock;
 
-	spinlock_t                    client_data_lock;
+	rwlock_t			client_data_lock;
 	struct list_head              core_list;
 	/* Access to the client_data_list is protected by the client_data_lock
-	 * spinlock and the lists_rwsem read-write semaphore */
+	 * rwlock and the lists_rwsem read-write semaphore
+	 */
 	struct list_head              client_data_list;
 
 	struct ib_cache               cache;
@@ -2550,7 +2553,13 @@ struct ib_device {
 
 	struct module               *owner;
 	struct device                dev;
-	struct kobject               *ports_parent;
+	/* First group for device attributes,
+	 * Second group for driver provided attributes (optional).
+	 * It is NULL terminated array.
+	 */
+	const struct attribute_group	*groups[3];
+
+	struct kobject			*ports_kobj;
 	struct list_head             port_list;
 
 	enum {
@@ -2633,9 +2642,9 @@ void ib_dealloc_device(struct ib_device *device);
 
 void ib_get_device_fw_str(struct ib_device *device, char *str);
 
-int ib_register_device(struct ib_device *device,
-		       int (*port_callback)(struct ib_device *,
-					    u8, struct kobject *));
+int ib_register_device(struct ib_device *device, const char *name,
+		       int (*port_callback)(struct ib_device *, u8,
+					    struct kobject *));
 void ib_unregister_device(struct ib_device *device);
 
 int ib_register_client   (struct ib_client *client);
@@ -2645,6 +2654,28 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
 void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
 			 void *data);
 
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+		      unsigned long pfn, unsigned long size, pgprot_t prot);
+int rdma_user_mmap_page(struct ib_ucontext *ucontext,
+			struct vm_area_struct *vma, struct page *page,
+			unsigned long size);
+#else
+static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
+				    struct vm_area_struct *vma,
+				    unsigned long pfn, unsigned long size,
+				    pgprot_t prot)
+{
+	return -EINVAL;
+}
+static inline int rdma_user_mmap_page(struct ib_ucontext *ucontext,
+				struct vm_area_struct *vma, struct page *page,
+				unsigned long size)
+{
+	return -EINVAL;
+}
+#endif
+
 static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
 {
 	return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
@@ -2728,7 +2759,6 @@ static inline int ib_destroy_usecnt(atomic_t *usecnt,
  * @next_state: Next QP state
  * @type: QP type
  * @mask: Mask of supplied QP attributes
- * @ll : link layer of port
  *
  * This function is a helper function that a low-level driver's
  * modify_qp method can use to validate the consumer's input.  It
@@ -2737,8 +2767,7 @@ static inline int ib_destroy_usecnt(atomic_t *usecnt,
  * and that the attribute mask supplied is allowed for the transition.
  */
 bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
-			enum ib_qp_type type, enum ib_qp_attr_mask mask,
-			enum rdma_link_layer ll);
+			enum ib_qp_type type, enum ib_qp_attr_mask mask);
 
 void ib_register_event_handler(struct ib_event_handler *event_handler);
 void ib_unregister_event_handler(struct ib_event_handler *event_handler);
@@ -4167,20 +4196,6 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
 
 }
 
-static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
-			       struct ib_qp *qp, struct ib_device *device)
-{
-	uobj->object = ibflow;
-	ibflow->uobject = uobj;
-
-	if (qp) {
-		atomic_inc(&qp->usecnt);
-		ibflow->qp = qp;
-	}
-
-	ibflow->device = device;
-}
-
 /**
  * rdma_roce_rescan_device - Rescan all of the network devices in the system
  * and add their gids, as needed, to the relevant RoCE devices.
@@ -4205,4 +4220,26 @@ int rdma_init_netdev(struct ib_device *device, u8 port_num,
 		     void (*setup)(struct net_device *),
 		     struct net_device *netdev);
 
+/**
+ * rdma_set_device_sysfs_group - Set device attributes group to have
+ *				 driver specific sysfs entries at
+ *				 for infiniband class.
+ *
+ * @device:	device pointer for which attributes to be created
+ * @group:	Pointer to group which should be added when device
+ *		is registered with sysfs.
+ * rdma_set_device_sysfs_group() allows existing drivers to expose one
+ * group per device to have sysfs attributes.
+ *
+ * NOTE: New drivers should not make use of this API; instead new device
+ * parameter should be exposed via netlink command. This API and mechanism
+ * exist only for existing drivers.
+ */
+static inline void
+rdma_set_device_sysfs_group(struct ib_device *dev,
+			    const struct attribute_group *group)
+{
+	dev->groups[1] = group;
+}
+
 #endif /* IB_VERBS_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 5d71a7f51a9f..60987a5903b7 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -152,7 +152,11 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
  * @ps: RDMA port space.
  * @qp_type: type of queue pair associated with the id.
  *
- * The id holds a reference on the network namespace until it is destroyed.
+ * Returns a new rdma_cm_id. The id holds a reference on the network
+ * namespace until it is destroyed.
+ *
+ * The event handler callback serializes on the id's mutex and is
+ * allowed to sleep.
  */
 #define rdma_create_id(net, event_handler, context, ps, qp_type) \
 	__rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
@@ -192,7 +196,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
  * @timeout_ms: Time to wait for resolution to complete.
  */
 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
-		      const struct sockaddr *dst_addr, int timeout_ms);
+		      const struct sockaddr *dst_addr,
+		      unsigned long timeout_ms);
 
 /**
  * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
@@ -202,7 +207,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
  * Users must have first called rdma_resolve_addr to resolve a dst_addr
  * into an RDMA address before calling this routine.
  */
-int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
+int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms);
 
 /**
  * rdma_create_qp - Allocate a QP and associate it with the specified RDMA
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index c369703fcd69..70218e6b5187 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -96,7 +96,7 @@ int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags);
 /**
  * Check if there are any listeners to the netlink group
  * @group: the netlink group ID
- * Returns 0 on success or a negative for no listeners.
+ * Returns true on success or false if no listeners.
  */
-int rdma_nl_chk_listeners(unsigned int group);
+bool rdma_nl_chk_listeners(unsigned int group);
 #endif /* _RDMA_NETLINK_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index e79229a0cf01..3584d0816fcd 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -149,6 +149,10 @@ struct rvt_ibport {
 
 #define RVT_CQN_MAX 16 /* maximum length of cq name */
 
+#define RVT_SGE_COPY_MEMCPY	0
+#define RVT_SGE_COPY_CACHELESS	1
+#define RVT_SGE_COPY_ADAPTIVE	2
+
 /*
  * Things that are driver specific, module parameters in hfi1 and qib
  */
@@ -161,6 +165,9 @@ struct rvt_driver_params {
 	 */
 	unsigned int lkey_table_size;
 	unsigned int qp_table_size;
+	unsigned int sge_copy_mode;
+	unsigned int wss_threshold;
+	unsigned int wss_clean_period;
 	int qpn_start;
 	int qpn_inc;
 	int qpn_res_start;
@@ -193,6 +200,19 @@ struct rvt_ah {
 	u8 log_pmtu;
 };
 
+/* memory working set size */
+struct rvt_wss {
+	unsigned long *entries;
+	atomic_t total_count;
+	atomic_t clean_counter;
+	atomic_t clean_entry;
+
+	int threshold;
+	int num_entries;
+	long pages_mask;
+	unsigned int clean_period;
+};
+
 struct rvt_dev_info;
 struct rvt_swqe;
 struct rvt_driver_provided {
@@ -211,11 +231,18 @@ struct rvt_driver_provided {
 	 * version requires the s_lock not to be held. The other assumes the
 	 * s_lock is held.
 	 */
-	void (*schedule_send)(struct rvt_qp *qp);
-	void (*schedule_send_no_lock)(struct rvt_qp *qp);
+	bool (*schedule_send)(struct rvt_qp *qp);
+	bool (*schedule_send_no_lock)(struct rvt_qp *qp);
 
-	/* Driver specific work request checking */
-	int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+	/*
+	 * Driver specific work request setup and checking.
+	 * This function is allowed to perform any setup, checks, or
+	 * adjustments required to the SWQE in order to be usable by
+	 * underlying protocols. This includes private data structure
+	 * allocations.
+	 */
+	int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
+			 bool *call_send);
 
 	/*
 	 * Sometimes rdmavt needs to kick the driver's send progress. That is
@@ -371,6 +398,9 @@ struct rvt_dev_info {
 	/* post send table */
 	const struct rvt_operation_params *post_parms;
 
+	/* opcode translation table */
+	const enum ib_wc_opcode *wc_opcode;
+
 	/* Driver specific helper functions */
 	struct rvt_driver_provided driver_f;
 
@@ -411,6 +441,8 @@ struct rvt_dev_info {
 	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
 	spinlock_t n_mcast_grps_lock;
 
+	/* Memory Working Set Size */
+	struct rvt_wss *wss;
 };
 
 /**
@@ -423,7 +455,14 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
 				      const char *fmt, const char *name,
 				      const int unit)
 {
-	snprintf(rdi->ibdev.name, sizeof(rdi->ibdev.name), fmt, name, unit);
+	/*
+	 * FIXME: rvt and its users want to touch the ibdev before
+	 * registration and have things like the name work. We don't have the
+	 * infrastructure in the core to support this directly today, hack it
+	 * to work by setting the name manually here.
+	 */
+	dev_set_name(&rdi->ibdev.dev, fmt, name, unit);
+	strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX);
 }
 
 /**
@@ -434,7 +473,7 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
  */
 static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi)
 {
-	return rdi->ibdev.name;
+	return dev_name(&rdi->ibdev.dev);
 }
 
 static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 927f6d5b6d0f..cbafb1878669 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -678,6 +678,13 @@ void rvt_del_timers_sync(struct rvt_qp *qp);
 void rvt_stop_rc_timers(struct rvt_qp *qp);
 void rvt_add_retry_timer(struct rvt_qp *qp);
 
+void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
+		  void *data, u32 length,
+		  bool release, bool copy_last);
+void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
+		       enum ib_wc_status status);
+void rvt_ruc_loopback(struct rvt_qp *qp);
+
 /**
  * struct rvt_qp_iter - the iterator for QPs
  * @qp - the current QP
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 9654d33edd98..2638fa7cd702 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -173,16 +173,10 @@ int rdma_restrack_put(struct rdma_restrack_entry *res);
 /**
  * rdma_restrack_set_task() - set the task for this resource
  * @res:  resource entry
- * @task: task struct
+ * @caller: kernel name, the current task will be used if the caller is NULL.
  */
-static inline void rdma_restrack_set_task(struct rdma_restrack_entry *res,
-					  struct task_struct *task)
-{
-	if (res->task)
-		put_task_struct(res->task);
-	get_task_struct(task);
-	res->task = task;
-}
+void rdma_restrack_set_task(struct rdma_restrack_entry *res,
+			    const char *caller);
 
 /*
  * Helper functions for rdma drivers when filling out
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 9e997c3c2f04..84d3d15f1f38 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -52,6 +52,7 @@ enum uverbs_attr_type {
 	UVERBS_ATTR_TYPE_IDR,
 	UVERBS_ATTR_TYPE_FD,
 	UVERBS_ATTR_TYPE_ENUM_IN,
+	UVERBS_ATTR_TYPE_IDRS_ARRAY,
 };
 
 enum uverbs_obj_access {
@@ -101,7 +102,7 @@ struct uverbs_attr_spec {
 		} enum_def;
 	} u;
 
-	/* This weird split of the enum lets us remove some padding */
+	/* This weird split lets us remove some padding */
 	union {
 		struct {
 			/*
@@ -111,6 +112,17 @@ struct uverbs_attr_spec {
 			 */
 			const struct uverbs_attr_spec *ids;
 		} enum_def;
+
+		struct {
+			/*
+			 * higher bits mean the namespace and lower bits mean
+			 * the type id within the namespace.
+			 */
+			u16				obj_type;
+			u16				min_len;
+			u16				max_len;
+			u8				access;
+		} objs_arr;
 	} u2;
 };
 
@@ -251,6 +263,11 @@ static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key)
 	return attr_key - 1;
 }
 
+static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey)
+{
+	return attr_bkey + 1;
+}
+
 /*
  * =======================================
  *	Verbs definitions
@@ -323,6 +340,27 @@ struct uverbs_object_tree_def {
 #define UA_MANDATORY .mandatory = 1
 #define UA_OPTIONAL .mandatory = 0
 
+/*
+ * min_len must be bigger than 0 and _max_len must be smaller than 4095.  Only
+ * READ\WRITE accesses are supported.
+ */
+#define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \
+			     ...)                                              \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = (_attr_id) +                                             \
+		      BUILD_BUG_ON_ZERO((_min_len) == 0 ||                     \
+					(_max_len) >                           \
+						PAGE_SIZE / sizeof(void *) ||  \
+					(_min_len) > (_max_len) ||             \
+					(_access) == UVERBS_ACCESS_NEW ||      \
+					(_access) == UVERBS_ACCESS_DESTROY),   \
+		.attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY,                 \
+			  .u2.objs_arr.obj_type = _idr_type,                   \
+			  .u2.objs_arr.access = _access,                       \
+			  .u2.objs_arr.min_len = _min_len,                     \
+			  .u2.objs_arr.max_len = _max_len,                     \
+			  __VA_ARGS__ } })
+
 #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...)                     \
 	(&(const struct uverbs_attr_def){                                      \
 		.id = _attr_id,                                                \
@@ -365,6 +403,15 @@ struct uverbs_object_tree_def {
 			  __VA_ARGS__ },                                       \
 	})
 
+/* An input value that is a member in the enum _enum_type. */
+#define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...)                        \
+	UVERBS_ATTR_PTR_IN(                                                    \
+		_attr_id,                                                      \
+		UVERBS_ATTR_SIZE(                                              \
+			sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)),  \
+			sizeof(u64)),                                          \
+		__VA_ARGS__)
+
 /*
  * An input value that is a bitwise combination of values of _enum_type.
  * This permits the flag value to be passed as either a u32 or u64, it must
@@ -431,10 +478,16 @@ struct uverbs_obj_attr {
 	const struct uverbs_api_attr	*attr_elm;
 };
 
+struct uverbs_objs_arr_attr {
+	struct ib_uobject **uobjects;
+	u16 len;
+};
+
 struct uverbs_attr {
 	union {
 		struct uverbs_ptr_attr	ptr_attr;
 		struct uverbs_obj_attr	obj_attr;
+		struct uverbs_objs_arr_attr objs_arr_attr;
 	};
 };
 
@@ -507,6 +560,31 @@ uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
 	return attr->ptr_attr.len;
 }
 
+/**
+ * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for
+ * UVERBS_ATTR_TYPE_IDRS_ARRAY.
+ * @arr: Returned pointer to array of pointers for uobjects or NULL if
+ *       the attribute isn't provided.
+ *
+ * Return: The array length or 0 if no attribute was provided.
+ */
+static inline int uverbs_attr_get_uobjs_arr(
+	const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx,
+	struct ib_uobject ***arr)
+{
+	const struct uverbs_attr *attr =
+			uverbs_attr_get(attrs_bundle, attr_idx);
+
+	if (IS_ERR(attr)) {
+		*arr = NULL;
+		return 0;
+	}
+
+	*arr = attr->objs_arr_attr.uobjects;
+
+	return attr->objs_arr_attr.len;
+}
+
 static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
 {
 	return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
@@ -603,6 +681,9 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
 {
 	return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
 }
+int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		      size_t idx, s64 lower_bound, u64 upper_bound,
+		      s64 *def_val);
 #else
 static inline int
 uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
@@ -631,6 +712,34 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
 {
 	return ERR_PTR(-EINVAL);
 }
+static inline int
+_uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		  size_t idx, s64 lower_bound, u64 upper_bound,
+		  s64 *def_val)
+{
+	return -EINVAL;
+}
 #endif
 
+#define uverbs_get_const(_to, _attrs_bundle, _idx)                             \
+	({                                                                     \
+		s64 _val;                                                      \
+		int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx,       \
+					     type_min(typeof(*_to)),           \
+					     type_max(typeof(*_to)), NULL);    \
+		(*_to) = _val;                                                 \
+		_ret;                                                          \
+	})
+
+#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default)           \
+	({                                                                     \
+		s64 _val;                                                      \
+		s64 _def_val = _default;                                       \
+		int _ret =                                                     \
+			_uverbs_get_const(&_val, _attrs_bundle, _idx,          \
+					  type_min(typeof(*_to)),              \
+					  type_max(typeof(*_to)), &_def_val);  \
+		(*_to) = _val;                                                 \
+		_ret;                                                          \
+	})
 #endif
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 3b00231cc084..3db2802fbc68 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -140,5 +140,56 @@ __uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile,
 #define uobj_alloc(_type, _ufile, _ib_dev)                                     \
 	__uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev)
 
+static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action,
+						  struct ib_uobject *uobj,
+						  struct ib_device *ib_dev,
+						  enum ib_flow_action_type type)
+{
+	atomic_set(&action->usecnt, 0);
+	action->device = ib_dev;
+	action->type = type;
+	action->uobject = uobj;
+	uobj->object = action;
+}
+
+struct ib_uflow_resources {
+	size_t			max;
+	size_t			num;
+	size_t			collection_num;
+	size_t			counters_num;
+	struct ib_counters	**counters;
+	struct ib_flow_action	**collection;
+};
+
+struct ib_uflow_object {
+	struct ib_uobject		uobject;
+	struct ib_uflow_resources	*resources;
+};
+
+struct ib_uflow_resources *flow_resources_alloc(size_t num_specs);
+void flow_resources_add(struct ib_uflow_resources *uflow_res,
+			enum ib_flow_spec_type type,
+			void *ibobj);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
+
+static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
+			       struct ib_qp *qp, struct ib_device *device,
+			       struct ib_uflow_resources *uflow_res)
+{
+	struct ib_uflow_object *uflow;
+
+	uobj->object = ibflow;
+	ibflow->uobject = uobj;
+
+	if (qp) {
+		atomic_inc(&qp->usecnt);
+		ibflow->qp = qp;
+	}
+
+	ibflow->device = device;
+	uflow = container_of(uobj, typeof(*uflow), uobject);
+	uflow->resources = uflow_res;
+}
+
 #endif
 
diff --git a/include/soc/fsl/dpaa2-fd.h b/include/soc/fsl/dpaa2-fd.h
index 2576abaa7779..90ae8d191f1a 100644
--- a/include/soc/fsl/dpaa2-fd.h
+++ b/include/soc/fsl/dpaa2-fd.h
@@ -66,6 +66,15 @@ struct dpaa2_fd {
 #define SG_BPID_MASK		0x3FFF
 #define SG_FINAL_FLAG_MASK	0x1
 #define SG_FINAL_FLAG_SHIFT	15
+#define FL_SHORT_LEN_FLAG_MASK	0x1
+#define FL_SHORT_LEN_FLAG_SHIFT	14
+#define FL_SHORT_LEN_MASK	0x3FFFF
+#define FL_OFFSET_MASK		0x0FFF
+#define FL_FORMAT_MASK		0x3
+#define FL_FORMAT_SHIFT		12
+#define FL_BPID_MASK		0x3FFF
+#define FL_FINAL_FLAG_MASK	0x1
+#define FL_FINAL_FLAG_SHIFT	15
 
 /* Error bits in FD CTRL */
 #define FD_CTRL_ERR_MASK	0x000000FF
@@ -435,4 +444,237 @@ static inline void dpaa2_sg_set_final(struct dpaa2_sg_entry *sg, bool final)
 	sg->format_offset |= cpu_to_le16(final << SG_FINAL_FLAG_SHIFT);
 }
 
+/**
+ * struct dpaa2_fl_entry - structure for frame list entry.
+ * @addr:          address in the FLE
+ * @len:           length in the FLE
+ * @bpid:          buffer pool ID
+ * @format_offset: format, offset, and short-length fields
+ * @frc:           frame context
+ * @ctrl:          control bits...including pta, pvt1, pvt2, err, etc
+ * @flc:           flow context address
+ */
+struct dpaa2_fl_entry {
+	__le64 addr;
+	__le32 len;
+	__le16 bpid;
+	__le16 format_offset;
+	__le32 frc;
+	__le32 ctrl;
+	__le64 flc;
+};
+
+enum dpaa2_fl_format {
+	dpaa2_fl_single = 0,
+	dpaa2_fl_res,
+	dpaa2_fl_sg
+};
+
+/**
+ * dpaa2_fl_get_addr() - get the addr field of FLE
+ * @fle: the given frame list entry
+ *
+ * Return the address in the frame list entry.
+ */
+static inline dma_addr_t dpaa2_fl_get_addr(const struct dpaa2_fl_entry *fle)
+{
+	return (dma_addr_t)le64_to_cpu(fle->addr);
+}
+
+/**
+ * dpaa2_fl_set_addr() - Set the addr field of FLE
+ * @fle: the given frame list entry
+ * @addr: the address needs to be set in frame list entry
+ */
+static inline void dpaa2_fl_set_addr(struct dpaa2_fl_entry *fle,
+				     dma_addr_t addr)
+{
+	fle->addr = cpu_to_le64(addr);
+}
+
+/**
+ * dpaa2_fl_get_frc() - Get the frame context in the FLE
+ * @fle: the given frame list entry
+ *
+ * Return the frame context field in the frame lsit entry.
+ */
+static inline u32 dpaa2_fl_get_frc(const struct dpaa2_fl_entry *fle)
+{
+	return le32_to_cpu(fle->frc);
+}
+
+/**
+ * dpaa2_fl_set_frc() - Set the frame context in the FLE
+ * @fle: the given frame list entry
+ * @frc: the frame context needs to be set in frame list entry
+ */
+static inline void dpaa2_fl_set_frc(struct dpaa2_fl_entry *fle, u32 frc)
+{
+	fle->frc = cpu_to_le32(frc);
+}
+
+/**
+ * dpaa2_fl_get_ctrl() - Get the control bits in the FLE
+ * @fle: the given frame list entry
+ *
+ * Return the control bits field in the frame list entry.
+ */
+static inline u32 dpaa2_fl_get_ctrl(const struct dpaa2_fl_entry *fle)
+{
+	return le32_to_cpu(fle->ctrl);
+}
+
+/**
+ * dpaa2_fl_set_ctrl() - Set the control bits in the FLE
+ * @fle: the given frame list entry
+ * @ctrl: the control bits to be set in the frame list entry
+ */
+static inline void dpaa2_fl_set_ctrl(struct dpaa2_fl_entry *fle, u32 ctrl)
+{
+	fle->ctrl = cpu_to_le32(ctrl);
+}
+
+/**
+ * dpaa2_fl_get_flc() - Get the flow context in the FLE
+ * @fle: the given frame list entry
+ *
+ * Return the flow context in the frame list entry.
+ */
+static inline dma_addr_t dpaa2_fl_get_flc(const struct dpaa2_fl_entry *fle)
+{
+	return (dma_addr_t)le64_to_cpu(fle->flc);
+}
+
+/**
+ * dpaa2_fl_set_flc() - Set the flow context field of FLE
+ * @fle: the given frame list entry
+ * @flc_addr: the flow context needs to be set in frame list entry
+ */
+static inline void dpaa2_fl_set_flc(struct dpaa2_fl_entry *fle,
+				    dma_addr_t flc_addr)
+{
+	fle->flc = cpu_to_le64(flc_addr);
+}
+
+static inline bool dpaa2_fl_short_len(const struct dpaa2_fl_entry *fle)
+{
+	return !!((le16_to_cpu(fle->format_offset) >>
+		  FL_SHORT_LEN_FLAG_SHIFT) & FL_SHORT_LEN_FLAG_MASK);
+}
+
+/**
+ * dpaa2_fl_get_len() - Get the length in the FLE
+ * @fle: the given frame list entry
+ *
+ * Return the length field in the frame list entry.
+ */
+static inline u32 dpaa2_fl_get_len(const struct dpaa2_fl_entry *fle)
+{
+	if (dpaa2_fl_short_len(fle))
+		return le32_to_cpu(fle->len) & FL_SHORT_LEN_MASK;
+
+	return le32_to_cpu(fle->len);
+}
+
+/**
+ * dpaa2_fl_set_len() - Set the length field of FLE
+ * @fle: the given frame list entry
+ * @len: the length needs to be set in frame list entry
+ */
+static inline void dpaa2_fl_set_len(struct dpaa2_fl_entry *fle, u32 len)
+{
+	fle->len = cpu_to_le32(len);
+}
+
+/**
+ * dpaa2_fl_get_offset() - Get the offset field in the frame list entry
+ * @fle: the given frame list entry
+ *
+ * Return the offset.
+ */
+static inline u16 dpaa2_fl_get_offset(const struct dpaa2_fl_entry *fle)
+{
+	return le16_to_cpu(fle->format_offset) & FL_OFFSET_MASK;
+}
+
+/**
+ * dpaa2_fl_set_offset() - Set the offset field of FLE
+ * @fle: the given frame list entry
+ * @offset: the offset needs to be set in frame list entry
+ */
+static inline void dpaa2_fl_set_offset(struct dpaa2_fl_entry *fle, u16 offset)
+{
+	fle->format_offset &= cpu_to_le16(~FL_OFFSET_MASK);
+	fle->format_offset |= cpu_to_le16(offset);
+}
+
+/**
+ * dpaa2_fl_get_format() - Get the format field in the FLE
+ * @fle: the given frame list entry
+ *
+ * Return the format.
+ */
+static inline enum dpaa2_fl_format dpaa2_fl_get_format(const struct dpaa2_fl_entry *fle)
+{
+	return (enum dpaa2_fl_format)((le16_to_cpu(fle->format_offset) >>
+				       FL_FORMAT_SHIFT) & FL_FORMAT_MASK);
+}
+
+/**
+ * dpaa2_fl_set_format() - Set the format field of FLE
+ * @fle: the given frame list entry
+ * @format: the format needs to be set in frame list entry
+ */
+static inline void dpaa2_fl_set_format(struct dpaa2_fl_entry *fle,
+				       enum dpaa2_fl_format format)
+{
+	fle->format_offset &= cpu_to_le16(~(FL_FORMAT_MASK << FL_FORMAT_SHIFT));
+	fle->format_offset |= cpu_to_le16(format << FL_FORMAT_SHIFT);
+}
+
+/**
+ * dpaa2_fl_get_bpid() - Get the bpid field in the FLE
+ * @fle: the given frame list entry
+ *
+ * Return the buffer pool id.
+ */
+static inline u16 dpaa2_fl_get_bpid(const struct dpaa2_fl_entry *fle)
+{
+	return le16_to_cpu(fle->bpid) & FL_BPID_MASK;
+}
+
+/**
+ * dpaa2_fl_set_bpid() - Set the bpid field of FLE
+ * @fle: the given frame list entry
+ * @bpid: buffer pool id to be set
+ */
+static inline void dpaa2_fl_set_bpid(struct dpaa2_fl_entry *fle, u16 bpid)
+{
+	fle->bpid &= cpu_to_le16(~(FL_BPID_MASK));
+	fle->bpid |= cpu_to_le16(bpid);
+}
+
+/**
+ * dpaa2_fl_is_final() - Check final bit in FLE
+ * @fle: the given frame list entry
+ *
+ * Return bool.
+ */
+static inline bool dpaa2_fl_is_final(const struct dpaa2_fl_entry *fle)
+{
+	return !!(le16_to_cpu(fle->format_offset) >> FL_FINAL_FLAG_SHIFT);
+}
+
+/**
+ * dpaa2_fl_set_final() - Set the final bit in FLE
+ * @fle: the given frame list entry
+ * @final: the final boolean to be set
+ */
+static inline void dpaa2_fl_set_final(struct dpaa2_fl_entry *fle, bool final)
+{
+	fle->format_offset &= cpu_to_le16((~(FL_FINAL_FLAG_MASK <<
+					     FL_FINAL_FLAG_SHIFT)) & 0xFFFF);
+	fle->format_offset |= cpu_to_le16(final << FL_FINAL_FLAG_SHIFT);
+}
+
 #endif /* __FSL_DPAA2_FD_H */
diff --git a/include/soc/fsl/dpaa2-global.h b/include/soc/fsl/dpaa2-global.h
index 9bc0713346a8..2bfc379d3dc9 100644
--- a/include/soc/fsl/dpaa2-global.h
+++ b/include/soc/fsl/dpaa2-global.h
@@ -174,4 +174,19 @@ static inline const struct dpaa2_fd *dpaa2_dq_fd(const struct dpaa2_dq *dq)
 	return (const struct dpaa2_fd *)&dq->dq.fd[0];
 }
 
+#define DPAA2_CSCN_SIZE		sizeof(struct dpaa2_dq)
+#define DPAA2_CSCN_ALIGN	16
+#define DPAA2_CSCN_STATE_CG	BIT(0)
+
+/**
+ * dpaa2_cscn_state_congested() - Check congestion state
+ * @cscn: congestion SCN (delivered to WQ or memory)
+ *
+i * Return true is congested.
+ */
+static inline bool dpaa2_cscn_state_congested(struct dpaa2_dq *cscn)
+{
+	return !!(cscn->scn.state & DPAA2_CSCN_STATE_CG);
+}
+
 #endif /* __FSL_DPAA2_GLOBAL_H */
diff --git a/include/soc/fsl/dpaa2-io.h b/include/soc/fsl/dpaa2-io.h
index ab51e40d11db..70997ab2146c 100644
--- a/include/soc/fsl/dpaa2-io.h
+++ b/include/soc/fsl/dpaa2-io.h
@@ -97,9 +97,13 @@ void dpaa2_io_service_deregister(struct dpaa2_io *service,
 int dpaa2_io_service_rearm(struct dpaa2_io *service,
 			   struct dpaa2_io_notification_ctx *ctx);
 
+int dpaa2_io_service_pull_fq(struct dpaa2_io *d, u32 fqid,
+			     struct dpaa2_io_store *s);
 int dpaa2_io_service_pull_channel(struct dpaa2_io *d, u32 channelid,
 				  struct dpaa2_io_store *s);
 
+int dpaa2_io_service_enqueue_fq(struct dpaa2_io *d, u32 fqid,
+				const struct dpaa2_fd *fd);
 int dpaa2_io_service_enqueue_qd(struct dpaa2_io *d, u32 qdid, u8 prio,
 				u16 qdbin, const struct dpaa2_fd *fd);
 int dpaa2_io_service_release(struct dpaa2_io *d, u32 bpid,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index df4bedb9b01c..538546edbfbd 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -242,10 +242,12 @@ __SYSCALL(__NR_tee, sys_tee)
 /* fs/stat.c */
 #define __NR_readlinkat 78
 __SYSCALL(__NR_readlinkat, sys_readlinkat)
+#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
 #define __NR3264_fstatat 79
 __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
 #define __NR3264_fstat 80
 __SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
+#endif
 
 /* fs/sync.c */
 #define __NR_sync 81
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index bfaec6903b8b..b9ba520f7e4b 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -200,6 +200,15 @@ struct binder_node_debug_info {
 	__u32            has_weak_ref;
 };
 
+struct binder_node_info_for_ref {
+	__u32            handle;
+	__u32            strong_count;
+	__u32            weak_count;
+	__u32            reserved1;
+	__u32            reserved2;
+	__u32            reserved3;
+};
+
 #define BINDER_WRITE_READ		_IOWR('b', 1, struct binder_write_read)
 #define BINDER_SET_IDLE_TIMEOUT		_IOW('b', 3, __s64)
 #define BINDER_SET_MAX_THREADS		_IOW('b', 5, __u32)
@@ -208,6 +217,7 @@ struct binder_node_debug_info {
 #define BINDER_THREAD_EXIT		_IOW('b', 8, __s32)
 #define BINDER_VERSION			_IOWR('b', 9, struct binder_version)
 #define BINDER_GET_NODE_DEBUG_INFO	_IOWR('b', 11, struct binder_node_debug_info)
+#define BINDER_GET_NODE_INFO_FOR_REF	_IOWR('b', 12, struct binder_node_info_for_ref)
 
 /*
  * NOTE: Two special error codes you should check for when calling
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
index 19bf0ca6d635..6dafbc3e4414 100644
--- a/include/uapi/linux/cryptouser.h
+++ b/include/uapi/linux/cryptouser.h
@@ -29,6 +29,7 @@ enum {
 	CRYPTO_MSG_UPDATEALG,
 	CRYPTO_MSG_GETALG,
 	CRYPTO_MSG_DELRNG,
+	CRYPTO_MSG_GETSTAT,
 	__CRYPTO_MSG_MAX
 };
 #define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1)
@@ -50,6 +51,16 @@ enum crypto_attr_type_t {
 	CRYPTOCFGA_REPORT_AKCIPHER,	/* struct crypto_report_akcipher */
 	CRYPTOCFGA_REPORT_KPP,		/* struct crypto_report_kpp */
 	CRYPTOCFGA_REPORT_ACOMP,	/* struct crypto_report_acomp */
+	CRYPTOCFGA_STAT_LARVAL,		/* struct crypto_stat */
+	CRYPTOCFGA_STAT_HASH,		/* struct crypto_stat */
+	CRYPTOCFGA_STAT_BLKCIPHER,	/* struct crypto_stat */
+	CRYPTOCFGA_STAT_AEAD,		/* struct crypto_stat */
+	CRYPTOCFGA_STAT_COMPRESS,	/* struct crypto_stat */
+	CRYPTOCFGA_STAT_RNG,		/* struct crypto_stat */
+	CRYPTOCFGA_STAT_CIPHER,		/* struct crypto_stat */
+	CRYPTOCFGA_STAT_AKCIPHER,	/* struct crypto_stat */
+	CRYPTOCFGA_STAT_KPP,		/* struct crypto_stat */
+	CRYPTOCFGA_STAT_ACOMP,		/* struct crypto_stat */
 	__CRYPTOCFGA_MAX
 
 #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1)
@@ -65,6 +76,47 @@ struct crypto_user_alg {
 	__u32 cru_flags;
 };
 
+struct crypto_stat {
+	char type[CRYPTO_MAX_NAME];
+	union {
+		__u32 stat_encrypt_cnt;
+		__u32 stat_compress_cnt;
+		__u32 stat_generate_cnt;
+		__u32 stat_hash_cnt;
+		__u32 stat_setsecret_cnt;
+	};
+	union {
+		__u64 stat_encrypt_tlen;
+		__u64 stat_compress_tlen;
+		__u64 stat_generate_tlen;
+		__u64 stat_hash_tlen;
+	};
+	union {
+		__u32 stat_akcipher_err_cnt;
+		__u32 stat_cipher_err_cnt;
+		__u32 stat_compress_err_cnt;
+		__u32 stat_aead_err_cnt;
+		__u32 stat_hash_err_cnt;
+		__u32 stat_rng_err_cnt;
+		__u32 stat_kpp_err_cnt;
+	};
+	union {
+		__u32 stat_decrypt_cnt;
+		__u32 stat_decompress_cnt;
+		__u32 stat_seed_cnt;
+		__u32 stat_generate_public_key_cnt;
+	};
+	union {
+		__u64 stat_decrypt_tlen;
+		__u64 stat_decompress_tlen;
+	};
+	union {
+		__u32 stat_verify_cnt;
+		__u32 stat_compute_shared_secret_cnt;
+	};
+	__u32 stat_sign_cnt;
+};
+
 struct crypto_report_larval {
 	char type[CRYPTO_MAX_NAME];
 };
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 73e01918f996..a441ea1bfe6d 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -279,8 +279,8 @@ struct fsxattr {
 #define FS_ENCRYPTION_MODE_AES_256_CTS		4
 #define FS_ENCRYPTION_MODE_AES_128_CBC		5
 #define FS_ENCRYPTION_MODE_AES_128_CTS		6
-#define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7
-#define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8
+#define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7 /* Removed, do not use. */
+#define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8 /* Removed, do not use. */
 
 struct fscrypt_policy {
 	__u8 version;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 251be353f950..2b7a652c9fa4 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -420,13 +420,19 @@ struct kvm_run {
 struct kvm_coalesced_mmio_zone {
 	__u64 addr;
 	__u32 size;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 pio;
+	};
 };
 
 struct kvm_coalesced_mmio {
 	__u64 phys_addr;
 	__u32 len;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 pio;
+	};
 	__u8  data[8];
 };
 
@@ -719,6 +725,7 @@ struct kvm_ppc_one_seg_page_size {
 
 #define KVM_PPC_PAGE_SIZES_REAL		0x00000001
 #define KVM_PPC_1T_SEGMENTS		0x00000002
+#define KVM_PPC_NO_HASH			0x00000004
 
 struct kvm_ppc_smmu_info {
 	__u64 flags;
@@ -751,6 +758,15 @@ struct kvm_ppc_resize_hpt {
 #define KVM_S390_SIE_PAGE_OFFSET 1
 
 /*
+ * On arm64, machine type can be used to request the physical
+ * address size for the VM. Bits[7-0] are reserved for the guest
+ * PA size shift (i.e, log2(PA_Size)). For backward compatibility,
+ * value 0 implies the default IPA size, 40bits.
+ */
+#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK	0xffULL
+#define KVM_VM_TYPE_ARM_IPA_SIZE(x)		\
+	((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
+/*
  * ioctls for /dev/kvm fds:
  */
 #define KVM_GET_API_VERSION       _IO(KVMIO,   0x00)
@@ -953,6 +969,12 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_NESTED_STATE 157
 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158
 #define KVM_CAP_MSR_PLATFORM_INFO 159
+#define KVM_CAP_PPC_NESTED_HV 160
+#define KVM_CAP_HYPERV_SEND_IPI 161
+#define KVM_CAP_COALESCED_PIO 162
+#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
+#define KVM_CAP_EXCEPTION_PAYLOAD 164
+#define KVM_CAP_ARM_VM_IPA_SIZE 165
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/uapi/linux/usb/tmc.h b/include/uapi/linux/usb/tmc.h
index 729af2f861a4..fdd4d88a7b95 100644
--- a/include/uapi/linux/usb/tmc.h
+++ b/include/uapi/linux/usb/tmc.h
@@ -4,6 +4,7 @@
  * Copyright (C) 2008 Novell, Inc.
  * Copyright (C) 2008 Greg Kroah-Hartman <gregkh@suse.de>
  * Copyright (C) 2015 Dave Penkler <dpenkler@gmail.com>
+ * Copyright (C) 2018 IVI Foundation, Inc.
  *
  * This file holds USB constants defined by the USB Device Class
  * and USB488 Subclass Definitions for Test and Measurement devices
@@ -40,11 +41,38 @@
 #define USBTMC488_REQUEST_GOTO_LOCAL			161
 #define USBTMC488_REQUEST_LOCAL_LOCKOUT			162
 
+struct usbtmc_request {
+	__u8 bRequestType;
+	__u8 bRequest;
+	__u16 wValue;
+	__u16 wIndex;
+	__u16 wLength;
+} __attribute__ ((packed));
+
+struct usbtmc_ctrlrequest {
+	struct usbtmc_request req;
+	void __user *data; /* pointer to user space */
+} __attribute__ ((packed));
+
 struct usbtmc_termchar {
 	__u8 term_char;
 	__u8 term_char_enabled;
 } __attribute__ ((packed));
 
+/*
+ * usbtmc_message->flags:
+ */
+#define USBTMC_FLAG_ASYNC		0x0001
+#define USBTMC_FLAG_APPEND		0x0002
+#define USBTMC_FLAG_IGNORE_TRAILER	0x0004
+
+struct usbtmc_message {
+	__u32 transfer_size; /* size of bytes to transfer */
+	__u32 transferred; /* size of received/written bytes */
+	__u32 flags; /* bit 0: 0 = synchronous; 1 = asynchronous */
+	void __user *message; /* pointer to header and data in user space */
+} __attribute__ ((packed));
+
 /* Request values for USBTMC driver's ioctl entry point */
 #define USBTMC_IOC_NR			91
 #define USBTMC_IOCTL_INDICATOR_PULSE	_IO(USBTMC_IOC_NR, 1)
@@ -53,10 +81,15 @@ struct usbtmc_termchar {
 #define USBTMC_IOCTL_ABORT_BULK_IN	_IO(USBTMC_IOC_NR, 4)
 #define USBTMC_IOCTL_CLEAR_OUT_HALT	_IO(USBTMC_IOC_NR, 6)
 #define USBTMC_IOCTL_CLEAR_IN_HALT	_IO(USBTMC_IOC_NR, 7)
+#define USBTMC_IOCTL_CTRL_REQUEST	_IOWR(USBTMC_IOC_NR, 8, struct usbtmc_ctrlrequest)
 #define USBTMC_IOCTL_GET_TIMEOUT	_IOR(USBTMC_IOC_NR, 9, __u32)
 #define USBTMC_IOCTL_SET_TIMEOUT	_IOW(USBTMC_IOC_NR, 10, __u32)
 #define USBTMC_IOCTL_EOM_ENABLE	        _IOW(USBTMC_IOC_NR, 11, __u8)
 #define USBTMC_IOCTL_CONFIG_TERMCHAR	_IOW(USBTMC_IOC_NR, 12, struct usbtmc_termchar)
+#define USBTMC_IOCTL_WRITE		_IOWR(USBTMC_IOC_NR, 13, struct usbtmc_message)
+#define USBTMC_IOCTL_READ		_IOWR(USBTMC_IOC_NR, 14, struct usbtmc_message)
+#define USBTMC_IOCTL_WRITE_RESULT	_IOWR(USBTMC_IOC_NR, 15, __u32)
+#define USBTMC_IOCTL_API_VERSION	_IOR(USBTMC_IOC_NR, 16, __u32)
 
 #define USBTMC488_IOCTL_GET_CAPS	_IOR(USBTMC_IOC_NR, 17, unsigned char)
 #define USBTMC488_IOCTL_READ_STB	_IOR(USBTMC_IOC_NR, 18, unsigned char)
@@ -64,6 +97,14 @@ struct usbtmc_termchar {
 #define USBTMC488_IOCTL_GOTO_LOCAL	_IO(USBTMC_IOC_NR, 20)
 #define USBTMC488_IOCTL_LOCAL_LOCKOUT	_IO(USBTMC_IOC_NR, 21)
 #define USBTMC488_IOCTL_TRIGGER		_IO(USBTMC_IOC_NR, 22)
+#define USBTMC488_IOCTL_WAIT_SRQ	_IOW(USBTMC_IOC_NR, 23, __u32)
+
+#define USBTMC_IOCTL_MSG_IN_ATTR	_IOR(USBTMC_IOC_NR, 24, __u8)
+#define USBTMC_IOCTL_AUTO_ABORT		_IOW(USBTMC_IOC_NR, 25, __u8)
+
+/* Cancel and cleanup asynchronous calls */
+#define USBTMC_IOCTL_CANCEL_IO		_IO(USBTMC_IOC_NR, 35)
+#define USBTMC_IOCTL_CLEANUP_IO		_IO(USBTMC_IOC_NR, 36)
 
 /* Driver encoded usb488 capabilities */
 #define USBTMC488_CAPABILITY_TRIGGER         1
diff --git a/include/uapi/linux/usb/video.h b/include/uapi/linux/usb/video.h
index ff6cc6cb4227..d854cb19c42c 100644
--- a/include/uapi/linux/usb/video.h
+++ b/include/uapi/linux/usb/video.h
@@ -192,14 +192,14 @@ struct uvc_descriptor_header {
 
 /* 3.7.2. Video Control Interface Header Descriptor */
 struct uvc_header_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u16 bcdUVC;
-	__u16 wTotalLength;
-	__u32 dwClockFrequency;
-	__u8  bInCollection;
-	__u8  baInterfaceNr[];
+	__u8   bLength;
+	__u8   bDescriptorType;
+	__u8   bDescriptorSubType;
+	__le16 bcdUVC;
+	__le16 wTotalLength;
+	__le32 dwClockFrequency;
+	__u8   bInCollection;
+	__u8   baInterfaceNr[];
 } __attribute__((__packed__));
 
 #define UVC_DT_HEADER_SIZE(n)				(12+(n))
@@ -209,57 +209,57 @@ struct uvc_header_descriptor {
 
 #define DECLARE_UVC_HEADER_DESCRIPTOR(n)		\
 struct UVC_HEADER_DESCRIPTOR(n) {			\
-	__u8  bLength;					\
-	__u8  bDescriptorType;				\
-	__u8  bDescriptorSubType;			\
-	__u16 bcdUVC;					\
-	__u16 wTotalLength;				\
-	__u32 dwClockFrequency;				\
-	__u8  bInCollection;				\
-	__u8  baInterfaceNr[n];				\
+	__u8   bLength;					\
+	__u8   bDescriptorType;				\
+	__u8   bDescriptorSubType;			\
+	__le16 bcdUVC;					\
+	__le16 wTotalLength;				\
+	__le32 dwClockFrequency;			\
+	__u8   bInCollection;				\
+	__u8   baInterfaceNr[n];			\
 } __attribute__ ((packed))
 
 /* 3.7.2.1. Input Terminal Descriptor */
 struct uvc_input_terminal_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bTerminalID;
-	__u16 wTerminalType;
-	__u8  bAssocTerminal;
-	__u8  iTerminal;
+	__u8   bLength;
+	__u8   bDescriptorType;
+	__u8   bDescriptorSubType;
+	__u8   bTerminalID;
+	__le16 wTerminalType;
+	__u8   bAssocTerminal;
+	__u8   iTerminal;
 } __attribute__((__packed__));
 
 #define UVC_DT_INPUT_TERMINAL_SIZE			8
 
 /* 3.7.2.2. Output Terminal Descriptor */
 struct uvc_output_terminal_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bTerminalID;
-	__u16 wTerminalType;
-	__u8  bAssocTerminal;
-	__u8  bSourceID;
-	__u8  iTerminal;
+	__u8   bLength;
+	__u8   bDescriptorType;
+	__u8   bDescriptorSubType;
+	__u8   bTerminalID;
+	__le16 wTerminalType;
+	__u8   bAssocTerminal;
+	__u8   bSourceID;
+	__u8   iTerminal;
 } __attribute__((__packed__));
 
 #define UVC_DT_OUTPUT_TERMINAL_SIZE			9
 
 /* 3.7.2.3. Camera Terminal Descriptor */
 struct uvc_camera_terminal_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bTerminalID;
-	__u16 wTerminalType;
-	__u8  bAssocTerminal;
-	__u8  iTerminal;
-	__u16 wObjectiveFocalLengthMin;
-	__u16 wObjectiveFocalLengthMax;
-	__u16 wOcularFocalLength;
-	__u8  bControlSize;
-	__u8  bmControls[3];
+	__u8   bLength;
+	__u8   bDescriptorType;
+	__u8   bDescriptorSubType;
+	__u8   bTerminalID;
+	__le16 wTerminalType;
+	__u8   bAssocTerminal;
+	__u8   iTerminal;
+	__le16 wObjectiveFocalLengthMin;
+	__le16 wObjectiveFocalLengthMax;
+	__le16 wOcularFocalLength;
+	__u8   bControlSize;
+	__u8   bmControls[3];
 } __attribute__((__packed__));
 
 #define UVC_DT_CAMERA_TERMINAL_SIZE(n)			(15+(n))
@@ -293,15 +293,15 @@ struct UVC_SELECTOR_UNIT_DESCRIPTOR(n) {		\
 
 /* 3.7.2.5. Processing Unit Descriptor */
 struct uvc_processing_unit_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bUnitID;
-	__u8  bSourceID;
-	__u16 wMaxMultiplier;
-	__u8  bControlSize;
-	__u8  bmControls[2];
-	__u8  iProcessing;
+	__u8   bLength;
+	__u8   bDescriptorType;
+	__u8   bDescriptorSubType;
+	__u8   bUnitID;
+	__u8   bSourceID;
+	__le16 wMaxMultiplier;
+	__u8   bControlSize;
+	__u8   bmControls[2];
+	__u8   iProcessing;
 } __attribute__((__packed__));
 
 #define UVC_DT_PROCESSING_UNIT_SIZE(n)			(9+(n))
@@ -343,29 +343,29 @@ struct UVC_EXTENSION_UNIT_DESCRIPTOR(p, n) {		\
 
 /* 3.8.2.2. Video Control Interrupt Endpoint Descriptor */
 struct uvc_control_endpoint_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u16 wMaxTransferSize;
+	__u8   bLength;
+	__u8   bDescriptorType;
+	__u8   bDescriptorSubType;
+	__le16 wMaxTransferSize;
 } __attribute__((__packed__));
 
 #define UVC_DT_CONTROL_ENDPOINT_SIZE			5
 
 /* 3.9.2.1. Input Header Descriptor */
 struct uvc_input_header_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bNumFormats;
-	__u16 wTotalLength;
-	__u8  bEndpointAddress;
-	__u8  bmInfo;
-	__u8  bTerminalLink;
-	__u8  bStillCaptureMethod;
-	__u8  bTriggerSupport;
-	__u8  bTriggerUsage;
-	__u8  bControlSize;
-	__u8  bmaControls[];
+	__u8   bLength;
+	__u8   bDescriptorType;
+	__u8   bDescriptorSubType;
+	__u8   bNumFormats;
+	__le16 wTotalLength;
+	__u8   bEndpointAddress;
+	__u8   bmInfo;
+	__u8   bTerminalLink;
+	__u8   bStillCaptureMethod;
+	__u8   bTriggerSupport;
+	__u8   bTriggerUsage;
+	__u8   bControlSize;
+	__u8   bmaControls[];
 } __attribute__((__packed__));
 
 #define UVC_DT_INPUT_HEADER_SIZE(n, p)			(13+(n*p))
@@ -375,32 +375,32 @@ struct uvc_input_header_descriptor {
 
 #define DECLARE_UVC_INPUT_HEADER_DESCRIPTOR(n, p)	\
 struct UVC_INPUT_HEADER_DESCRIPTOR(n, p) {		\
-	__u8  bLength;					\
-	__u8  bDescriptorType;				\
-	__u8  bDescriptorSubType;			\
-	__u8  bNumFormats;				\
-	__u16 wTotalLength;				\
-	__u8  bEndpointAddress;				\
-	__u8  bmInfo;					\
-	__u8  bTerminalLink;				\
-	__u8  bStillCaptureMethod;			\
-	__u8  bTriggerSupport;				\
-	__u8  bTriggerUsage;				\
-	__u8  bControlSize;				\
-	__u8  bmaControls[p][n];			\
+	__u8   bLength;					\
+	__u8   bDescriptorType;				\
+	__u8   bDescriptorSubType;			\
+	__u8   bNumFormats;				\
+	__le16 wTotalLength;				\
+	__u8   bEndpointAddress;			\
+	__u8   bmInfo;					\
+	__u8   bTerminalLink;				\
+	__u8   bStillCaptureMethod;			\
+	__u8   bTriggerSupport;				\
+	__u8   bTriggerUsage;				\
+	__u8   bControlSize;				\
+	__u8   bmaControls[p][n];			\
 } __attribute__ ((packed))
 
 /* 3.9.2.2. Output Header Descriptor */
 struct uvc_output_header_descriptor {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bNumFormats;
-	__u16 wTotalLength;
-	__u8  bEndpointAddress;
-	__u8  bTerminalLink;
-	__u8  bControlSize;
-	__u8  bmaControls[];
+	__u8   bLength;
+	__u8   bDescriptorType;
+	__u8   bDescriptorSubType;
+	__u8   bNumFormats;
+	__le16 wTotalLength;
+	__u8   bEndpointAddress;
+	__u8   bTerminalLink;
+	__u8   bControlSize;
+	__u8   bmaControls[];
 } __attribute__((__packed__));
 
 #define UVC_DT_OUTPUT_HEADER_SIZE(n, p)			(9+(n*p))
@@ -410,15 +410,15 @@ struct uvc_output_header_descriptor {
 
 #define DECLARE_UVC_OUTPUT_HEADER_DESCRIPTOR(n, p)	\
 struct UVC_OUTPUT_HEADER_DESCRIPTOR(n, p) {		\
-	__u8  bLength;					\
-	__u8  bDescriptorType;				\
-	__u8  bDescriptorSubType;			\
-	__u8  bNumFormats;				\
-	__u16 wTotalLength;				\
-	__u8  bEndpointAddress;				\
-	__u8  bTerminalLink;				\
-	__u8  bControlSize;				\
-	__u8  bmaControls[p][n];			\
+	__u8   bLength;					\
+	__u8   bDescriptorType;				\
+	__u8   bDescriptorSubType;			\
+	__u8   bNumFormats;				\
+	__le16 wTotalLength;				\
+	__u8   bEndpointAddress;			\
+	__u8   bTerminalLink;				\
+	__u8   bControlSize;				\
+	__u8   bmaControls[p][n];			\
 } __attribute__ ((packed))
 
 /* 3.9.2.6. Color matching descriptor */
@@ -473,19 +473,19 @@ struct uvc_format_uncompressed {
 
 /* Uncompressed Payload - 3.1.2. Uncompressed Video Frame Descriptor */
 struct uvc_frame_uncompressed {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bFrameIndex;
-	__u8  bmCapabilities;
-	__u16 wWidth;
-	__u16 wHeight;
-	__u32 dwMinBitRate;
-	__u32 dwMaxBitRate;
-	__u32 dwMaxVideoFrameBufferSize;
-	__u32 dwDefaultFrameInterval;
-	__u8  bFrameIntervalType;
-	__u32 dwFrameInterval[];
+	__u8   bLength;
+	__u8   bDescriptorType;
+	__u8   bDescriptorSubType;
+	__u8   bFrameIndex;
+	__u8   bmCapabilities;
+	__le16 wWidth;
+	__le16 wHeight;
+	__le32 dwMinBitRate;
+	__le32 dwMaxBitRate;
+	__le32 dwMaxVideoFrameBufferSize;
+	__le32 dwDefaultFrameInterval;
+	__u8   bFrameIntervalType;
+	__le32 dwFrameInterval[];
 } __attribute__((__packed__));
 
 #define UVC_DT_FRAME_UNCOMPRESSED_SIZE(n)		(26+4*(n))
@@ -495,19 +495,19 @@ struct uvc_frame_uncompressed {
 
 #define DECLARE_UVC_FRAME_UNCOMPRESSED(n)		\
 struct UVC_FRAME_UNCOMPRESSED(n) {			\
-	__u8  bLength;					\
-	__u8  bDescriptorType;				\
-	__u8  bDescriptorSubType;			\
-	__u8  bFrameIndex;				\
-	__u8  bmCapabilities;				\
-	__u16 wWidth;					\
-	__u16 wHeight;					\
-	__u32 dwMinBitRate;				\
-	__u32 dwMaxBitRate;				\
-	__u32 dwMaxVideoFrameBufferSize;		\
-	__u32 dwDefaultFrameInterval;			\
-	__u8  bFrameIntervalType;			\
-	__u32 dwFrameInterval[n];			\
+	__u8   bLength;					\
+	__u8   bDescriptorType;				\
+	__u8   bDescriptorSubType;			\
+	__u8   bFrameIndex;				\
+	__u8   bmCapabilities;				\
+	__le16 wWidth;					\
+	__le16 wHeight;					\
+	__le32 dwMinBitRate;				\
+	__le32 dwMaxBitRate;				\
+	__le32 dwMaxVideoFrameBufferSize;		\
+	__le32 dwDefaultFrameInterval;			\
+	__u8   bFrameIntervalType;			\
+	__le32 dwFrameInterval[n];			\
 } __attribute__ ((packed))
 
 /* MJPEG Payload - 3.1.1. MJPEG Video Format Descriptor */
@@ -529,19 +529,19 @@ struct uvc_format_mjpeg {
 
 /* MJPEG Payload - 3.1.2. MJPEG Video Frame Descriptor */
 struct uvc_frame_mjpeg {
-	__u8  bLength;
-	__u8  bDescriptorType;
-	__u8  bDescriptorSubType;
-	__u8  bFrameIndex;
-	__u8  bmCapabilities;
-	__u16 wWidth;
-	__u16 wHeight;
-	__u32 dwMinBitRate;
-	__u32 dwMaxBitRate;
-	__u32 dwMaxVideoFrameBufferSize;
-	__u32 dwDefaultFrameInterval;
-	__u8  bFrameIntervalType;
-	__u32 dwFrameInterval[];
+	__u8   bLength;
+	__u8   bDescriptorType;
+	__u8   bDescriptorSubType;
+	__u8   bFrameIndex;
+	__u8   bmCapabilities;
+	__le16 wWidth;
+	__le16 wHeight;
+	__le32 dwMinBitRate;
+	__le32 dwMaxBitRate;
+	__le32 dwMaxVideoFrameBufferSize;
+	__le32 dwDefaultFrameInterval;
+	__u8   bFrameIntervalType;
+	__le32 dwFrameInterval[];
 } __attribute__((__packed__));
 
 #define UVC_DT_FRAME_MJPEG_SIZE(n)			(26+4*(n))
@@ -551,19 +551,19 @@ struct uvc_frame_mjpeg {
 
 #define DECLARE_UVC_FRAME_MJPEG(n)			\
 struct UVC_FRAME_MJPEG(n) {				\
-	__u8  bLength;					\
-	__u8  bDescriptorType;				\
-	__u8  bDescriptorSubType;			\
-	__u8  bFrameIndex;				\
-	__u8  bmCapabilities;				\
-	__u16 wWidth;					\
-	__u16 wHeight;					\
-	__u32 dwMinBitRate;				\
-	__u32 dwMaxBitRate;				\
-	__u32 dwMaxVideoFrameBufferSize;		\
-	__u32 dwDefaultFrameInterval;			\
-	__u8  bFrameIntervalType;			\
-	__u32 dwFrameInterval[n];			\
+	__u8   bLength;					\
+	__u8   bDescriptorType;				\
+	__u8   bDescriptorSubType;			\
+	__u8   bFrameIndex;				\
+	__u8   bmCapabilities;				\
+	__le16 wWidth;					\
+	__le16 wHeight;					\
+	__le32 dwMinBitRate;				\
+	__le32 dwMaxBitRate;				\
+	__le32 dwMaxVideoFrameBufferSize;		\
+	__le32 dwDefaultFrameInterval;			\
+	__u8   bFrameIntervalType;			\
+	__le32 dwFrameInterval[n];			\
 } __attribute__ ((packed))
 
 #endif /* __LINUX_USB_VIDEO_H */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 1aa7b82e8169..f378b9802d8b 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -200,6 +200,7 @@ struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
 #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
 #define VFIO_DEVICE_FLAGS_CCW	(1 << 4)	/* vfio-ccw device */
+#define VFIO_DEVICE_FLAGS_AP	(1 << 5)	/* vfio-ap device */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 };
@@ -215,6 +216,7 @@ struct vfio_device_info {
 #define VFIO_DEVICE_API_PLATFORM_STRING		"vfio-platform"
 #define VFIO_DEVICE_API_AMBA_STRING		"vfio-amba"
 #define VFIO_DEVICE_API_CCW_STRING		"vfio-ccw"
+#define VFIO_DEVICE_API_AP_STRING		"vfio-ap"
 
 /**
  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 25a16760de2a..1254b51a551a 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -763,10 +763,28 @@ struct ib_uverbs_sge {
 	__u32 lkey;
 };
 
+enum ib_uverbs_wr_opcode {
+	IB_UVERBS_WR_RDMA_WRITE = 0,
+	IB_UVERBS_WR_RDMA_WRITE_WITH_IMM = 1,
+	IB_UVERBS_WR_SEND = 2,
+	IB_UVERBS_WR_SEND_WITH_IMM = 3,
+	IB_UVERBS_WR_RDMA_READ = 4,
+	IB_UVERBS_WR_ATOMIC_CMP_AND_SWP = 5,
+	IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD = 6,
+	IB_UVERBS_WR_LOCAL_INV = 7,
+	IB_UVERBS_WR_BIND_MW = 8,
+	IB_UVERBS_WR_SEND_WITH_INV = 9,
+	IB_UVERBS_WR_TSO = 10,
+	IB_UVERBS_WR_RDMA_READ_WITH_INV = 11,
+	IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12,
+	IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13,
+	/* Review enum ib_wr_opcode before modifying this */
+};
+
 struct ib_uverbs_send_wr {
 	__aligned_u64 wr_id;
 	__u32 num_sge;
-	__u32 opcode;
+	__u32 opcode;		/* see enum ib_uverbs_wr_opcode */
 	__u32 send_flags;
 	union {
 		__be32 imm_data;
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index addbb9c4529e..8fa9f90e2bb1 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -45,6 +45,9 @@ enum {
 	MLX5_QP_FLAG_BFREG_INDEX	= 1 << 3,
 	MLX5_QP_FLAG_TYPE_DCT		= 1 << 4,
 	MLX5_QP_FLAG_TYPE_DCI		= 1 << 5,
+	MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC = 1 << 6,
+	MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7,
+	MLX5_QP_FLAG_ALLOW_SCATTER_CQE	= 1 << 8,
 };
 
 enum {
@@ -349,9 +352,22 @@ struct mlx5_ib_create_qp_rss {
 	__u32	flags;
 };
 
+enum mlx5_ib_create_qp_resp_mask {
+	MLX5_IB_CREATE_QP_RESP_MASK_TIRN = 1UL << 0,
+	MLX5_IB_CREATE_QP_RESP_MASK_TISN = 1UL << 1,
+	MLX5_IB_CREATE_QP_RESP_MASK_RQN  = 1UL << 2,
+	MLX5_IB_CREATE_QP_RESP_MASK_SQN  = 1UL << 3,
+};
+
 struct mlx5_ib_create_qp_resp {
 	__u32	bfreg_index;
 	__u32   reserved;
+	__u32	comp_mask;
+	__u32	tirn;
+	__u32	tisn;
+	__u32	rqn;
+	__u32	sqn;
+	__u32   reserved1;
 };
 
 struct mlx5_ib_alloc_mw {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 9c51801b9e64..408e220034de 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -125,6 +125,7 @@ enum mlx5_ib_flow_matcher_create_attrs {
 	MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
 	MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
 	MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+	MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
 };
 
 enum mlx5_ib_flow_matcher_destroy_attrs {
@@ -155,6 +156,8 @@ enum mlx5_ib_create_flow_attrs {
 	MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
 	MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
 	MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
+	MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+	MLX5_IB_ATTR_CREATE_FLOW_TAG,
 };
 
 enum mlx5_ib_destoy_flow_attrs {
@@ -166,4 +169,22 @@ enum mlx5_ib_flow_methods {
 	MLX5_IB_METHOD_DESTROY_FLOW,
 };
 
+enum mlx5_ib_flow_action_methods {
+	MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
+};
+
+enum mlx5_ib_create_flow_action_create_modify_header_attrs {
+	MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+	MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+};
+
+enum mlx5_ib_create_flow_action_create_packet_reformat_attrs {
+	MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+	MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+	MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+};
+
 #endif
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index 8a2fb33f3ed4..4ef62c0e8452 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -39,5 +39,17 @@ enum mlx5_ib_uapi_flow_action_flags {
 	MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA	= 1 << 0,
 };
 
+enum mlx5_ib_uapi_flow_table_type {
+	MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX     = 0x0,
+	MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX	= 0x1,
+};
+
+enum mlx5_ib_uapi_flow_action_packet_reformat_type {
+	MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 = 0x0,
+	MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x1,
+	MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x2,
+	MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3,
+};
+
 #endif
 
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index edba6351ac13..f9c41bf59efc 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -227,8 +227,9 @@ enum rdma_nldev_command {
 	RDMA_NLDEV_CMD_UNSPEC,
 
 	RDMA_NLDEV_CMD_GET, /* can dump */
+	RDMA_NLDEV_CMD_SET,
 
-	/* 2 - 4 are free to use */
+	/* 3 - 4 are free to use */
 
 	RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */
 
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 24800c6c1f32..06c34d99be85 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -53,7 +53,7 @@ enum {
 
 struct ib_uverbs_attr {
 	__u16 attr_id;		/* command specific type attribute */
-	__u16 len;		/* only for pointers */
+	__u16 len;		/* only for pointers and IDRs array */
 	__u16 flags;		/* combination of UVERBS_ATTR_F_XXXX */
 	union {
 		struct {
@@ -63,7 +63,10 @@ struct ib_uverbs_attr {
 		__u16 reserved;
 	} attr_data;
 	union {
-		/* Used by PTR_IN/OUT, ENUM_IN and IDR */
+		/*
+		 * ptr to command, inline data, idr/fd or
+		 * ptr to __u32 array of IDRs
+		 */
 		__aligned_u64 data;
 		/* Used by FD_IN and FD_OUT */
 		__s64 data_s64;
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index cc41de3b8deb..c595bed7bfcb 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1461,10 +1461,10 @@ COMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
 #endif
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
-static int compat_prepare_timeout(const struct compat_timespec __user *p,
+static int compat_prepare_timeout(const struct old_timespec32 __user *p,
 				   struct timespec64 *ts)
 {
-	if (compat_get_timespec64(ts, p))
+	if (get_old_timespec32(ts, p))
 		return -EFAULT;
 	if (!timespec64_valid(ts))
 		return -EINVAL;
@@ -1474,7 +1474,7 @@ static int compat_prepare_timeout(const struct compat_timespec __user *p,
 COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes,
 		       const char __user *, u_msg_ptr,
 		       compat_size_t, msg_len, unsigned int, msg_prio,
-		       const struct compat_timespec __user *, u_abs_timeout)
+		       const struct old_timespec32 __user *, u_abs_timeout)
 {
 	struct timespec64 ts, *p = NULL;
 	if (u_abs_timeout) {
@@ -1489,7 +1489,7 @@ COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes,
 COMPAT_SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes,
 		       char __user *, u_msg_ptr,
 		       compat_size_t, msg_len, unsigned int __user *, u_msg_prio,
-		       const struct compat_timespec __user *, u_abs_timeout)
+		       const struct old_timespec32 __user *, u_abs_timeout)
 {
 	struct timespec64 ts, *p = NULL;
 	if (u_abs_timeout) {
diff --git a/ipc/msg.c b/ipc/msg.c
index 883642cf2b27..0833c6405915 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -622,9 +622,9 @@ struct compat_msqid_ds {
 	struct compat_ipc_perm msg_perm;
 	compat_uptr_t msg_first;
 	compat_uptr_t msg_last;
-	compat_time_t msg_stime;
-	compat_time_t msg_rtime;
-	compat_time_t msg_ctime;
+	old_time32_t msg_stime;
+	old_time32_t msg_rtime;
+	old_time32_t msg_ctime;
 	compat_ulong_t msg_lcbytes;
 	compat_ulong_t msg_lqbytes;
 	unsigned short msg_cbytes;
diff --git a/ipc/sem.c b/ipc/sem.c
index 26f8e37fcdcb..745dc6187e84 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1698,8 +1698,8 @@ SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
 
 struct compat_semid_ds {
 	struct compat_ipc_perm sem_perm;
-	compat_time_t sem_otime;
-	compat_time_t sem_ctime;
+	old_time32_t sem_otime;
+	old_time32_t sem_ctime;
 	compat_uptr_t sem_base;
 	compat_uptr_t sem_pending;
 	compat_uptr_t sem_pending_last;
@@ -2214,11 +2214,11 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 #ifdef CONFIG_COMPAT_32BIT_TIME
 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
 			    unsigned int nsops,
-			    const struct compat_timespec __user *timeout)
+			    const struct old_timespec32 __user *timeout)
 {
 	if (timeout) {
 		struct timespec64 ts;
-		if (compat_get_timespec64(&ts, timeout))
+		if (get_old_timespec32(&ts, timeout))
 			return -EFAULT;
 		return do_semtimedop(semid, tsems, nsops, &ts);
 	}
@@ -2227,7 +2227,7 @@ long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
 
 COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems,
 		       unsigned int, nsops,
-		       const struct compat_timespec __user *, timeout)
+		       const struct old_timespec32 __user *, timeout)
 {
 	return compat_ksys_semtimedop(semid, tsems, nsops, timeout);
 }
diff --git a/ipc/shm.c b/ipc/shm.c
index 1c65fb357395..0842411cb0e9 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1202,9 +1202,9 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 struct compat_shmid_ds {
 	struct compat_ipc_perm shm_perm;
 	int shm_segsz;
-	compat_time_t shm_atime;
-	compat_time_t shm_dtime;
-	compat_time_t shm_ctime;
+	old_time32_t shm_atime;
+	old_time32_t shm_dtime;
+	old_time32_t shm_ctime;
 	compat_ipc_pid_t shm_cpid;
 	compat_ipc_pid_t shm_lpid;
 	unsigned short shm_nattch;
diff --git a/ipc/syscall.c b/ipc/syscall.c
index 65d405f1ba0c..1ac06e3983c0 100644
--- a/ipc/syscall.c
+++ b/ipc/syscall.c
@@ -35,7 +35,7 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
 			        (const struct __kernel_timespec __user *)fifth);
 		else if (IS_ENABLED(CONFIG_COMPAT_32BIT_TIME))
 			return compat_ksys_semtimedop(first, ptr, second,
-			        (const struct compat_timespec __user *)fifth);
+			        (const struct old_timespec32 __user *)fifth);
 		else
 			return -ENOSYS;
 
diff --git a/ipc/util.h b/ipc/util.h
index 0a159f69b3bb..1ee81bce25e9 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -266,7 +266,7 @@ long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
 /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */
 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
 			    unsigned int nsops,
-			    const struct compat_timespec __user *timeout);
+			    const struct old_timespec32 __user *timeout);
 #ifdef CONFIG_COMPAT
 long compat_ksys_semctl(int semid, int semnum, int cmd, int arg);
 long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr);
diff --git a/kernel/compat.c b/kernel/compat.c
index 8e40efc2928a..089d00d0da9c 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -93,28 +93,28 @@ int compat_put_timex(struct compat_timex __user *utp, const struct timex *txc)
 	return 0;
 }
 
-static int __compat_get_timeval(struct timeval *tv, const struct compat_timeval __user *ctv)
+static int __compat_get_timeval(struct timeval *tv, const struct old_timeval32 __user *ctv)
 {
 	return (!access_ok(VERIFY_READ, ctv, sizeof(*ctv)) ||
 			__get_user(tv->tv_sec, &ctv->tv_sec) ||
 			__get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
 }
 
-static int __compat_put_timeval(const struct timeval *tv, struct compat_timeval __user *ctv)
+static int __compat_put_timeval(const struct timeval *tv, struct old_timeval32 __user *ctv)
 {
 	return (!access_ok(VERIFY_WRITE, ctv, sizeof(*ctv)) ||
 			__put_user(tv->tv_sec, &ctv->tv_sec) ||
 			__put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
 }
 
-static int __compat_get_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
+static int __compat_get_timespec(struct timespec *ts, const struct old_timespec32 __user *cts)
 {
 	return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
 			__get_user(ts->tv_sec, &cts->tv_sec) ||
 			__get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
 }
 
-static int __compat_put_timespec(const struct timespec *ts, struct compat_timespec __user *cts)
+static int __compat_put_timespec(const struct timespec *ts, struct old_timespec32 __user *cts)
 {
 	return (!access_ok(VERIFY_WRITE, cts, sizeof(*cts)) ||
 			__put_user(ts->tv_sec, &cts->tv_sec) ||
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 87a6bc2a96c0..f14c376937e5 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -14,8 +14,6 @@
 #include <linux/pfn.h>
 #include <linux/set_memory.h>
 
-#define DIRECT_MAPPING_ERROR		0
-
 /*
  * Most architectures use ZONE_DMA for the first 16 Megabytes, but
  * some use it for entirely different regions:
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 4f8a6dbf0b60..ebecaf255ea2 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -21,6 +21,7 @@
 
 #include <linux/cache.h>
 #include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
@@ -73,13 +74,6 @@ static phys_addr_t io_tlb_start, io_tlb_end;
 static unsigned long io_tlb_nslabs;
 
 /*
- * When the IOMMU overflows we return a fallback buffer. This sets the size.
- */
-static unsigned long io_tlb_overflow = 32*1024;
-
-static phys_addr_t io_tlb_overflow_buffer;
-
-/*
  * This is a free list describing the number of free entries available from
  * each index
  */
@@ -126,7 +120,6 @@ setup_io_tlb_npages(char *str)
 	return 0;
 }
 early_param("swiotlb", setup_io_tlb_npages);
-/* make io_tlb_overflow tunable too? */
 
 unsigned long swiotlb_nr_tbl(void)
 {
@@ -194,16 +187,10 @@ void __init swiotlb_update_mem_attributes(void)
 	bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
 	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
-
-	vaddr = phys_to_virt(io_tlb_overflow_buffer);
-	bytes = PAGE_ALIGN(io_tlb_overflow);
-	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-	memset(vaddr, 0, bytes);
 }
 
 int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 {
-	void *v_overflow_buffer;
 	unsigned long i, bytes;
 
 	bytes = nslabs << IO_TLB_SHIFT;
@@ -213,17 +200,6 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 	io_tlb_end = io_tlb_start + bytes;
 
 	/*
-	 * Get the overflow emergency buffer
-	 */
-	v_overflow_buffer = memblock_virt_alloc_low_nopanic(
-						PAGE_ALIGN(io_tlb_overflow),
-						PAGE_SIZE);
-	if (!v_overflow_buffer)
-		return -ENOMEM;
-
-	io_tlb_overflow_buffer = __pa(v_overflow_buffer);
-
-	/*
 	 * Allocate and initialize the free list array.  This array is used
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 	 * between io_tlb_start and io_tlb_end.
@@ -330,7 +306,6 @@ int
 swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 {
 	unsigned long i, bytes;
-	unsigned char *v_overflow_buffer;
 
 	bytes = nslabs << IO_TLB_SHIFT;
 
@@ -342,19 +317,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	memset(tlb, 0, bytes);
 
 	/*
-	 * Get the overflow emergency buffer
-	 */
-	v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
-						     get_order(io_tlb_overflow));
-	if (!v_overflow_buffer)
-		goto cleanup2;
-
-	set_memory_decrypted((unsigned long)v_overflow_buffer,
-			io_tlb_overflow >> PAGE_SHIFT);
-	memset(v_overflow_buffer, 0, io_tlb_overflow);
-	io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
-
-	/*
 	 * Allocate and initialize the free list array.  This array is used
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 	 * between io_tlb_start and io_tlb_end.
@@ -390,10 +352,6 @@ cleanup4:
 	                                                 sizeof(int)));
 	io_tlb_list = NULL;
 cleanup3:
-	free_pages((unsigned long)v_overflow_buffer,
-		   get_order(io_tlb_overflow));
-	io_tlb_overflow_buffer = 0;
-cleanup2:
 	io_tlb_end = 0;
 	io_tlb_start = 0;
 	io_tlb_nslabs = 0;
@@ -407,8 +365,6 @@ void __init swiotlb_exit(void)
 		return;
 
 	if (late_alloc) {
-		free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
-			   get_order(io_tlb_overflow));
 		free_pages((unsigned long)io_tlb_orig_addr,
 			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
 		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
@@ -416,8 +372,6 @@ void __init swiotlb_exit(void)
 		free_pages((unsigned long)phys_to_virt(io_tlb_start),
 			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
 	} else {
-		memblock_free_late(io_tlb_overflow_buffer,
-				   PAGE_ALIGN(io_tlb_overflow));
 		memblock_free_late(__pa(io_tlb_orig_addr),
 				   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
 		memblock_free_late(__pa(io_tlb_list),
@@ -429,7 +383,7 @@ void __init swiotlb_exit(void)
 	max_segment = 0;
 }
 
-int is_swiotlb_buffer(phys_addr_t paddr)
+static int is_swiotlb_buffer(phys_addr_t paddr)
 {
 	return paddr >= io_tlb_start && paddr < io_tlb_end;
 }
@@ -591,26 +545,6 @@ found:
 }
 
 /*
- * Allocates bounce buffer and returns its physical address.
- */
-static phys_addr_t
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-	   enum dma_data_direction dir, unsigned long attrs)
-{
-	dma_addr_t start_dma_addr;
-
-	if (swiotlb_force == SWIOTLB_NO_FORCE) {
-		dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
-				     &phys);
-		return SWIOTLB_MAP_ERROR;
-	}
-
-	start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
-	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
-				      dir, attrs);
-}
-
-/*
  * tlb_addr is the physical address of the bounce buffer to unmap.
  */
 void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
@@ -689,104 +623,32 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 	}
 }
 
-static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
-		size_t size)
-{
-	u64 mask = DMA_BIT_MASK(32);
-
-	if (dev && dev->coherent_dma_mask)
-		mask = dev->coherent_dma_mask;
-	return addr + size - 1 <= mask;
-}
-
-static void *
-swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		unsigned long attrs)
+static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-	phys_addr_t phys_addr;
-
-	if (swiotlb_force == SWIOTLB_NO_FORCE)
-		goto out_warn;
-
-	phys_addr = swiotlb_tbl_map_single(dev,
-			__phys_to_dma(dev, io_tlb_start),
-			0, size, DMA_FROM_DEVICE, attrs);
-	if (phys_addr == SWIOTLB_MAP_ERROR)
-		goto out_warn;
-
-	*dma_handle = __phys_to_dma(dev, phys_addr);
-	if (!dma_coherent_ok(dev, *dma_handle, size))
-		goto out_unmap;
-
-	memset(phys_to_virt(phys_addr), 0, size);
-	return phys_to_virt(phys_addr);
+	dma_addr_t dma_addr;
 
-out_unmap:
-	dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-		(unsigned long long)dev->coherent_dma_mask,
-		(unsigned long long)*dma_handle);
-
-	/*
-	 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
-	 * DMA_ATTR_SKIP_CPU_SYNC is optional.
-	 */
-	swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-			DMA_ATTR_SKIP_CPU_SYNC);
-out_warn:
-	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
-		dev_warn(dev,
-			"swiotlb: coherent allocation failed, size=%zu\n",
-			size);
-		dump_stack();
+	if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) {
+		dev_warn_ratelimited(dev,
+			"Cannot do DMA to address %pa\n", phys);
+		return DIRECT_MAPPING_ERROR;
 	}
-	return NULL;
-}
-
-static bool swiotlb_free_buffer(struct device *dev, size_t size,
-		dma_addr_t dma_addr)
-{
-	phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
 
-	WARN_ON_ONCE(irqs_disabled());
-
-	if (!is_swiotlb_buffer(phys_addr))
-		return false;
-
-	/*
-	 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
-	 * DMA_ATTR_SKIP_CPU_SYNC is optional.
-	 */
-	swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-				 DMA_ATTR_SKIP_CPU_SYNC);
-	return true;
-}
-
-static void
-swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
-	     int do_panic)
-{
-	if (swiotlb_force == SWIOTLB_NO_FORCE)
-		return;
-
-	/*
-	 * Ran out of IOMMU space for this operation. This is very bad.
-	 * Unfortunately the drivers cannot handle this operation properly.
-	 * unless they check for dma_mapping_error (most don't)
-	 * When the mapping is small enough return a static buffer to limit
-	 * the damage, or panic when the transfer is too big.
-	 */
-	dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
-			    size);
+	/* Oh well, have to allocate and map a bounce buffer. */
+	*phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
+			*phys, size, dir, attrs);
+	if (*phys == SWIOTLB_MAP_ERROR)
+		return DIRECT_MAPPING_ERROR;
 
-	if (size <= io_tlb_overflow || !do_panic)
-		return;
+	/* Ensure that the address returned is DMA'ble */
+	dma_addr = __phys_to_dma(dev, *phys);
+	if (unlikely(!dma_capable(dev, dma_addr, size))) {
+		swiotlb_tbl_unmap_single(dev, *phys, size, dir,
+			attrs | DMA_ATTR_SKIP_CPU_SYNC);
+		return DIRECT_MAPPING_ERROR;
+	}
 
-	if (dir == DMA_BIDIRECTIONAL)
-		panic("DMA: Random memory could be DMA accessed\n");
-	if (dir == DMA_FROM_DEVICE)
-		panic("DMA: Random memory could be DMA written\n");
-	if (dir == DMA_TO_DEVICE)
-		panic("DMA: Random memory could be DMA read\n");
+	return dma_addr;
 }
 
 /*
@@ -801,7 +663,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    enum dma_data_direction dir,
 			    unsigned long attrs)
 {
-	phys_addr_t map, phys = page_to_phys(page) + offset;
+	phys_addr_t phys = page_to_phys(page) + offset;
 	dma_addr_t dev_addr = phys_to_dma(dev, phys);
 
 	BUG_ON(dir == DMA_NONE);
@@ -810,28 +672,17 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
-		return dev_addr;
-
-	trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
-
-	/* Oh well, have to allocate and map a bounce buffer. */
-	map = map_single(dev, phys, size, dir, attrs);
-	if (map == SWIOTLB_MAP_ERROR) {
-		swiotlb_full(dev, size, dir, 1);
-		return __phys_to_dma(dev, io_tlb_overflow_buffer);
+	if (!dma_capable(dev, dev_addr, size) ||
+	    swiotlb_force == SWIOTLB_FORCE) {
+		trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
+		dev_addr = swiotlb_bounce_page(dev, &phys, size, dir, attrs);
 	}
 
-	dev_addr = __phys_to_dma(dev, map);
+	if (!dev_is_dma_coherent(dev) &&
+	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+		arch_sync_dma_for_device(dev, phys, size, dir);
 
-	/* Ensure that the address returned is DMA'ble */
-	if (dma_capable(dev, dev_addr, size))
-		return dev_addr;
-
-	attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-	swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
-
-	return __phys_to_dma(dev, io_tlb_overflow_buffer);
+	return dev_addr;
 }
 
 /*
@@ -842,14 +693,18 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
  * After this call, reads by the cpu to the buffer are guaranteed to see
  * whatever the device wrote there.
  */
-static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-			 size_t size, enum dma_data_direction dir,
-			 unsigned long attrs)
+void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
+			size_t size, enum dma_data_direction dir,
+			unsigned long attrs)
 {
 	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
+	if (!dev_is_dma_coherent(hwdev) &&
+	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+		arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
+
 	if (is_swiotlb_buffer(paddr)) {
 		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
 		return;
@@ -867,13 +722,6 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 	dma_mark_clean(phys_to_virt(paddr), size);
 }
 
-void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
-			size_t size, enum dma_data_direction dir,
-			unsigned long attrs)
-{
-	unmap_single(hwdev, dev_addr, size, dir, attrs);
-}
-
 /*
  * Make physical memory consistent for a single streaming mode DMA translation
  * after a transfer.
@@ -893,15 +741,17 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 
 	BUG_ON(dir == DMA_NONE);
 
-	if (is_swiotlb_buffer(paddr)) {
+	if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_CPU)
+		arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
+
+	if (is_swiotlb_buffer(paddr))
 		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
-		return;
-	}
 
-	if (dir != DMA_FROM_DEVICE)
-		return;
+	if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_DEVICE)
+		arch_sync_dma_for_device(hwdev, paddr, size, dir);
 
-	dma_mark_clean(phys_to_virt(paddr), size);
+	if (!is_swiotlb_buffer(paddr) && dir == DMA_FROM_DEVICE)
+		dma_mark_clean(phys_to_virt(paddr), size);
 }
 
 void
@@ -925,48 +775,31 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
  * appropriate dma address and length.  They are obtained via
  * sg_dma_{address,length}(SG).
  *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
  * Device ownership issues as mentioned above for swiotlb_map_page are the
  * same here.
  */
 int
-swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
+swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems,
 		     enum dma_data_direction dir, unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
 
-	BUG_ON(dir == DMA_NONE);
-
 	for_each_sg(sgl, sg, nelems, i) {
-		phys_addr_t paddr = sg_phys(sg);
-		dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
-
-		if (swiotlb_force == SWIOTLB_FORCE ||
-		    !dma_capable(hwdev, dev_addr, sg->length)) {
-			phys_addr_t map = map_single(hwdev, sg_phys(sg),
-						     sg->length, dir, attrs);
-			if (map == SWIOTLB_MAP_ERROR) {
-				/* Don't panic here, we expect map_sg users
-				   to do proper error handling. */
-				swiotlb_full(hwdev, sg->length, dir, 0);
-				attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-				swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
-						       attrs);
-				sg_dma_len(sgl) = 0;
-				return 0;
-			}
-			sg->dma_address = __phys_to_dma(hwdev, map);
-		} else
-			sg->dma_address = dev_addr;
+		sg->dma_address = swiotlb_map_page(dev, sg_page(sg), sg->offset,
+				sg->length, dir, attrs);
+		if (sg->dma_address == DIRECT_MAPPING_ERROR)
+			goto out_error;
 		sg_dma_len(sg) = sg->length;
 	}
+
 	return nelems;
+
+out_error:
+	swiotlb_unmap_sg_attrs(dev, sgl, i, dir,
+			attrs | DMA_ATTR_SKIP_CPU_SYNC);
+	sg_dma_len(sgl) = 0;
+	return 0;
 }
 
 /*
@@ -984,7 +817,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i)
-		unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
+		swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), dir,
 			     attrs);
 }
 
@@ -1022,12 +855,6 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
 }
 
-int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
-{
-	return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
-}
-
 /*
  * Return whether the given device DMA address mask can be supported
  * properly.  For example, if your device can only drive the low 24-bits
@@ -1040,39 +867,10 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask)
 	return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
 }
 
-void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		gfp_t gfp, unsigned long attrs)
-{
-	void *vaddr;
-
-	/* temporary workaround: */
-	if (gfp & __GFP_NOWARN)
-		attrs |= DMA_ATTR_NO_WARN;
-
-	/*
-	 * Don't print a warning when the first allocation attempt fails.
-	 * swiotlb_alloc_coherent() will print a warning when the DMA memory
-	 * allocation ultimately failed.
-	 */
-	gfp |= __GFP_NOWARN;
-
-	vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
-	if (!vaddr)
-		vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
-	return vaddr;
-}
-
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_addr, unsigned long attrs)
-{
-	if (!swiotlb_free_buffer(dev, size, dma_addr))
-		dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
 const struct dma_map_ops swiotlb_dma_ops = {
-	.mapping_error		= swiotlb_dma_mapping_error,
-	.alloc			= swiotlb_alloc,
-	.free			= swiotlb_free,
+	.mapping_error		= dma_direct_mapping_error,
+	.alloc			= dma_direct_alloc,
+	.free			= dma_direct_free,
 	.sync_single_for_cpu	= swiotlb_sync_single_for_cpu,
 	.sync_single_for_device	= swiotlb_sync_single_for_device,
 	.sync_sg_for_cpu	= swiotlb_sync_sg_for_cpu,
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 83f830acbb5f..410a77a8f6e2 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -173,7 +173,7 @@ err_unlock:
 }
 
 COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
-		struct compat_timespec __user *, utime, u32 __user *, uaddr2,
+		struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
 		u32, val3)
 {
 	struct timespec ts;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 5d9fc01b60a6..3366d11c3e02 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -183,7 +183,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 		 * unhappy about. Replace them with ':', which does
 		 * the trick and is not as offensive as '\'...
 		 */
-		name = kstrdup(of_node_full_name(of_node), GFP_KERNEL);
+		name = kasprintf(GFP_KERNEL, "%pOF", of_node);
 		if (!name) {
 			kfree(domain);
 			return NULL;
@@ -867,7 +867,7 @@ void irq_dispose_mapping(unsigned int virq)
 EXPORT_SYMBOL_GPL(irq_dispose_mapping);
 
 /**
- * irq_find_mapping() - Find a linux irq from an hw irq number.
+ * irq_find_mapping() - Find a linux irq from a hw irq number.
  * @domain: domain owning this hardware interrupt
  * @hwirq: hardware irq number in that domain space
  */
@@ -1741,6 +1741,7 @@ static void debugfs_add_domain_dir(struct irq_domain *d)
 static void debugfs_remove_domain_dir(struct irq_domain *d)
 {
 	debugfs_remove(d->debugfs_file);
+	d->debugfs_file = NULL;
 }
 
 void __init irq_domain_debugfs_init(struct dentry *root)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index fb86146037a7..9dbdccab3b6a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -927,6 +927,9 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
 
 	local_bh_disable();
 	ret = action->thread_fn(action->irq, action->dev_id);
+	if (ret == IRQ_HANDLED)
+		atomic_inc(&desc->threads_handled);
+
 	irq_finalize_oneshot(desc, action);
 	local_bh_enable();
 	return ret;
@@ -943,6 +946,9 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc,
 	irqreturn_t ret;
 
 	ret = action->thread_fn(action->irq, action->dev_id);
+	if (ret == IRQ_HANDLED)
+		atomic_inc(&desc->threads_handled);
+
 	irq_finalize_oneshot(desc, action);
 	return ret;
 }
@@ -1020,8 +1026,6 @@ static int irq_thread(void *data)
 		irq_thread_check_affinity(desc, action);
 
 		action_ret = handler_fn(desc, action);
-		if (action_ret == IRQ_HANDLED)
-			atomic_inc(&desc->threads_handled);
 		if (action_ret == IRQ_WAKE_THREAD)
 			irq_wake_secondary(desc, action);
 
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 9bf5404397e0..b77150ad1965 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -16,6 +16,8 @@
  *	01Mar01 Andrew Morton
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/tty.h>
@@ -192,16 +194,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
 	return 0;
 }
 
-/*
- * Number of registered extended console drivers.
- *
- * If extended consoles are present, in-kernel cont reassembly is disabled
- * and each fragment is stored as a separate log entry with proper
- * continuation flag so that every emitted message has full metadata.  This
- * doesn't change the result for regular consoles or /proc/kmsg.  For
- * /dev/kmsg, as long as the reader concatenates messages according to
- * consecutive continuation flags, the end result should be the same too.
- */
+/* Number of registered extended console drivers. */
 static int nr_ext_console_drivers;
 
 /*
@@ -423,6 +416,7 @@ static u32 log_next_idx;
 /* the next printk record to write to the console */
 static u64 console_seq;
 static u32 console_idx;
+static u64 exclusive_console_stop_seq;
 
 /* the next printk record to read after the last 'clear' command */
 static u64 clear_seq;
@@ -437,6 +431,7 @@ static u32 clear_idx;
 /* record buffer */
 #define LOG_ALIGN __alignof__(struct printk_log)
 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
+#define LOG_BUF_LEN_MAX (u32)(1 << 31)
 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
@@ -1037,18 +1032,28 @@ void log_buf_vmcoreinfo_setup(void)
 static unsigned long __initdata new_log_buf_len;
 
 /* we practice scaling the ring buffer by powers of 2 */
-static void __init log_buf_len_update(unsigned size)
+static void __init log_buf_len_update(u64 size)
 {
+	if (size > (u64)LOG_BUF_LEN_MAX) {
+		size = (u64)LOG_BUF_LEN_MAX;
+		pr_err("log_buf over 2G is not supported.\n");
+	}
+
 	if (size)
 		size = roundup_pow_of_two(size);
 	if (size > log_buf_len)
-		new_log_buf_len = size;
+		new_log_buf_len = (unsigned long)size;
 }
 
 /* save requested log_buf_len since it's too early to process it */
 static int __init log_buf_len_setup(char *str)
 {
-	unsigned size = memparse(str, &str);
+	u64 size;
+
+	if (!str)
+		return -EINVAL;
+
+	size = memparse(str, &str);
 
 	log_buf_len_update(size);
 
@@ -1093,7 +1098,7 @@ void __init setup_log_buf(int early)
 {
 	unsigned long flags;
 	char *new_log_buf;
-	int free;
+	unsigned int free;
 
 	if (log_buf != __log_buf)
 		return;
@@ -1113,7 +1118,7 @@ void __init setup_log_buf(int early)
 	}
 
 	if (unlikely(!new_log_buf)) {
-		pr_err("log_buf_len: %ld bytes not available\n",
+		pr_err("log_buf_len: %lu bytes not available\n",
 			new_log_buf_len);
 		return;
 	}
@@ -1126,8 +1131,8 @@ void __init setup_log_buf(int early)
 	memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
 	logbuf_unlock_irqrestore(flags);
 
-	pr_info("log_buf_len: %d bytes\n", log_buf_len);
-	pr_info("early log buf free: %d(%d%%)\n",
+	pr_info("log_buf_len: %u bytes\n", log_buf_len);
+	pr_info("early log buf free: %u(%u%%)\n",
 		free, (free * 100) / __LOG_BUF_LEN);
 }
 
@@ -1767,12 +1772,8 @@ static void cont_flush(void)
 
 static bool cont_add(int facility, int level, enum log_flags flags, const char *text, size_t len)
 {
-	/*
-	 * If ext consoles are present, flush and skip in-kernel
-	 * continuation.  See nr_ext_console_drivers definition.  Also, if
-	 * the line gets too long, split it up in separate records.
-	 */
-	if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) {
+	/* If the line gets too long, split it up in separate records. */
+	if (cont.len + len > sizeof(cont.buf)) {
 		cont_flush();
 		return false;
 	}
@@ -1795,9 +1796,6 @@ static bool cont_add(int facility, int level, enum log_flags flags, const char *
 		cont_flush();
 	}
 
-	if (cont.len > (sizeof(cont.buf) * 80) / 100)
-		cont_flush();
-
 	return true;
 }
 
@@ -1889,8 +1887,9 @@ asmlinkage int vprintk_emit(int facility, int level,
 			    const char *fmt, va_list args)
 {
 	int printed_len;
-	bool in_sched = false;
+	bool in_sched = false, pending_output;
 	unsigned long flags;
+	u64 curr_log_seq;
 
 	if (level == LOGLEVEL_SCHED) {
 		level = LOGLEVEL_DEFAULT;
@@ -1902,11 +1901,13 @@ asmlinkage int vprintk_emit(int facility, int level,
 
 	/* This stops the holder of console_sem just where we want him */
 	logbuf_lock_irqsave(flags);
+	curr_log_seq = log_next_seq;
 	printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
+	pending_output = (curr_log_seq != log_next_seq);
 	logbuf_unlock_irqrestore(flags);
 
 	/* If called from the scheduler, we can not call up(). */
-	if (!in_sched) {
+	if (!in_sched && pending_output) {
 		/*
 		 * Disable preemption to avoid being preempted while holding
 		 * console_sem which would prevent anyone from printing to
@@ -1923,7 +1924,8 @@ asmlinkage int vprintk_emit(int facility, int level,
 		preempt_enable();
 	}
 
-	wake_up_klogd();
+	if (pending_output)
+		wake_up_klogd();
 	return printed_len;
 }
 EXPORT_SYMBOL(vprintk_emit);
@@ -2009,6 +2011,7 @@ static u64 syslog_seq;
 static u32 syslog_idx;
 static u64 console_seq;
 static u32 console_idx;
+static u64 exclusive_console_stop_seq;
 static u64 log_first_seq;
 static u32 log_first_idx;
 static u64 log_next_seq;
@@ -2351,8 +2354,9 @@ again:
 		printk_safe_enter_irqsave(flags);
 		raw_spin_lock(&logbuf_lock);
 		if (console_seq < log_first_seq) {
-			len = sprintf(text, "** %u printk messages dropped **\n",
-				      (unsigned)(log_first_seq - console_seq));
+			len = sprintf(text,
+				      "** %llu printk messages dropped **\n",
+				      log_first_seq - console_seq);
 
 			/* messages are gone, move to first one */
 			console_seq = log_first_seq;
@@ -2376,6 +2380,12 @@ skip:
 			goto skip;
 		}
 
+		/* Output to all consoles once old messages replayed. */
+		if (unlikely(exclusive_console &&
+			     console_seq >= exclusive_console_stop_seq)) {
+			exclusive_console = NULL;
+		}
+
 		len += msg_print_text(msg,
 				console_msg_format & MSG_FORMAT_SYSLOG,
 				text + len,
@@ -2418,10 +2428,6 @@ skip:
 
 	console_locked = 0;
 
-	/* Release the exclusive_console once it is used */
-	if (unlikely(exclusive_console))
-		exclusive_console = NULL;
-
 	raw_spin_unlock(&logbuf_lock);
 
 	up_console_sem();
@@ -2688,8 +2694,7 @@ void register_console(struct console *newcon)
 	}
 
 	if (newcon->flags & CON_EXTENDED)
-		if (!nr_ext_console_drivers++)
-			pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n");
+		nr_ext_console_drivers++;
 
 	if (newcon->flags & CON_PRINTBUFFER) {
 		/*
@@ -2699,13 +2704,18 @@ void register_console(struct console *newcon)
 		logbuf_lock_irqsave(flags);
 		console_seq = syslog_seq;
 		console_idx = syslog_idx;
-		logbuf_unlock_irqrestore(flags);
 		/*
 		 * We're about to replay the log buffer.  Only do this to the
 		 * just-registered console to avoid excessive message spam to
 		 * the already-registered consoles.
+		 *
+		 * Set exclusive_console with disabled interrupts to reduce
+		 * race window with eventual console_flush_on_panic() that
+		 * ignores console_lock.
 		 */
 		exclusive_console = newcon;
+		exclusive_console_stop_seq = console_seq;
+		logbuf_unlock_irqrestore(flags);
 	}
 	console_unlock();
 	console_sysfs_notify();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe0223121883..2e696b03e99d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5244,7 +5244,7 @@ out_unlock:
  * an error code.
  */
 SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-		struct timespec __user *, interval)
+		struct __kernel_timespec __user *, interval)
 {
 	struct timespec64 t;
 	int retval = sched_rr_get_interval(pid, &t);
@@ -5255,16 +5255,16 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 	return retval;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
 		       compat_pid_t, pid,
-		       struct compat_timespec __user *, interval)
+		       struct old_timespec32 __user *, interval)
 {
 	struct timespec64 t;
 	int retval = sched_rr_get_interval(pid, &t);
 
 	if (retval == 0)
-		retval = compat_put_timespec64(&t, interval);
+		retval = put_old_timespec32(&t, interval);
 	return retval;
 }
 #endif
diff --git a/kernel/signal.c b/kernel/signal.c
index dbd2e4db24cf..17565240b1c6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3165,7 +3165,7 @@ int copy_siginfo_from_user32(struct kernel_siginfo *to,
  *  @ts: upper bound on process time suspension
  */
 static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info,
-		    const struct timespec *ts)
+		    const struct timespec64 *ts)
 {
 	ktime_t *to = NULL, timeout = KTIME_MAX;
 	struct task_struct *tsk = current;
@@ -3173,9 +3173,9 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info,
 	int sig, ret = 0;
 
 	if (ts) {
-		if (!timespec_valid(ts))
+		if (!timespec64_valid(ts))
 			return -EINVAL;
-		timeout = timespec_to_ktime(*ts);
+		timeout = timespec64_to_ktime(*ts);
 		to = &timeout;
 	}
 
@@ -3223,11 +3223,12 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info,
  *  @sigsetsize: size of sigset_t type
  */
 SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
-		siginfo_t __user *, uinfo, const struct timespec __user *, uts,
+		siginfo_t __user *, uinfo,
+		const struct __kernel_timespec __user *, uts,
 		size_t, sigsetsize)
 {
 	sigset_t these;
-	struct timespec ts;
+	struct timespec64 ts;
 	kernel_siginfo_t info;
 	int ret;
 
@@ -3239,7 +3240,7 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
 		return -EFAULT;
 
 	if (uts) {
-		if (copy_from_user(&ts, uts, sizeof(ts)))
+		if (get_timespec64(&ts, uts))
 			return -EFAULT;
 	}
 
@@ -3256,10 +3257,10 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
 #ifdef CONFIG_COMPAT
 COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
 		struct compat_siginfo __user *, uinfo,
-		struct compat_timespec __user *, uts, compat_size_t, sigsetsize)
+		struct old_timespec32 __user *, uts, compat_size_t, sigsetsize)
 {
 	sigset_t s;
-	struct timespec t;
+	struct timespec64 t;
 	kernel_siginfo_t info;
 	long ret;
 
@@ -3270,7 +3271,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
 		return -EFAULT;
 
 	if (uts) {
-		if (compat_get_timespec(&t, uts))
+		if (get_old_timespec32(&t, uts))
 			return -EFAULT;
 	}
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7a0720a20003..d28813306b2c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -257,9 +257,9 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 	int softirq_bit;
 
 	/*
-	 * Mask out PF_MEMALLOC s current task context is borrowed for the
-	 * softirq. A softirq handled such as network RX might set PF_MEMALLOC
-	 * again if the socket is related to swap
+	 * Mask out PF_MEMALLOC as the current task context is borrowed for the
+	 * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
+	 * again if the socket is related to swapping.
 	 */
 	current->flags &= ~PF_MEMALLOC;
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index e1a549c9e399..9cdd74bd2d27 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1660,7 +1660,7 @@ int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
 	switch(restart->nanosleep.type) {
 #ifdef CONFIG_COMPAT_32BIT_TIME
 	case TT_COMPAT:
-		if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp))
+		if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp))
 			return -EFAULT;
 		break;
 #endif
@@ -1780,12 +1780,12 @@ SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
-COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp,
-		       struct compat_timespec __user *, rmtp)
+COMPAT_SYSCALL_DEFINE2(nanosleep, struct old_timespec32 __user *, rqtp,
+		       struct old_timespec32 __user *, rmtp)
 {
 	struct timespec64 tu;
 
-	if (compat_get_timespec64(&tu, rqtp))
+	if (get_old_timespec32(&tu, rqtp))
 		return -EFAULT;
 
 	if (!timespec64_valid(&tu))
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 2c6847d5d69b..989ccf028bde 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -162,20 +162,20 @@ COMPAT_SYS_NI(setitimer);
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
-		       struct compat_timespec __user *, tp)
+		       struct old_timespec32 __user *, tp)
 {
 	struct timespec64 new_tp;
 
 	if (which_clock != CLOCK_REALTIME)
 		return -EINVAL;
-	if (compat_get_timespec64(&new_tp, tp))
+	if (get_old_timespec32(&new_tp, tp))
 		return -EFAULT;
 
 	return do_sys_settimeofday64(&new_tp, NULL);
 }
 
 COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
-		       struct compat_timespec __user *, tp)
+		       struct old_timespec32 __user *, tp)
 {
 	int ret;
 	struct timespec64 kernel_tp;
@@ -184,13 +184,13 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
 	if (ret)
 		return ret;
 
-	if (compat_put_timespec64(&kernel_tp, tp))
+	if (put_old_timespec32(&kernel_tp, tp))
 		return -EFAULT;
 	return 0;
 }
 
 COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
-		       struct compat_timespec __user *, tp)
+		       struct old_timespec32 __user *, tp)
 {
 	struct timespec64 rtn_tp = {
 		.tv_sec = 0,
@@ -201,7 +201,7 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
 	case CLOCK_REALTIME:
 	case CLOCK_MONOTONIC:
 	case CLOCK_BOOTTIME:
-		if (compat_put_timespec64(&rtn_tp, tp))
+		if (put_old_timespec32(&rtn_tp, tp))
 			return -EFAULT;
 		return 0;
 	default:
@@ -210,8 +210,8 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
 }
 
 COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
-		       struct compat_timespec __user *, rqtp,
-		       struct compat_timespec __user *, rmtp)
+		       struct old_timespec32 __user *, rqtp,
+		       struct old_timespec32 __user *, rmtp)
 {
 	struct timespec64 t;
 
@@ -224,7 +224,7 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
 		return -EINVAL;
 	}
 
-	if (compat_get_timespec64(&t, rqtp))
+	if (get_old_timespec32(&t, rqtp))
 		return -EFAULT;
 	if (!timespec64_valid(&t))
 		return -EINVAL;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index eabb4c22728d..bd62b5eeb5a0 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -755,13 +755,13 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
 COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
-		       struct compat_itimerspec __user *, setting)
+		       struct old_itimerspec32 __user *, setting)
 {
 	struct itimerspec64 cur_setting;
 
 	int ret = do_timer_gettime(timer_id, &cur_setting);
 	if (!ret) {
-		if (put_compat_itimerspec64(&cur_setting, setting))
+		if (put_old_itimerspec32(&cur_setting, setting))
 			ret = -EFAULT;
 	}
 	return ret;
@@ -928,8 +928,8 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
-		       struct compat_itimerspec __user *, new,
-		       struct compat_itimerspec __user *, old)
+		       struct old_itimerspec32 __user *, new,
+		       struct old_itimerspec32 __user *, old)
 {
 	struct itimerspec64 new_spec, old_spec;
 	struct itimerspec64 *rtn = old ? &old_spec : NULL;
@@ -937,12 +937,12 @@ COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
 
 	if (!new)
 		return -EINVAL;
-	if (get_compat_itimerspec64(&new_spec, new))
+	if (get_old_itimerspec32(&new_spec, new))
 		return -EFAULT;
 
 	error = do_timer_settime(timer_id, flags, &new_spec, rtn);
 	if (!error && old) {
-		if (put_compat_itimerspec64(&old_spec, old))
+		if (put_old_itimerspec32(&old_spec, old))
 			error = -EFAULT;
 	}
 	return error;
@@ -1115,7 +1115,7 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
 COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock,
-		       struct compat_timespec __user *, tp)
+		       struct old_timespec32 __user *, tp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 ts;
@@ -1123,14 +1123,14 @@ COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock,
 	if (!kc || !kc->clock_set)
 		return -EINVAL;
 
-	if (compat_get_timespec64(&ts, tp))
+	if (get_old_timespec32(&ts, tp))
 		return -EFAULT;
 
 	return kc->clock_set(which_clock, &ts);
 }
 
 COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
-		       struct compat_timespec __user *, tp)
+		       struct old_timespec32 __user *, tp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 ts;
@@ -1141,7 +1141,7 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
 
 	err = kc->clock_get(which_clock, &ts);
 
-	if (!err && compat_put_timespec64(&ts, tp))
+	if (!err && put_old_timespec32(&ts, tp))
 		err = -EFAULT;
 
 	return err;
@@ -1180,7 +1180,7 @@ COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
 COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
-		       struct compat_timespec __user *, tp)
+		       struct old_timespec32 __user *, tp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 ts;
@@ -1190,7 +1190,7 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
 		return -EINVAL;
 
 	err = kc->clock_getres(which_clock, &ts);
-	if (!err && tp && compat_put_timespec64(&ts, tp))
+	if (!err && tp && put_old_timespec32(&ts, tp))
 		return -EFAULT;
 
 	return err;
@@ -1237,8 +1237,8 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
 COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
-		       struct compat_timespec __user *, rqtp,
-		       struct compat_timespec __user *, rmtp)
+		       struct old_timespec32 __user *, rqtp,
+		       struct old_timespec32 __user *, rmtp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 t;
@@ -1248,7 +1248,7 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
 	if (!kc->nsleep)
 		return -EOPNOTSUPP;
 
-	if (compat_get_timespec64(&t, rqtp))
+	if (get_old_timespec32(&t, rqtp))
 		return -EFAULT;
 
 	if (!timespec64_valid(&t))
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index aa2094d5dd27..be0aac2b4300 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -400,8 +400,6 @@ void tick_broadcast_control(enum tick_broadcast_mode mode)
 		if (tick_broadcast_forced)
 			break;
 		cpumask_clear_cpu(cpu, tick_broadcast_on);
-		if (!tick_device_is_functional(dev))
-			break;
 		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
 			if (tick_broadcast_device.mode ==
 			    TICKDEV_MODE_PERIODIC)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5b33e2f5c0ed..69e673b88474 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -885,7 +885,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 	if (need_resched())
 		return false;
 
-	if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
+	if (unlikely(local_softirq_pending())) {
 		static int ratelimit;
 
 		if (ratelimit < 10 &&
diff --git a/kernel/time/time.c b/kernel/time/time.c
index ccdb351277ee..e3a7f7fd3abc 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -104,12 +104,12 @@ SYSCALL_DEFINE1(stime, time_t __user *, tptr)
 #ifdef CONFIG_COMPAT
 #ifdef __ARCH_WANT_COMPAT_SYS_TIME
 
-/* compat_time_t is a 32 bit "long" and needs to get converted. */
-COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc)
+/* old_time32_t is a 32 bit "long" and needs to get converted. */
+COMPAT_SYSCALL_DEFINE1(time, old_time32_t __user *, tloc)
 {
-	compat_time_t i;
+	old_time32_t i;
 
-	i = (compat_time_t)ktime_get_real_seconds();
+	i = (old_time32_t)ktime_get_real_seconds();
 
 	if (tloc) {
 		if (put_user(i,tloc))
@@ -119,7 +119,7 @@ COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc)
 	return i;
 }
 
-COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr)
+COMPAT_SYSCALL_DEFINE1(stime, old_time32_t __user *, tptr)
 {
 	struct timespec64 tv;
 	int err;
@@ -144,9 +144,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
 		struct timezone __user *, tz)
 {
 	if (likely(tv != NULL)) {
-		struct timeval ktv;
-		do_gettimeofday(&ktv);
-		if (copy_to_user(tv, &ktv, sizeof(ktv)))
+		struct timespec64 ts;
+
+		ktime_get_real_ts64(&ts);
+		if (put_user(ts.tv_sec, &tv->tv_sec) ||
+		    put_user(ts.tv_nsec / 1000, &tv->tv_usec))
 			return -EFAULT;
 	}
 	if (unlikely(tz != NULL)) {
@@ -223,14 +225,15 @@ SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv,
 }
 
 #ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE2(gettimeofday, struct compat_timeval __user *, tv,
+COMPAT_SYSCALL_DEFINE2(gettimeofday, struct old_timeval32 __user *, tv,
 		       struct timezone __user *, tz)
 {
 	if (tv) {
-		struct timeval ktv;
+		struct timespec64 ts;
 
-		do_gettimeofday(&ktv);
-		if (compat_put_timeval(&ktv, tv))
+		ktime_get_real_ts64(&ts);
+		if (put_user(ts.tv_sec, &tv->tv_sec) ||
+		    put_user(ts.tv_nsec / 1000, &tv->tv_usec))
 			return -EFAULT;
 	}
 	if (tz) {
@@ -241,7 +244,7 @@ COMPAT_SYSCALL_DEFINE2(gettimeofday, struct compat_timeval __user *, tv,
 	return 0;
 }
 
-COMPAT_SYSCALL_DEFINE2(settimeofday, struct compat_timeval __user *, tv,
+COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv,
 		       struct timezone __user *, tz)
 {
 	struct timespec64 new_ts;
@@ -342,30 +345,6 @@ unsigned int jiffies_to_usecs(const unsigned long j)
 }
 EXPORT_SYMBOL(jiffies_to_usecs);
 
-/**
- * timespec_trunc - Truncate timespec to a granularity
- * @t: Timespec
- * @gran: Granularity in ns.
- *
- * Truncate a timespec to a granularity. Always rounds down. gran must
- * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
- */
-struct timespec timespec_trunc(struct timespec t, unsigned gran)
-{
-	/* Avoid division in the common cases 1 ns and 1 s. */
-	if (gran == 1) {
-		/* nothing */
-	} else if (gran == NSEC_PER_SEC) {
-		t.tv_nsec = 0;
-	} else if (gran > 1 && gran < NSEC_PER_SEC) {
-		t.tv_nsec -= t.tv_nsec % gran;
-	} else {
-		WARN(1, "illegal file time granularity: %u", gran);
-	}
-	return t;
-}
-EXPORT_SYMBOL(timespec_trunc);
-
 /*
  * mktime64 - Converts date to seconds.
  * Converts Gregorian date to seconds since 1970-01-01 00:00:00.
@@ -884,10 +863,10 @@ int put_timespec64(const struct timespec64 *ts,
 }
 EXPORT_SYMBOL_GPL(put_timespec64);
 
-int __compat_get_timespec64(struct timespec64 *ts64,
-				   const struct compat_timespec __user *cts)
+static int __get_old_timespec32(struct timespec64 *ts64,
+				   const struct old_timespec32 __user *cts)
 {
-	struct compat_timespec ts;
+	struct old_timespec32 ts;
 	int ret;
 
 	ret = copy_from_user(&ts, cts, sizeof(ts));
@@ -900,33 +879,33 @@ int __compat_get_timespec64(struct timespec64 *ts64,
 	return 0;
 }
 
-int __compat_put_timespec64(const struct timespec64 *ts64,
-				   struct compat_timespec __user *cts)
+static int __put_old_timespec32(const struct timespec64 *ts64,
+				   struct old_timespec32 __user *cts)
 {
-	struct compat_timespec ts = {
+	struct old_timespec32 ts = {
 		.tv_sec = ts64->tv_sec,
 		.tv_nsec = ts64->tv_nsec
 	};
 	return copy_to_user(cts, &ts, sizeof(ts)) ? -EFAULT : 0;
 }
 
-int compat_get_timespec64(struct timespec64 *ts, const void __user *uts)
+int get_old_timespec32(struct timespec64 *ts, const void __user *uts)
 {
 	if (COMPAT_USE_64BIT_TIME)
 		return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0;
 	else
-		return __compat_get_timespec64(ts, uts);
+		return __get_old_timespec32(ts, uts);
 }
-EXPORT_SYMBOL_GPL(compat_get_timespec64);
+EXPORT_SYMBOL_GPL(get_old_timespec32);
 
-int compat_put_timespec64(const struct timespec64 *ts, void __user *uts)
+int put_old_timespec32(const struct timespec64 *ts, void __user *uts)
 {
 	if (COMPAT_USE_64BIT_TIME)
 		return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0;
 	else
-		return __compat_put_timespec64(ts, uts);
+		return __put_old_timespec32(ts, uts);
 }
-EXPORT_SYMBOL_GPL(compat_put_timespec64);
+EXPORT_SYMBOL_GPL(put_old_timespec32);
 
 int get_itimerspec64(struct itimerspec64 *it,
 			const struct __kernel_itimerspec __user *uit)
@@ -958,23 +937,23 @@ int put_itimerspec64(const struct itimerspec64 *it,
 }
 EXPORT_SYMBOL_GPL(put_itimerspec64);
 
-int get_compat_itimerspec64(struct itimerspec64 *its,
-			const struct compat_itimerspec __user *uits)
+int get_old_itimerspec32(struct itimerspec64 *its,
+			const struct old_itimerspec32 __user *uits)
 {
 
-	if (__compat_get_timespec64(&its->it_interval, &uits->it_interval) ||
-	    __compat_get_timespec64(&its->it_value, &uits->it_value))
+	if (__get_old_timespec32(&its->it_interval, &uits->it_interval) ||
+	    __get_old_timespec32(&its->it_value, &uits->it_value))
 		return -EFAULT;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(get_compat_itimerspec64);
+EXPORT_SYMBOL_GPL(get_old_itimerspec32);
 
-int put_compat_itimerspec64(const struct itimerspec64 *its,
-			struct compat_itimerspec __user *uits)
+int put_old_itimerspec32(const struct itimerspec64 *its,
+			struct old_itimerspec32 __user *uits)
 {
-	if (__compat_put_timespec64(&its->it_interval, &uits->it_interval) ||
-	    __compat_put_timespec64(&its->it_value, &uits->it_value))
+	if (__put_old_timespec32(&its->it_interval, &uits->it_interval) ||
+	    __put_old_timespec32(&its->it_value, &uits->it_value))
 		return -EFAULT;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(put_compat_itimerspec64);
+EXPORT_SYMBOL_GPL(put_old_itimerspec32);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f3b22f456fac..2d110c948805 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1212,22 +1212,6 @@ int get_device_system_crosststamp(int (*get_time_fn)
 EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
 
 /**
- * do_gettimeofday - Returns the time of day in a timeval
- * @tv:		pointer to the timeval to be set
- *
- * NOTE: Users should be converted to using getnstimeofday()
- */
-void do_gettimeofday(struct timeval *tv)
-{
-	struct timespec64 now;
-
-	getnstimeofday64(&now);
-	tv->tv_sec = now.tv_sec;
-	tv->tv_usec = now.tv_nsec/1000;
-}
-EXPORT_SYMBOL(do_gettimeofday);
-
-/**
  * do_settimeofday64 - Sets the time of day.
  * @ts:     pointer to the timespec64 variable containing the new time
  *
@@ -2174,14 +2158,6 @@ void getboottime64(struct timespec64 *ts)
 }
 EXPORT_SYMBOL_GPL(getboottime64);
 
-unsigned long get_seconds(void)
-{
-	struct timekeeper *tk = &tk_core.timekeeper;
-
-	return tk->xtime_sec;
-}
-EXPORT_SYMBOL(get_seconds);
-
 void ktime_get_coarse_real_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
diff --git a/lib/Kconfig b/lib/Kconfig
index a3928d4438b5..d82f20609939 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -621,3 +621,6 @@ config GENERIC_LIB_CMPDI2
 
 config GENERIC_LIB_UCMPDI2
 	bool
+
+config GENERIC_LIB_UMODDI3
+	bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8d24f4ed66fd..04adfc3b185e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1965,6 +1965,14 @@ config TEST_DEBUG_VIRTUAL
 
 	  If unsure, say N.
 
+config TEST_MEMCAT_P
+	tristate "Test memcat_p() helper function"
+	help
+	  Test the memcat_p() helper for correctly merging two
+	  pointer arrays together.
+
+	  If unsure, say N.
+
 endif # RUNTIME_TESTING_MENU
 
 config MEMTEST
diff --git a/lib/Makefile b/lib/Makefile
index 423876446810..fa3eb1b4c0e3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -24,7 +24,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
-	 nmi_backtrace.o nodemask.o win_minmax.o
+	 nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o
 
 lib-$(CONFIG_PRINTK) += dump_stack.o
 lib-$(CONFIG_MMU) += ioremap.o
@@ -71,6 +71,7 @@ obj-$(CONFIG_TEST_UUID) += test_uuid.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
 obj-$(CONFIG_TEST_KMOD) += test_kmod.o
 obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
+obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
@@ -270,3 +271,4 @@ obj-$(CONFIG_GENERIC_LIB_LSHRDI3) += lshrdi3.o
 obj-$(CONFIG_GENERIC_LIB_MULDI3) += muldi3.o
 obj-$(CONFIG_GENERIC_LIB_CMPDI2) += cmpdi2.o
 obj-$(CONFIG_GENERIC_LIB_UCMPDI2) += ucmpdi2.o
+obj-$(CONFIG_GENERIC_LIB_UMODDI3) += umoddi3.o udivmoddi4.o
diff --git a/lib/chacha20.c b/lib/chacha20.c
index c1cc50fb68c9..d907fec6a9ed 100644
--- a/lib/chacha20.c
+++ b/lib/chacha20.c
@@ -16,9 +16,9 @@
 #include <asm/unaligned.h>
 #include <crypto/chacha20.h>
 
-void chacha20_block(u32 *state, u32 *stream)
+void chacha20_block(u32 *state, u8 *stream)
 {
-	u32 x[16], *out = stream;
+	u32 x[16];
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(x); i++)
@@ -67,7 +67,7 @@ void chacha20_block(u32 *state, u32 *stream)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(x); i++)
-		out[i] = cpu_to_le32(x[i] + state[i]);
+		put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
 
 	state[12]++;
 }
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index 1ad33e555805..4d0d47c1ffbd 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -14,10 +14,47 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <crypto/hash.h>
+#include <crypto/algapi.h>
 #include <linux/static_key.h>
+#include <linux/notifier.h>
 
-static struct crypto_shash *crct10dif_tfm;
+static struct crypto_shash __rcu *crct10dif_tfm;
 static struct static_key crct10dif_fallback __read_mostly;
+static DEFINE_MUTEX(crc_t10dif_mutex);
+
+static int crc_t10dif_rehash(struct notifier_block *self, unsigned long val, void *data)
+{
+	struct crypto_alg *alg = data;
+	struct crypto_shash *new, *old;
+
+	if (val != CRYPTO_MSG_ALG_LOADED ||
+	    static_key_false(&crct10dif_fallback) ||
+	    strncmp(alg->cra_name, CRC_T10DIF_STRING, strlen(CRC_T10DIF_STRING)))
+		return 0;
+
+	mutex_lock(&crc_t10dif_mutex);
+	old = rcu_dereference_protected(crct10dif_tfm,
+					lockdep_is_held(&crc_t10dif_mutex));
+	if (!old) {
+		mutex_unlock(&crc_t10dif_mutex);
+		return 0;
+	}
+	new = crypto_alloc_shash("crct10dif", 0, 0);
+	if (IS_ERR(new)) {
+		mutex_unlock(&crc_t10dif_mutex);
+		return 0;
+	}
+	rcu_assign_pointer(crct10dif_tfm, new);
+	mutex_unlock(&crc_t10dif_mutex);
+
+	synchronize_rcu();
+	crypto_free_shash(old);
+	return 0;
+}
+
+static struct notifier_block crc_t10dif_nb = {
+	.notifier_call = crc_t10dif_rehash,
+};
 
 __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
 {
@@ -30,11 +67,14 @@ __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
 	if (static_key_false(&crct10dif_fallback))
 		return crc_t10dif_generic(crc, buffer, len);
 
-	desc.shash.tfm = crct10dif_tfm;
+	rcu_read_lock();
+	desc.shash.tfm = rcu_dereference(crct10dif_tfm);
 	desc.shash.flags = 0;
 	*(__u16 *)desc.ctx = crc;
 
 	err = crypto_shash_update(&desc.shash, buffer, len);
+	rcu_read_unlock();
+
 	BUG_ON(err);
 
 	return *(__u16 *)desc.ctx;
@@ -49,6 +89,7 @@ EXPORT_SYMBOL(crc_t10dif);
 
 static int __init crc_t10dif_mod_init(void)
 {
+	crypto_register_notifier(&crc_t10dif_nb);
 	crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
 	if (IS_ERR(crct10dif_tfm)) {
 		static_key_slow_inc(&crct10dif_fallback);
@@ -59,12 +100,24 @@ static int __init crc_t10dif_mod_init(void)
 
 static void __exit crc_t10dif_mod_fini(void)
 {
+	crypto_unregister_notifier(&crc_t10dif_nb);
 	crypto_free_shash(crct10dif_tfm);
 }
 
 module_init(crc_t10dif_mod_init);
 module_exit(crc_t10dif_mod_fini);
 
+static int crc_t10dif_transform_show(char *buffer, const struct kernel_param *kp)
+{
+	if (static_key_false(&crct10dif_fallback))
+		return sprintf(buffer, "fallback\n");
+
+	return sprintf(buffer, "%s\n",
+		crypto_tfm_alg_driver_name(crypto_shash_tfm(crct10dif_tfm)));
+}
+
+module_param_call(transform, NULL, crc_t10dif_transform_show, NULL, 0644);
+
 MODULE_DESCRIPTION("T10 DIF CRC calculation");
 MODULE_LICENSE("GPL");
 MODULE_SOFTDEP("pre: crct10dif");
diff --git a/lib/memcat_p.c b/lib/memcat_p.c
new file mode 100644
index 000000000000..b810fbc66962
--- /dev/null
+++ b/lib/memcat_p.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/slab.h>
+
+/*
+ * Merge two NULL-terminated pointer arrays into a newly allocated
+ * array, which is also NULL-terminated. Nomenclature is inspired by
+ * memset_p() and memcat() found elsewhere in the kernel source tree.
+ */
+void **__memcat_p(void **a, void **b)
+{
+	void **p = a, **new;
+	int nr;
+
+	/* count the elements in both arrays */
+	for (nr = 0, p = a; *p; nr++, p++)
+		;
+	for (p = b; *p; nr++, p++)
+		;
+	/* one for the NULL-terminator */
+	nr++;
+
+	new = kmalloc_array(nr, sizeof(void *), GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	/* nr -> last index; p points to NULL in b[] */
+	for (nr--; nr >= 0; nr--, p = p == b ? &a[nr] : p - 1)
+		new[nr] = *p;
+
+	return new;
+}
+EXPORT_SYMBOL_GPL(__memcat_p);
+
diff --git a/lib/string.c b/lib/string.c
index 2c0900a5d51a..38e4ca08e757 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -27,6 +27,7 @@
 #include <linux/export.h>
 #include <linux/bug.h>
 #include <linux/errno.h>
+#include <linux/slab.h>
 
 #include <asm/byteorder.h>
 #include <asm/word-at-a-time.h>
diff --git a/lib/test_memcat_p.c b/lib/test_memcat_p.c
new file mode 100644
index 000000000000..849c477d49d0
--- /dev/null
+++ b/lib/test_memcat_p.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test cases for memcat_p() in lib/memcat_p.c
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+struct test_struct {
+	int		num;
+	unsigned int	magic;
+};
+
+#define MAGIC		0xf00ff00f
+/* Size of each of the NULL-terminated input arrays */
+#define INPUT_MAX	128
+/* Expected number of non-NULL elements in the output array */
+#define EXPECT		(INPUT_MAX * 2 - 2)
+
+static int __init test_memcat_p_init(void)
+{
+	struct test_struct **in0, **in1, **out, **p;
+	int err = -ENOMEM, i, r, total = 0;
+
+	in0 = kcalloc(INPUT_MAX, sizeof(*in0), GFP_KERNEL);
+	if (!in0)
+		return err;
+
+	in1 = kcalloc(INPUT_MAX, sizeof(*in1), GFP_KERNEL);
+	if (!in1)
+		goto err_free_in0;
+
+	for (i = 0, r = 1; i < INPUT_MAX - 1; i++) {
+		in0[i] = kmalloc(sizeof(**in0), GFP_KERNEL);
+		if (!in0[i])
+			goto err_free_elements;
+
+		in1[i] = kmalloc(sizeof(**in1), GFP_KERNEL);
+		if (!in1[i]) {
+			kfree(in0[i]);
+			goto err_free_elements;
+		}
+
+		/* lifted from test_sort.c */
+		r = (r * 725861) % 6599;
+		in0[i]->num = r;
+		in1[i]->num = -r;
+		in0[i]->magic = MAGIC;
+		in1[i]->magic = MAGIC;
+	}
+
+	in0[i] = in1[i] = NULL;
+
+	out = memcat_p(in0, in1);
+	if (!out)
+		goto err_free_all_elements;
+
+	err = -EINVAL;
+	for (i = 0, p = out; *p && (i < INPUT_MAX * 2 - 1); p++, i++) {
+		total += (*p)->num;
+
+		if ((*p)->magic != MAGIC) {
+			pr_err("test failed: wrong magic at %d: %u\n", i,
+			       (*p)->magic);
+			goto err_free_out;
+		}
+	}
+
+	if (total) {
+		pr_err("test failed: expected zero total, got %d\n", total);
+		goto err_free_out;
+	}
+
+	if (i != EXPECT) {
+		pr_err("test failed: expected output size %d, got %d\n",
+		       EXPECT, i);
+		goto err_free_out;
+	}
+
+	for (i = 0; i < INPUT_MAX - 1; i++)
+		if (out[i] != in0[i] || out[i + INPUT_MAX - 1] != in1[i]) {
+			pr_err("test failed: wrong element order at %d\n", i);
+			goto err_free_out;
+		}
+
+	err = 0;
+	pr_info("test passed\n");
+
+err_free_out:
+	kfree(out);
+err_free_all_elements:
+	i = INPUT_MAX;
+err_free_elements:
+	for (i--; i >= 0; i--) {
+		kfree(in1[i]);
+		kfree(in0[i]);
+	}
+
+	kfree(in1);
+err_free_in0:
+	kfree(in0);
+
+	return err;
+}
+
+static void __exit test_memcat_p_exit(void)
+{
+}
+
+module_init(test_memcat_p_init);
+module_exit(test_memcat_p_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/udivmoddi4.c b/lib/udivmoddi4.c
new file mode 100644
index 000000000000..c08bc8a5f1cf
--- /dev/null
+++ b/lib/udivmoddi4.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.
+ */
+
+#include <linux/libgcc.h>
+
+#define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz(X))
+
+#define W_TYPE_SIZE 32
+
+#define __ll_B ((unsigned long) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((unsigned long) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((unsigned long) (t) >> (W_TYPE_SIZE / 2))
+
+/* If we still don't have umul_ppmm, define it using plain C. */
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v)						\
+	do {								\
+		unsigned long __x0, __x1, __x2, __x3;			\
+		unsigned short __ul, __vl, __uh, __vh;			\
+									\
+		__ul = __ll_lowpart(u);					\
+		__uh = __ll_highpart(u);				\
+		__vl = __ll_lowpart(v);					\
+		__vh = __ll_highpart(v);				\
+									\
+		__x0 = (unsigned long) __ul * __vl;			\
+		__x1 = (unsigned long) __ul * __vh;			\
+		__x2 = (unsigned long) __uh * __vl;			\
+		__x3 = (unsigned long) __uh * __vh;			\
+									\
+		__x1 += __ll_highpart(__x0);				\
+		__x1 += __x2;						\
+		if (__x1 < __x2)					\
+			__x3 += __ll_B;					\
+									\
+		(w1) = __x3 + __ll_highpart(__x1);			\
+		(w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
+	} while (0)
+#endif
+
+#if !defined(sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
+	do {								\
+		unsigned long __x;					\
+		__x = (al) - (bl);					\
+		(sh) = (ah) - (bh) - (__x > (al));			\
+		(sl) = __x;						\
+	} while (0)
+#endif
+
+/* Define this unconditionally, so it can be used for debugging. */
+#define __udiv_qrnnd_c(q, r, n1, n0, d)					\
+	do {								\
+		unsigned long __d1, __d0, __q1, __q0;			\
+		unsigned long __r1, __r0, __m;				\
+		__d1 = __ll_highpart(d);				\
+		__d0 = __ll_lowpart(d);				\
+									\
+		__r1 = (n1) % __d1;					\
+		__q1 = (n1) / __d1;					\
+		__m = (unsigned long) __q1 * __d0;			\
+		__r1 = __r1 * __ll_B | __ll_highpart(n0);		\
+		if (__r1 < __m) {					\
+			__q1--, __r1 += (d);				\
+			if (__r1 >= (d))				\
+				if (__r1 < __m)				\
+					__q1--, __r1 += (d);		\
+		}							\
+		__r1 -= __m;						\
+									\
+		__r0 = __r1 % __d1;					\
+		__q0 = __r1 / __d1;					\
+		__m = (unsigned long) __q0 * __d0;			\
+		__r0 = __r0 * __ll_B | __ll_lowpart(n0);		\
+		if (__r0 < __m) {					\
+			__q0--, __r0 += (d);				\
+			if (__r0 >= (d))				\
+				if (__r0 < __m)				\
+					__q0--, __r0 += (d);		\
+		}							\
+		__r0 -= __m;						\
+									\
+		(q) = (unsigned long) __q1 * __ll_B | __q0;		\
+		(r) = __r0;						\
+	} while (0)
+
+/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
+#if !defined(udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+unsigned long long __udivmoddi4(unsigned long long u, unsigned long long v,
+				unsigned long long *rp)
+{
+	const DWunion nn = {.ll = u };
+	const DWunion dd = {.ll = v };
+	DWunion rr, ww;
+	unsigned long d0, d1, n0, n1, n2;
+	unsigned long q0 = 0, q1 = 0;
+	unsigned long b, bm;
+
+	d0 = dd.s.low;
+	d1 = dd.s.high;
+	n0 = nn.s.low;
+	n1 = nn.s.high;
+
+#if !UDIV_NEEDS_NORMALIZATION
+
+	if (d1 == 0) {
+		if (d0 > n1) {
+			/* 0q = nn / 0D */
+
+			udiv_qrnnd(q0, n0, n1, n0, d0);
+			q1 = 0;
+
+			/* Remainder in n0. */
+		} else {
+			/* qq = NN / 0d */
+
+			if (d0 == 0)
+				/* Divide intentionally by zero. */
+				d0 = 1 / d0;
+
+			udiv_qrnnd(q1, n1, 0, n1, d0);
+			udiv_qrnnd(q0, n0, n1, n0, d0);
+
+			/* Remainder in n0. */
+		}
+
+		if (rp != 0) {
+			rr.s.low = n0;
+			rr.s.high = 0;
+			*rp = rr.ll;
+		}
+
+#else /* UDIV_NEEDS_NORMALIZATION */
+
+	if (d1 == 0) {
+		if (d0 > n1) {
+			/* 0q = nn / 0D */
+
+			count_leading_zeros(bm, d0);
+
+			if (bm != 0) {
+				/*
+				 * Normalize, i.e. make the most significant bit
+				 * of the denominator set.
+				 */
+
+				d0 = d0 << bm;
+				n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
+				n0 = n0 << bm;
+			}
+
+			udiv_qrnnd(q0, n0, n1, n0, d0);
+			q1 = 0;
+
+			/* Remainder in n0 >> bm. */
+		} else {
+			/* qq = NN / 0d */
+
+			if (d0 == 0)
+				/* Divide intentionally by zero. */
+				d0 = 1 / d0;
+
+			count_leading_zeros(bm, d0);
+
+			if (bm == 0) {
+				/*
+				 * From (n1 >= d0) /\ (the most significant bit
+				 * of d0 is set), conclude (the most significant
+				 * bit of n1 is set) /\ (theleading quotient
+				 * digit q1 = 1).
+				 *
+				 * This special case is necessary, not an
+				 * optimization. (Shifts counts of W_TYPE_SIZE
+				 * are undefined.)
+				 */
+
+				n1 -= d0;
+				q1 = 1;
+			} else {
+				/* Normalize. */
+
+				b = W_TYPE_SIZE - bm;
+
+				d0 = d0 << bm;
+				n2 = n1 >> b;
+				n1 = (n1 << bm) | (n0 >> b);
+				n0 = n0 << bm;
+
+				udiv_qrnnd(q1, n1, n2, n1, d0);
+			}
+
+			/* n1 != d0... */
+
+			udiv_qrnnd(q0, n0, n1, n0, d0);
+
+			/* Remainder in n0 >> bm. */
+		}
+
+		if (rp != 0) {
+			rr.s.low = n0 >> bm;
+			rr.s.high = 0;
+			*rp = rr.ll;
+		}
+
+#endif /* UDIV_NEEDS_NORMALIZATION */
+
+	} else {
+		if (d1 > n1) {
+			/* 00 = nn / DD */
+
+			q0 = 0;
+			q1 = 0;
+
+			/* Remainder in n1n0. */
+			if (rp != 0) {
+				rr.s.low = n0;
+				rr.s.high = n1;
+				*rp = rr.ll;
+			}
+		} else {
+			/* 0q = NN / dd */
+
+			count_leading_zeros(bm, d1);
+			if (bm == 0) {
+				/*
+				 * From (n1 >= d1) /\ (the most significant bit
+				 * of d1 is set), conclude (the most significant
+				 * bit of n1 is set) /\ (the quotient digit q0 =
+				 * 0 or 1).
+				 *
+				 * This special case is necessary, not an
+				 * optimization.
+				 */
+
+				/*
+				 * The condition on the next line takes
+				 * advantage of that n1 >= d1 (true due to
+				 * program flow).
+				 */
+				if (n1 > d1 || n0 >= d0) {
+					q0 = 1;
+					sub_ddmmss(n1, n0, n1, n0, d1, d0);
+				} else {
+					q0 = 0;
+				}
+
+				q1 = 0;
+
+				if (rp != 0) {
+					rr.s.low = n0;
+					rr.s.high = n1;
+					*rp = rr.ll;
+				}
+			} else {
+				unsigned long m1, m0;
+				/* Normalize. */
+
+				b = W_TYPE_SIZE - bm;
+
+				d1 = (d1 << bm) | (d0 >> b);
+				d0 = d0 << bm;
+				n2 = n1 >> b;
+				n1 = (n1 << bm) | (n0 >> b);
+				n0 = n0 << bm;
+
+				udiv_qrnnd(q0, n1, n2, n1, d1);
+				umul_ppmm(m1, m0, q0, d0);
+
+				if (m1 > n1 || (m1 == n1 && m0 > n0)) {
+					q0--;
+					sub_ddmmss(m1, m0, m1, m0, d1, d0);
+				}
+
+				q1 = 0;
+
+				/* Remainder in (n1n0 - m1m0) >> bm. */
+				if (rp != 0) {
+					sub_ddmmss(n1, n0, n1, n0, m1, m0);
+					rr.s.low = (n1 << b) | (n0 >> bm);
+					rr.s.high = n1 >> bm;
+					*rp = rr.ll;
+				}
+			}
+		}
+	}
+
+	ww.s.low = q0;
+	ww.s.high = q1;
+
+	return ww.ll;
+}
diff --git a/arch/nios2/boot/linked_dtb.S b/lib/umoddi3.c
index 071f922db338..d7bbf0f85197 100644
--- a/arch/nios2/boot/linked_dtb.S
+++ b/lib/umoddi3.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+
 /*
- * Copyright (C) 2011 Thomas Chou <thomas@wytron.com.tw>
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -12,8 +12,21 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.
  */
-.section .dtb.init.rodata,"a"
-.incbin "arch/nios2/boot/system.dtb"
+
+#include <linux/module.h>
+#include <linux/libgcc.h>
+
+extern unsigned long long __udivmoddi4(unsigned long long u,
+				       unsigned long long v,
+				       unsigned long long *rp);
+
+unsigned long long __umoddi3(unsigned long long u, unsigned long long v)
+{
+	unsigned long long w;
+	(void)__udivmoddi4(u, v, &w);
+	return w;
+}
+EXPORT_SYMBOL(__umoddi3);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 376de10929b3..37a54a6dd594 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -613,6 +613,109 @@ char *string(char *buf, char *end, const char *s, struct printf_spec spec)
 }
 
 static noinline_for_stack
+char *pointer_string(char *buf, char *end, const void *ptr,
+		     struct printf_spec spec)
+{
+	spec.base = 16;
+	spec.flags |= SMALL;
+	if (spec.field_width == -1) {
+		spec.field_width = 2 * sizeof(ptr);
+		spec.flags |= ZEROPAD;
+	}
+
+	return number(buf, end, (unsigned long int)ptr, spec);
+}
+
+/* Make pointers available for printing early in the boot sequence. */
+static int debug_boot_weak_hash __ro_after_init;
+
+static int __init debug_boot_weak_hash_enable(char *str)
+{
+	debug_boot_weak_hash = 1;
+	pr_info("debug_boot_weak_hash enabled\n");
+	return 0;
+}
+early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable);
+
+static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key);
+static siphash_key_t ptr_key __read_mostly;
+
+static void enable_ptr_key_workfn(struct work_struct *work)
+{
+	get_random_bytes(&ptr_key, sizeof(ptr_key));
+	/* Needs to run from preemptible context */
+	static_branch_disable(&not_filled_random_ptr_key);
+}
+
+static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
+
+static void fill_random_ptr_key(struct random_ready_callback *unused)
+{
+	/* This may be in an interrupt handler. */
+	queue_work(system_unbound_wq, &enable_ptr_key_work);
+}
+
+static struct random_ready_callback random_ready = {
+	.func = fill_random_ptr_key
+};
+
+static int __init initialize_ptr_random(void)
+{
+	int key_size = sizeof(ptr_key);
+	int ret;
+
+	/* Use hw RNG if available. */
+	if (get_random_bytes_arch(&ptr_key, key_size) == key_size) {
+		static_branch_disable(&not_filled_random_ptr_key);
+		return 0;
+	}
+
+	ret = add_random_ready_callback(&random_ready);
+	if (!ret) {
+		return 0;
+	} else if (ret == -EALREADY) {
+		/* This is in preemptible context */
+		enable_ptr_key_workfn(&enable_ptr_key_work);
+		return 0;
+	}
+
+	return ret;
+}
+early_initcall(initialize_ptr_random);
+
+/* Maps a pointer to a 32 bit unique identifier. */
+static char *ptr_to_id(char *buf, char *end, const void *ptr,
+		       struct printf_spec spec)
+{
+	const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)";
+	unsigned long hashval;
+
+	/* When debugging early boot use non-cryptographically secure hash. */
+	if (unlikely(debug_boot_weak_hash)) {
+		hashval = hash_long((unsigned long)ptr, 32);
+		return pointer_string(buf, end, (const void *)hashval, spec);
+	}
+
+	if (static_branch_unlikely(&not_filled_random_ptr_key)) {
+		spec.field_width = 2 * sizeof(ptr);
+		/* string length must be less than default_width */
+		return string(buf, end, str, spec);
+	}
+
+#ifdef CONFIG_64BIT
+	hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key);
+	/*
+	 * Mask off the first 32 bits, this makes explicit that we have
+	 * modified the address (and 32 bits is plenty for a unique ID).
+	 */
+	hashval = hashval & 0xffffffff;
+#else
+	hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key);
+#endif
+	return pointer_string(buf, end, (const void *)hashval, spec);
+}
+
+static noinline_for_stack
 char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_spec spec,
 		  const char *fmt)
 {
@@ -1357,20 +1460,6 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	return string(buf, end, uuid, spec);
 }
 
-static noinline_for_stack
-char *pointer_string(char *buf, char *end, const void *ptr,
-		     struct printf_spec spec)
-{
-	spec.base = 16;
-	spec.flags |= SMALL;
-	if (spec.field_width == -1) {
-		spec.field_width = 2 * sizeof(ptr);
-		spec.flags |= ZEROPAD;
-	}
-
-	return number(buf, end, (unsigned long int)ptr, spec);
-}
-
 int kptr_restrict __read_mostly;
 
 static noinline_for_stack
@@ -1421,7 +1510,8 @@ char *restricted_pointer(char *buf, char *end, const void *ptr,
 }
 
 static noinline_for_stack
-char *netdev_bits(char *buf, char *end, const void *addr, const char *fmt)
+char *netdev_bits(char *buf, char *end, const void *addr,
+		  struct printf_spec spec,  const char *fmt)
 {
 	unsigned long long num;
 	int size;
@@ -1432,9 +1522,7 @@ char *netdev_bits(char *buf, char *end, const void *addr, const char *fmt)
 		size = sizeof(netdev_features_t);
 		break;
 	default:
-		num = (unsigned long)addr;
-		size = sizeof(unsigned long);
-		break;
+		return ptr_to_id(buf, end, addr, spec);
 	}
 
 	return special_hex_number(buf, end, num, size);
@@ -1474,7 +1562,7 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
 #ifdef CONFIG_COMMON_CLK
 		return string(buf, end, __clk_get_name(clk), spec);
 #else
-		return special_hex_number(buf, end, (unsigned long)clk, sizeof(unsigned long));
+		return ptr_to_id(buf, end, clk, spec);
 #endif
 	}
 }
@@ -1596,6 +1684,7 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 		fmt = "f";
 
 	for (pass = false; strspn(fmt,"fnpPFcC"); fmt++, pass = true) {
+		int precision;
 		if (pass) {
 			if (buf < end)
 				*buf = ':';
@@ -1607,7 +1696,11 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 			buf = device_node_gen_full_name(dn, buf, end);
 			break;
 		case 'n':	/* name */
-			buf = string(buf, end, dn->name, str_spec);
+			p = kbasename(of_node_full_name(dn));
+			precision = str_spec.precision;
+			str_spec.precision = strchrnul(p, '@') - p;
+			buf = string(buf, end, p, str_spec);
+			str_spec.precision = precision;
 			break;
 		case 'p':	/* phandle */
 			buf = number(buf, end, (unsigned int)dn->phandle, num_spec);
@@ -1651,94 +1744,6 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 	return widen_string(buf, buf - buf_start, end, spec);
 }
 
-/* Make pointers available for printing early in the boot sequence. */
-static int debug_boot_weak_hash __ro_after_init;
-
-static int __init debug_boot_weak_hash_enable(char *str)
-{
-	debug_boot_weak_hash = 1;
-	pr_info("debug_boot_weak_hash enabled\n");
-	return 0;
-}
-early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable);
-
-static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key);
-static siphash_key_t ptr_key __read_mostly;
-
-static void enable_ptr_key_workfn(struct work_struct *work)
-{
-	get_random_bytes(&ptr_key, sizeof(ptr_key));
-	/* Needs to run from preemptible context */
-	static_branch_disable(&not_filled_random_ptr_key);
-}
-
-static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
-
-static void fill_random_ptr_key(struct random_ready_callback *unused)
-{
-	/* This may be in an interrupt handler. */
-	queue_work(system_unbound_wq, &enable_ptr_key_work);
-}
-
-static struct random_ready_callback random_ready = {
-	.func = fill_random_ptr_key
-};
-
-static int __init initialize_ptr_random(void)
-{
-	int key_size = sizeof(ptr_key);
-	int ret;
-
-	/* Use hw RNG if available. */
-	if (get_random_bytes_arch(&ptr_key, key_size) == key_size) {
-		static_branch_disable(&not_filled_random_ptr_key);
-		return 0;
-	}
-
-	ret = add_random_ready_callback(&random_ready);
-	if (!ret) {
-		return 0;
-	} else if (ret == -EALREADY) {
-		/* This is in preemptible context */
-		enable_ptr_key_workfn(&enable_ptr_key_work);
-		return 0;
-	}
-
-	return ret;
-}
-early_initcall(initialize_ptr_random);
-
-/* Maps a pointer to a 32 bit unique identifier. */
-static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
-{
-	const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)";
-	unsigned long hashval;
-
-	/* When debugging early boot use non-cryptographically secure hash. */
-	if (unlikely(debug_boot_weak_hash)) {
-		hashval = hash_long((unsigned long)ptr, 32);
-		return pointer_string(buf, end, (const void *)hashval, spec);
-	}
-
-	if (static_branch_unlikely(&not_filled_random_ptr_key)) {
-		spec.field_width = 2 * sizeof(ptr);
-		/* string length must be less than default_width */
-		return string(buf, end, str, spec);
-	}
-
-#ifdef CONFIG_64BIT
-	hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key);
-	/*
-	 * Mask off the first 32 bits, this makes explicit that we have
-	 * modified the address (and 32 bits is plenty for a unique ID).
-	 */
-	hashval = hashval & 0xffffffff;
-#else
-	hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key);
-#endif
-	return pointer_string(buf, end, (const void *)hashval, spec);
-}
-
 /*
  * Show a '%p' thing.  A kernel extension is that the '%p' is followed
  * by an extra set of alphanumeric characters that are extended format
@@ -1942,7 +1947,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 			break;
 		return restricted_pointer(buf, end, ptr, spec);
 	case 'N':
-		return netdev_bits(buf, end, ptr, fmt);
+		return netdev_bits(buf, end, ptr, spec, fmt);
 	case 'a':
 		return address_val(buf, end, ptr, fmt);
 	case 'd':
diff --git a/mm/util.c b/mm/util.c
index 9e3ebd2ef65f..470f5cd80b64 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -15,17 +15,10 @@
 #include <linux/vmalloc.h>
 #include <linux/userfaultfd_k.h>
 
-#include <asm/sections.h>
 #include <linux/uaccess.h>
 
 #include "internal.h"
 
-static inline int is_kernel_rodata(unsigned long addr)
-{
-	return addr >= (unsigned long)__start_rodata &&
-		addr < (unsigned long)__end_rodata;
-}
-
 /**
  * kfree_const - conditionally free memory
  * @x: pointer to the memory
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 00deacdcb51c..cfd83c5521ae 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -49,18 +49,17 @@ static int bnep_sock_release(struct socket *sock)
 	return 0;
 }
 
-static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int do_bnep_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp)
 {
 	struct bnep_connlist_req cl;
 	struct bnep_connadd_req  ca;
 	struct bnep_conndel_req  cd;
 	struct bnep_conninfo ci;
 	struct socket *nsock;
-	void __user *argp = (void __user *)arg;
 	__u32 supp_feat = BIT(BNEP_SETUP_RESPONSE);
 	int err;
 
-	BT_DBG("cmd %x arg %lx", cmd, arg);
+	BT_DBG("cmd %x arg %p", cmd, argp);
 
 	switch (cmd) {
 	case BNEPCONNADD:
@@ -134,16 +133,22 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	return 0;
 }
 
+static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return do_bnep_sock_ioctl(sock, cmd, (void __user *)arg);
+}
+
 #ifdef CONFIG_COMPAT
 static int bnep_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
+	void __user *argp = compat_ptr(arg);
 	if (cmd == BNEPGETCONNLIST) {
 		struct bnep_connlist_req cl;
+		unsigned __user *p = argp;
 		u32 uci;
 		int err;
 
-		if (get_user(cl.cnum, (u32 __user *) arg) ||
-				get_user(uci, (u32 __user *) (arg + 4)))
+		if (get_user(cl.cnum, p) || get_user(uci, p + 1))
 			return -EFAULT;
 
 		cl.ci = compat_ptr(uci);
@@ -153,13 +158,13 @@ static int bnep_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigne
 
 		err = bnep_get_connlist(&cl);
 
-		if (!err && put_user(cl.cnum, (u32 __user *) arg))
+		if (!err && put_user(cl.cnum, p))
 			err = -EFAULT;
 
 		return err;
 	}
 
-	return bnep_sock_ioctl(sock, cmd, arg);
+	return do_bnep_sock_ioctl(sock, cmd, argp);
 }
 #endif
 
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index e08f28fadd65..defdd4871919 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -63,17 +63,16 @@ static int cmtp_sock_release(struct socket *sock)
 	return 0;
 }
 
-static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int do_cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp)
 {
 	struct cmtp_connadd_req ca;
 	struct cmtp_conndel_req cd;
 	struct cmtp_connlist_req cl;
 	struct cmtp_conninfo ci;
 	struct socket *nsock;
-	void __user *argp = (void __user *)arg;
 	int err;
 
-	BT_DBG("cmd %x arg %lx", cmd, arg);
+	BT_DBG("cmd %x arg %p", cmd, argp);
 
 	switch (cmd) {
 	case CMTPCONNADD:
@@ -137,16 +136,22 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	return -EINVAL;
 }
 
+static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return do_cmtp_sock_ioctl(sock, cmd, (void __user *)arg);
+}
+
 #ifdef CONFIG_COMPAT
 static int cmtp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
+	void __user *argp = compat_ptr(arg);
 	if (cmd == CMTPGETCONNLIST) {
 		struct cmtp_connlist_req cl;
+		u32 __user *p = argp;
 		u32 uci;
 		int err;
 
-		if (get_user(cl.cnum, (u32 __user *) arg) ||
-				get_user(uci, (u32 __user *) (arg + 4)))
+		if (get_user(cl.cnum, p) || get_user(uci, p + 1))
 			return -EFAULT;
 
 		cl.ci = compat_ptr(uci);
@@ -156,13 +161,13 @@ static int cmtp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigne
 
 		err = cmtp_get_connlist(&cl);
 
-		if (!err && put_user(cl.cnum, (u32 __user *) arg))
+		if (!err && put_user(cl.cnum, p))
 			err = -EFAULT;
 
 		return err;
 	}
 
-	return cmtp_sock_ioctl(sock, cmd, arg);
+	return do_cmtp_sock_ioctl(sock, cmd, argp);
 }
 #endif
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 3734dc1788b4..a442e21f3894 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -649,7 +649,7 @@ static void hidp_process_transmit(struct hidp_session *session,
 }
 
 static int hidp_setup_input(struct hidp_session *session,
-				struct hidp_connadd_req *req)
+				const struct hidp_connadd_req *req)
 {
 	struct input_dev *input;
 	int i;
@@ -748,7 +748,7 @@ EXPORT_SYMBOL_GPL(hidp_hid_driver);
 /* This function sets up the hid device. It does not add it
    to the HID system. That is done in hidp_add_connection(). */
 static int hidp_setup_hid(struct hidp_session *session,
-				struct hidp_connadd_req *req)
+				const struct hidp_connadd_req *req)
 {
 	struct hid_device *hid;
 	int err;
@@ -807,7 +807,7 @@ fault:
 
 /* initialize session devices */
 static int hidp_session_dev_init(struct hidp_session *session,
-				 struct hidp_connadd_req *req)
+				 const struct hidp_connadd_req *req)
 {
 	int ret;
 
@@ -906,7 +906,7 @@ static void hidp_session_dev_work(struct work_struct *work)
 static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
 			    struct socket *ctrl_sock,
 			    struct socket *intr_sock,
-			    struct hidp_connadd_req *req,
+			    const struct hidp_connadd_req *req,
 			    struct l2cap_conn *conn)
 {
 	struct hidp_session *session;
@@ -1338,7 +1338,7 @@ static int hidp_verify_sockets(struct socket *ctrl_sock,
 	return 0;
 }
 
-int hidp_connection_add(struct hidp_connadd_req *req,
+int hidp_connection_add(const struct hidp_connadd_req *req,
 			struct socket *ctrl_sock,
 			struct socket *intr_sock)
 {
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index 8798492a6e99..6ef88d0a1919 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -122,7 +122,7 @@ struct hidp_connlist_req {
 	struct hidp_conninfo __user *ci;
 };
 
-int hidp_connection_add(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock);
+int hidp_connection_add(const struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock);
 int hidp_connection_del(struct hidp_conndel_req *req);
 int hidp_get_connlist(struct hidp_connlist_req *req);
 int hidp_get_conninfo(struct hidp_conninfo *ci);
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 1eaac01f85de..9f85a1943be9 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -46,9 +46,8 @@ static int hidp_sock_release(struct socket *sock)
 	return 0;
 }
 
-static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int do_hidp_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp)
 {
-	void __user *argp = (void __user *) arg;
 	struct hidp_connadd_req ca;
 	struct hidp_conndel_req cd;
 	struct hidp_connlist_req cl;
@@ -57,7 +56,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	struct socket *isock;
 	int err;
 
-	BT_DBG("cmd %x arg %lx", cmd, arg);
+	BT_DBG("cmd %x arg %p", cmd, argp);
 
 	switch (cmd) {
 	case HIDPCONNADD:
@@ -122,6 +121,11 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	return -EINVAL;
 }
 
+static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return do_hidp_sock_ioctl(sock, cmd, (void __user *)arg);
+}
+
 #ifdef CONFIG_COMPAT
 struct compat_hidp_connadd_req {
 	int   ctrl_sock;	/* Connected control socket */
@@ -141,13 +145,15 @@ struct compat_hidp_connadd_req {
 
 static int hidp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
+	void __user *argp = compat_ptr(arg);
+	int err;
+
 	if (cmd == HIDPGETCONNLIST) {
 		struct hidp_connlist_req cl;
+		u32 __user *p = argp;
 		u32 uci;
-		int err;
 
-		if (get_user(cl.cnum, (u32 __user *) arg) ||
-				get_user(uci, (u32 __user *) (arg + 4)))
+		if (get_user(cl.cnum, p) || get_user(uci, p + 1))
 			return -EFAULT;
 
 		cl.ci = compat_ptr(uci);
@@ -157,39 +163,54 @@ static int hidp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigne
 
 		err = hidp_get_connlist(&cl);
 
-		if (!err && put_user(cl.cnum, (u32 __user *) arg))
+		if (!err && put_user(cl.cnum, p))
 			err = -EFAULT;
 
 		return err;
 	} else if (cmd == HIDPCONNADD) {
-		struct compat_hidp_connadd_req ca;
-		struct hidp_connadd_req __user *uca;
+		struct compat_hidp_connadd_req ca32;
+		struct hidp_connadd_req ca;
+		struct socket *csock;
+		struct socket *isock;
 
-		uca = compat_alloc_user_space(sizeof(*uca));
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
 
-		if (copy_from_user(&ca, (void __user *) arg, sizeof(ca)))
+		if (copy_from_user(&ca32, (void __user *) arg, sizeof(ca32)))
 			return -EFAULT;
 
-		if (put_user(ca.ctrl_sock, &uca->ctrl_sock) ||
-				put_user(ca.intr_sock, &uca->intr_sock) ||
-				put_user(ca.parser, &uca->parser) ||
-				put_user(ca.rd_size, &uca->rd_size) ||
-				put_user(compat_ptr(ca.rd_data), &uca->rd_data) ||
-				put_user(ca.country, &uca->country) ||
-				put_user(ca.subclass, &uca->subclass) ||
-				put_user(ca.vendor, &uca->vendor) ||
-				put_user(ca.product, &uca->product) ||
-				put_user(ca.version, &uca->version) ||
-				put_user(ca.flags, &uca->flags) ||
-				put_user(ca.idle_to, &uca->idle_to) ||
-				copy_to_user(&uca->name[0], &ca.name[0], 128))
-			return -EFAULT;
+		ca.ctrl_sock = ca32.ctrl_sock;
+		ca.intr_sock = ca32.intr_sock;
+		ca.parser = ca32.parser;
+		ca.rd_size = ca32.rd_size;
+		ca.rd_data = compat_ptr(ca32.rd_data);
+		ca.country = ca32.country;
+		ca.subclass = ca32.subclass;
+		ca.vendor = ca32.vendor;
+		ca.product = ca32.product;
+		ca.version = ca32.version;
+		ca.flags = ca32.flags;
+		ca.idle_to = ca32.idle_to;
+		memcpy(ca.name, ca32.name, 128);
+
+		csock = sockfd_lookup(ca.ctrl_sock, &err);
+		if (!csock)
+			return err;
 
-		arg = (unsigned long) uca;
+		isock = sockfd_lookup(ca.intr_sock, &err);
+		if (!isock) {
+			sockfd_put(csock);
+			return err;
+		}
 
-		/* Fall through. We don't actually write back any _changes_
-		   to the structure anyway, so there's no need to copy back
-		   into the original compat version */
+		err = hidp_connection_add(&ca, csock, isock);
+		if (!err && copy_to_user(argp, &ca32, sizeof(ca32)))
+			err = -EFAULT;
+
+		sockfd_put(csock);
+		sockfd_put(isock);
+
+		return err;
 	}
 
 	return hidp_sock_ioctl(sock, cmd, arg);
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 02172c408ff2..5d6724cee38f 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -46,9 +46,9 @@ static int set_secret(struct ceph_crypto_key *key, void *buf)
 		goto fail;
 	}
 
-	/* crypto_alloc_skcipher() allocates with GFP_KERNEL */
+	/* crypto_alloc_sync_skcipher() allocates with GFP_KERNEL */
 	noio_flag = memalloc_noio_save();
-	key->tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+	key->tfm = crypto_alloc_sync_skcipher("cbc(aes)", 0, 0);
 	memalloc_noio_restore(noio_flag);
 	if (IS_ERR(key->tfm)) {
 		ret = PTR_ERR(key->tfm);
@@ -56,7 +56,7 @@ static int set_secret(struct ceph_crypto_key *key, void *buf)
 		goto fail;
 	}
 
-	ret = crypto_skcipher_setkey(key->tfm, key->key, key->len);
+	ret = crypto_sync_skcipher_setkey(key->tfm, key->key, key->len);
 	if (ret)
 		goto fail;
 
@@ -136,7 +136,7 @@ void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
 	if (key) {
 		kfree(key->key);
 		key->key = NULL;
-		crypto_free_skcipher(key->tfm);
+		crypto_free_sync_skcipher(key->tfm);
 		key->tfm = NULL;
 	}
 }
@@ -216,7 +216,7 @@ static void teardown_sgtable(struct sg_table *sgt)
 static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt,
 			  void *buf, int buf_len, int in_len, int *pout_len)
 {
-	SKCIPHER_REQUEST_ON_STACK(req, key->tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm);
 	struct sg_table sgt;
 	struct scatterlist prealloc_sg;
 	char iv[AES_BLOCK_SIZE] __aligned(8);
@@ -232,7 +232,7 @@ static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt,
 		return ret;
 
 	memcpy(iv, aes_iv, AES_BLOCK_SIZE);
-	skcipher_request_set_tfm(req, key->tfm);
+	skcipher_request_set_sync_tfm(req, key->tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv);
 
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index bb45c7d43739..96ef4d860bc9 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -13,7 +13,7 @@ struct ceph_crypto_key {
 	struct ceph_timespec created;
 	int len;
 	void *key;
-	struct crypto_skcipher *tfm;
+	struct crypto_sync_skcipher *tfm;
 };
 
 int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
diff --git a/net/compat.c b/net/compat.c
index 3b2105f6549d..47a614b370cd 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -812,21 +812,21 @@ COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len
 
 static int __compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 				 unsigned int vlen, unsigned int flags,
-				 struct compat_timespec __user *timeout)
+				 struct old_timespec32 __user *timeout)
 {
 	int datagrams;
-	struct timespec ktspec;
+	struct timespec64 ktspec;
 
 	if (timeout == NULL)
 		return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
 				      flags | MSG_CMSG_COMPAT, NULL);
 
-	if (compat_get_timespec(&ktspec, timeout))
+	if (compat_get_timespec64(&ktspec, timeout))
 		return -EFAULT;
 
 	datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
 				   flags | MSG_CMSG_COMPAT, &ktspec);
-	if (datagrams > 0 && compat_put_timespec(&ktspec, timeout))
+	if (datagrams > 0 && compat_put_timespec64(&ktspec, timeout))
 		datagrams = -EFAULT;
 
 	return datagrams;
@@ -834,7 +834,7 @@ static int __compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 
 COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
 		       unsigned int, vlen, unsigned int, flags,
-		       struct compat_timespec __user *, timeout)
+		       struct old_timespec32 __user *, timeout)
 {
 	return __compat_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
 }
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 5e4f04004a49..7bf833598615 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -106,6 +106,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
 		iterate_fd(p->files, 0, update_classid_sock,
 			   (void *)(unsigned long)cs->classid);
 		task_unlock(p);
+		cond_resched();
 	}
 	css_task_iter_end(&it);
 
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 2fb703d70803..7e29f88dbf6a 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -146,18 +146,18 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template)
 			goto err_tfm;
 	}
 
-	key->tfm0 = crypto_alloc_skcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
+	key->tfm0 = crypto_alloc_sync_skcipher("ctr(aes)", 0, 0);
 	if (IS_ERR(key->tfm0))
 		goto err_tfm;
 
-	if (crypto_skcipher_setkey(key->tfm0, template->key,
+	if (crypto_sync_skcipher_setkey(key->tfm0, template->key,
 				   IEEE802154_LLSEC_KEY_SIZE))
 		goto err_tfm0;
 
 	return key;
 
 err_tfm0:
-	crypto_free_skcipher(key->tfm0);
+	crypto_free_sync_skcipher(key->tfm0);
 err_tfm:
 	for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
 		if (key->tfm[i])
@@ -177,7 +177,7 @@ static void llsec_key_release(struct kref *ref)
 	for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
 		crypto_free_aead(key->tfm[i]);
 
-	crypto_free_skcipher(key->tfm0);
+	crypto_free_sync_skcipher(key->tfm0);
 	kzfree(key);
 }
 
@@ -622,7 +622,7 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
 {
 	u8 iv[16];
 	struct scatterlist src;
-	SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
 	int err, datalen;
 	unsigned char *data;
 
@@ -632,7 +632,7 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
 	datalen = skb_tail_pointer(skb) - data;
 	sg_init_one(&src, data, datalen);
 
-	skcipher_request_set_tfm(req, key->tfm0);
+	skcipher_request_set_sync_tfm(req, key->tfm0);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &src, &src, datalen, iv);
 	err = crypto_skcipher_encrypt(req);
@@ -840,7 +840,7 @@ llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
 	unsigned char *data;
 	int datalen;
 	struct scatterlist src;
-	SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
 	int err;
 
 	llsec_geniv(iv, dev_addr, &hdr->sec);
@@ -849,7 +849,7 @@ llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
 
 	sg_init_one(&src, data, datalen);
 
-	skcipher_request_set_tfm(req, key->tfm0);
+	skcipher_request_set_sync_tfm(req, key->tfm0);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &src, &src, datalen, iv);
 
diff --git a/net/mac802154/llsec.h b/net/mac802154/llsec.h
index 6f3b658e3279..8be46d74dc39 100644
--- a/net/mac802154/llsec.h
+++ b/net/mac802154/llsec.h
@@ -29,7 +29,7 @@ struct mac802154_llsec_key {
 
 	/* one tfm for each authsize (4/8/16) */
 	struct crypto_aead *tfm[3];
-	struct crypto_skcipher *tfm0;
+	struct crypto_sync_skcipher *tfm0;
 
 	struct kref ref;
 };
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 0a7c49e8e053..382196e57a26 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -435,7 +435,7 @@ struct rxrpc_connection {
 	struct sk_buff_head	rx_queue;	/* received conn-level packets */
 	const struct rxrpc_security *security;	/* applied security module */
 	struct key		*server_key;	/* security for this service */
-	struct crypto_skcipher	*cipher;	/* encryption handle */
+	struct crypto_sync_skcipher *cipher;	/* encryption handle */
 	struct rxrpc_crypt	csum_iv;	/* packet checksum base */
 	unsigned long		flags;
 	unsigned long		events;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index cea16838d588..cbef9ea43dec 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -46,7 +46,7 @@ struct rxkad_level2_hdr {
  * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
  * packets
  */
-static struct crypto_skcipher *rxkad_ci;
+static struct crypto_sync_skcipher *rxkad_ci;
 static DEFINE_MUTEX(rxkad_ci_mutex);
 
 /*
@@ -54,7 +54,7 @@ static DEFINE_MUTEX(rxkad_ci_mutex);
  */
 static int rxkad_init_connection_security(struct rxrpc_connection *conn)
 {
-	struct crypto_skcipher *ci;
+	struct crypto_sync_skcipher *ci;
 	struct rxrpc_key_token *token;
 	int ret;
 
@@ -63,14 +63,14 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
 	token = conn->params.key->payload.data[0];
 	conn->security_ix = token->security_index;
 
-	ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+	ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0);
 	if (IS_ERR(ci)) {
 		_debug("no cipher");
 		ret = PTR_ERR(ci);
 		goto error;
 	}
 
-	if (crypto_skcipher_setkey(ci, token->kad->session_key,
+	if (crypto_sync_skcipher_setkey(ci, token->kad->session_key,
 				   sizeof(token->kad->session_key)) < 0)
 		BUG();
 
@@ -104,7 +104,7 @@ error:
 static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
 {
 	struct rxrpc_key_token *token;
-	SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
 	struct scatterlist sg;
 	struct rxrpc_crypt iv;
 	__be32 *tmpbuf;
@@ -128,7 +128,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
 	tmpbuf[3] = htonl(conn->security_ix);
 
 	sg_init_one(&sg, tmpbuf, tmpsize);
-	skcipher_request_set_tfm(req, conn->cipher);
+	skcipher_request_set_sync_tfm(req, conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x);
 	crypto_skcipher_encrypt(req);
@@ -167,7 +167,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
 	memset(&iv, 0, sizeof(iv));
 
 	sg_init_one(&sg, sechdr, 8);
-	skcipher_request_set_tfm(req, call->conn->cipher);
+	skcipher_request_set_sync_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
 	crypto_skcipher_encrypt(req);
@@ -212,7 +212,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
 	sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
-	skcipher_request_set_tfm(req, call->conn->cipher);
+	skcipher_request_set_sync_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x);
 	crypto_skcipher_encrypt(req);
@@ -250,7 +250,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
 			       void *sechdr)
 {
 	struct rxrpc_skb_priv *sp;
-	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
 	struct rxrpc_crypt iv;
 	struct scatterlist sg;
 	u32 x, y;
@@ -279,7 +279,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
 	call->crypto_buf[1] = htonl(x);
 
 	sg_init_one(&sg, call->crypto_buf, 8);
-	skcipher_request_set_tfm(req, call->conn->cipher);
+	skcipher_request_set_sync_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
 	crypto_skcipher_encrypt(req);
@@ -352,7 +352,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
 	/* start the decryption afresh */
 	memset(&iv, 0, sizeof(iv));
 
-	skcipher_request_set_tfm(req, call->conn->cipher);
+	skcipher_request_set_sync_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
 	crypto_skcipher_decrypt(req);
@@ -450,7 +450,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
 	token = call->conn->params.key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
-	skcipher_request_set_tfm(req, call->conn->cipher);
+	skcipher_request_set_sync_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, len, iv.x);
 	crypto_skcipher_decrypt(req);
@@ -506,7 +506,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
 			       unsigned int offset, unsigned int len,
 			       rxrpc_seq_t seq, u16 expected_cksum)
 {
-	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
 	struct rxrpc_crypt iv;
 	struct scatterlist sg;
 	bool aborted;
@@ -529,7 +529,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	call->crypto_buf[1] = htonl(x);
 
 	sg_init_one(&sg, call->crypto_buf, 8);
-	skcipher_request_set_tfm(req, call->conn->cipher);
+	skcipher_request_set_sync_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
 	crypto_skcipher_encrypt(req);
@@ -755,7 +755,7 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
 				   struct rxkad_response *resp,
 				   const struct rxkad_key *s2)
 {
-	SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
 	struct rxrpc_crypt iv;
 	struct scatterlist sg[1];
 
@@ -764,7 +764,7 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
 
 	sg_init_table(sg, 1);
 	sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
-	skcipher_request_set_tfm(req, conn->cipher);
+	skcipher_request_set_sync_tfm(req, conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
 	crypto_skcipher_encrypt(req);
@@ -1021,7 +1021,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
 				   struct rxkad_response *resp,
 				   const struct rxrpc_crypt *session_key)
 {
-	SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
 	struct scatterlist sg[1];
 	struct rxrpc_crypt iv;
 
@@ -1031,7 +1031,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
 	ASSERT(rxkad_ci != NULL);
 
 	mutex_lock(&rxkad_ci_mutex);
-	if (crypto_skcipher_setkey(rxkad_ci, session_key->x,
+	if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x,
 				   sizeof(*session_key)) < 0)
 		BUG();
 
@@ -1039,7 +1039,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
 
 	sg_init_table(sg, 1);
 	sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
-	skcipher_request_set_tfm(req, rxkad_ci);
+	skcipher_request_set_sync_tfm(req, rxkad_ci);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
 	crypto_skcipher_decrypt(req);
@@ -1218,7 +1218,7 @@ static void rxkad_clear(struct rxrpc_connection *conn)
 	_enter("");
 
 	if (conn->cipher)
-		crypto_free_skcipher(conn->cipher);
+		crypto_free_sync_skcipher(conn->cipher);
 }
 
 /*
@@ -1228,7 +1228,7 @@ static int rxkad_init(void)
 {
 	/* pin the cipher we need so that the crypto layer doesn't invoke
 	 * keventd to go get it */
-	rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+	rxkad_ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0);
 	return PTR_ERR_OR_ZERO(rxkad_ci);
 }
 
@@ -1238,7 +1238,7 @@ static int rxkad_init(void)
 static void rxkad_exit(void)
 {
 	if (rxkad_ci)
-		crypto_free_skcipher(rxkad_ci);
+		crypto_free_sync_skcipher(rxkad_ci);
 }
 
 /*
diff --git a/net/socket.c b/net/socket.c
index b68801c7d0ab..99c96851469f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2342,7 +2342,7 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
  */
 
 int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
-		   unsigned int flags, struct timespec *timeout)
+		   unsigned int flags, struct timespec64 *timeout)
 {
 	int fput_needed, err, datagrams;
 	struct socket *sock;
@@ -2407,8 +2407,7 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 
 		if (timeout) {
 			ktime_get_ts64(&timeout64);
-			*timeout = timespec64_to_timespec(
-					timespec64_sub(end_time, timeout64));
+			*timeout = timespec64_sub(end_time, timeout64);
 			if (timeout->tv_sec < 0) {
 				timeout->tv_sec = timeout->tv_nsec = 0;
 				break;
@@ -2454,10 +2453,10 @@ out_put:
 
 static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
 			   unsigned int vlen, unsigned int flags,
-			   struct timespec __user *timeout)
+			   struct __kernel_timespec __user *timeout)
 {
 	int datagrams;
-	struct timespec timeout_sys;
+	struct timespec64 timeout_sys;
 
 	if (flags & MSG_CMSG_COMPAT)
 		return -EINVAL;
@@ -2465,13 +2464,12 @@ static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
 	if (!timeout)
 		return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
 
-	if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
+	if (get_timespec64(&timeout_sys, timeout))
 		return -EFAULT;
 
 	datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
 
-	if (datagrams > 0 &&
-	    copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
+	if (datagrams > 0 && put_timespec64(&timeout_sys, timeout))
 		datagrams = -EFAULT;
 
 	return datagrams;
@@ -2479,7 +2477,7 @@ static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
 
 SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
 		unsigned int, vlen, unsigned int, flags,
-		struct timespec __user *, timeout)
+		struct __kernel_timespec __user *, timeout)
 {
 	return do_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
 }
@@ -2603,7 +2601,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 		break;
 	case SYS_RECVMMSG:
 		err = do_sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2],
-				      a[3], (struct timespec __user *)a[4]);
+				      a[3], (struct __kernel_timespec __user *)a[4]);
 		break;
 	case SYS_ACCEPT4:
 		err = __sys_accept4(a0, (struct sockaddr __user *)a1,
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 0220e1ca5280..4f43383971ba 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -53,7 +53,7 @@
 
 u32
 krb5_encrypt(
-	struct crypto_skcipher *tfm,
+	struct crypto_sync_skcipher *tfm,
 	void * iv,
 	void * in,
 	void * out,
@@ -62,24 +62,24 @@ krb5_encrypt(
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
 	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
-	if (length % crypto_skcipher_blocksize(tfm) != 0)
+	if (length % crypto_sync_skcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+	if (crypto_sync_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
 		dprintk("RPC:       gss_k5encrypt: tfm iv size too large %d\n",
-			crypto_skcipher_ivsize(tfm));
+			crypto_sync_skcipher_ivsize(tfm));
 		goto out;
 	}
 
 	if (iv)
-		memcpy(local_iv, iv, crypto_skcipher_ivsize(tfm));
+		memcpy(local_iv, iv, crypto_sync_skcipher_ivsize(tfm));
 
 	memcpy(out, in, length);
 	sg_init_one(sg, out, length);
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, length, local_iv);
 
@@ -92,7 +92,7 @@ out:
 
 u32
 krb5_decrypt(
-     struct crypto_skcipher *tfm,
+     struct crypto_sync_skcipher *tfm,
      void * iv,
      void * in,
      void * out,
@@ -101,23 +101,23 @@ krb5_decrypt(
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
 	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
-	if (length % crypto_skcipher_blocksize(tfm) != 0)
+	if (length % crypto_sync_skcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+	if (crypto_sync_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
 		dprintk("RPC:       gss_k5decrypt: tfm iv size too large %d\n",
-			crypto_skcipher_ivsize(tfm));
+			crypto_sync_skcipher_ivsize(tfm));
 		goto out;
 	}
 	if (iv)
-		memcpy(local_iv,iv, crypto_skcipher_ivsize(tfm));
+		memcpy(local_iv, iv, crypto_sync_skcipher_ivsize(tfm));
 
 	memcpy(out, in, length);
 	sg_init_one(sg, out, length);
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, length, local_iv);
 
@@ -466,7 +466,8 @@ encryptor(struct scatterlist *sg, void *data)
 {
 	struct encryptor_desc *desc = data;
 	struct xdr_buf *outbuf = desc->outbuf;
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
+	struct crypto_sync_skcipher *tfm =
+		crypto_sync_skcipher_reqtfm(desc->req);
 	struct page *in_page;
 	int thislen = desc->fraglen + sg->length;
 	int fraglen, ret;
@@ -492,7 +493,7 @@ encryptor(struct scatterlist *sg, void *data)
 	desc->fraglen += sg->length;
 	desc->pos += sg->length;
 
-	fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
+	fraglen = thislen & (crypto_sync_skcipher_blocksize(tfm) - 1);
 	thislen -= fraglen;
 
 	if (thislen == 0)
@@ -526,16 +527,16 @@ encryptor(struct scatterlist *sg, void *data)
 }
 
 int
-gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
+gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf,
 		    int offset, struct page **pages)
 {
 	int ret;
 	struct encryptor_desc desc;
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
-	BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
+	BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0);
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 
 	memset(desc.iv, 0, sizeof(desc.iv));
@@ -567,7 +568,8 @@ decryptor(struct scatterlist *sg, void *data)
 {
 	struct decryptor_desc *desc = data;
 	int thislen = desc->fraglen + sg->length;
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
+	struct crypto_sync_skcipher *tfm =
+		crypto_sync_skcipher_reqtfm(desc->req);
 	int fraglen, ret;
 
 	/* Worst case is 4 fragments: head, end of page 1, start
@@ -578,7 +580,7 @@ decryptor(struct scatterlist *sg, void *data)
 	desc->fragno++;
 	desc->fraglen += sg->length;
 
-	fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
+	fraglen = thislen & (crypto_sync_skcipher_blocksize(tfm) - 1);
 	thislen -= fraglen;
 
 	if (thislen == 0)
@@ -608,17 +610,17 @@ decryptor(struct scatterlist *sg, void *data)
 }
 
 int
-gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
+gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf,
 		    int offset)
 {
 	int ret;
 	struct decryptor_desc desc;
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
 	/* XXXJBF: */
-	BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
+	BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0);
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 
 	memset(desc.iv, 0, sizeof(desc.iv));
@@ -672,12 +674,12 @@ xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
 }
 
 static u32
-gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
+gss_krb5_cts_crypt(struct crypto_sync_skcipher *cipher, struct xdr_buf *buf,
 		   u32 offset, u8 *iv, struct page **pages, int encrypt)
 {
 	u32 ret;
 	struct scatterlist sg[1];
-	SKCIPHER_REQUEST_ON_STACK(req, cipher);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, cipher);
 	u8 *data;
 	struct page **save_pages;
 	u32 len = buf->len - offset;
@@ -706,7 +708,7 @@ gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
 
 	sg_init_one(sg, data, len);
 
-	skcipher_request_set_tfm(req, cipher);
+	skcipher_request_set_sync_tfm(req, cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, len, iv);
 
@@ -735,7 +737,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 	struct xdr_netobj hmac;
 	u8 *cksumkey;
 	u8 *ecptr;
-	struct crypto_skcipher *cipher, *aux_cipher;
+	struct crypto_sync_skcipher *cipher, *aux_cipher;
 	int blocksize;
 	struct page **save_pages;
 	int nblocks, nbytes;
@@ -754,7 +756,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 		cksumkey = kctx->acceptor_integ;
 		usage = KG_USAGE_ACCEPTOR_SEAL;
 	}
-	blocksize = crypto_skcipher_blocksize(cipher);
+	blocksize = crypto_sync_skcipher_blocksize(cipher);
 
 	/* hide the gss token header and insert the confounder */
 	offset += GSS_KRB5_TOK_HDR_LEN;
@@ -807,7 +809,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 	memset(desc.iv, 0, sizeof(desc.iv));
 
 	if (cbcbytes) {
-		SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
 
 		desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
 		desc.fragno = 0;
@@ -816,7 +818,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 		desc.outbuf = buf;
 		desc.req = req;
 
-		skcipher_request_set_tfm(req, aux_cipher);
+		skcipher_request_set_sync_tfm(req, aux_cipher);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 
 		sg_init_table(desc.infrags, 4);
@@ -855,7 +857,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 	struct xdr_buf subbuf;
 	u32 ret = 0;
 	u8 *cksum_key;
-	struct crypto_skcipher *cipher, *aux_cipher;
+	struct crypto_sync_skcipher *cipher, *aux_cipher;
 	struct xdr_netobj our_hmac_obj;
 	u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
 	u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
@@ -874,7 +876,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 		cksum_key = kctx->initiator_integ;
 		usage = KG_USAGE_INITIATOR_SEAL;
 	}
-	blocksize = crypto_skcipher_blocksize(cipher);
+	blocksize = crypto_sync_skcipher_blocksize(cipher);
 
 
 	/* create a segment skipping the header and leaving out the checksum */
@@ -891,13 +893,13 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 	memset(desc.iv, 0, sizeof(desc.iv));
 
 	if (cbcbytes) {
-		SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
 
 		desc.fragno = 0;
 		desc.fraglen = 0;
 		desc.req = req;
 
-		skcipher_request_set_tfm(req, aux_cipher);
+		skcipher_request_set_sync_tfm(req, aux_cipher);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 
 		sg_init_table(desc.frags, 4);
@@ -946,7 +948,8 @@ out_err:
  * Set the key of the given cipher.
  */
 int
-krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
+krb5_rc4_setup_seq_key(struct krb5_ctx *kctx,
+		       struct crypto_sync_skcipher *cipher,
 		       unsigned char *cksum)
 {
 	struct crypto_shash *hmac;
@@ -994,7 +997,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
 	if (err)
 		goto out_err;
 
-	err = crypto_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
+	err = crypto_sync_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
 	if (err)
 		goto out_err;
 
@@ -1012,7 +1015,8 @@ out_err:
  * Set the key of cipher kctx->enc.
  */
 int
-krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
+krb5_rc4_setup_enc_key(struct krb5_ctx *kctx,
+		       struct crypto_sync_skcipher *cipher,
 		       s32 seqnum)
 {
 	struct crypto_shash *hmac;
@@ -1069,7 +1073,8 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
 	if (err)
 		goto out_err;
 
-	err = crypto_skcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
+	err = crypto_sync_skcipher_setkey(cipher, Kcrypt,
+					  kctx->gk5e->keylength);
 	if (err)
 		goto out_err;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index f7fe2d2b851f..550fdf18d3b3 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -147,7 +147,7 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 	size_t blocksize, keybytes, keylength, n;
 	unsigned char *inblockdata, *outblockdata, *rawkey;
 	struct xdr_netobj inblock, outblock;
-	struct crypto_skcipher *cipher;
+	struct crypto_sync_skcipher *cipher;
 	u32 ret = EINVAL;
 
 	blocksize = gk5e->blocksize;
@@ -157,11 +157,10 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 	if ((inkey->len != keylength) || (outkey->len != keylength))
 		goto err_return;
 
-	cipher = crypto_alloc_skcipher(gk5e->encrypt_name, 0,
-				       CRYPTO_ALG_ASYNC);
+	cipher = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(cipher))
 		goto err_return;
-	if (crypto_skcipher_setkey(cipher, inkey->data, inkey->len))
+	if (crypto_sync_skcipher_setkey(cipher, inkey->data, inkey->len))
 		goto err_return;
 
 	/* allocate and set up buffers */
@@ -238,7 +237,7 @@ err_free_in:
 	memset(inblockdata, 0, blocksize);
 	kfree(inblockdata);
 err_free_cipher:
-	crypto_free_skcipher(cipher);
+	crypto_free_sync_skcipher(cipher);
 err_return:
 	return ret;
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7bb2514aadd9..7f0424dfa8f6 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -218,7 +218,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
 
 static inline const void *
 get_key(const void *p, const void *end,
-	struct krb5_ctx *ctx, struct crypto_skcipher **res)
+	struct krb5_ctx *ctx, struct crypto_sync_skcipher **res)
 {
 	struct xdr_netobj	key;
 	int			alg;
@@ -246,15 +246,14 @@ get_key(const void *p, const void *end,
 	if (IS_ERR(p))
 		goto out_err;
 
-	*res = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
-							CRYPTO_ALG_ASYNC);
+	*res = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(*res)) {
 		printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
 			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
 		*res = NULL;
 		goto out_err_free_key;
 	}
-	if (crypto_skcipher_setkey(*res, key.data, key.len)) {
+	if (crypto_sync_skcipher_setkey(*res, key.data, key.len)) {
 		printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
 			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
 		goto out_err_free_tfm;
@@ -264,7 +263,7 @@ get_key(const void *p, const void *end,
 	return p;
 
 out_err_free_tfm:
-	crypto_free_skcipher(*res);
+	crypto_free_sync_skcipher(*res);
 out_err_free_key:
 	kfree(key.data);
 	p = ERR_PTR(-EINVAL);
@@ -336,30 +335,30 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	return 0;
 
 out_err_free_key2:
-	crypto_free_skcipher(ctx->seq);
+	crypto_free_sync_skcipher(ctx->seq);
 out_err_free_key1:
-	crypto_free_skcipher(ctx->enc);
+	crypto_free_sync_skcipher(ctx->enc);
 out_err_free_mech:
 	kfree(ctx->mech_used.data);
 out_err:
 	return PTR_ERR(p);
 }
 
-static struct crypto_skcipher *
+static struct crypto_sync_skcipher *
 context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
 {
-	struct crypto_skcipher *cp;
+	struct crypto_sync_skcipher *cp;
 
-	cp = crypto_alloc_skcipher(cname, 0, CRYPTO_ALG_ASYNC);
+	cp = crypto_alloc_sync_skcipher(cname, 0, 0);
 	if (IS_ERR(cp)) {
 		dprintk("gss_kerberos_mech: unable to initialize "
 			"crypto algorithm %s\n", cname);
 		return NULL;
 	}
-	if (crypto_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
+	if (crypto_sync_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
 		dprintk("gss_kerberos_mech: error setting key for "
 			"crypto algorithm %s\n", cname);
-		crypto_free_skcipher(cp);
+		crypto_free_sync_skcipher(cp);
 		return NULL;
 	}
 	return cp;
@@ -413,9 +412,9 @@ context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
 	return 0;
 
 out_free_enc:
-	crypto_free_skcipher(ctx->enc);
+	crypto_free_sync_skcipher(ctx->enc);
 out_free_seq:
-	crypto_free_skcipher(ctx->seq);
+	crypto_free_sync_skcipher(ctx->seq);
 out_err:
 	return -EINVAL;
 }
@@ -469,17 +468,15 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
 	/*
 	 * allocate hash, and skciphers for data and seqnum encryption
 	 */
-	ctx->enc = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
-					 CRYPTO_ALG_ASYNC);
+	ctx->enc = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(ctx->enc)) {
 		err = PTR_ERR(ctx->enc);
 		goto out_err_free_hmac;
 	}
 
-	ctx->seq = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
-					 CRYPTO_ALG_ASYNC);
+	ctx->seq = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(ctx->seq)) {
-		crypto_free_skcipher(ctx->enc);
+		crypto_free_sync_skcipher(ctx->enc);
 		err = PTR_ERR(ctx->seq);
 		goto out_err_free_hmac;
 	}
@@ -591,7 +588,7 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
 			context_v2_alloc_cipher(ctx, "cbc(aes)",
 						ctx->acceptor_seal);
 		if (ctx->acceptor_enc_aux == NULL) {
-			crypto_free_skcipher(ctx->initiator_enc_aux);
+			crypto_free_sync_skcipher(ctx->initiator_enc_aux);
 			goto out_free_acceptor_enc;
 		}
 	}
@@ -599,9 +596,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
 	return 0;
 
 out_free_acceptor_enc:
-	crypto_free_skcipher(ctx->acceptor_enc);
+	crypto_free_sync_skcipher(ctx->acceptor_enc);
 out_free_initiator_enc:
-	crypto_free_skcipher(ctx->initiator_enc);
+	crypto_free_sync_skcipher(ctx->initiator_enc);
 out_err:
 	return -EINVAL;
 }
@@ -713,12 +710,12 @@ static void
 gss_delete_sec_context_kerberos(void *internal_ctx) {
 	struct krb5_ctx *kctx = internal_ctx;
 
-	crypto_free_skcipher(kctx->seq);
-	crypto_free_skcipher(kctx->enc);
-	crypto_free_skcipher(kctx->acceptor_enc);
-	crypto_free_skcipher(kctx->initiator_enc);
-	crypto_free_skcipher(kctx->acceptor_enc_aux);
-	crypto_free_skcipher(kctx->initiator_enc_aux);
+	crypto_free_sync_skcipher(kctx->seq);
+	crypto_free_sync_skcipher(kctx->enc);
+	crypto_free_sync_skcipher(kctx->acceptor_enc);
+	crypto_free_sync_skcipher(kctx->initiator_enc);
+	crypto_free_sync_skcipher(kctx->acceptor_enc_aux);
+	crypto_free_sync_skcipher(kctx->initiator_enc_aux);
 	kfree(kctx->mech_used.data);
 	kfree(kctx);
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index c8b9082f4a9d..fb6656295204 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -43,13 +43,12 @@ static s32
 krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
 		      unsigned char *cksum, unsigned char *buf)
 {
-	struct crypto_skcipher *cipher;
+	struct crypto_sync_skcipher *cipher;
 	unsigned char plain[8];
 	s32 code;
 
 	dprintk("RPC:       %s:\n", __func__);
-	cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
-				       CRYPTO_ALG_ASYNC);
+	cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
@@ -68,12 +67,12 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
 
 	code = krb5_encrypt(cipher, cksum, plain, buf, 8);
 out:
-	crypto_free_skcipher(cipher);
+	crypto_free_sync_skcipher(cipher);
 	return code;
 }
 s32
 krb5_make_seq_num(struct krb5_ctx *kctx,
-		struct crypto_skcipher *key,
+		struct crypto_sync_skcipher *key,
 		int direction,
 		u32 seqnum,
 		unsigned char *cksum, unsigned char *buf)
@@ -101,13 +100,12 @@ static s32
 krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
 		     unsigned char *buf, int *direction, s32 *seqnum)
 {
-	struct crypto_skcipher *cipher;
+	struct crypto_sync_skcipher *cipher;
 	unsigned char plain[8];
 	s32 code;
 
 	dprintk("RPC:       %s:\n", __func__);
-	cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
-				       CRYPTO_ALG_ASYNC);
+	cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
@@ -130,7 +128,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
 	*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
 					(plain[2] << 8) | (plain[3]));
 out:
-	crypto_free_skcipher(cipher);
+	crypto_free_sync_skcipher(cipher);
 	return code;
 }
 
@@ -142,7 +140,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
 {
 	s32 code;
 	unsigned char plain[8];
-	struct crypto_skcipher *key = kctx->seq;
+	struct crypto_sync_skcipher *key = kctx->seq;
 
 	dprintk("RPC:       krb5_get_seq_num:\n");
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 39a2e672900b..3d975a4013d2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -174,7 +174,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 	now = get_seconds();
 
-	blocksize = crypto_skcipher_blocksize(kctx->enc);
+	blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
 	gss_krb5_add_padding(buf, offset, blocksize);
 	BUG_ON((buf->len - offset) % blocksize);
 	plainlen = conflen + buf->len - offset;
@@ -239,10 +239,10 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 		return GSS_S_FAILURE;
 
 	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
-		struct crypto_skcipher *cipher;
+		struct crypto_sync_skcipher *cipher;
 		int err;
-		cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
-					       CRYPTO_ALG_ASYNC);
+		cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name,
+						    0, 0);
 		if (IS_ERR(cipher))
 			return GSS_S_FAILURE;
 
@@ -250,7 +250,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 		err = gss_encrypt_xdr_buf(cipher, buf,
 					  offset + headlen - conflen, pages);
-		crypto_free_skcipher(cipher);
+		crypto_free_sync_skcipher(cipher);
 		if (err)
 			return GSS_S_FAILURE;
 	} else {
@@ -327,18 +327,18 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 		return GSS_S_BAD_SIG;
 
 	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
-		struct crypto_skcipher *cipher;
+		struct crypto_sync_skcipher *cipher;
 		int err;
 
-		cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
-					       CRYPTO_ALG_ASYNC);
+		cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name,
+						    0, 0);
 		if (IS_ERR(cipher))
 			return GSS_S_FAILURE;
 
 		krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
 
 		err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
-		crypto_free_skcipher(cipher);
+		crypto_free_sync_skcipher(cipher);
 		if (err)
 			return GSS_S_DEFECTIVE_TOKEN;
 	} else {
@@ -371,7 +371,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	/* Copy the data back to the right position.  XXX: Would probably be
 	 * better to copy and encrypt at the same time. */
 
-	blocksize = crypto_skcipher_blocksize(kctx->enc);
+	blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
 	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
 					conflen;
 	orig_start = buf->head[0].iov_base + offset;
diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c
index 57d0d871dcf7..33e67bd1dc34 100644
--- a/samples/mei/mei-amt-version.c
+++ b/samples/mei/mei-amt-version.c
@@ -370,7 +370,7 @@ static uint32_t amt_host_if_call(struct amt_host_if *acmd,
 			unsigned int expected_sz)
 {
 	uint32_t in_buf_sz;
-	uint32_t out_buf_sz;
+	ssize_t out_buf_sz;
 	ssize_t written;
 	uint32_t status;
 	struct amt_host_if_resp_header *msg_hdr;
diff --git a/scripts/Makefile b/scripts/Makefile
index 61affa300d25..ece52ff20171 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -39,8 +39,7 @@ build_unifdef: $(obj)/unifdef
 subdir-$(CONFIG_MODVERSIONS) += genksyms
 subdir-y                     += mod
 subdir-$(CONFIG_SECURITY_SELINUX) += selinux
-subdir-$(CONFIG_DTC)         += dtc
 subdir-$(CONFIG_GDB_SCRIPTS) += gdb
 
 # Let clean descend into subdirs
-subdir-	+= basic kconfig package gcc-plugins
+subdir-	+= basic dtc kconfig package gcc-plugins
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 61e596650ed3..8fe4468f9bda 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -283,7 +283,7 @@ $(obj)/%.dtb.S: $(obj)/%.dtb FORCE
 
 quiet_cmd_dtc = DTC     $@
 cmd_dtc = mkdir -p $(dir ${dtc-tmp}) ; \
-	$(CPP) $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \
+	$(HOSTCC) -E $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \
 	$(DTC) -O dtb -o $@ -b 0 \
 		$(addprefix -i,$(dir $<) $(DTC_INCLUDE)) $(DTC_FLAGS) \
 		-d $(depfile).dtc.tmp $(dtc-tmp) ; \
diff --git a/scripts/dtc/Makefile b/scripts/dtc/Makefile
index 1c943e03eaf2..056d5da6c477 100644
--- a/scripts/dtc/Makefile
+++ b/scripts/dtc/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # scripts/dtc makefile
 
-hostprogs-y	:= dtc
+hostprogs-$(CONFIG_DTC) := dtc
 always		:= $(hostprogs-y)
 
 dtc-objs	:= dtc.o flattree.o fstree.o data.o livetree.o treesource.o \
@@ -11,6 +11,13 @@ dtc-objs	+= dtc-lexer.lex.o dtc-parser.tab.o
 # Source files need to get at the userspace version of libfdt_env.h to compile
 HOST_EXTRACFLAGS := -I$(src)/libfdt
 
+ifeq ($(wildcard /usr/include/yaml.h),)
+HOST_EXTRACFLAGS += -DNO_YAML
+else
+dtc-objs	+= yamltree.o
+HOSTLDLIBS_dtc	:= -lyaml
+endif
+
 # Generated files need one more search path to include headers in source tree
 HOSTCFLAGS_dtc-lexer.lex.o := -I$(src)
 HOSTCFLAGS_dtc-parser.tab.o := -I$(src)
diff --git a/scripts/dtc/Makefile.dtc b/scripts/dtc/Makefile.dtc
index bece49b35535..d4375630a7f7 100644
--- a/scripts/dtc/Makefile.dtc
+++ b/scripts/dtc/Makefile.dtc
@@ -14,5 +14,9 @@ DTC_SRCS = \
 	treesource.c \
 	util.c
 
+ifneq ($(NO_YAML),1)
+DTC_SRCS += yamltree.c
+endif
+
 DTC_GEN_SRCS = dtc-lexer.lex.c dtc-parser.tab.c
 DTC_OBJS = $(DTC_SRCS:%.c=%.o) $(DTC_GEN_SRCS:%.c=%.o)
diff --git a/scripts/dtc/checks.c b/scripts/dtc/checks.c
index a2cc1036c915..9c9b0c328af6 100644
--- a/scripts/dtc/checks.c
+++ b/scripts/dtc/checks.c
@@ -962,6 +962,143 @@ static void check_simple_bus_reg(struct check *c, struct dt_info *dti, struct no
 }
 WARNING(simple_bus_reg, check_simple_bus_reg, NULL, &reg_format, &simple_bus_bridge);
 
+static const struct bus_type i2c_bus = {
+	.name = "i2c-bus",
+};
+
+static void check_i2c_bus_bridge(struct check *c, struct dt_info *dti, struct node *node)
+{
+	if (strprefixeq(node->name, node->basenamelen, "i2c-bus") ||
+	    strprefixeq(node->name, node->basenamelen, "i2c-arb")) {
+		node->bus = &i2c_bus;
+	} else if (strprefixeq(node->name, node->basenamelen, "i2c")) {
+		struct node *child;
+		for_each_child(node, child) {
+			if (strprefixeq(child->name, node->basenamelen, "i2c-bus"))
+				return;
+		}
+		node->bus = &i2c_bus;
+	} else
+		return;
+
+	if (!node->children)
+		return;
+
+	if (node_addr_cells(node) != 1)
+		FAIL(c, dti, node, "incorrect #address-cells for I2C bus");
+	if (node_size_cells(node) != 0)
+		FAIL(c, dti, node, "incorrect #size-cells for I2C bus");
+
+}
+WARNING(i2c_bus_bridge, check_i2c_bus_bridge, NULL, &addr_size_cells);
+
+static void check_i2c_bus_reg(struct check *c, struct dt_info *dti, struct node *node)
+{
+	struct property *prop;
+	const char *unitname = get_unitname(node);
+	char unit_addr[17];
+	uint32_t reg = 0;
+	int len;
+	cell_t *cells = NULL;
+
+	if (!node->parent || (node->parent->bus != &i2c_bus))
+		return;
+
+	prop = get_property(node, "reg");
+	if (prop)
+		cells = (cell_t *)prop->val.val;
+
+	if (!cells) {
+		FAIL(c, dti, node, "missing or empty reg property");
+		return;
+	}
+
+	reg = fdt32_to_cpu(*cells);
+	snprintf(unit_addr, sizeof(unit_addr), "%x", reg);
+	if (!streq(unitname, unit_addr))
+		FAIL(c, dti, node, "I2C bus unit address format error, expected \"%s\"",
+		     unit_addr);
+
+	for (len = prop->val.len; len > 0; len -= 4) {
+		reg = fdt32_to_cpu(*(cells++));
+		if (reg > 0x3ff)
+			FAIL_PROP(c, dti, node, prop, "I2C address must be less than 10-bits, got \"0x%x\"",
+				  reg);
+
+	}
+}
+WARNING(i2c_bus_reg, check_i2c_bus_reg, NULL, &reg_format, &i2c_bus_bridge);
+
+static const struct bus_type spi_bus = {
+	.name = "spi-bus",
+};
+
+static void check_spi_bus_bridge(struct check *c, struct dt_info *dti, struct node *node)
+{
+
+	if (strprefixeq(node->name, node->basenamelen, "spi")) {
+		node->bus = &spi_bus;
+	} else {
+		/* Try to detect SPI buses which don't have proper node name */
+		struct node *child;
+
+		if (node_addr_cells(node) != 1 || node_size_cells(node) != 0)
+			return;
+
+		for_each_child(node, child) {
+			struct property *prop;
+			for_each_property(child, prop) {
+				if (strprefixeq(prop->name, 4, "spi-")) {
+					node->bus = &spi_bus;
+					break;
+				}
+			}
+			if (node->bus == &spi_bus)
+				break;
+		}
+
+		if (node->bus == &spi_bus && get_property(node, "reg"))
+			FAIL(c, dti, node, "node name for SPI buses should be 'spi'");
+	}
+	if (node->bus != &spi_bus || !node->children)
+		return;
+
+	if (node_addr_cells(node) != 1)
+		FAIL(c, dti, node, "incorrect #address-cells for SPI bus");
+	if (node_size_cells(node) != 0)
+		FAIL(c, dti, node, "incorrect #size-cells for SPI bus");
+
+}
+WARNING(spi_bus_bridge, check_spi_bus_bridge, NULL, &addr_size_cells);
+
+static void check_spi_bus_reg(struct check *c, struct dt_info *dti, struct node *node)
+{
+	struct property *prop;
+	const char *unitname = get_unitname(node);
+	char unit_addr[9];
+	uint32_t reg = 0;
+	cell_t *cells = NULL;
+
+	if (!node->parent || (node->parent->bus != &spi_bus))
+		return;
+
+	prop = get_property(node, "reg");
+	if (prop)
+		cells = (cell_t *)prop->val.val;
+
+	if (!cells) {
+		FAIL(c, dti, node, "missing or empty reg property");
+		return;
+	}
+
+	reg = fdt32_to_cpu(*cells);
+	snprintf(unit_addr, sizeof(unit_addr), "%x", reg);
+	if (!streq(unitname, unit_addr))
+		FAIL(c, dti, node, "SPI bus unit address format error, expected \"%s\"",
+		     unit_addr);
+}
+WARNING(spi_bus_reg, check_spi_bus_reg, NULL, &reg_format, &spi_bus_bridge);
+
 static void check_unit_address_format(struct check *c, struct dt_info *dti,
 				      struct node *node)
 {
@@ -1582,6 +1719,12 @@ static struct check *check_table[] = {
 	&simple_bus_bridge,
 	&simple_bus_reg,
 
+	&i2c_bus_bridge,
+	&i2c_bus_reg,
+
+	&spi_bus_bridge,
+	&spi_bus_reg,
+
 	&avoid_default_addr_size,
 	&avoid_unnecessary_addr_size,
 	&unique_unit_address,
diff --git a/scripts/dtc/data.c b/scripts/dtc/data.c
index aa37a16c8891..4a204145cc7b 100644
--- a/scripts/dtc/data.c
+++ b/scripts/dtc/data.c
@@ -74,7 +74,8 @@ struct data data_copy_escape_string(const char *s, int len)
 	struct data d;
 	char *q;
 
-	d = data_grow_for(empty_data, len + 1);
+	d = data_add_marker(empty_data, TYPE_STRING, NULL);
+	d = data_grow_for(d, len + 1);
 
 	q = d.val;
 	while (i < len) {
@@ -94,6 +95,7 @@ struct data data_copy_file(FILE *f, size_t maxlen)
 {
 	struct data d = empty_data;
 
+	d = data_add_marker(d, TYPE_NONE, NULL);
 	while (!feof(f) && (d.len < maxlen)) {
 		size_t chunksize, ret;
 
diff --git a/scripts/dtc/dtc-parser.y b/scripts/dtc/dtc-parser.y
index 011a5b25539a..dd70ebf386f4 100644
--- a/scripts/dtc/dtc-parser.y
+++ b/scripts/dtc/dtc-parser.y
@@ -287,6 +287,7 @@ propdata:
 		}
 	| propdataprefix DT_REF
 		{
+			$1 = data_add_marker($1, TYPE_STRING, $2);
 			$$ = data_add_marker($1, REF_PATH, $2);
 		}
 	| propdataprefix DT_INCBIN '(' DT_STRING ',' integer_prim ',' integer_prim ')'
@@ -340,22 +341,27 @@ arrayprefix:
 	DT_BITS DT_LITERAL '<'
 		{
 			unsigned long long bits;
+			enum markertype type = TYPE_UINT32;
 
 			bits = $2;
 
-			if ((bits !=  8) && (bits != 16) &&
-			    (bits != 32) && (bits != 64)) {
+			switch (bits) {
+			case 8: type = TYPE_UINT8; break;
+			case 16: type = TYPE_UINT16; break;
+			case 32: type = TYPE_UINT32; break;
+			case 64: type = TYPE_UINT64; break;
+			default:
 				ERROR(&@2, "Array elements must be"
 				      " 8, 16, 32 or 64-bits");
 				bits = 32;
 			}
 
-			$$.data = empty_data;
+			$$.data = data_add_marker(empty_data, type, NULL);
 			$$.bits = bits;
 		}
 	| '<'
 		{
-			$$.data = empty_data;
+			$$.data = data_add_marker(empty_data, TYPE_UINT32, NULL);
 			$$.bits = 32;
 		}
 	| arrayprefix integer_prim
@@ -499,7 +505,7 @@ integer_unary:
 bytestring:
 	  /* empty */
 		{
-			$$ = empty_data;
+			$$ = data_add_marker(empty_data, TYPE_UINT8, NULL);
 		}
 	| bytestring DT_BYTE
 		{
diff --git a/scripts/dtc/dtc.c b/scripts/dtc/dtc.c
index c36994e6eac5..64134aadb997 100644
--- a/scripts/dtc/dtc.c
+++ b/scripts/dtc/dtc.c
@@ -95,6 +95,9 @@ static const char * const usage_opts_help[] = {
 	"\n\tOutput formats are:\n"
 	 "\t\tdts - device tree source text\n"
 	 "\t\tdtb - device tree blob\n"
+#ifndef NO_YAML
+	 "\t\tyaml - device tree encoded as YAML\n"
+#endif
 	 "\t\tasm - assembler source",
 	"\n\tBlob version to produce, defaults to "stringify(DEFAULT_FDT_VERSION)" (for dtb and asm output)",
 	"\n\tOutput dependency file",
@@ -128,6 +131,8 @@ static const char *guess_type_by_name(const char *fname, const char *fallback)
 		return fallback;
 	if (!strcasecmp(s, ".dts"))
 		return "dts";
+	if (!strcasecmp(s, ".yaml"))
+		return "yaml";
 	if (!strcasecmp(s, ".dtb"))
 		return "dtb";
 	return fallback;
@@ -350,6 +355,12 @@ int main(int argc, char *argv[])
 
 	if (streq(outform, "dts")) {
 		dt_to_source(outf, dti);
+#ifndef NO_YAML
+	} else if (streq(outform, "yaml")) {
+		if (!streq(inform, "dts"))
+			die("YAML output format requires dts input format\n");
+		dt_to_yaml(outf, dti);
+#endif
 	} else if (streq(outform, "dtb")) {
 		dt_to_blob(outf, dti, outversion);
 	} else if (streq(outform, "asm")) {
diff --git a/scripts/dtc/dtc.h b/scripts/dtc/dtc.h
index 6d667701ab6a..cbe541525c2c 100644
--- a/scripts/dtc/dtc.h
+++ b/scripts/dtc/dtc.h
@@ -74,10 +74,17 @@ typedef uint32_t cell_t;
 
 /* Data blobs */
 enum markertype {
+	TYPE_NONE,
 	REF_PHANDLE,
 	REF_PATH,
 	LABEL,
+	TYPE_UINT8,
+	TYPE_UINT16,
+	TYPE_UINT32,
+	TYPE_UINT64,
+	TYPE_STRING,
 };
+extern const char *markername(enum markertype markertype);
 
 struct  marker {
 	enum markertype type;
@@ -101,6 +108,8 @@ struct data {
 	for_each_marker(m) \
 		if ((m)->type == (t))
 
+size_t type_marker_length(struct marker *m);
+
 void data_free(struct data d);
 
 struct data data_grow_for(struct data d, int xlen);
@@ -290,6 +299,10 @@ struct dt_info *dt_from_blob(const char *fname);
 void dt_to_source(FILE *f, struct dt_info *dti);
 struct dt_info *dt_from_source(const char *f);
 
+/* YAML source */
+
+void dt_to_yaml(FILE *f, struct dt_info *dti);
+
 /* FS trees */
 
 struct dt_info *dt_from_fs(const char *dirname);
diff --git a/scripts/dtc/flattree.c b/scripts/dtc/flattree.c
index 8d268fb785db..851ea87dbc0f 100644
--- a/scripts/dtc/flattree.c
+++ b/scripts/dtc/flattree.c
@@ -393,7 +393,7 @@ void dt_to_blob(FILE *f, struct dt_info *dti, int version)
 			padlen = 0;
 			if (quiet < 1)
 				fprintf(stderr,
-					"Warning: blob size %d >= minimum size %d\n",
+					"Warning: blob size %"PRIu32" >= minimum size %d\n",
 					fdt32_to_cpu(fdt.totalsize), minsize);
 		}
 	}
diff --git a/scripts/dtc/libfdt/fdt.c b/scripts/dtc/libfdt/fdt.c
index 7855a1787763..ae03b1112961 100644
--- a/scripts/dtc/libfdt/fdt.c
+++ b/scripts/dtc/libfdt/fdt.c
@@ -55,7 +55,12 @@
 
 #include "libfdt_internal.h"
 
-int fdt_check_header(const void *fdt)
+/*
+ * Minimal sanity check for a read-only tree. fdt_ro_probe_() checks
+ * that the given buffer contains what appears to be a flattened
+ * device tree with sane information in its header.
+ */
+int fdt_ro_probe_(const void *fdt)
 {
 	if (fdt_magic(fdt) == FDT_MAGIC) {
 		/* Complete tree */
@@ -74,6 +79,78 @@ int fdt_check_header(const void *fdt)
 	return 0;
 }
 
+static int check_off_(uint32_t hdrsize, uint32_t totalsize, uint32_t off)
+{
+	return (off >= hdrsize) && (off <= totalsize);
+}
+
+static int check_block_(uint32_t hdrsize, uint32_t totalsize,
+			uint32_t base, uint32_t size)
+{
+	if (!check_off_(hdrsize, totalsize, base))
+		return 0; /* block start out of bounds */
+	if ((base + size) < base)
+		return 0; /* overflow */
+	if (!check_off_(hdrsize, totalsize, base + size))
+		return 0; /* block end out of bounds */
+	return 1;
+}
+
+size_t fdt_header_size_(uint32_t version)
+{
+	if (version <= 1)
+		return FDT_V1_SIZE;
+	else if (version <= 2)
+		return FDT_V2_SIZE;
+	else if (version <= 3)
+		return FDT_V3_SIZE;
+	else if (version <= 16)
+		return FDT_V16_SIZE;
+	else
+		return FDT_V17_SIZE;
+}
+
+int fdt_check_header(const void *fdt)
+{
+	size_t hdrsize;
+
+	if (fdt_magic(fdt) != FDT_MAGIC)
+		return -FDT_ERR_BADMAGIC;
+	hdrsize = fdt_header_size(fdt);
+	if ((fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION)
+	    || (fdt_last_comp_version(fdt) > FDT_LAST_SUPPORTED_VERSION))
+		return -FDT_ERR_BADVERSION;
+	if (fdt_version(fdt) < fdt_last_comp_version(fdt))
+		return -FDT_ERR_BADVERSION;
+
+	if ((fdt_totalsize(fdt) < hdrsize)
+	    || (fdt_totalsize(fdt) > INT_MAX))
+		return -FDT_ERR_TRUNCATED;
+
+	/* Bounds check memrsv block */
+	if (!check_off_(hdrsize, fdt_totalsize(fdt), fdt_off_mem_rsvmap(fdt)))
+		return -FDT_ERR_TRUNCATED;
+
+	/* Bounds check structure block */
+	if (fdt_version(fdt) < 17) {
+		if (!check_off_(hdrsize, fdt_totalsize(fdt),
+				fdt_off_dt_struct(fdt)))
+			return -FDT_ERR_TRUNCATED;
+	} else {
+		if (!check_block_(hdrsize, fdt_totalsize(fdt),
+				  fdt_off_dt_struct(fdt),
+				  fdt_size_dt_struct(fdt)))
+			return -FDT_ERR_TRUNCATED;
+	}
+
+	/* Bounds check strings block */
+	if (!check_block_(hdrsize, fdt_totalsize(fdt),
+			  fdt_off_dt_strings(fdt), fdt_size_dt_strings(fdt)))
+		return -FDT_ERR_TRUNCATED;
+
+	return 0;
+}
+
 const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len)
 {
 	unsigned absoffset = offset + fdt_off_dt_struct(fdt);
@@ -244,7 +321,7 @@ const char *fdt_find_string_(const char *strtab, int tabsize, const char *s)
 
 int fdt_move(const void *fdt, void *buf, int bufsize)
 {
-	FDT_CHECK_HEADER(fdt);
+	FDT_RO_PROBE(fdt);
 
 	if (fdt_totalsize(fdt) > bufsize)
 		return -FDT_ERR_NOSPACE;
diff --git a/scripts/dtc/libfdt/fdt_addresses.c b/scripts/dtc/libfdt/fdt_addresses.c
index eff4dbcc729d..49537b578d03 100644
--- a/scripts/dtc/libfdt/fdt_addresses.c
+++ b/scripts/dtc/libfdt/fdt_addresses.c
@@ -1,6 +1,7 @@
 /*
  * libfdt - Flat Device Tree manipulation
  * Copyright (C) 2014 David Gibson <david@gibson.dropbear.id.au>
+ * Copyright (C) 2018 embedded brains GmbH
  *
  * libfdt is dual licensed: you can use it either under the terms of
  * the GPL, or the BSD license, at your option.
@@ -55,42 +56,32 @@
 
 #include "libfdt_internal.h"
 
-int fdt_address_cells(const void *fdt, int nodeoffset)
+static int fdt_cells(const void *fdt, int nodeoffset, const char *name)
 {
-	const fdt32_t *ac;
+	const fdt32_t *c;
 	int val;
 	int len;
 
-	ac = fdt_getprop(fdt, nodeoffset, "#address-cells", &len);
-	if (!ac)
+	c = fdt_getprop(fdt, nodeoffset, name, &len);
+	if (!c)
 		return 2;
 
-	if (len != sizeof(*ac))
+	if (len != sizeof(*c))
 		return -FDT_ERR_BADNCELLS;
 
-	val = fdt32_to_cpu(*ac);
+	val = fdt32_to_cpu(*c);
 	if ((val <= 0) || (val > FDT_MAX_NCELLS))
 		return -FDT_ERR_BADNCELLS;
 
 	return val;
 }
 
-int fdt_size_cells(const void *fdt, int nodeoffset)
+int fdt_address_cells(const void *fdt, int nodeoffset)
 {
-	const fdt32_t *sc;
-	int val;
-	int len;
-
-	sc = fdt_getprop(fdt, nodeoffset, "#size-cells", &len);
-	if (!sc)
-		return 2;
-
-	if (len != sizeof(*sc))
-		return -FDT_ERR_BADNCELLS;
-
-	val = fdt32_to_cpu(*sc);
-	if ((val < 0) || (val > FDT_MAX_NCELLS))
-		return -FDT_ERR_BADNCELLS;
+	return fdt_cells(fdt, nodeoffset, "#address-cells");
+}
 
-	return val;
+int fdt_size_cells(const void *fdt, int nodeoffset)
+{
+	return fdt_cells(fdt, nodeoffset, "#size-cells");
 }
diff --git a/scripts/dtc/libfdt/fdt_overlay.c b/scripts/dtc/libfdt/fdt_overlay.c
index bf75388ec9a2..5fdab6c6371d 100644
--- a/scripts/dtc/libfdt/fdt_overlay.c
+++ b/scripts/dtc/libfdt/fdt_overlay.c
@@ -697,7 +697,7 @@ static int get_path_len(const void *fdt, int nodeoffset)
 	int len = 0, namelen;
 	const char *name;
 
-	FDT_CHECK_HEADER(fdt);
+	FDT_RO_PROBE(fdt);
 
 	for (;;) {
 		name = fdt_get_name(fdt, nodeoffset, &namelen);
@@ -866,8 +866,8 @@ int fdt_overlay_apply(void *fdt, void *fdto)
 	uint32_t delta = fdt_get_max_phandle(fdt);
 	int ret;
 
-	FDT_CHECK_HEADER(fdt);
-	FDT_CHECK_HEADER(fdto);
+	FDT_RO_PROBE(fdt);
+	FDT_RO_PROBE(fdto);
 
 	ret = overlay_adjust_local_phandles(fdto, delta);
 	if (ret)
diff --git a/scripts/dtc/libfdt/fdt_ro.c b/scripts/dtc/libfdt/fdt_ro.c
index dfb3236da388..eafc14282892 100644
--- a/scripts/dtc/libfdt/fdt_ro.c
+++ b/scripts/dtc/libfdt/fdt_ro.c
@@ -76,17 +76,72 @@ static int fdt_nodename_eq_(const void *fdt, int offset,
 		return 0;
 }
 
+const char *fdt_get_string(const void *fdt, int stroffset, int *lenp)
+{
+	uint32_t absoffset = stroffset + fdt_off_dt_strings(fdt);
+	size_t len;
+	int err;
+	const char *s, *n;
+
+	err = fdt_ro_probe_(fdt);
+	if (err != 0)
+		goto fail;
+
+	err = -FDT_ERR_BADOFFSET;
+	if (absoffset >= fdt_totalsize(fdt))
+		goto fail;
+	len = fdt_totalsize(fdt) - absoffset;
+
+	if (fdt_magic(fdt) == FDT_MAGIC) {
+		if (stroffset < 0)
+			goto fail;
+		if (fdt_version(fdt) >= 17) {
+			if (stroffset >= fdt_size_dt_strings(fdt))
+				goto fail;
+			if ((fdt_size_dt_strings(fdt) - stroffset) < len)
+				len = fdt_size_dt_strings(fdt) - stroffset;
+		}
+	} else if (fdt_magic(fdt) == FDT_SW_MAGIC) {
+		if ((stroffset >= 0)
+		    || (stroffset < -fdt_size_dt_strings(fdt)))
+			goto fail;
+		if ((-stroffset) < len)
+			len = -stroffset;
+	} else {
+		err = -FDT_ERR_INTERNAL;
+		goto fail;
+	}
+
+	s = (const char *)fdt + absoffset;
+	n = memchr(s, '\0', len);
+	if (!n) {
+		/* missing terminating NULL */
+		err = -FDT_ERR_TRUNCATED;
+		goto fail;
+	}
+
+	if (lenp)
+		*lenp = n - s;
+	return s;
+
+fail:
+	if (lenp)
+		*lenp = err;
+	return NULL;
+}
+
 const char *fdt_string(const void *fdt, int stroffset)
 {
-	return (const char *)fdt + fdt_off_dt_strings(fdt) + stroffset;
+	return fdt_get_string(fdt, stroffset, NULL);
 }
 
 static int fdt_string_eq_(const void *fdt, int stroffset,
 			  const char *s, int len)
 {
-	const char *p = fdt_string(fdt, stroffset);
+	int slen;
+	const char *p = fdt_get_string(fdt, stroffset, &slen);
 
-	return (strlen(p) == len) && (memcmp(p, s, len) == 0);
+	return p && (slen == len) && (memcmp(p, s, len) == 0);
 }
 
 uint32_t fdt_get_max_phandle(const void *fdt)
@@ -115,21 +170,42 @@ uint32_t fdt_get_max_phandle(const void *fdt)
 	return 0;
 }
 
+static const struct fdt_reserve_entry *fdt_mem_rsv(const void *fdt, int n)
+{
+	int offset = n * sizeof(struct fdt_reserve_entry);
+	int absoffset = fdt_off_mem_rsvmap(fdt) + offset;
+
+	if (absoffset < fdt_off_mem_rsvmap(fdt))
+		return NULL;
+	if (absoffset > fdt_totalsize(fdt) - sizeof(struct fdt_reserve_entry))
+		return NULL;
+	return fdt_mem_rsv_(fdt, n);
+}
+
 int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size)
 {
-	FDT_CHECK_HEADER(fdt);
-	*address = fdt64_to_cpu(fdt_mem_rsv_(fdt, n)->address);
-	*size = fdt64_to_cpu(fdt_mem_rsv_(fdt, n)->size);
+	const struct fdt_reserve_entry *re;
+
+	FDT_RO_PROBE(fdt);
+	re = fdt_mem_rsv(fdt, n);
+	if (!re)
+		return -FDT_ERR_BADOFFSET;
+
+	*address = fdt64_ld(&re->address);
+	*size = fdt64_ld(&re->size);
 	return 0;
 }
 
 int fdt_num_mem_rsv(const void *fdt)
 {
-	int i = 0;
+	int i;
+	const struct fdt_reserve_entry *re;
 
-	while (fdt64_to_cpu(fdt_mem_rsv_(fdt, i)->size) != 0)
-		i++;
-	return i;
+	for (i = 0; (re = fdt_mem_rsv(fdt, i)) != NULL; i++) {
+		if (fdt64_ld(&re->size) == 0)
+			return i;
+	}
+	return -FDT_ERR_TRUNCATED;
 }
 
 static int nextprop_(const void *fdt, int offset)
@@ -161,7 +237,7 @@ int fdt_subnode_offset_namelen(const void *fdt, int offset,
 {
 	int depth;
 
-	FDT_CHECK_HEADER(fdt);
+	FDT_RO_PROBE(fdt);
 
 	for (depth = 0;
 	     (offset >= 0) && (depth >= 0);
@@ -187,7 +263,7 @@ int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen)
 	const char *p = path;
 	int offset = 0;
 
-	FDT_CHECK_HEADER(fdt);
+	FDT_RO_PROBE(fdt);
 
 	/* see if we have an alias */
 	if (*path != '/') {
@@ -237,7 +313,7 @@ const char *fdt_get_name(const void *fdt, int nodeoffset, int *len)
 	const char *nameptr;
 	int err;
 
-	if (((err = fdt_check_header(fdt)) != 0)
+	if (((err = fdt_ro_probe_(fdt)) != 0)
 	    || ((err = fdt_check_node_offset_(fdt, nodeoffset)) < 0))
 			goto fail;
 
@@ -303,7 +379,7 @@ static const struct fdt_property *fdt_get_property_by_offset_(const void *fdt,
 	prop = fdt_offset_ptr_(fdt, offset);
 
 	if (lenp)
-		*lenp = fdt32_to_cpu(prop->len);
+		*lenp = fdt32_ld(&prop->len);
 
 	return prop;
 }
@@ -340,7 +416,7 @@ static const struct fdt_property *fdt_get_property_namelen_(const void *fdt,
 			offset = -FDT_ERR_INTERNAL;
 			break;
 		}
-		if (fdt_string_eq_(fdt, fdt32_to_cpu(prop->nameoff),
+		if (fdt_string_eq_(fdt, fdt32_ld(&prop->nameoff),
 				   name, namelen)) {
 			if (poffset)
 				*poffset = offset;
@@ -393,7 +469,7 @@ const void *fdt_getprop_namelen(const void *fdt, int nodeoffset,
 
 	/* Handle realignment */
 	if (fdt_version(fdt) < 0x10 && (poffset + sizeof(*prop)) % 8 &&
-	    fdt32_to_cpu(prop->len) >= 8)
+	    fdt32_ld(&prop->len) >= 8)
 		return prop->data + 4;
 	return prop->data;
 }
@@ -406,12 +482,22 @@ const void *fdt_getprop_by_offset(const void *fdt, int offset,
 	prop = fdt_get_property_by_offset_(fdt, offset, lenp);
 	if (!prop)
 		return NULL;
-	if (namep)
-		*namep = fdt_string(fdt, fdt32_to_cpu(prop->nameoff));
+	if (namep) {
+		const char *name;
+		int namelen;
+		name = fdt_get_string(fdt, fdt32_ld(&prop->nameoff),
+				      &namelen);
+		if (!name) {
+			if (lenp)
+				*lenp = namelen;
+			return NULL;
+		}
+		*namep = name;
+	}
 
 	/* Handle realignment */
 	if (fdt_version(fdt) < 0x10 && (offset + sizeof(*prop)) % 8 &&
-	    fdt32_to_cpu(prop->len) >= 8)
+	    fdt32_ld(&prop->len) >= 8)
 		return prop->data + 4;
 	return prop->data;
 }
@@ -436,7 +522,7 @@ uint32_t fdt_get_phandle(const void *fdt, int nodeoffset)
 			return 0;
 	}
 
-	return fdt32_to_cpu(*php);
+	return fdt32_ld(php);
 }
 
 const char *fdt_get_alias_namelen(const void *fdt,
@@ -462,7 +548,7 @@ int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen)
 	int offset, depth, namelen;
 	const char *name;
 
-	FDT_CHECK_HEADER(fdt);
+	FDT_RO_PROBE(fdt);
 
 	if (buflen < 2)
 		return -FDT_ERR_NOSPACE;
@@ -514,7 +600,7 @@ int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset,
 	int offset, depth;
 	int supernodeoffset = -FDT_ERR_INTERNAL;
 
-	FDT_CHECK_HEADER(fdt);
+	FDT_RO_PROBE(fdt);
 
 	if (supernodedepth < 0)
 		return -FDT_ERR_NOTFOUND;
@@ -573,7 +659,7 @@ int fdt_node_offset_by_prop_value(const void *fdt, int startoffset,
 	const void *val;
 	int len;
 
-	FDT_CHECK_HEADER(fdt);
+	FDT_RO_PROBE(fdt);
 
 	/* FIXME: The algorithm here is pretty horrible: we scan each
 	 * property of a node in fdt_getprop(), then if that didn't
@@ -599,7 +685,7 @@ int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle)
 	if ((phandle == 0) || (phandle == -1))
 		return -FDT_ERR_BADPHANDLE;
 
-	FDT_CHECK_HEADER(fdt);
+	FDT_RO_PROBE(fdt);
 
 	/* FIXME: The algorithm here is pretty horrible: we
 	 * potentially scan each property of a node in
@@ -752,7 +838,7 @@ int fdt_node_offset_by_compatible(const void *fdt, int startoffset,
 {
 	int offset, err;
 
-	FDT_CHECK_HEADER(fdt);
+	FDT_RO_PROBE(fdt);
 
 	/* FIXME: The algorithm here is pretty horrible: we scan each
 	 * property of a node in fdt_node_check_compatible(), then if
@@ -771,3 +857,66 @@ int fdt_node_offset_by_compatible(const void *fdt, int startoffset,
 
 	return offset; /* error from fdt_next_node() */
 }
+
+int fdt_check_full(const void *fdt, size_t bufsize)
+{
+	int err;
+	int num_memrsv;
+	int offset, nextoffset = 0;
+	uint32_t tag;
+	unsigned depth = 0;
+	const void *prop;
+	const char *propname;
+
+	if (bufsize < FDT_V1_SIZE)
+		return -FDT_ERR_TRUNCATED;
+	err = fdt_check_header(fdt);
+	if (err != 0)
+		return err;
+	if (bufsize < fdt_totalsize(fdt))
+		return -FDT_ERR_TRUNCATED;
+
+	num_memrsv = fdt_num_mem_rsv(fdt);
+	if (num_memrsv < 0)
+		return num_memrsv;
+
+	while (1) {
+		offset = nextoffset;
+		tag = fdt_next_tag(fdt, offset, &nextoffset);
+
+		if (nextoffset < 0)
+			return nextoffset;
+
+		switch (tag) {
+		case FDT_NOP:
+			break;
+
+		case FDT_END:
+			if (depth != 0)
+				return -FDT_ERR_BADSTRUCTURE;
+			return 0;
+
+		case FDT_BEGIN_NODE:
+			depth++;
+			if (depth > INT_MAX)
+				return -FDT_ERR_BADSTRUCTURE;
+			break;
+
+		case FDT_END_NODE:
+			if (depth == 0)
+				return -FDT_ERR_BADSTRUCTURE;
+			depth--;
+			break;
+
+		case FDT_PROP:
+			prop = fdt_getprop_by_offset(fdt, offset, &propname,
+						     &err);
+			if (!prop)
+				return err;
+			break;
+
+		default:
+			return -FDT_ERR_INTERNAL;
+		}
+	}
+}
diff --git a/scripts/dtc/libfdt/fdt_rw.c b/scripts/dtc/libfdt/fdt_rw.c
index 9b829051e444..2e49855d7cf8 100644
--- a/scripts/dtc/libfdt/fdt_rw.c
+++ b/scripts/dtc/libfdt/fdt_rw.c
@@ -67,9 +67,9 @@ static int fdt_blocks_misordered_(const void *fdt,
 		    (fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt)));
 }
 
-static int fdt_rw_check_header_(void *fdt)
+static int fdt_rw_probe_(void *fdt)
 {
-	FDT_CHECK_HEADER(fdt);
+	FDT_RO_PROBE(fdt);
 
 	if (fdt_version(fdt) < 17)
 		return -FDT_ERR_BADVERSION;
@@ -82,10 +82,10 @@ static int fdt_rw_check_header_(void *fdt)
 	return 0;
 }
 
-#define FDT_RW_CHECK_HEADER(fdt) \
+#define FDT_RW_PROBE(fdt) \
 	{ \
 		int err_; \
-		if ((err_ = fdt_rw_check_header_(fdt)) != 0) \
+		if ((err_ = fdt_rw_probe_(fdt)) != 0) \
 			return err_; \
 	}
 
@@ -176,7 +176,7 @@ int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size)
 	struct fdt_reserve_entry *re;
 	int err;
 
-	FDT_RW_CHECK_HEADER(fdt);
+	FDT_RW_PROBE(fdt);
 
 	re = fdt_mem_rsv_w_(fdt, fdt_num_mem_rsv(fdt));
 	err = fdt_splice_mem_rsv_(fdt, re, 0, 1);
@@ -192,7 +192,7 @@ int fdt_del_mem_rsv(void *fdt, int n)
 {
 	struct fdt_reserve_entry *re = fdt_mem_rsv_w_(fdt, n);
 
-	FDT_RW_CHECK_HEADER(fdt);
+	FDT_RW_PROBE(fdt);
 
 	if (n >= fdt_num_mem_rsv(fdt))
 		return -FDT_ERR_NOTFOUND;
@@ -252,7 +252,7 @@ int fdt_set_name(void *fdt, int nodeoffset, const char *name)
 	int oldlen, newlen;
 	int err;
 
-	FDT_RW_CHECK_HEADER(fdt);
+	FDT_RW_PROBE(fdt);
 
 	namep = (char *)(uintptr_t)fdt_get_name(fdt, nodeoffset, &oldlen);
 	if (!namep)
@@ -275,7 +275,7 @@ int fdt_setprop_placeholder(void *fdt, int nodeoffset, const char *name,
 	struct fdt_property *prop;
 	int err;
 
-	FDT_RW_CHECK_HEADER(fdt);
+	FDT_RW_PROBE(fdt);
 
 	err = fdt_resize_property_(fdt, nodeoffset, name, len, &prop);
 	if (err == -FDT_ERR_NOTFOUND)
@@ -308,7 +308,7 @@ int fdt_appendprop(void *fdt, int nodeoffset, const char *name,
 	struct fdt_property *prop;
 	int err, oldlen, newlen;
 
-	FDT_RW_CHECK_HEADER(fdt);
+	FDT_RW_PROBE(fdt);
 
 	prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen);
 	if (prop) {
@@ -334,7 +334,7 @@ int fdt_delprop(void *fdt, int nodeoffset, const char *name)
 	struct fdt_property *prop;
 	int len, proplen;
 
-	FDT_RW_CHECK_HEADER(fdt);
+	FDT_RW_PROBE(fdt);
 
 	prop = fdt_get_property_w(fdt, nodeoffset, name, &len);
 	if (!prop)
@@ -354,7 +354,7 @@ int fdt_add_subnode_namelen(void *fdt, int parentoffset,
 	uint32_t tag;
 	fdt32_t *endtag;
 
-	FDT_RW_CHECK_HEADER(fdt);
+	FDT_RW_PROBE(fdt);
 
 	offset = fdt_subnode_offset_namelen(fdt, parentoffset, name, namelen);
 	if (offset >= 0)
@@ -394,7 +394,7 @@ int fdt_del_node(void *fdt, int nodeoffset)
 {
 	int endoffset;
 
-	FDT_RW_CHECK_HEADER(fdt);
+	FDT_RW_PROBE(fdt);
 
 	endoffset = fdt_node_end_offset_(fdt, nodeoffset);
 	if (endoffset < 0)
@@ -435,7 +435,7 @@ int fdt_open_into(const void *fdt, void *buf, int bufsize)
 	const char *fdtend = fdtstart + fdt_totalsize(fdt);
 	char *tmp;
 
-	FDT_CHECK_HEADER(fdt);
+	FDT_RO_PROBE(fdt);
 
 	mem_rsv_size = (fdt_num_mem_rsv(fdt)+1)
 		* sizeof(struct fdt_reserve_entry);
@@ -494,7 +494,7 @@ int fdt_pack(void *fdt)
 {
 	int mem_rsv_size;
 
-	FDT_RW_CHECK_HEADER(fdt);
+	FDT_RW_PROBE(fdt);
 
 	mem_rsv_size = (fdt_num_mem_rsv(fdt)+1)
 		* sizeof(struct fdt_reserve_entry);
diff --git a/scripts/dtc/libfdt/fdt_sw.c b/scripts/dtc/libfdt/fdt_sw.c
index 6d33cc29d022..9fa4a94d83c3 100644
--- a/scripts/dtc/libfdt/fdt_sw.c
+++ b/scripts/dtc/libfdt/fdt_sw.c
@@ -55,21 +55,77 @@
 
 #include "libfdt_internal.h"
 
-static int fdt_sw_check_header_(void *fdt)
+static int fdt_sw_probe_(void *fdt)
 {
-	if (fdt_magic(fdt) != FDT_SW_MAGIC)
+	if (fdt_magic(fdt) == FDT_MAGIC)
+		return -FDT_ERR_BADSTATE;
+	else if (fdt_magic(fdt) != FDT_SW_MAGIC)
 		return -FDT_ERR_BADMAGIC;
-	/* FIXME: should check more details about the header state */
 	return 0;
 }
 
-#define FDT_SW_CHECK_HEADER(fdt) \
+#define FDT_SW_PROBE(fdt) \
+	{ \
+		int err; \
+		if ((err = fdt_sw_probe_(fdt)) != 0) \
+			return err; \
+	}
+
+/* 'memrsv' state:	Initial state after fdt_create()
+ *
+ * Allowed functions:
+ *	fdt_add_reservmap_entry()
+ *	fdt_finish_reservemap()		[moves to 'struct' state]
+ */
+static int fdt_sw_probe_memrsv_(void *fdt)
+{
+	int err = fdt_sw_probe_(fdt);
+	if (err)
+		return err;
+
+	if (fdt_off_dt_strings(fdt) != 0)
+		return -FDT_ERR_BADSTATE;
+	return 0;
+}
+
+#define FDT_SW_PROBE_MEMRSV(fdt) \
+	{ \
+		int err; \
+		if ((err = fdt_sw_probe_memrsv_(fdt)) != 0) \
+			return err; \
+	}
+
+/* 'struct' state:	Enter this state after fdt_finish_reservemap()
+ *
+ * Allowed functions:
+ *	fdt_begin_node()
+ *	fdt_end_node()
+ *	fdt_property*()
+ *	fdt_finish()			[moves to 'complete' state]
+ */
+static int fdt_sw_probe_struct_(void *fdt)
+{
+	int err = fdt_sw_probe_(fdt);
+	if (err)
+		return err;
+
+	if (fdt_off_dt_strings(fdt) != fdt_totalsize(fdt))
+		return -FDT_ERR_BADSTATE;
+	return 0;
+}
+
+#define FDT_SW_PROBE_STRUCT(fdt) \
 	{ \
 		int err; \
-		if ((err = fdt_sw_check_header_(fdt)) != 0) \
+		if ((err = fdt_sw_probe_struct_(fdt)) != 0) \
 			return err; \
 	}
 
+/* 'complete' state:	Enter this state after fdt_finish()
+ *
+ * Allowed functions: none
+ */
+
 static void *fdt_grab_space_(void *fdt, size_t len)
 {
 	int offset = fdt_size_dt_struct(fdt);
@@ -87,9 +143,11 @@ static void *fdt_grab_space_(void *fdt, size_t len)
 
 int fdt_create(void *buf, int bufsize)
 {
+	const size_t hdrsize = FDT_ALIGN(sizeof(struct fdt_header),
+					 sizeof(struct fdt_reserve_entry));
 	void *fdt = buf;
 
-	if (bufsize < sizeof(struct fdt_header))
+	if (bufsize < hdrsize)
 		return -FDT_ERR_NOSPACE;
 
 	memset(buf, 0, bufsize);
@@ -99,10 +157,9 @@ int fdt_create(void *buf, int bufsize)
 	fdt_set_last_comp_version(fdt, FDT_FIRST_SUPPORTED_VERSION);
 	fdt_set_totalsize(fdt,  bufsize);
 
-	fdt_set_off_mem_rsvmap(fdt, FDT_ALIGN(sizeof(struct fdt_header),
-					      sizeof(struct fdt_reserve_entry)));
+	fdt_set_off_mem_rsvmap(fdt, hdrsize);
 	fdt_set_off_dt_struct(fdt, fdt_off_mem_rsvmap(fdt));
-	fdt_set_off_dt_strings(fdt, bufsize);
+	fdt_set_off_dt_strings(fdt, 0);
 
 	return 0;
 }
@@ -112,11 +169,14 @@ int fdt_resize(void *fdt, void *buf, int bufsize)
 	size_t headsize, tailsize;
 	char *oldtail, *newtail;
 
-	FDT_SW_CHECK_HEADER(fdt);
+	FDT_SW_PROBE(fdt);
 
-	headsize = fdt_off_dt_struct(fdt);
+	headsize = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt);
 	tailsize = fdt_size_dt_strings(fdt);
 
+	if ((headsize + tailsize) > fdt_totalsize(fdt))
+		return -FDT_ERR_INTERNAL;
+
 	if ((headsize + tailsize) > bufsize)
 		return -FDT_ERR_NOSPACE;
 
@@ -133,8 +193,9 @@ int fdt_resize(void *fdt, void *buf, int bufsize)
 		memmove(buf, fdt, headsize);
 	}
 
-	fdt_set_off_dt_strings(buf, bufsize);
 	fdt_set_totalsize(buf, bufsize);
+	if (fdt_off_dt_strings(buf))
+		fdt_set_off_dt_strings(buf, bufsize);
 
 	return 0;
 }
@@ -144,10 +205,7 @@ int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size)
 	struct fdt_reserve_entry *re;
 	int offset;
 
-	FDT_SW_CHECK_HEADER(fdt);
-
-	if (fdt_size_dt_struct(fdt))
-		return -FDT_ERR_BADSTATE;
+	FDT_SW_PROBE_MEMRSV(fdt);
 
 	offset = fdt_off_dt_struct(fdt);
 	if ((offset + sizeof(*re)) > fdt_totalsize(fdt))
@@ -164,16 +222,23 @@ int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size)
 
 int fdt_finish_reservemap(void *fdt)
 {
-	return fdt_add_reservemap_entry(fdt, 0, 0);
+	int err = fdt_add_reservemap_entry(fdt, 0, 0);
+
+	if (err)
+		return err;
+
+	fdt_set_off_dt_strings(fdt, fdt_totalsize(fdt));
+	return 0;
 }
 
 int fdt_begin_node(void *fdt, const char *name)
 {
 	struct fdt_node_header *nh;
-	int namelen = strlen(name) + 1;
+	int namelen;
 
-	FDT_SW_CHECK_HEADER(fdt);
+	FDT_SW_PROBE_STRUCT(fdt);
 
+	namelen = strlen(name) + 1;
 	nh = fdt_grab_space_(fdt, sizeof(*nh) + FDT_TAGALIGN(namelen));
 	if (! nh)
 		return -FDT_ERR_NOSPACE;
@@ -187,7 +252,7 @@ int fdt_end_node(void *fdt)
 {
 	fdt32_t *en;
 
-	FDT_SW_CHECK_HEADER(fdt);
+	FDT_SW_PROBE_STRUCT(fdt);
 
 	en = fdt_grab_space_(fdt, FDT_TAGSIZE);
 	if (! en)
@@ -225,7 +290,7 @@ int fdt_property_placeholder(void *fdt, const char *name, int len, void **valp)
 	struct fdt_property *prop;
 	int nameoff;
 
-	FDT_SW_CHECK_HEADER(fdt);
+	FDT_SW_PROBE_STRUCT(fdt);
 
 	nameoff = fdt_find_add_string_(fdt, name);
 	if (nameoff == 0)
@@ -262,7 +327,7 @@ int fdt_finish(void *fdt)
 	uint32_t tag;
 	int offset, nextoffset;
 
-	FDT_SW_CHECK_HEADER(fdt);
+	FDT_SW_PROBE_STRUCT(fdt);
 
 	/* Add terminator */
 	end = fdt_grab_space_(fdt, sizeof(*end));
diff --git a/scripts/dtc/libfdt/libfdt.h b/scripts/dtc/libfdt/libfdt.h
index 1e27780e1185..2bd151dd355f 100644
--- a/scripts/dtc/libfdt/libfdt.h
+++ b/scripts/dtc/libfdt/libfdt.h
@@ -90,8 +90,9 @@
 
 /* Error codes: codes for bad device tree blobs */
 #define FDT_ERR_TRUNCATED	8
-	/* FDT_ERR_TRUNCATED: Structure block of the given device tree
-	 * ends without an FDT_END tag. */
+	/* FDT_ERR_TRUNCATED: FDT or a sub-block is improperly
+	 * terminated (overflows, goes outside allowed bounds, or
+	 * isn't properly terminated).  */
 #define FDT_ERR_BADMAGIC	9
 	/* FDT_ERR_BADMAGIC: Given "device tree" appears not to be a
 	 * device tree at all - it is missing the flattened device
@@ -153,6 +154,29 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
 
 uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
 
+/*
+ * Alignment helpers:
+ *     These helpers access words from a device tree blob.  They're
+ *     built to work even with unaligned pointers on platforms (ike
+ *     ARM) that don't like unaligned loads and stores
+ */
+
+static inline uint32_t fdt32_ld(const fdt32_t *p)
+{
+	fdt32_t v;
+
+	memcpy(&v, p, sizeof(v));
+	return fdt32_to_cpu(v);
+}
+
+static inline uint64_t fdt64_ld(const fdt64_t *p)
+{
+	fdt64_t v;
+
+	memcpy(&v, p, sizeof(v));
+	return fdt64_to_cpu(v);
+}
+
 /**********************************************************************/
 /* Traversal functions                                                */
 /**********************************************************************/
@@ -213,7 +237,7 @@ int fdt_next_subnode(const void *fdt, int offset);
 /* General functions                                                  */
 /**********************************************************************/
 #define fdt_get_header(fdt, field) \
-	(fdt32_to_cpu(((const struct fdt_header *)(fdt))->field))
+	(fdt32_ld(&((const struct fdt_header *)(fdt))->field))
 #define fdt_magic(fdt)			(fdt_get_header(fdt, magic))
 #define fdt_totalsize(fdt)		(fdt_get_header(fdt, totalsize))
 #define fdt_off_dt_struct(fdt)		(fdt_get_header(fdt, off_dt_struct))
@@ -244,18 +268,31 @@ fdt_set_hdr_(size_dt_struct);
 #undef fdt_set_hdr_
 
 /**
- * fdt_check_header - sanity check a device tree or possible device tree
+ * fdt_header_size - return the size of the tree's header
+ * @fdt: pointer to a flattened device tree
+ */
+size_t fdt_header_size_(uint32_t version);
+static inline size_t fdt_header_size(const void *fdt)
+{
+	return fdt_header_size_(fdt_version(fdt));
+}
+
+/**
+ * fdt_check_header - sanity check a device tree header
+
  * @fdt: pointer to data which might be a flattened device tree
  *
  * fdt_check_header() checks that the given buffer contains what
- * appears to be a flattened device tree with sane information in its
- * header.
+ * appears to be a flattened device tree, and that the header contains
+ * valid information (to the extent that can be determined from the
+ * header alone).
  *
  * returns:
  *     0, if the buffer appears to contain a valid device tree
  *     -FDT_ERR_BADMAGIC,
  *     -FDT_ERR_BADVERSION,
- *     -FDT_ERR_BADSTATE, standard meanings, as above
+ *     -FDT_ERR_BADSTATE,
+ *     -FDT_ERR_TRUNCATED, standard meanings, as above
  */
 int fdt_check_header(const void *fdt);
 
@@ -284,6 +321,24 @@ int fdt_move(const void *fdt, void *buf, int bufsize);
 /* Read-only functions                                                */
 /**********************************************************************/
 
+int fdt_check_full(const void *fdt, size_t bufsize);
+
+/**
+ * fdt_get_string - retrieve a string from the strings block of a device tree
+ * @fdt: pointer to the device tree blob
+ * @stroffset: offset of the string within the strings block (native endian)
+ * @lenp: optional pointer to return the string's length
+ *
+ * fdt_get_string() retrieves a pointer to a single string from the
+ * strings block of the device tree blob at fdt, and optionally also
+ * returns the string's length in *lenp.
+ *
+ * returns:
+ *     a pointer to the string, on success
+ *     NULL, if stroffset is out of bounds, or doesn't point to a valid string
+ */
+const char *fdt_get_string(const void *fdt, int stroffset, int *lenp);
+
 /**
  * fdt_string - retrieve a string from the strings block of a device tree
  * @fdt: pointer to the device tree blob
@@ -294,7 +349,7 @@ int fdt_move(const void *fdt, void *buf, int bufsize);
  *
  * returns:
  *     a pointer to the string, on success
- *     NULL, if stroffset is out of bounds
+ *     NULL, if stroffset is out of bounds, or doesn't point to a valid string
  */
 const char *fdt_string(const void *fdt, int stroffset);
 
@@ -1090,7 +1145,7 @@ int fdt_address_cells(const void *fdt, int nodeoffset);
  *
  * returns:
  *	0 <= n < FDT_MAX_NCELLS, on success
- *      2, if the node has no #address-cells property
+ *      2, if the node has no #size-cells property
  *      -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid
  *		#size-cells property
  *	-FDT_ERR_BADMAGIC,
@@ -1313,10 +1368,13 @@ static inline int fdt_property_u64(void *fdt, const char *name, uint64_t val)
 	fdt64_t tmp = cpu_to_fdt64(val);
 	return fdt_property(fdt, name, &tmp, sizeof(tmp));
 }
+
+#ifndef SWIG /* Not available in Python */
 static inline int fdt_property_cell(void *fdt, const char *name, uint32_t val)
 {
 	return fdt_property_u32(fdt, name, val);
 }
+#endif
 
 /**
  * fdt_property_placeholder - add a new property and return a ptr to its value
diff --git a/scripts/dtc/libfdt/libfdt_env.h b/scripts/dtc/libfdt/libfdt_env.h
index bd2474628775..eb2053845c9c 100644
--- a/scripts/dtc/libfdt/libfdt_env.h
+++ b/scripts/dtc/libfdt/libfdt_env.h
@@ -56,6 +56,7 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>
 
 #ifdef __CHECKER__
 #define FDT_FORCE __attribute__((force))
diff --git a/scripts/dtc/libfdt/libfdt_internal.h b/scripts/dtc/libfdt/libfdt_internal.h
index 7681e192295b..4109f890ae60 100644
--- a/scripts/dtc/libfdt/libfdt_internal.h
+++ b/scripts/dtc/libfdt/libfdt_internal.h
@@ -55,10 +55,11 @@
 #define FDT_ALIGN(x, a)		(((x) + (a) - 1) & ~((a) - 1))
 #define FDT_TAGALIGN(x)		(FDT_ALIGN((x), FDT_TAGSIZE))
 
-#define FDT_CHECK_HEADER(fdt) \
+int fdt_ro_probe_(const void *fdt);
+#define FDT_RO_PROBE(fdt)			\
 	{ \
 		int err_; \
-		if ((err_ = fdt_check_header(fdt)) != 0) \
+		if ((err_ = fdt_ro_probe_(fdt)) != 0)	\
 			return err_; \
 	}
 
diff --git a/scripts/dtc/livetree.c b/scripts/dtc/livetree.c
index 6e4c367f54b3..4ff0679e0062 100644
--- a/scripts/dtc/livetree.c
+++ b/scripts/dtc/livetree.c
@@ -594,6 +594,7 @@ struct node *get_node_by_ref(struct node *tree, const char *ref)
 cell_t get_node_phandle(struct node *root, struct node *node)
 {
 	static cell_t phandle = 1; /* FIXME: ick, static local */
+	struct data d = empty_data;
 
 	if ((node->phandle != 0) && (node->phandle != -1))
 		return node->phandle;
@@ -603,17 +604,16 @@ cell_t get_node_phandle(struct node *root, struct node *node)
 
 	node->phandle = phandle;
 
+	d = data_add_marker(d, TYPE_UINT32, NULL);
+	d = data_append_cell(d, phandle);
+
 	if (!get_property(node, "linux,phandle")
 	    && (phandle_format & PHANDLE_LEGACY))
-		add_property(node,
-			     build_property("linux,phandle",
-					    data_append_cell(empty_data, phandle)));
+		add_property(node, build_property("linux,phandle", d));
 
 	if (!get_property(node, "phandle")
 	    && (phandle_format & PHANDLE_EPAPR))
-		add_property(node,
-			     build_property("phandle",
-					    data_append_cell(empty_data, phandle)));
+		add_property(node, build_property("phandle", d));
 
 	/* If the node *does* have a phandle property, we must
 	 * be dealing with a self-referencing phandle, which will be
diff --git a/scripts/dtc/treesource.c b/scripts/dtc/treesource.c
index 2461a3d068a0..f2874f1d1465 100644
--- a/scripts/dtc/treesource.c
+++ b/scripts/dtc/treesource.c
@@ -61,24 +61,14 @@ static bool isstring(char c)
 		|| strchr("\a\b\t\n\v\f\r", c));
 }
 
-static void write_propval_string(FILE *f, struct data val)
+static void write_propval_string(FILE *f, const char *s, size_t len)
 {
-	const char *str = val.val;
-	int i;
-	struct marker *m = val.markers;
-
-	assert(str[val.len-1] == '\0');
+	const char *end = s + len - 1;
+	assert(*end == '\0');
 
-	while (m && (m->offset == 0)) {
-		if (m->type == LABEL)
-			fprintf(f, "%s: ", m->ref);
-		m = m->next;
-	}
 	fprintf(f, "\"");
-
-	for (i = 0; i < (val.len-1); i++) {
-		char c = str[i];
-
+	while (s < end) {
+		char c = *s++;
 		switch (c) {
 		case '\a':
 			fprintf(f, "\\a");
@@ -108,91 +98,78 @@ static void write_propval_string(FILE *f, struct data val)
 			fprintf(f, "\\\"");
 			break;
 		case '\0':
-			fprintf(f, "\", ");
-			while (m && (m->offset <= (i + 1))) {
-				if (m->type == LABEL) {
-					assert(m->offset == (i+1));
-					fprintf(f, "%s: ", m->ref);
-				}
-				m = m->next;
-			}
-			fprintf(f, "\"");
+			fprintf(f, "\\0");
 			break;
 		default:
 			if (isprint((unsigned char)c))
 				fprintf(f, "%c", c);
 			else
-				fprintf(f, "\\x%02hhx", c);
+				fprintf(f, "\\x%02"PRIx8, c);
 		}
 	}
 	fprintf(f, "\"");
-
-	/* Wrap up any labels at the end of the value */
-	for_each_marker_of_type(m, LABEL) {
-		assert (m->offset == val.len);
-		fprintf(f, " %s:", m->ref);
-	}
 }
 
-static void write_propval_cells(FILE *f, struct data val)
+static void write_propval_int(FILE *f, const char *p, size_t len, size_t width)
 {
-	void *propend = val.val + val.len;
-	fdt32_t *cp = (fdt32_t *)val.val;
-	struct marker *m = val.markers;
-
-	fprintf(f, "<");
-	for (;;) {
-		while (m && (m->offset <= ((char *)cp - val.val))) {
-			if (m->type == LABEL) {
-				assert(m->offset == ((char *)cp - val.val));
-				fprintf(f, "%s: ", m->ref);
-			}
-			m = m->next;
-		}
+	const char *end = p + len;
+	assert(len % width == 0);
 
-		fprintf(f, "0x%x", fdt32_to_cpu(*cp++));
-		if ((void *)cp >= propend)
+	for (; p < end; p += width) {
+		switch (width) {
+		case 1:
+			fprintf(f, " %02"PRIx8, *(const uint8_t*)p);
+			break;
+		case 2:
+			fprintf(f, " 0x%02"PRIx16, fdt16_to_cpu(*(const fdt16_t*)p));
+			break;
+		case 4:
+			fprintf(f, " 0x%02"PRIx32, fdt32_to_cpu(*(const fdt32_t*)p));
+			break;
+		case 8:
+			fprintf(f, " 0x%02"PRIx64, fdt64_to_cpu(*(const fdt64_t*)p));
 			break;
-		fprintf(f, " ");
+		}
 	}
+}
 
-	/* Wrap up any labels at the end of the value */
-	for_each_marker_of_type(m, LABEL) {
-		assert (m->offset == val.len);
-		fprintf(f, " %s:", m->ref);
-	}
-	fprintf(f, ">");
+static bool has_data_type_information(struct marker *m)
+{
+	return m->type >= TYPE_UINT8;
 }
 
-static void write_propval_bytes(FILE *f, struct data val)
+static struct marker *next_type_marker(struct marker *m)
 {
-	void *propend = val.val + val.len;
-	const char *bp = val.val;
-	struct marker *m = val.markers;
-
-	fprintf(f, "[");
-	for (;;) {
-		while (m && (m->offset == (bp-val.val))) {
-			if (m->type == LABEL)
-				fprintf(f, "%s: ", m->ref);
-			m = m->next;
-		}
+	while (m && !has_data_type_information(m))
+		m = m->next;
+	return m;
+}
 
-		fprintf(f, "%02hhx", (unsigned char)(*bp++));
-		if ((const void *)bp >= propend)
-			break;
-		fprintf(f, " ");
-	}
+size_t type_marker_length(struct marker *m)
+{
+	struct marker *next = next_type_marker(m->next);
 
-	/* Wrap up any labels at the end of the value */
-	for_each_marker_of_type(m, LABEL) {
-		assert (m->offset == val.len);
-		fprintf(f, " %s:", m->ref);
-	}
-	fprintf(f, "]");
+	if (next)
+		return next->offset - m->offset;
+	return 0;
 }
 
-static void write_propval(FILE *f, struct property *prop)
+static const char *delim_start[] = {
+	[TYPE_UINT8] = "[",
+	[TYPE_UINT16] = "/bits/ 16 <",
+	[TYPE_UINT32] = "<",
+	[TYPE_UINT64] = "/bits/ 64 <",
+	[TYPE_STRING] = "",
+};
+static const char *delim_end[] = {
+	[TYPE_UINT8] = " ]",
+	[TYPE_UINT16] = " >",
+	[TYPE_UINT32] = " >",
+	[TYPE_UINT64] = " >",
+	[TYPE_STRING] = "",
+};
+
+static enum markertype guess_value_type(struct property *prop)
 {
 	int len = prop->val.len;
 	const char *p = prop->val.val;
@@ -201,11 +178,6 @@ static void write_propval(FILE *f, struct property *prop)
 	int nnotstringlbl = 0, nnotcelllbl = 0;
 	int i;
 
-	if (len == 0) {
-		fprintf(f, ";\n");
-		return;
-	}
-
 	for (i = 0; i < len; i++) {
 		if (! isstring(p[i]))
 			nnotstring++;
@@ -220,17 +192,91 @@ static void write_propval(FILE *f, struct property *prop)
 			nnotcelllbl++;
 	}
 
-	fprintf(f, " = ");
 	if ((p[len-1] == '\0') && (nnotstring == 0) && (nnul < (len-nnul))
 	    && (nnotstringlbl == 0)) {
-		write_propval_string(f, prop->val);
+		return TYPE_STRING;
 	} else if (((len % sizeof(cell_t)) == 0) && (nnotcelllbl == 0)) {
-		write_propval_cells(f, prop->val);
-	} else {
-		write_propval_bytes(f, prop->val);
+		return TYPE_UINT32;
 	}
 
-	fprintf(f, ";\n");
+	return TYPE_UINT8;
+}
+
+static void write_propval(FILE *f, struct property *prop)
+{
+	size_t len = prop->val.len;
+	struct marker *m = prop->val.markers;
+	struct marker dummy_marker;
+	enum markertype emit_type = TYPE_NONE;
+
+	if (len == 0) {
+		fprintf(f, ";\n");
+		return;
+	}
+
+	fprintf(f, " = ");
+
+	if (!next_type_marker(m)) {
+		/* data type information missing, need to guess */
+		dummy_marker.type = guess_value_type(prop);
+		dummy_marker.next = prop->val.markers;
+		dummy_marker.offset = 0;
+		dummy_marker.ref = NULL;
+		m = &dummy_marker;
+	}
+
+	struct marker *m_label = prop->val.markers;
+	for_each_marker(m) {
+		size_t chunk_len;
+		const char *p = &prop->val.val[m->offset];
+
+		if (!has_data_type_information(m))
+			continue;
+
+		chunk_len = type_marker_length(m);
+		if (!chunk_len)
+			chunk_len = len - m->offset;
+
+		if (emit_type != TYPE_NONE)
+			fprintf(f, "%s, ", delim_end[emit_type]);
+		emit_type = m->type;
+
+		for_each_marker_of_type(m_label, LABEL) {
+			if (m_label->offset > m->offset)
+				break;
+			fprintf(f, "%s: ", m_label->ref);
+		}
+
+		fprintf(f, "%s", delim_start[emit_type]);
+
+		if (chunk_len <= 0)
+			continue;
+
+		switch(emit_type) {
+		case TYPE_UINT16:
+			write_propval_int(f, p, chunk_len, 2);
+			break;
+		case TYPE_UINT32:
+			write_propval_int(f, p, chunk_len, 4);
+			break;
+		case TYPE_UINT64:
+			write_propval_int(f, p, chunk_len, 8);
+			break;
+		case TYPE_STRING:
+			write_propval_string(f, p, chunk_len);
+			break;
+		default:
+			write_propval_int(f, p, chunk_len, 1);
+		}
+	}
+
+	/* Wrap up any labels at the end of the value */
+	for_each_marker_of_type(m_label, LABEL) {
+		assert (m_label->offset == len);
+		fprintf(f, " %s:", m_label->ref);
+	}
+
+	fprintf(f, "%s;\n", delim_end[emit_type] ? : "");
 }
 
 static void write_tree_source_node(FILE *f, struct node *tree, int level)
@@ -281,4 +327,3 @@ void dt_to_source(FILE *f, struct dt_info *dti)
 
 	write_tree_source_node(f, dti->dt, 0);
 }
-
diff --git a/scripts/dtc/update-dtc-source.sh b/scripts/dtc/update-dtc-source.sh
index 1a009fd195d0..7dd29a0362b8 100755
--- a/scripts/dtc/update-dtc-source.sh
+++ b/scripts/dtc/update-dtc-source.sh
@@ -32,7 +32,7 @@ DTC_UPSTREAM_PATH=`pwd`/../dtc
 DTC_LINUX_PATH=`pwd`/scripts/dtc
 
 DTC_SOURCE="checks.c data.c dtc.c dtc.h flattree.c fstree.c livetree.c srcpos.c \
-		srcpos.h treesource.c util.c util.h version_gen.h Makefile.dtc \
+		srcpos.h treesource.c util.c util.h version_gen.h yamltree.c Makefile.dtc \
 		dtc-lexer.l dtc-parser.y"
 LIBFDT_SOURCE="Makefile.libfdt fdt.c fdt.h fdt_addresses.c fdt_empty_tree.c \
 		fdt_overlay.c fdt_ro.c fdt_rw.c fdt_strerror.c fdt_sw.c \
diff --git a/scripts/dtc/util.c b/scripts/dtc/util.c
index 9953c32a0244..a69b7a13463d 100644
--- a/scripts/dtc/util.c
+++ b/scripts/dtc/util.c
@@ -227,11 +227,11 @@ char get_escape_char(const char *s, int *i)
 	return val;
 }
 
-int utilfdt_read_err_len(const char *filename, char **buffp, off_t *len)
+int utilfdt_read_err(const char *filename, char **buffp, size_t *len)
 {
 	int fd = 0;	/* assume stdin */
 	char *buf = NULL;
-	off_t bufsize = 1024, offset = 0;
+	size_t bufsize = 1024, offset = 0;
 	int ret = 0;
 
 	*buffp = NULL;
@@ -264,20 +264,15 @@ int utilfdt_read_err_len(const char *filename, char **buffp, off_t *len)
 		free(buf);
 	else
 		*buffp = buf;
-	*len = bufsize;
+	if (len)
+		*len = bufsize;
 	return ret;
 }
 
-int utilfdt_read_err(const char *filename, char **buffp)
-{
-	off_t len;
-	return utilfdt_read_err_len(filename, buffp, &len);
-}
-
-char *utilfdt_read_len(const char *filename, off_t *len)
+char *utilfdt_read(const char *filename, size_t *len)
 {
 	char *buff;
-	int ret = utilfdt_read_err_len(filename, &buff, len);
+	int ret = utilfdt_read_err(filename, &buff, len);
 
 	if (ret) {
 		fprintf(stderr, "Couldn't open blob from '%s': %s\n", filename,
@@ -288,12 +283,6 @@ char *utilfdt_read_len(const char *filename, off_t *len)
 	return buff;
 }
 
-char *utilfdt_read(const char *filename)
-{
-	off_t len;
-	return utilfdt_read_len(filename, &len);
-}
-
 int utilfdt_write_err(const char *filename, const void *blob)
 {
 	int fd = 1;	/* assume stdout */
diff --git a/scripts/dtc/util.h b/scripts/dtc/util.h
index 66fba8ea709b..f6cea8274174 100644
--- a/scripts/dtc/util.h
+++ b/scripts/dtc/util.h
@@ -98,16 +98,10 @@ char get_escape_char(const char *s, int *i);
  * stderr.
  *
  * @param filename	The filename to read, or - for stdin
- * @return Pointer to allocated buffer containing fdt, or NULL on error
- */
-char *utilfdt_read(const char *filename);
-
-/**
- * Like utilfdt_read(), but also passes back the size of the file read.
- *
  * @param len		If non-NULL, the amount of data we managed to read
+ * @return Pointer to allocated buffer containing fdt, or NULL on error
  */
-char *utilfdt_read_len(const char *filename, off_t *len);
+char *utilfdt_read(const char *filename, size_t *len);
 
 /**
  * Read a device tree file into a buffer. Does not report errors, but only
@@ -116,16 +110,10 @@ char *utilfdt_read_len(const char *filename, off_t *len);
  *
  * @param filename	The filename to read, or - for stdin
  * @param buffp		Returns pointer to buffer containing fdt
- * @return 0 if ok, else an errno value representing the error
- */
-int utilfdt_read_err(const char *filename, char **buffp);
-
-/**
- * Like utilfdt_read_err(), but also passes back the size of the file read.
- *
  * @param len		If non-NULL, the amount of data we managed to read
+ * @return 0 if ok, else an errno value representing the error
  */
-int utilfdt_read_err_len(const char *filename, char **buffp, off_t *len);
+int utilfdt_read_err(const char *filename, char **buffp, size_t *len);
 
 /**
  * Write a device tree buffer to a file. This will report any errors on
diff --git a/scripts/dtc/version_gen.h b/scripts/dtc/version_gen.h
index b00f14ff7a17..6d23fd095f16 100644
--- a/scripts/dtc/version_gen.h
+++ b/scripts/dtc/version_gen.h
@@ -1 +1 @@
-#define DTC_VERSION "DTC 1.4.6-g84e414b0"
+#define DTC_VERSION "DTC 1.4.7-gc86da84d"
diff --git a/scripts/dtc/yamltree.c b/scripts/dtc/yamltree.c
new file mode 100644
index 000000000000..a00285a5a9ec
--- /dev/null
+++ b/scripts/dtc/yamltree.c
@@ -0,0 +1,247 @@
+/*
+ * (C) Copyright Linaro, Ltd. 2018
+ * (C) Copyright Arm Holdings.  2017
+ * (C) Copyright David Gibson <dwg@au1.ibm.com>, IBM Corporation.  2005.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ *                                                                   USA
+ */
+
+#include <stdlib.h>
+#include <yaml.h>
+#include "dtc.h"
+#include "srcpos.h"
+
+char *yaml_error_name[] = {
+	[YAML_NO_ERROR] = "no error",
+	[YAML_MEMORY_ERROR] = "memory error",
+	[YAML_READER_ERROR] = "reader error",
+	[YAML_SCANNER_ERROR] = "scanner error",
+	[YAML_PARSER_ERROR] = "parser error",
+	[YAML_COMPOSER_ERROR] = "composer error",
+	[YAML_WRITER_ERROR] = "writer error",
+	[YAML_EMITTER_ERROR] = "emitter error",
+};
+
+#define yaml_emitter_emit_or_die(emitter, event) (			\
+{									\
+	if (!yaml_emitter_emit(emitter, event))				\
+		die("yaml '%s': %s in %s, line %i\n",			\
+		    yaml_error_name[(emitter)->error], 			\
+		    (emitter)->problem, __func__, __LINE__);		\
+})
+
+static void yaml_propval_int(yaml_emitter_t *emitter, struct marker *markers, char *data, int len, int width)
+{
+	yaml_event_t event;
+	void *tag;
+	int off, start_offset = markers->offset;
+
+	switch(width) {
+		case 1: tag = "!u8"; break;
+		case 2: tag = "!u16"; break;
+		case 4: tag = "!u32"; break;
+		case 8: tag = "!u64"; break;
+		default:
+			die("Invalid width %i", width);
+	}
+	assert(len % width == 0);
+
+	yaml_sequence_start_event_initialize(&event, NULL,
+		(yaml_char_t *)tag, width == 4, YAML_FLOW_SEQUENCE_STYLE);
+	yaml_emitter_emit_or_die(emitter, &event);
+
+	for (off = 0; off < len; off += width) {
+		char buf[32];
+		struct marker *m;
+		bool is_phandle = false;
+
+		switch(width) {
+		case 1:
+			sprintf(buf, "0x%"PRIx8, *(uint8_t*)(data + off));
+			break;
+		case 2:
+			sprintf(buf, "0x%"PRIx16, fdt16_to_cpu(*(fdt16_t*)(data + off)));
+			break;
+		case 4:
+			sprintf(buf, "0x%"PRIx32, fdt32_to_cpu(*(fdt32_t*)(data + off)));
+			m = markers;
+			is_phandle = false;
+			for_each_marker_of_type(m, REF_PHANDLE) {
+				if (m->offset == (start_offset + off)) {
+					is_phandle = true;
+					break;
+				}
+			}
+			break;
+		case 8:
+			sprintf(buf, "0x%"PRIx64, fdt64_to_cpu(*(fdt64_t*)(data + off)));
+			break;
+		}
+
+		if (is_phandle)
+			yaml_scalar_event_initialize(&event, NULL,
+				(yaml_char_t*)"!phandle", (yaml_char_t *)buf,
+				strlen(buf), 0, 0, YAML_PLAIN_SCALAR_STYLE);
+		else
+			yaml_scalar_event_initialize(&event, NULL,
+				(yaml_char_t*)YAML_INT_TAG, (yaml_char_t *)buf,
+				strlen(buf), 1, 1, YAML_PLAIN_SCALAR_STYLE);
+		yaml_emitter_emit_or_die(emitter, &event);
+	}
+
+	yaml_sequence_end_event_initialize(&event);
+	yaml_emitter_emit_or_die(emitter, &event);
+}
+
+static void yaml_propval_string(yaml_emitter_t *emitter, char *str, int len)
+{
+	yaml_event_t event;
+	int i;
+
+	assert(str[len-1] == '\0');
+
+	/* Make sure the entire string is in the lower 7-bit ascii range */
+	for (i = 0; i < len; i++)
+		assert(isascii(str[i]));
+
+	yaml_scalar_event_initialize(&event, NULL,
+		(yaml_char_t *)YAML_STR_TAG, (yaml_char_t*)str,
+		len-1, 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE);
+	yaml_emitter_emit_or_die(emitter, &event);
+}
+
+static void yaml_propval(yaml_emitter_t *emitter, struct property *prop)
+{
+	yaml_event_t event;
+	int len = prop->val.len;
+	struct marker *m = prop->val.markers;
+
+	/* Emit the property name */
+	yaml_scalar_event_initialize(&event, NULL,
+		(yaml_char_t *)YAML_STR_TAG, (yaml_char_t*)prop->name,
+		strlen(prop->name), 1, 1, YAML_PLAIN_SCALAR_STYLE);
+	yaml_emitter_emit_or_die(emitter, &event);
+
+	/* Boolean properties are easiest to deal with. Length is zero, so just emit 'true' */
+	if (len == 0) {
+		yaml_scalar_event_initialize(&event, NULL,
+			(yaml_char_t *)YAML_BOOL_TAG,
+			(yaml_char_t*)"true",
+			strlen("true"), 1, 0, YAML_PLAIN_SCALAR_STYLE);
+		yaml_emitter_emit_or_die(emitter, &event);
+		return;
+	}
+
+	if (!m)
+		die("No markers present in property '%s' value\n", prop->name);
+
+	yaml_sequence_start_event_initialize(&event, NULL,
+		(yaml_char_t *)YAML_SEQ_TAG, 1, YAML_FLOW_SEQUENCE_STYLE);
+	yaml_emitter_emit_or_die(emitter, &event);
+
+	for_each_marker(m) {
+		int chunk_len;
+		char *data = &prop->val.val[m->offset];
+
+		if (m->type < TYPE_UINT8)
+			continue;
+
+		chunk_len = type_marker_length(m) ? : len;
+		assert(chunk_len > 0);
+		len -= chunk_len;
+
+		switch(m->type) {
+		case TYPE_UINT16:
+			yaml_propval_int(emitter, m, data, chunk_len, 2);
+			break;
+		case TYPE_UINT32:
+			yaml_propval_int(emitter, m, data, chunk_len, 4);
+			break;
+		case TYPE_UINT64:
+			yaml_propval_int(emitter, m, data, chunk_len, 8);
+			break;
+		case TYPE_STRING:
+			yaml_propval_string(emitter, data, chunk_len);
+			break;
+		default:
+			yaml_propval_int(emitter, m, data, chunk_len, 1);
+			break;
+		}
+	}
+
+	yaml_sequence_end_event_initialize(&event);
+	yaml_emitter_emit_or_die(emitter, &event);
+}
+
+
+static void yaml_tree(struct node *tree, yaml_emitter_t *emitter)
+{
+	struct property *prop;
+	struct node *child;
+	yaml_event_t event;
+
+	if (tree->deleted)
+		return;
+
+	yaml_mapping_start_event_initialize(&event, NULL,
+		(yaml_char_t *)YAML_MAP_TAG, 1, YAML_ANY_MAPPING_STYLE);
+	yaml_emitter_emit_or_die(emitter, &event);
+
+	for_each_property(tree, prop)
+		yaml_propval(emitter, prop);
+
+	/* Loop over all the children, emitting them into the map */
+	for_each_child(tree, child) {
+		yaml_scalar_event_initialize(&event, NULL,
+			(yaml_char_t *)YAML_STR_TAG, (yaml_char_t*)child->name,
+			strlen(child->name), 1, 0, YAML_PLAIN_SCALAR_STYLE);
+		yaml_emitter_emit_or_die(emitter, &event);
+		yaml_tree(child, emitter);
+	}
+
+	yaml_mapping_end_event_initialize(&event);
+	yaml_emitter_emit_or_die(emitter, &event);
+}
+
+void dt_to_yaml(FILE *f, struct dt_info *dti)
+{
+	yaml_emitter_t emitter;
+	yaml_event_t event;
+
+	yaml_emitter_initialize(&emitter);
+	yaml_emitter_set_output_file(&emitter, f);
+	yaml_stream_start_event_initialize(&event, YAML_UTF8_ENCODING);
+	yaml_emitter_emit_or_die(&emitter, &event);
+
+	yaml_document_start_event_initialize(&event, NULL, NULL, NULL, 0);
+	yaml_emitter_emit_or_die(&emitter, &event);
+
+	yaml_sequence_start_event_initialize(&event, NULL, (yaml_char_t *)YAML_SEQ_TAG, 1, YAML_ANY_SEQUENCE_STYLE);
+	yaml_emitter_emit_or_die(&emitter, &event);
+
+	yaml_tree(dti->dt, &emitter);
+
+	yaml_sequence_end_event_initialize(&event);
+	yaml_emitter_emit_or_die(&emitter, &event);
+
+	yaml_document_end_event_initialize(&event, 0);
+	yaml_emitter_emit_or_die(&emitter, &event);
+
+	yaml_stream_end_event_initialize(&event);
+	yaml_emitter_emit_or_die(&emitter, &event);
+
+	yaml_emitter_delete(&emitter);
+}
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 9bb0a7f2863e..5eacba858e4b 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -26,7 +26,7 @@
 
 static struct key *keyring[INTEGRITY_KEYRING_MAX];
 
-static const char *keyring_name[INTEGRITY_KEYRING_MAX] = {
+static const char * const keyring_name[INTEGRITY_KEYRING_MAX] = {
 #ifndef CONFIG_INTEGRITY_TRUSTED_KEYRING
 	"_evm",
 	"_ima",
@@ -37,12 +37,6 @@ static const char *keyring_name[INTEGRITY_KEYRING_MAX] = {
 	"_module",
 };
 
-#ifdef CONFIG_INTEGRITY_TRUSTED_KEYRING
-static bool init_keyring __initdata = true;
-#else
-static bool init_keyring __initdata;
-#endif
-
 #ifdef CONFIG_IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
 #define restrict_link_to_ima restrict_link_by_builtin_and_secondary_trusted
 #else
@@ -85,7 +79,7 @@ int __init integrity_init_keyring(const unsigned int id)
 	struct key_restriction *restriction;
 	int err = 0;
 
-	if (!init_keyring)
+	if (!IS_ENABLED(CONFIG_INTEGRITY_TRUSTED_KEYRING))
 		return 0;
 
 	restriction = kzalloc(sizeof(struct key_restriction), GFP_KERNEL);
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index 8a3905bb02c7..8c25f949ebdb 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -27,7 +27,7 @@
 #define EVMKEY "evm-key"
 #define MAX_KEY_SIZE 128
 static unsigned char evmkey[MAX_KEY_SIZE];
-static int evmkey_len = MAX_KEY_SIZE;
+static const int evmkey_len = MAX_KEY_SIZE;
 
 struct crypto_shash *hmac_tfm;
 static struct crypto_shash *evm_tfm[HASH_ALGO__LAST];
@@ -38,7 +38,7 @@ static DEFINE_MUTEX(mutex);
 
 static unsigned long evm_set_key_flags;
 
-static char * const evm_hmac = "hmac(sha1)";
+static const char evm_hmac[] = "hmac(sha1)";
 
 /**
  * evm_set_key() - set EVM HMAC key from the kernel
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 67db9d9454ca..cc12f3449a72 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -88,7 +88,7 @@ struct ima_template_desc {
 	char *name;
 	char *fmt;
 	int num_fields;
-	struct ima_template_field **fields;
+	const struct ima_template_field **fields;
 };
 
 struct ima_template_entry {
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index a02c5acfd403..99dd1d53fc35 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -51,7 +51,8 @@ int ima_alloc_init_template(struct ima_event_data *event_data,
 
 	(*entry)->template_desc = template_desc;
 	for (i = 0; i < template_desc->num_fields; i++) {
-		struct ima_template_field *field = template_desc->fields[i];
+		const struct ima_template_field *field =
+			template_desc->fields[i];
 		u32 len;
 
 		result = field->field_init(event_data,
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 7e7e7e7c250a..d9e7728027c6 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -210,7 +210,7 @@ static int ima_calc_file_hash_atfm(struct file *file,
 {
 	loff_t i_size, offset;
 	char *rbuf[2] = { NULL, };
-	int rc, read = 0, rbuf_len, active = 0, ahash_rc = 0;
+	int rc, rbuf_len, active = 0, ahash_rc = 0;
 	struct ahash_request *req;
 	struct scatterlist sg[1];
 	struct crypto_wait wait;
@@ -257,11 +257,6 @@ static int ima_calc_file_hash_atfm(struct file *file,
 					  &rbuf_size[1], 0);
 	}
 
-	if (!(file->f_mode & FMODE_READ)) {
-		file->f_mode |= FMODE_READ;
-		read = 1;
-	}
-
 	for (offset = 0; offset < i_size; offset += rbuf_len) {
 		if (!rbuf[1] && offset) {
 			/* Not using two buffers, and it is not the first
@@ -300,8 +295,6 @@ static int ima_calc_file_hash_atfm(struct file *file,
 	/* wait for the last update request to complete */
 	rc = ahash_wait(ahash_rc, &wait);
 out3:
-	if (read)
-		file->f_mode &= ~FMODE_READ;
 	ima_free_pages(rbuf[0], rbuf_size[0]);
 	ima_free_pages(rbuf[1], rbuf_size[1]);
 out2:
@@ -336,7 +329,7 @@ static int ima_calc_file_hash_tfm(struct file *file,
 {
 	loff_t i_size, offset = 0;
 	char *rbuf;
-	int rc, read = 0;
+	int rc;
 	SHASH_DESC_ON_STACK(shash, tfm);
 
 	shash->tfm = tfm;
@@ -357,11 +350,6 @@ static int ima_calc_file_hash_tfm(struct file *file,
 	if (!rbuf)
 		return -ENOMEM;
 
-	if (!(file->f_mode & FMODE_READ)) {
-		file->f_mode |= FMODE_READ;
-		read = 1;
-	}
-
 	while (offset < i_size) {
 		int rbuf_len;
 
@@ -378,8 +366,6 @@ static int ima_calc_file_hash_tfm(struct file *file,
 		if (rc)
 			break;
 	}
-	if (read)
-		file->f_mode &= ~FMODE_READ;
 	kfree(rbuf);
 out:
 	if (!rc)
@@ -420,6 +406,8 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
 {
 	loff_t i_size;
 	int rc;
+	struct file *f = file;
+	bool new_file_instance = false, modified_flags = false;
 
 	/*
 	 * For consistency, fail file's opened with the O_DIRECT flag on
@@ -431,15 +419,41 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
 		return -EINVAL;
 	}
 
-	i_size = i_size_read(file_inode(file));
+	/* Open a new file instance in O_RDONLY if we cannot read */
+	if (!(file->f_mode & FMODE_READ)) {
+		int flags = file->f_flags & ~(O_WRONLY | O_APPEND |
+				O_TRUNC | O_CREAT | O_NOCTTY | O_EXCL);
+		flags |= O_RDONLY;
+		f = dentry_open(&file->f_path, flags, file->f_cred);
+		if (IS_ERR(f)) {
+			/*
+			 * Cannot open the file again, lets modify f_flags
+			 * of original and continue
+			 */
+			pr_info_ratelimited("Unable to reopen file for reading.\n");
+			f = file;
+			f->f_flags |= FMODE_READ;
+			modified_flags = true;
+		} else {
+			new_file_instance = true;
+		}
+	}
+
+	i_size = i_size_read(file_inode(f));
 
 	if (ima_ahash_minsize && i_size >= ima_ahash_minsize) {
-		rc = ima_calc_file_ahash(file, hash);
+		rc = ima_calc_file_ahash(f, hash);
 		if (!rc)
-			return 0;
+			goto out;
 	}
 
-	return ima_calc_file_shash(file, hash);
+	rc = ima_calc_file_shash(f, hash);
+out:
+	if (new_file_instance)
+		fput(f);
+	else if (modified_flags)
+		f->f_flags &= ~FMODE_READ;
+	return rc;
 }
 
 /*
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index ae9d5c766a3c..3183cc23d0f8 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -42,14 +42,14 @@ static int __init default_canonical_fmt_setup(char *str)
 __setup("ima_canonical_fmt", default_canonical_fmt_setup);
 
 static int valid_policy = 1;
-#define TMPBUFLEN 12
+
 static ssize_t ima_show_htable_value(char __user *buf, size_t count,
 				     loff_t *ppos, atomic_long_t *val)
 {
-	char tmpbuf[TMPBUFLEN];
+	char tmpbuf[32];	/* greater than largest 'long' string value */
 	ssize_t len;
 
-	len = scnprintf(tmpbuf, TMPBUFLEN, "%li\n", atomic_long_read(val));
+	len = scnprintf(tmpbuf, sizeof(tmpbuf), "%li\n", atomic_long_read(val));
 	return simple_read_from_buffer(buf, count, ppos, tmpbuf, len);
 }
 
@@ -179,7 +179,8 @@ int ima_measurements_show(struct seq_file *m, void *v)
 	/* 6th:  template specific data */
 	for (i = 0; i < e->template_desc->num_fields; i++) {
 		enum ima_show_type show = IMA_SHOW_BINARY;
-		struct ima_template_field *field = e->template_desc->fields[i];
+		const struct ima_template_field *field =
+			e->template_desc->fields[i];
 
 		if (is_ima_template && strcmp(field->field_id, "d") == 0)
 			show = IMA_SHOW_BINARY_NO_FIELD_LEN;
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index faac9ecaa0ae..59d834219cd6 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -25,7 +25,7 @@
 #include "ima.h"
 
 /* name for boot aggregate entry */
-static const char *boot_aggregate_name = "boot_aggregate";
+static const char boot_aggregate_name[] = "boot_aggregate";
 struct tpm_chip *ima_tpm_chip;
 
 /* Add the boot aggregate to the IMA measurement list and extend
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 2d31921fbda4..1b88d58e1325 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -440,7 +440,7 @@ int ima_read_file(struct file *file, enum kernel_read_file_id read_id)
 	return 0;
 }
 
-static int read_idmap[READING_MAX_ID] = {
+static const int read_idmap[READING_MAX_ID] = {
 	[READING_FIRMWARE] = FIRMWARE_CHECK,
 	[READING_FIRMWARE_PREALLOC_BUFFER] = FIRMWARE_CHECK,
 	[READING_MODULE] = MODULE_CHECK,
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
index 30db39b23804..b631b8bc7624 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -32,7 +32,7 @@ static struct ima_template_desc builtin_templates[] = {
 static LIST_HEAD(defined_templates);
 static DEFINE_SPINLOCK(template_list);
 
-static struct ima_template_field supported_fields[] = {
+static const struct ima_template_field supported_fields[] = {
 	{.field_id = "d", .field_init = ima_eventdigest_init,
 	 .field_show = ima_show_template_digest},
 	{.field_id = "n", .field_init = ima_eventname_init,
@@ -49,7 +49,7 @@ static struct ima_template_field supported_fields[] = {
 static struct ima_template_desc *ima_template;
 static struct ima_template_desc *lookup_template_desc(const char *name);
 static int template_desc_init_fields(const char *template_fmt,
-				     struct ima_template_field ***fields,
+				     const struct ima_template_field ***fields,
 				     int *num_fields);
 
 static int __init ima_template_setup(char *str)
@@ -125,7 +125,8 @@ static struct ima_template_desc *lookup_template_desc(const char *name)
 	return found ? template_desc : NULL;
 }
 
-static struct ima_template_field *lookup_template_field(const char *field_id)
+static const struct ima_template_field *
+lookup_template_field(const char *field_id)
 {
 	int i;
 
@@ -153,11 +154,11 @@ static int template_fmt_size(const char *template_fmt)
 }
 
 static int template_desc_init_fields(const char *template_fmt,
-				     struct ima_template_field ***fields,
+				     const struct ima_template_field ***fields,
 				     int *num_fields)
 {
 	const char *template_fmt_ptr;
-	struct ima_template_field *found_fields[IMA_TEMPLATE_NUM_FIELDS_MAX];
+	const struct ima_template_field *found_fields[IMA_TEMPLATE_NUM_FIELDS_MAX];
 	int template_num_fields;
 	int i, len;
 
diff --git a/security/loadpin/Kconfig b/security/loadpin/Kconfig
index dd01aa91e521..a0d70d82b98e 100644
--- a/security/loadpin/Kconfig
+++ b/security/loadpin/Kconfig
@@ -10,10 +10,10 @@ config SECURITY_LOADPIN
 	  have a root filesystem backed by a read-only device such as
 	  dm-verity or a CDROM.
 
-config SECURITY_LOADPIN_ENABLED
+config SECURITY_LOADPIN_ENFORCE
 	bool "Enforce LoadPin at boot"
 	depends on SECURITY_LOADPIN
 	help
 	  If selected, LoadPin will enforce pinning at boot. If not
 	  selected, it can be enabled at boot with the kernel parameter
-	  "loadpin.enabled=1".
+	  "loadpin.enforce=1".
diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c
index 0716af28808a..48f39631b370 100644
--- a/security/loadpin/loadpin.c
+++ b/security/loadpin/loadpin.c
@@ -44,7 +44,7 @@ static void report_load(const char *origin, struct file *file, char *operation)
 	kfree(pathname);
 }
 
-static int enabled = IS_ENABLED(CONFIG_SECURITY_LOADPIN_ENABLED);
+static int enforce = IS_ENABLED(CONFIG_SECURITY_LOADPIN_ENFORCE);
 static struct super_block *pinned_root;
 static DEFINE_SPINLOCK(pinned_root_spinlock);
 
@@ -60,8 +60,8 @@ static struct ctl_path loadpin_sysctl_path[] = {
 
 static struct ctl_table loadpin_sysctl_table[] = {
 	{
-		.procname       = "enabled",
-		.data           = &enabled,
+		.procname       = "enforce",
+		.data           = &enforce,
 		.maxlen         = sizeof(int),
 		.mode           = 0644,
 		.proc_handler   = proc_dointvec_minmax,
@@ -84,8 +84,11 @@ static void check_pinning_enforcement(struct super_block *mnt_sb)
 	 * device, allow sysctl to change modes for testing.
 	 */
 	if (mnt_sb->s_bdev) {
+		char bdev[BDEVNAME_SIZE];
+
 		ro = bdev_read_only(mnt_sb->s_bdev);
-		pr_info("dev(%u,%u): %s\n",
+		bdevname(mnt_sb->s_bdev, bdev);
+		pr_info("%s (%u:%u): %s\n", bdev,
 			MAJOR(mnt_sb->s_bdev->bd_dev),
 			MINOR(mnt_sb->s_bdev->bd_dev),
 			ro ? "read-only" : "writable");
@@ -97,7 +100,7 @@ static void check_pinning_enforcement(struct super_block *mnt_sb)
 					   loadpin_sysctl_table))
 			pr_notice("sysctl registration failed!\n");
 		else
-			pr_info("load pinning can be disabled.\n");
+			pr_info("enforcement can be disabled.\n");
 	} else
 		pr_info("load pinning engaged.\n");
 }
@@ -128,7 +131,7 @@ static int loadpin_read_file(struct file *file, enum kernel_read_file_id id)
 
 	/* This handles the older init_module API that has a NULL file. */
 	if (!file) {
-		if (!enabled) {
+		if (!enforce) {
 			report_load(origin, NULL, "old-api-pinning-ignored");
 			return 0;
 		}
@@ -151,7 +154,7 @@ static int loadpin_read_file(struct file *file, enum kernel_read_file_id id)
 		 * Unlock now since it's only pinned_root we care about.
 		 * In the worst case, we will (correctly) report pinning
 		 * failures before we have announced that pinning is
-		 * enabled. This would be purely cosmetic.
+		 * enforcing. This would be purely cosmetic.
 		 */
 		spin_unlock(&pinned_root_spinlock);
 		check_pinning_enforcement(pinned_root);
@@ -161,7 +164,7 @@ static int loadpin_read_file(struct file *file, enum kernel_read_file_id id)
 	}
 
 	if (IS_ERR_OR_NULL(pinned_root) || load_root != pinned_root) {
-		if (unlikely(!enabled)) {
+		if (unlikely(!enforce)) {
 			report_load(origin, file, "pinning-ignored");
 			return 0;
 		}
@@ -186,10 +189,11 @@ static struct security_hook_list loadpin_hooks[] __lsm_ro_after_init = {
 
 void __init loadpin_add_hooks(void)
 {
-	pr_info("ready to pin (currently %sabled)", enabled ? "en" : "dis");
+	pr_info("ready to pin (currently %senforcing)\n",
+		enforce ? "" : "not ");
 	security_add_hooks(loadpin_hooks, ARRAY_SIZE(loadpin_hooks), "loadpin");
 }
 
 /* Should not be mutable after boot, so not listed in sysfs (perm == 0). */
-module_param(enabled, int, 0);
-MODULE_PARM_DESC(enabled, "Pin module/firmware loading (default: true)");
+module_param(enforce, int, 0);
+MODULE_PARM_DESC(enforce, "Enforce module/firmware pinning");
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 934dabe150fa..81fb4c1631e9 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -421,6 +421,7 @@ static int smk_ptrace_rule_check(struct task_struct *tracer,
 	struct smk_audit_info ad, *saip = NULL;
 	struct task_smack *tsp;
 	struct smack_known *tracer_known;
+	const struct cred *tracercred;
 
 	if ((mode & PTRACE_MODE_NOAUDIT) == 0) {
 		smk_ad_init(&ad, func, LSM_AUDIT_DATA_TASK);
@@ -429,7 +430,8 @@ static int smk_ptrace_rule_check(struct task_struct *tracer,
 	}
 
 	rcu_read_lock();
-	tsp = __task_cred(tracer)->security;
+	tracercred = __task_cred(tracer);
+	tsp = tracercred->security;
 	tracer_known = smk_of_task(tsp);
 
 	if ((mode & PTRACE_MODE_ATTACH) &&
@@ -439,7 +441,7 @@ static int smk_ptrace_rule_check(struct task_struct *tracer,
 			rc = 0;
 		else if (smack_ptrace_rule == SMACK_PTRACE_DRACONIAN)
 			rc = -EACCES;
-		else if (capable(CAP_SYS_PTRACE))
+		else if (smack_privileged_cred(CAP_SYS_PTRACE, tracercred))
 			rc = 0;
 		else
 			rc = -EACCES;
@@ -1841,6 +1843,7 @@ static int smack_file_send_sigiotask(struct task_struct *tsk,
 {
 	struct smack_known *skp;
 	struct smack_known *tkp = smk_of_task(tsk->cred->security);
+	const struct cred *tcred;
 	struct file *file;
 	int rc;
 	struct smk_audit_info ad;
@@ -1854,8 +1857,12 @@ static int smack_file_send_sigiotask(struct task_struct *tsk,
 	skp = file->f_security;
 	rc = smk_access(skp, tkp, MAY_DELIVER, NULL);
 	rc = smk_bu_note("sigiotask", skp, tkp, MAY_DELIVER, rc);
-	if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE))
+
+	rcu_read_lock();
+	tcred = __task_cred(tsk);
+	if (rc != 0 && smack_privileged_cred(CAP_MAC_OVERRIDE, tcred))
 		rc = 0;
+	rcu_read_unlock();
 
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
 	smk_ad_setfield_u_tsk(&ad, tsk);
@@ -3467,7 +3474,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
 		 */
 		final = &smack_known_star;
 		/*
-		 * No break.
+		 * Fall through.
 		 *
 		 * If a smack value has been set we want to use it,
 		 * but since tmpfs isn't giving us the opportunity
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index f6482e53d55a..06b517075ec0 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -2853,7 +2853,6 @@ static const struct file_operations smk_ptrace_ops = {
 static int smk_fill_super(struct super_block *sb, void *data, int silent)
 {
 	int rc;
-	struct inode *root_inode;
 
 	static const struct tree_descr smack_files[] = {
 		[SMK_LOAD] = {
@@ -2917,8 +2916,6 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
 		return rc;
 	}
 
-	root_inode = d_inode(sb->s_root);
-
 	return 0;
 }
 
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index fd23d5778ea1..8a6eff9c27f3 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -288,6 +288,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR	0x00000002
 #define KVM_VCPUEVENT_VALID_SHADOW	0x00000004
 #define KVM_VCPUEVENT_VALID_SMM		0x00000008
+#define KVM_VCPUEVENT_VALID_PAYLOAD	0x00000010
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS	0x01
@@ -299,7 +300,10 @@ struct kvm_vcpu_events {
 		__u8 injected;
 		__u8 nr;
 		__u8 has_error_code;
-		__u8 pad;
+		union {
+			__u8 pad;
+			__u8 pending;
+		};
 		__u32 error_code;
 	} exception;
 	struct {
@@ -322,7 +326,9 @@ struct kvm_vcpu_events {
 		__u8 smm_inside_nmi;
 		__u8 latched_init;
 	} smi;
-	__u32 reserved[9];
+	__u8 reserved[27];
+	__u8 exception_has_payload;
+	__u64 exception_payload;
 };
 
 /* for KVM_GET/SET_DEBUGREGS */
diff --git a/tools/crypto/getstat.c b/tools/crypto/getstat.c
new file mode 100644
index 000000000000..24115173a483
--- /dev/null
+++ b/tools/crypto/getstat.c
@@ -0,0 +1,294 @@
+/* Heavily copied from libkcapi 2015 - 2017, Stephan Mueller <smueller@chronox.de> */
+#include <errno.h>
+#include <linux/cryptouser.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#define CR_RTA(x)  ((struct rtattr *)(((char *)(x)) + NLMSG_ALIGN(sizeof(struct crypto_user_alg))))
+
+static int get_stat(const char *drivername)
+{
+	struct {
+		struct nlmsghdr n;
+		struct crypto_user_alg cru;
+	} req;
+	struct sockaddr_nl nl;
+	int sd = 0, ret;
+	socklen_t addr_len;
+	struct iovec iov;
+	struct msghdr msg;
+	char buf[4096];
+	struct nlmsghdr *res_n = (struct nlmsghdr *)buf;
+	struct crypto_user_alg *cru_res = NULL;
+	int res_len = 0;
+	struct rtattr *tb[CRYPTOCFGA_MAX + 1];
+	struct rtattr *rta;
+	struct nlmsgerr *errmsg;
+
+	memset(&req, 0, sizeof(req));
+	memset(&buf, 0, sizeof(buf));
+	memset(&msg, 0, sizeof(msg));
+
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.cru));
+	req.n.nlmsg_flags = NLM_F_REQUEST;
+	req.n.nlmsg_type = CRYPTO_MSG_GETSTAT;
+	req.n.nlmsg_seq = time(NULL);
+
+	strncpy(req.cru.cru_driver_name, drivername, strlen(drivername));
+
+	sd =  socket(AF_NETLINK, SOCK_RAW, NETLINK_CRYPTO);
+	if (sd < 0) {
+		fprintf(stderr, "Netlink error: cannot open netlink socket");
+		return -errno;
+	}
+	memset(&nl, 0, sizeof(nl));
+	nl.nl_family = AF_NETLINK;
+	if (bind(sd, (struct sockaddr *)&nl, sizeof(nl)) < 0) {
+		ret = -errno;
+		fprintf(stderr, "Netlink error: cannot bind netlink socket");
+		goto out;
+	}
+
+	/* sanity check that netlink socket was successfully opened */
+	addr_len = sizeof(nl);
+	if (getsockname(sd, (struct sockaddr *)&nl, &addr_len) < 0) {
+		ret = -errno;
+		printf("Netlink error: cannot getsockname");
+		goto out;
+	}
+	if (addr_len != sizeof(nl)) {
+		ret = -errno;
+		printf("Netlink error: wrong address length %d", addr_len);
+		goto out;
+	}
+	if (nl.nl_family != AF_NETLINK) {
+		ret = -errno;
+		printf("Netlink error: wrong address family %d",
+				nl.nl_family);
+		goto out;
+	}
+
+	memset(&nl, 0, sizeof(nl));
+	nl.nl_family = AF_NETLINK;
+	iov.iov_base = (void *)&req.n;
+	iov.iov_len = req.n.nlmsg_len;
+	msg.msg_name = &nl;
+	msg.msg_namelen = sizeof(nl);
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	if (sendmsg(sd, &msg, 0) < 0) {
+		ret = -errno;
+		printf("Netlink error: sendmsg failed");
+		goto out;
+	}
+	memset(buf, 0, sizeof(buf));
+	iov.iov_base = buf;
+	while (1) {
+		iov.iov_len = sizeof(buf);
+		ret = recvmsg(sd, &msg, 0);
+		if (ret < 0) {
+			if (errno == EINTR || errno == EAGAIN)
+				continue;
+			ret = -errno;
+			printf("Netlink error: netlink receive error");
+			goto out;
+		}
+		if (ret == 0) {
+			ret = -errno;
+			printf("Netlink error: no data");
+			goto out;
+		}
+		if (ret > sizeof(buf)) {
+			ret = -errno;
+			printf("Netlink error: received too much data");
+			goto out;
+		}
+		break;
+	}
+
+	ret = -EFAULT;
+	res_len = res_n->nlmsg_len;
+	if (res_n->nlmsg_type == NLMSG_ERROR) {
+		errmsg = NLMSG_DATA(res_n);
+		fprintf(stderr, "Fail with %d\n", errmsg->error);
+		ret = errmsg->error;
+		goto out;
+	}
+
+	if (res_n->nlmsg_type == CRYPTO_MSG_GETSTAT) {
+		cru_res = NLMSG_DATA(res_n);
+		res_len -= NLMSG_SPACE(sizeof(*cru_res));
+	}
+	if (res_len < 0) {
+		printf("Netlink error: nlmsg len %d\n", res_len);
+		goto out;
+	}
+
+	if (!cru_res) {
+		ret = -EFAULT;
+		printf("Netlink error: no cru_res\n");
+		goto out;
+	}
+
+	rta = CR_RTA(cru_res);
+	memset(tb, 0, sizeof(struct rtattr *) * (CRYPTOCFGA_MAX + 1));
+	while (RTA_OK(rta, res_len)) {
+		if ((rta->rta_type <= CRYPTOCFGA_MAX) && (!tb[rta->rta_type]))
+			tb[rta->rta_type] = rta;
+		rta = RTA_NEXT(rta, res_len);
+	}
+	if (res_len) {
+		printf("Netlink error: unprocessed data %d",
+				res_len);
+		goto out;
+	}
+
+	if (tb[CRYPTOCFGA_STAT_HASH]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_HASH];
+		struct crypto_stat *rhash =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tHash\n\tHash: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rhash->stat_hash_cnt, rhash->stat_hash_tlen,
+			rhash->stat_hash_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_COMPRESS]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_COMPRESS];
+		struct crypto_stat *rblk =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tCompress\n\tCompress: %u bytes: %llu\n\tDecompress: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rblk->stat_compress_cnt, rblk->stat_compress_tlen,
+			rblk->stat_decompress_cnt, rblk->stat_decompress_tlen,
+			rblk->stat_compress_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_ACOMP]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_ACOMP];
+		struct crypto_stat *rcomp =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tACompress\n\tCompress: %u bytes: %llu\n\tDecompress: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rcomp->stat_compress_cnt, rcomp->stat_compress_tlen,
+			rcomp->stat_decompress_cnt, rcomp->stat_decompress_tlen,
+			rcomp->stat_compress_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_AEAD]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_AEAD];
+		struct crypto_stat *raead =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tAEAD\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			raead->stat_encrypt_cnt, raead->stat_encrypt_tlen,
+			raead->stat_decrypt_cnt, raead->stat_decrypt_tlen,
+			raead->stat_aead_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_BLKCIPHER]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_BLKCIPHER];
+		struct crypto_stat *rblk =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tCipher\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen,
+			rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen,
+			rblk->stat_cipher_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_AKCIPHER]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_AKCIPHER];
+		struct crypto_stat *rblk =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tAkcipher\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tSign: %u\n\tVerify: %u\n\tErrors: %u\n",
+			drivername,
+			rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen,
+			rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen,
+			rblk->stat_sign_cnt, rblk->stat_verify_cnt,
+			rblk->stat_akcipher_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_CIPHER]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_CIPHER];
+		struct crypto_stat *rblk =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tcipher\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen,
+			rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen,
+			rblk->stat_cipher_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_RNG]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_RNG];
+		struct crypto_stat *rrng =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tRNG\n\tSeed: %u\n\tGenerate: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rrng->stat_seed_cnt,
+			rrng->stat_generate_cnt, rrng->stat_generate_tlen,
+			rrng->stat_rng_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_KPP]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_KPP];
+		struct crypto_stat *rkpp =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tKPP\n\tSetsecret: %u\n\tGenerate public key: %u\n\tCompute_shared_secret: %u\n\tErrors: %u\n",
+			drivername,
+			rkpp->stat_setsecret_cnt,
+			rkpp->stat_generate_public_key_cnt,
+			rkpp->stat_compute_shared_secret_cnt,
+			rkpp->stat_kpp_err_cnt);
+	} else {
+		fprintf(stderr, "%s is of an unknown algorithm\n", drivername);
+	}
+	ret = 0;
+out:
+	close(sd);
+	return ret;
+}
+
+int main(int argc, const char *argv[])
+{
+	char buf[4096];
+	FILE *procfd;
+	int i, lastspace;
+	int ret;
+
+	procfd = fopen("/proc/crypto", "r");
+	if (!procfd) {
+		ret = errno;
+		fprintf(stderr, "Cannot open /proc/crypto %s\n", strerror(errno));
+		return ret;
+	}
+	if (argc > 1) {
+		if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
+			printf("Usage: %s [-h|--help] display this help\n", argv[0]);
+			printf("Usage: %s display all crypto statistics\n", argv[0]);
+			printf("Usage: %s drivername1 drivername2 ... = display crypto statistics about drivername1 ...\n", argv[0]);
+			return 0;
+		}
+		for (i = 1; i < argc; i++) {
+			ret = get_stat(argv[i]);
+			if (ret) {
+				fprintf(stderr, "Failed with %s\n", strerror(-ret));
+				return ret;
+			}
+		}
+		return 0;
+	}
+
+	while (fgets(buf, sizeof(buf), procfd)) {
+		if (!strncmp(buf, "driver", 6)) {
+			lastspace = 0;
+			i = 0;
+			while (i < strlen(buf)) {
+				i++;
+				if (buf[i] == ' ')
+					lastspace = i;
+			}
+			buf[strlen(buf) - 1] = '\0';
+			ret = get_stat(buf + lastspace + 1);
+			if (ret) {
+				fprintf(stderr, "Failed with %s\n", strerror(-ret));
+				goto out;
+			}
+		}
+	}
+out:
+	fclose(procfd);
+	return ret;
+}
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 251be353f950..2875ce85b322 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -719,6 +719,7 @@ struct kvm_ppc_one_seg_page_size {
 
 #define KVM_PPC_PAGE_SIZES_REAL		0x00000001
 #define KVM_PPC_1T_SEGMENTS		0x00000002
+#define KVM_PPC_NO_HASH			0x00000004
 
 struct kvm_ppc_smmu_info {
 	__u64 flags;
@@ -953,6 +954,10 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_NESTED_STATE 157
 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158
 #define KVM_CAP_MSR_PLATFORM_INFO 159
+#define KVM_CAP_PPC_NESTED_HV 160
+#define KVM_CAP_HYPERV_SEND_IPI 161
+#define KVM_CAP_COALESCED_PIO 162
+#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
index 853b95d1e139..2011376c7ab5 100644
--- a/tools/perf/arch/powerpc/util/book3s_hv_exits.h
+++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
@@ -15,7 +15,6 @@
 	{0x400, "INST_STORAGE"}, \
 	{0x480, "INST_SEGMENT"}, \
 	{0x500, "EXTERNAL"}, \
-	{0x501, "EXTERNAL_LEVEL"}, \
 	{0x502, "EXTERNAL_HV"}, \
 	{0x600, "ALIGNMENT"}, \
 	{0x700, "PROGRAM"}, \
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 5c34752e1cff..6210ba41c29e 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,6 +1,8 @@
-cr4_cpuid_sync_test
-platform_info_test
-set_sregs_test
-sync_regs_test
-vmx_tsc_adjust_test
-state_test
+/x86_64/cr4_cpuid_sync_test
+/x86_64/evmcs_test
+/x86_64/platform_info_test
+/x86_64/set_sregs_test
+/x86_64/sync_regs_test
+/x86_64/vmx_tsc_adjust_test
+/x86_64/state_test
+/dirty_log_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index ec32dad3c3f0..01a219229238 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -1,26 +1,30 @@
 all:
 
-top_srcdir = ../../../../
+top_srcdir = ../../../..
 UNAME_M := $(shell uname -m)
 
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
-LIBKVM_x86_64 = lib/x86.c lib/vmx.c
-
-TEST_GEN_PROGS_x86_64 = platform_info_test
-TEST_GEN_PROGS_x86_64 += set_sregs_test
-TEST_GEN_PROGS_x86_64 += sync_regs_test
-TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
-TEST_GEN_PROGS_x86_64 += state_test
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c
+LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c
+LIBKVM_aarch64 = lib/aarch64/processor.c
+
+TEST_GEN_PROGS_x86_64 = x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
+TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += x86_64/state_test
+TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 
+TEST_GEN_PROGS_aarch64 += dirty_log_test
+
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
 LIBKVM += $(LIBKVM_$(UNAME_M))
 
 INSTALL_HDR_PATH = $(top_srcdir)/usr
 LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
-LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include
-CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
+LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
 LDFLAGS += -pthread
 
 # After inclusion, $(OUTPUT) is defined and
@@ -29,7 +33,7 @@ include ../lib.mk
 
 STATIC_LIBS := $(OUTPUT)/libkvm.a
 LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
-EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS)
+EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.*
 
 x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
 $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
@@ -41,3 +45,12 @@ $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
 all: $(STATIC_LIBS)
 $(TEST_GEN_PROGS): $(STATIC_LIBS)
 $(STATIC_LIBS):| khdr
+
+cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
+cscope:
+	$(RM) cscope.*
+	(find $(include_paths) -name '*.h' \
+		-exec realpath --relative-base=$(PWD) {} \;; \
+	find . -name '*.c' \
+		-exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files
+	cscope -b
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 0c2cdc105f96..d59820cc2d39 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -5,6 +5,8 @@
  * Copyright (C) 2018, Red Hat, Inc.
  */
 
+#define _GNU_SOURCE /* for program_invocation_name */
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -15,76 +17,78 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
+#include "processor.h"
+
+#define DEBUG printf
 
-#define  DEBUG                 printf
+#define VCPU_ID				1
 
-#define  VCPU_ID                        1
 /* The memory slot index to track dirty pages */
-#define  TEST_MEM_SLOT_INDEX            1
-/*
- * GPA offset of the testing memory slot. Must be bigger than the
- * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES.
- */
-#define  TEST_MEM_OFFSET                (1ULL << 30) /* 1G */
-/* Size of the testing memory slot */
-#define  TEST_MEM_PAGES                 (1ULL << 18) /* 1G for 4K pages */
+#define TEST_MEM_SLOT_INDEX		1
+
+/* Default guest test memory offset, 1G */
+#define DEFAULT_GUEST_TEST_MEM		0x40000000
+
 /* How many pages to dirty for each guest loop */
-#define  TEST_PAGES_PER_LOOP            1024
+#define TEST_PAGES_PER_LOOP		1024
+
 /* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
-#define  TEST_HOST_LOOP_N               32
+#define TEST_HOST_LOOP_N		32
+
 /* Interval for each host loop (ms) */
-#define  TEST_HOST_LOOP_INTERVAL        10
+#define TEST_HOST_LOOP_INTERVAL		10
+
+/*
+ * Guest/Host shared variables. Ensure addr_gva2hva() and/or
+ * sync_global_to/from_guest() are used when accessing from
+ * the host. READ/WRITE_ONCE() should also be used with anything
+ * that may change.
+ */
+static uint64_t host_page_size;
+static uint64_t guest_page_size;
+static uint64_t guest_num_pages;
+static uint64_t random_array[TEST_PAGES_PER_LOOP];
+static uint64_t iteration;
 
 /*
- * Guest variables.  We use these variables to share data between host
- * and guest.  There are two copies of the variables, one in host memory
- * (which is unused) and one in guest memory.  When the host wants to
- * access these variables, it needs to call addr_gva2hva() to access the
- * guest copy.
+ * GPA offset of the testing memory slot. Must be bigger than
+ * DEFAULT_GUEST_PHY_PAGES.
  */
-uint64_t guest_random_array[TEST_PAGES_PER_LOOP];
-uint64_t guest_iteration;
-uint64_t guest_page_size;
+static uint64_t guest_test_mem = DEFAULT_GUEST_TEST_MEM;
 
 /*
- * Writes to the first byte of a random page within the testing memory
- * region continuously.
+ * Continuously write to the first 8 bytes of a random pages within
+ * the testing memory region.
  */
-void guest_code(void)
+static void guest_code(void)
 {
-	int i = 0;
-	uint64_t volatile *array = guest_random_array;
-	uint64_t volatile *guest_addr;
+	int i;
 
 	while (true) {
 		for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
-			/*
-			 * Write to the first 8 bytes of a random page
-			 * on the testing memory region.
-			 */
-			guest_addr = (uint64_t *)
-			    (TEST_MEM_OFFSET +
-			     (array[i] % TEST_MEM_PAGES) * guest_page_size);
-			*guest_addr = guest_iteration;
+			uint64_t addr = guest_test_mem;
+			addr += (READ_ONCE(random_array[i]) % guest_num_pages)
+				* guest_page_size;
+			addr &= ~(host_page_size - 1);
+			*(uint64_t *)addr = READ_ONCE(iteration);
 		}
+
 		/* Tell the host that we need more random numbers */
 		GUEST_SYNC(1);
 	}
 }
 
-/*
- * Host variables.  These variables should only be used by the host
- * rather than the guest.
- */
-bool host_quit;
+/* Host variables */
+static bool host_quit;
 
 /* Points to the test VM memory region on which we track dirty logs */
-void *host_test_mem;
+static void *host_test_mem;
+static uint64_t host_num_pages;
 
 /* For statistics only */
-uint64_t host_dirty_count;
-uint64_t host_clear_count;
-uint64_t host_track_next_count;
+static uint64_t host_dirty_count;
+static uint64_t host_clear_count;
+static uint64_t host_track_next_count;
 
 /*
  * We use this bitmap to track some pages that should have its dirty
@@ -93,40 +97,34 @@ uint64_t host_track_next_count;
  * page bit is cleared in the latest bitmap, then the system must
  * report that write in the next get dirty log call.
  */
-unsigned long *host_bmap_track;
+static unsigned long *host_bmap_track;
 
-void generate_random_array(uint64_t *guest_array, uint64_t size)
+static void generate_random_array(uint64_t *guest_array, uint64_t size)
 {
 	uint64_t i;
 
-	for (i = 0; i < size; i++) {
+	for (i = 0; i < size; i++)
 		guest_array[i] = random();
-	}
 }
 
-void *vcpu_worker(void *data)
+static void *vcpu_worker(void *data)
 {
 	int ret;
-	uint64_t loops, *guest_array, pages_count = 0;
 	struct kvm_vm *vm = data;
+	uint64_t *guest_array;
+	uint64_t pages_count = 0;
 	struct kvm_run *run;
-	struct guest_args args;
+	struct ucall uc;
 
 	run = vcpu_state(vm, VCPU_ID);
 
-	/* Retrieve the guest random array pointer and cache it */
-	guest_array = addr_gva2hva(vm, (vm_vaddr_t)guest_random_array);
-
-	DEBUG("VCPU starts\n");
-
+	guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
 	generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
 
 	while (!READ_ONCE(host_quit)) {
-		/* Let the guest to dirty these random pages */
+		/* Let the guest dirty the random pages */
 		ret = _vcpu_run(vm, VCPU_ID);
-		guest_args_read(vm, VCPU_ID, &args);
-		if (run->exit_reason == KVM_EXIT_IO &&
-		    args.port == GUEST_PORT_SYNC) {
+		if (get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) {
 			pages_count += TEST_PAGES_PER_LOOP;
 			generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
 		} else {
@@ -137,18 +135,20 @@ void *vcpu_worker(void *data)
 		}
 	}
 
-	DEBUG("VCPU exits, dirtied %"PRIu64" pages\n", pages_count);
+	DEBUG("Dirtied %"PRIu64" pages\n", pages_count);
 
 	return NULL;
 }
 
-void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration)
+static void vm_dirty_log_verify(unsigned long *bmap)
 {
 	uint64_t page;
-	uint64_t volatile *value_ptr;
+	uint64_t *value_ptr;
+	uint64_t step = host_page_size >= guest_page_size ? 1 :
+				guest_page_size / host_page_size;
 
-	for (page = 0; page < TEST_MEM_PAGES; page++) {
-		value_ptr = host_test_mem + page * getpagesize();
+	for (page = 0; page < host_num_pages; page += step) {
+		value_ptr = host_test_mem + page * host_page_size;
 
 		/* If this is a special page that we were tracking... */
 		if (test_and_clear_bit(page, host_bmap_track)) {
@@ -208,88 +208,117 @@ void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration)
 	}
 }
 
-void help(char *name)
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
+				uint64_t extra_mem_pages, void *guest_code)
 {
-	puts("");
-	printf("usage: %s [-i iterations] [-I interval] [-h]\n", name);
-	puts("");
-	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
-	       TEST_HOST_LOOP_N);
-	printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
-	       TEST_HOST_LOOP_INTERVAL);
-	puts("");
-	exit(0);
+	struct kvm_vm *vm;
+	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
+
+	vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+#ifdef __x86_64__
+	vm_create_irqchip(vm);
+#endif
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+	return vm;
 }
 
-int main(int argc, char *argv[])
+static void run_test(enum vm_guest_mode mode, unsigned long iterations,
+		     unsigned long interval, bool top_offset)
 {
+	unsigned int guest_pa_bits, guest_page_shift;
 	pthread_t vcpu_thread;
 	struct kvm_vm *vm;
-	uint64_t volatile *psize, *iteration;
-	unsigned long *bmap, iterations = TEST_HOST_LOOP_N,
-	    interval = TEST_HOST_LOOP_INTERVAL;
-	int opt;
-
-	while ((opt = getopt(argc, argv, "hi:I:")) != -1) {
-		switch (opt) {
-		case 'i':
-			iterations = strtol(optarg, NULL, 10);
-			break;
-		case 'I':
-			interval = strtol(optarg, NULL, 10);
-			break;
-		case 'h':
-		default:
-			help(argv[0]);
-			break;
-		}
+	uint64_t max_gfn;
+	unsigned long *bmap;
+
+	switch (mode) {
+	case VM_MODE_P52V48_4K:
+		guest_pa_bits = 52;
+		guest_page_shift = 12;
+		break;
+	case VM_MODE_P52V48_64K:
+		guest_pa_bits = 52;
+		guest_page_shift = 16;
+		break;
+	case VM_MODE_P40V48_4K:
+		guest_pa_bits = 40;
+		guest_page_shift = 12;
+		break;
+	case VM_MODE_P40V48_64K:
+		guest_pa_bits = 40;
+		guest_page_shift = 16;
+		break;
+	default:
+		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
 	}
 
-	TEST_ASSERT(iterations > 2, "Iteration must be bigger than zero\n");
-	TEST_ASSERT(interval > 0, "Interval must be bigger than zero");
+	DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
-	DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
-	      iterations, interval);
+	max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
+	guest_page_size = (1ul << guest_page_shift);
+	/* 1G of guest page sized pages */
+	guest_num_pages = (1ul << (30 - guest_page_shift));
+	host_page_size = getpagesize();
+	host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
+			 !!((guest_num_pages * guest_page_size) % host_page_size);
 
-	srandom(time(0));
+	if (top_offset) {
+		guest_test_mem = (max_gfn - guest_num_pages) * guest_page_size;
+		guest_test_mem &= ~(host_page_size - 1);
+	}
 
-	bmap = bitmap_alloc(TEST_MEM_PAGES);
-	host_bmap_track = bitmap_alloc(TEST_MEM_PAGES);
+	DEBUG("guest test mem offset: 0x%lx\n", guest_test_mem);
 
-	vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code);
+	bmap = bitmap_alloc(host_num_pages);
+	host_bmap_track = bitmap_alloc(host_num_pages);
+
+	vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code);
 
 	/* Add an extra memory slot for testing dirty logging */
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-				    TEST_MEM_OFFSET,
+				    guest_test_mem,
 				    TEST_MEM_SLOT_INDEX,
-				    TEST_MEM_PAGES,
+				    guest_num_pages,
 				    KVM_MEM_LOG_DIRTY_PAGES);
-	/* Cache the HVA pointer of the region */
-	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET);
 
 	/* Do 1:1 mapping for the dirty track memory slot */
-	virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET,
-		 TEST_MEM_PAGES * getpagesize(), 0);
+	virt_map(vm, guest_test_mem, guest_test_mem,
+		 guest_num_pages * guest_page_size, 0);
+
+	/* Cache the HVA pointer of the region */
+	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_mem);
 
+#ifdef __x86_64__
 	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+#endif
+#ifdef __aarch64__
+	ucall_init(vm, UCALL_MMIO, NULL);
+#endif
 
-	/* Tell the guest about the page size on the system */
-	psize = addr_gva2hva(vm, (vm_vaddr_t)&guest_page_size);
-	*psize = getpagesize();
+	/* Export the shared variables to the guest */
+	sync_global_to_guest(vm, host_page_size);
+	sync_global_to_guest(vm, guest_page_size);
+	sync_global_to_guest(vm, guest_test_mem);
+	sync_global_to_guest(vm, guest_num_pages);
 
 	/* Start the iterations */
-	iteration = addr_gva2hva(vm, (vm_vaddr_t)&guest_iteration);
-	*iteration = 1;
+	iteration = 1;
+	sync_global_to_guest(vm, iteration);
+	host_quit = false;
+	host_dirty_count = 0;
+	host_clear_count = 0;
+	host_track_next_count = 0;
 
-	/* Start dirtying pages */
 	pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
 
-	while (*iteration < iterations) {
+	while (iteration < iterations) {
 		/* Give the vcpu thread some time to dirty some pages */
 		usleep(interval * 1000);
 		kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
-		vm_dirty_log_verify(bmap, *iteration);
-		(*iteration)++;
+		vm_dirty_log_verify(bmap);
+		iteration++;
+		sync_global_to_guest(vm, iteration);
 	}
 
 	/* Tell the vcpu thread to quit */
@@ -302,7 +331,118 @@ int main(int argc, char *argv[])
 
 	free(bmap);
 	free(host_bmap_track);
+	ucall_uninit(vm);
 	kvm_vm_free(vm);
+}
+
+static struct vm_guest_modes {
+	enum vm_guest_mode mode;
+	bool supported;
+	bool enabled;
+} vm_guest_modes[NUM_VM_MODES] = {
+#if defined(__x86_64__)
+	{ VM_MODE_P52V48_4K,	1, 1, },
+	{ VM_MODE_P52V48_64K,	0, 0, },
+	{ VM_MODE_P40V48_4K,	0, 0, },
+	{ VM_MODE_P40V48_64K,	0, 0, },
+#elif defined(__aarch64__)
+	{ VM_MODE_P52V48_4K,	0, 0, },
+	{ VM_MODE_P52V48_64K,	0, 0, },
+	{ VM_MODE_P40V48_4K,	1, 1, },
+	{ VM_MODE_P40V48_64K,	1, 1, },
+#endif
+};
+
+static void help(char *name)
+{
+	int i;
+
+	puts("");
+	printf("usage: %s [-h] [-i iterations] [-I interval] "
+	       "[-o offset] [-t] [-m mode]\n", name);
+	puts("");
+	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+	       TEST_HOST_LOOP_N);
+	printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
+	       TEST_HOST_LOOP_INTERVAL);
+	printf(" -o: guest test memory offset (default: 0x%lx)\n",
+	       DEFAULT_GUEST_TEST_MEM);
+	printf(" -t: map guest test memory at the top of the allowed "
+	       "physical address range\n");
+	printf(" -m: specify the guest mode ID to test "
+	       "(default: test all supported modes)\n"
+	       "     This option may be used multiple times.\n"
+	       "     Guest mode IDs:\n");
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		printf("         %d:    %s%s\n",
+		       vm_guest_modes[i].mode,
+		       vm_guest_mode_string(vm_guest_modes[i].mode),
+		       vm_guest_modes[i].supported ? " (supported)" : "");
+	}
+	puts("");
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned long iterations = TEST_HOST_LOOP_N;
+	unsigned long interval = TEST_HOST_LOOP_INTERVAL;
+	bool mode_selected = false;
+	bool top_offset = false;
+	unsigned int mode;
+	int opt, i;
+
+	while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) {
+		switch (opt) {
+		case 'i':
+			iterations = strtol(optarg, NULL, 10);
+			break;
+		case 'I':
+			interval = strtol(optarg, NULL, 10);
+			break;
+		case 'o':
+			guest_test_mem = strtoull(optarg, NULL, 0);
+			break;
+		case 't':
+			top_offset = true;
+			break;
+		case 'm':
+			if (!mode_selected) {
+				for (i = 0; i < NUM_VM_MODES; ++i)
+					vm_guest_modes[i].enabled = 0;
+				mode_selected = true;
+			}
+			mode = strtoul(optarg, NULL, 10);
+			TEST_ASSERT(mode < NUM_VM_MODES,
+				    "Guest mode ID %d too big", mode);
+			vm_guest_modes[mode].enabled = 1;
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			break;
+		}
+	}
+
+	TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
+	TEST_ASSERT(interval > 0, "Interval must be greater than zero");
+	TEST_ASSERT(!top_offset || guest_test_mem == DEFAULT_GUEST_TEST_MEM,
+		    "Cannot use both -o [offset] and -t at the same time");
+
+	DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
+	      iterations, interval);
+
+	srandom(time(0));
+
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		if (!vm_guest_modes[i].enabled)
+			continue;
+		TEST_ASSERT(vm_guest_modes[i].supported,
+			    "Guest mode ID %d (%s) not supported.",
+			    vm_guest_modes[i].mode,
+			    vm_guest_mode_string(vm_guest_modes[i].mode));
+		run_test(vm_guest_modes[i].mode, iterations, interval, top_offset);
+	}
 
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
new file mode 100644
index 000000000000..9ef2ab1a0c08
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AArch64 processor specific defines
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include "kvm_util.h"
+
+
+#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+			   KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+#define CPACR_EL1	3, 0,  1, 0, 2
+#define TCR_EL1		3, 0,  2, 0, 2
+#define MAIR_EL1	3, 0, 10, 2, 0
+#define TTBR0_EL1	3, 0,  2, 0, 0
+#define SCTLR_EL1	3, 0,  1, 0, 0
+
+/*
+ * Default MAIR
+ *                  index   attribute
+ * DEVICE_nGnRnE      0     0000:0000
+ * DEVICE_nGnRE       1     0000:0100
+ * DEVICE_GRE         2     0000:1100
+ * NORMAL_NC          3     0100:0100
+ * NORMAL             4     1111:1111
+ * NORMAL_WT          5     1011:1011
+ */
+#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \
+			  (0x04ul << (1 * 8)) | \
+			  (0x0cul << (2 * 8)) | \
+			  (0x44ul << (3 * 8)) | \
+			  (0xfful << (4 * 8)) | \
+			  (0xbbul << (5 * 8)))
+
+static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr)
+{
+	struct kvm_one_reg reg;
+	reg.id = id;
+	reg.addr = (uint64_t)addr;
+	vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, &reg);
+}
+
+static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t val)
+{
+	struct kvm_one_reg reg;
+	reg.id = id;
+	reg.addr = (uint64_t)&val;
+	vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, &reg);
+}
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h
new file mode 100644
index 000000000000..4059014d93ea
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/evmcs.h
@@ -0,0 +1,1098 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/vmx.h
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ */
+
+#ifndef SELFTEST_KVM_EVMCS_H
+#define SELFTEST_KVM_EVMCS_H
+
+#include <stdint.h>
+#include "vmx.h"
+
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+extern bool enable_evmcs;
+
+struct hv_vp_assist_page {
+	__u32 apic_assist;
+	__u32 reserved;
+	__u64 vtl_control[2];
+	__u64 nested_enlightenments_control[2];
+	__u32 enlighten_vmentry;
+	__u64 current_nested_vmcs;
+};
+
+struct hv_enlightened_vmcs {
+	u32 revision_id;
+	u32 abort;
+
+	u16 host_es_selector;
+	u16 host_cs_selector;
+	u16 host_ss_selector;
+	u16 host_ds_selector;
+	u16 host_fs_selector;
+	u16 host_gs_selector;
+	u16 host_tr_selector;
+
+	u64 host_ia32_pat;
+	u64 host_ia32_efer;
+
+	u64 host_cr0;
+	u64 host_cr3;
+	u64 host_cr4;
+
+	u64 host_ia32_sysenter_esp;
+	u64 host_ia32_sysenter_eip;
+	u64 host_rip;
+	u32 host_ia32_sysenter_cs;
+
+	u32 pin_based_vm_exec_control;
+	u32 vm_exit_controls;
+	u32 secondary_vm_exec_control;
+
+	u64 io_bitmap_a;
+	u64 io_bitmap_b;
+	u64 msr_bitmap;
+
+	u16 guest_es_selector;
+	u16 guest_cs_selector;
+	u16 guest_ss_selector;
+	u16 guest_ds_selector;
+	u16 guest_fs_selector;
+	u16 guest_gs_selector;
+	u16 guest_ldtr_selector;
+	u16 guest_tr_selector;
+
+	u32 guest_es_limit;
+	u32 guest_cs_limit;
+	u32 guest_ss_limit;
+	u32 guest_ds_limit;
+	u32 guest_fs_limit;
+	u32 guest_gs_limit;
+	u32 guest_ldtr_limit;
+	u32 guest_tr_limit;
+	u32 guest_gdtr_limit;
+	u32 guest_idtr_limit;
+
+	u32 guest_es_ar_bytes;
+	u32 guest_cs_ar_bytes;
+	u32 guest_ss_ar_bytes;
+	u32 guest_ds_ar_bytes;
+	u32 guest_fs_ar_bytes;
+	u32 guest_gs_ar_bytes;
+	u32 guest_ldtr_ar_bytes;
+	u32 guest_tr_ar_bytes;
+
+	u64 guest_es_base;
+	u64 guest_cs_base;
+	u64 guest_ss_base;
+	u64 guest_ds_base;
+	u64 guest_fs_base;
+	u64 guest_gs_base;
+	u64 guest_ldtr_base;
+	u64 guest_tr_base;
+	u64 guest_gdtr_base;
+	u64 guest_idtr_base;
+
+	u64 padding64_1[3];
+
+	u64 vm_exit_msr_store_addr;
+	u64 vm_exit_msr_load_addr;
+	u64 vm_entry_msr_load_addr;
+
+	u64 cr3_target_value0;
+	u64 cr3_target_value1;
+	u64 cr3_target_value2;
+	u64 cr3_target_value3;
+
+	u32 page_fault_error_code_mask;
+	u32 page_fault_error_code_match;
+
+	u32 cr3_target_count;
+	u32 vm_exit_msr_store_count;
+	u32 vm_exit_msr_load_count;
+	u32 vm_entry_msr_load_count;
+
+	u64 tsc_offset;
+	u64 virtual_apic_page_addr;
+	u64 vmcs_link_pointer;
+
+	u64 guest_ia32_debugctl;
+	u64 guest_ia32_pat;
+	u64 guest_ia32_efer;
+
+	u64 guest_pdptr0;
+	u64 guest_pdptr1;
+	u64 guest_pdptr2;
+	u64 guest_pdptr3;
+
+	u64 guest_pending_dbg_exceptions;
+	u64 guest_sysenter_esp;
+	u64 guest_sysenter_eip;
+
+	u32 guest_activity_state;
+	u32 guest_sysenter_cs;
+
+	u64 cr0_guest_host_mask;
+	u64 cr4_guest_host_mask;
+	u64 cr0_read_shadow;
+	u64 cr4_read_shadow;
+	u64 guest_cr0;
+	u64 guest_cr3;
+	u64 guest_cr4;
+	u64 guest_dr7;
+
+	u64 host_fs_base;
+	u64 host_gs_base;
+	u64 host_tr_base;
+	u64 host_gdtr_base;
+	u64 host_idtr_base;
+	u64 host_rsp;
+
+	u64 ept_pointer;
+
+	u16 virtual_processor_id;
+	u16 padding16[3];
+
+	u64 padding64_2[5];
+	u64 guest_physical_address;
+
+	u32 vm_instruction_error;
+	u32 vm_exit_reason;
+	u32 vm_exit_intr_info;
+	u32 vm_exit_intr_error_code;
+	u32 idt_vectoring_info_field;
+	u32 idt_vectoring_error_code;
+	u32 vm_exit_instruction_len;
+	u32 vmx_instruction_info;
+
+	u64 exit_qualification;
+	u64 exit_io_instruction_ecx;
+	u64 exit_io_instruction_esi;
+	u64 exit_io_instruction_edi;
+	u64 exit_io_instruction_eip;
+
+	u64 guest_linear_address;
+	u64 guest_rsp;
+	u64 guest_rflags;
+
+	u32 guest_interruptibility_info;
+	u32 cpu_based_vm_exec_control;
+	u32 exception_bitmap;
+	u32 vm_entry_controls;
+	u32 vm_entry_intr_info_field;
+	u32 vm_entry_exception_error_code;
+	u32 vm_entry_instruction_len;
+	u32 tpr_threshold;
+
+	u64 guest_rip;
+
+	u32 hv_clean_fields;
+	u32 hv_padding_32;
+	u32 hv_synthetic_controls;
+	struct {
+		u32 nested_flush_hypercall:1;
+		u32 msr_bitmap:1;
+		u32 reserved:30;
+	} hv_enlightenments_control;
+	u32 hv_vp_id;
+
+	u64 hv_vm_id;
+	u64 partition_assist_page;
+	u64 padding64_4[4];
+	u64 guest_bndcfgs;
+	u64 padding64_5[7];
+	u64 xss_exit_bitmap;
+	u64 padding64_6[7];
+};
+
+#define HV_X64_MSR_VP_ASSIST_PAGE		0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE	0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK	\
+		(~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+struct hv_enlightened_vmcs *current_evmcs;
+struct hv_vp_assist_page *current_vp_assist;
+
+static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+{
+	u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+		HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+	wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
+
+	current_vp_assist = vp_assist;
+
+	enable_evmcs = true;
+
+	return 0;
+}
+
+static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
+{
+	current_vp_assist->current_nested_vmcs = vmcs_pa;
+	current_vp_assist->enlighten_vmentry = 1;
+
+	current_evmcs = vmcs;
+
+	return 0;
+}
+
+static inline int evmcs_vmptrst(uint64_t *value)
+{
+	*value = current_vp_assist->current_nested_vmcs &
+		~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+	return 0;
+}
+
+static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
+{
+	switch (encoding) {
+	case GUEST_RIP:
+		*value = current_evmcs->guest_rip;
+		break;
+	case GUEST_RSP:
+		*value = current_evmcs->guest_rsp;
+		break;
+	case GUEST_RFLAGS:
+		*value = current_evmcs->guest_rflags;
+		break;
+	case HOST_IA32_PAT:
+		*value = current_evmcs->host_ia32_pat;
+		break;
+	case HOST_IA32_EFER:
+		*value = current_evmcs->host_ia32_efer;
+		break;
+	case HOST_CR0:
+		*value = current_evmcs->host_cr0;
+		break;
+	case HOST_CR3:
+		*value = current_evmcs->host_cr3;
+		break;
+	case HOST_CR4:
+		*value = current_evmcs->host_cr4;
+		break;
+	case HOST_IA32_SYSENTER_ESP:
+		*value = current_evmcs->host_ia32_sysenter_esp;
+		break;
+	case HOST_IA32_SYSENTER_EIP:
+		*value = current_evmcs->host_ia32_sysenter_eip;
+		break;
+	case HOST_RIP:
+		*value = current_evmcs->host_rip;
+		break;
+	case IO_BITMAP_A:
+		*value = current_evmcs->io_bitmap_a;
+		break;
+	case IO_BITMAP_B:
+		*value = current_evmcs->io_bitmap_b;
+		break;
+	case MSR_BITMAP:
+		*value = current_evmcs->msr_bitmap;
+		break;
+	case GUEST_ES_BASE:
+		*value = current_evmcs->guest_es_base;
+		break;
+	case GUEST_CS_BASE:
+		*value = current_evmcs->guest_cs_base;
+		break;
+	case GUEST_SS_BASE:
+		*value = current_evmcs->guest_ss_base;
+		break;
+	case GUEST_DS_BASE:
+		*value = current_evmcs->guest_ds_base;
+		break;
+	case GUEST_FS_BASE:
+		*value = current_evmcs->guest_fs_base;
+		break;
+	case GUEST_GS_BASE:
+		*value = current_evmcs->guest_gs_base;
+		break;
+	case GUEST_LDTR_BASE:
+		*value = current_evmcs->guest_ldtr_base;
+		break;
+	case GUEST_TR_BASE:
+		*value = current_evmcs->guest_tr_base;
+		break;
+	case GUEST_GDTR_BASE:
+		*value = current_evmcs->guest_gdtr_base;
+		break;
+	case GUEST_IDTR_BASE:
+		*value = current_evmcs->guest_idtr_base;
+		break;
+	case TSC_OFFSET:
+		*value = current_evmcs->tsc_offset;
+		break;
+	case VIRTUAL_APIC_PAGE_ADDR:
+		*value = current_evmcs->virtual_apic_page_addr;
+		break;
+	case VMCS_LINK_POINTER:
+		*value = current_evmcs->vmcs_link_pointer;
+		break;
+	case GUEST_IA32_DEBUGCTL:
+		*value = current_evmcs->guest_ia32_debugctl;
+		break;
+	case GUEST_IA32_PAT:
+		*value = current_evmcs->guest_ia32_pat;
+		break;
+	case GUEST_IA32_EFER:
+		*value = current_evmcs->guest_ia32_efer;
+		break;
+	case GUEST_PDPTR0:
+		*value = current_evmcs->guest_pdptr0;
+		break;
+	case GUEST_PDPTR1:
+		*value = current_evmcs->guest_pdptr1;
+		break;
+	case GUEST_PDPTR2:
+		*value = current_evmcs->guest_pdptr2;
+		break;
+	case GUEST_PDPTR3:
+		*value = current_evmcs->guest_pdptr3;
+		break;
+	case GUEST_PENDING_DBG_EXCEPTIONS:
+		*value = current_evmcs->guest_pending_dbg_exceptions;
+		break;
+	case GUEST_SYSENTER_ESP:
+		*value = current_evmcs->guest_sysenter_esp;
+		break;
+	case GUEST_SYSENTER_EIP:
+		*value = current_evmcs->guest_sysenter_eip;
+		break;
+	case CR0_GUEST_HOST_MASK:
+		*value = current_evmcs->cr0_guest_host_mask;
+		break;
+	case CR4_GUEST_HOST_MASK:
+		*value = current_evmcs->cr4_guest_host_mask;
+		break;
+	case CR0_READ_SHADOW:
+		*value = current_evmcs->cr0_read_shadow;
+		break;
+	case CR4_READ_SHADOW:
+		*value = current_evmcs->cr4_read_shadow;
+		break;
+	case GUEST_CR0:
+		*value = current_evmcs->guest_cr0;
+		break;
+	case GUEST_CR3:
+		*value = current_evmcs->guest_cr3;
+		break;
+	case GUEST_CR4:
+		*value = current_evmcs->guest_cr4;
+		break;
+	case GUEST_DR7:
+		*value = current_evmcs->guest_dr7;
+		break;
+	case HOST_FS_BASE:
+		*value = current_evmcs->host_fs_base;
+		break;
+	case HOST_GS_BASE:
+		*value = current_evmcs->host_gs_base;
+		break;
+	case HOST_TR_BASE:
+		*value = current_evmcs->host_tr_base;
+		break;
+	case HOST_GDTR_BASE:
+		*value = current_evmcs->host_gdtr_base;
+		break;
+	case HOST_IDTR_BASE:
+		*value = current_evmcs->host_idtr_base;
+		break;
+	case HOST_RSP:
+		*value = current_evmcs->host_rsp;
+		break;
+	case EPT_POINTER:
+		*value = current_evmcs->ept_pointer;
+		break;
+	case GUEST_BNDCFGS:
+		*value = current_evmcs->guest_bndcfgs;
+		break;
+	case XSS_EXIT_BITMAP:
+		*value = current_evmcs->xss_exit_bitmap;
+		break;
+	case GUEST_PHYSICAL_ADDRESS:
+		*value = current_evmcs->guest_physical_address;
+		break;
+	case EXIT_QUALIFICATION:
+		*value = current_evmcs->exit_qualification;
+		break;
+	case GUEST_LINEAR_ADDRESS:
+		*value = current_evmcs->guest_linear_address;
+		break;
+	case VM_EXIT_MSR_STORE_ADDR:
+		*value = current_evmcs->vm_exit_msr_store_addr;
+		break;
+	case VM_EXIT_MSR_LOAD_ADDR:
+		*value = current_evmcs->vm_exit_msr_load_addr;
+		break;
+	case VM_ENTRY_MSR_LOAD_ADDR:
+		*value = current_evmcs->vm_entry_msr_load_addr;
+		break;
+	case CR3_TARGET_VALUE0:
+		*value = current_evmcs->cr3_target_value0;
+		break;
+	case CR3_TARGET_VALUE1:
+		*value = current_evmcs->cr3_target_value1;
+		break;
+	case CR3_TARGET_VALUE2:
+		*value = current_evmcs->cr3_target_value2;
+		break;
+	case CR3_TARGET_VALUE3:
+		*value = current_evmcs->cr3_target_value3;
+		break;
+	case TPR_THRESHOLD:
+		*value = current_evmcs->tpr_threshold;
+		break;
+	case GUEST_INTERRUPTIBILITY_INFO:
+		*value = current_evmcs->guest_interruptibility_info;
+		break;
+	case CPU_BASED_VM_EXEC_CONTROL:
+		*value = current_evmcs->cpu_based_vm_exec_control;
+		break;
+	case EXCEPTION_BITMAP:
+		*value = current_evmcs->exception_bitmap;
+		break;
+	case VM_ENTRY_CONTROLS:
+		*value = current_evmcs->vm_entry_controls;
+		break;
+	case VM_ENTRY_INTR_INFO_FIELD:
+		*value = current_evmcs->vm_entry_intr_info_field;
+		break;
+	case VM_ENTRY_EXCEPTION_ERROR_CODE:
+		*value = current_evmcs->vm_entry_exception_error_code;
+		break;
+	case VM_ENTRY_INSTRUCTION_LEN:
+		*value = current_evmcs->vm_entry_instruction_len;
+		break;
+	case HOST_IA32_SYSENTER_CS:
+		*value = current_evmcs->host_ia32_sysenter_cs;
+		break;
+	case PIN_BASED_VM_EXEC_CONTROL:
+		*value = current_evmcs->pin_based_vm_exec_control;
+		break;
+	case VM_EXIT_CONTROLS:
+		*value = current_evmcs->vm_exit_controls;
+		break;
+	case SECONDARY_VM_EXEC_CONTROL:
+		*value = current_evmcs->secondary_vm_exec_control;
+		break;
+	case GUEST_ES_LIMIT:
+		*value = current_evmcs->guest_es_limit;
+		break;
+	case GUEST_CS_LIMIT:
+		*value = current_evmcs->guest_cs_limit;
+		break;
+	case GUEST_SS_LIMIT:
+		*value = current_evmcs->guest_ss_limit;
+		break;
+	case GUEST_DS_LIMIT:
+		*value = current_evmcs->guest_ds_limit;
+		break;
+	case GUEST_FS_LIMIT:
+		*value = current_evmcs->guest_fs_limit;
+		break;
+	case GUEST_GS_LIMIT:
+		*value = current_evmcs->guest_gs_limit;
+		break;
+	case GUEST_LDTR_LIMIT:
+		*value = current_evmcs->guest_ldtr_limit;
+		break;
+	case GUEST_TR_LIMIT:
+		*value = current_evmcs->guest_tr_limit;
+		break;
+	case GUEST_GDTR_LIMIT:
+		*value = current_evmcs->guest_gdtr_limit;
+		break;
+	case GUEST_IDTR_LIMIT:
+		*value = current_evmcs->guest_idtr_limit;
+		break;
+	case GUEST_ES_AR_BYTES:
+		*value = current_evmcs->guest_es_ar_bytes;
+		break;
+	case GUEST_CS_AR_BYTES:
+		*value = current_evmcs->guest_cs_ar_bytes;
+		break;
+	case GUEST_SS_AR_BYTES:
+		*value = current_evmcs->guest_ss_ar_bytes;
+		break;
+	case GUEST_DS_AR_BYTES:
+		*value = current_evmcs->guest_ds_ar_bytes;
+		break;
+	case GUEST_FS_AR_BYTES:
+		*value = current_evmcs->guest_fs_ar_bytes;
+		break;
+	case GUEST_GS_AR_BYTES:
+		*value = current_evmcs->guest_gs_ar_bytes;
+		break;
+	case GUEST_LDTR_AR_BYTES:
+		*value = current_evmcs->guest_ldtr_ar_bytes;
+		break;
+	case GUEST_TR_AR_BYTES:
+		*value = current_evmcs->guest_tr_ar_bytes;
+		break;
+	case GUEST_ACTIVITY_STATE:
+		*value = current_evmcs->guest_activity_state;
+		break;
+	case GUEST_SYSENTER_CS:
+		*value = current_evmcs->guest_sysenter_cs;
+		break;
+	case VM_INSTRUCTION_ERROR:
+		*value = current_evmcs->vm_instruction_error;
+		break;
+	case VM_EXIT_REASON:
+		*value = current_evmcs->vm_exit_reason;
+		break;
+	case VM_EXIT_INTR_INFO:
+		*value = current_evmcs->vm_exit_intr_info;
+		break;
+	case VM_EXIT_INTR_ERROR_CODE:
+		*value = current_evmcs->vm_exit_intr_error_code;
+		break;
+	case IDT_VECTORING_INFO_FIELD:
+		*value = current_evmcs->idt_vectoring_info_field;
+		break;
+	case IDT_VECTORING_ERROR_CODE:
+		*value = current_evmcs->idt_vectoring_error_code;
+		break;
+	case VM_EXIT_INSTRUCTION_LEN:
+		*value = current_evmcs->vm_exit_instruction_len;
+		break;
+	case VMX_INSTRUCTION_INFO:
+		*value = current_evmcs->vmx_instruction_info;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MASK:
+		*value = current_evmcs->page_fault_error_code_mask;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MATCH:
+		*value = current_evmcs->page_fault_error_code_match;
+		break;
+	case CR3_TARGET_COUNT:
+		*value = current_evmcs->cr3_target_count;
+		break;
+	case VM_EXIT_MSR_STORE_COUNT:
+		*value = current_evmcs->vm_exit_msr_store_count;
+		break;
+	case VM_EXIT_MSR_LOAD_COUNT:
+		*value = current_evmcs->vm_exit_msr_load_count;
+		break;
+	case VM_ENTRY_MSR_LOAD_COUNT:
+		*value = current_evmcs->vm_entry_msr_load_count;
+		break;
+	case HOST_ES_SELECTOR:
+		*value = current_evmcs->host_es_selector;
+		break;
+	case HOST_CS_SELECTOR:
+		*value = current_evmcs->host_cs_selector;
+		break;
+	case HOST_SS_SELECTOR:
+		*value = current_evmcs->host_ss_selector;
+		break;
+	case HOST_DS_SELECTOR:
+		*value = current_evmcs->host_ds_selector;
+		break;
+	case HOST_FS_SELECTOR:
+		*value = current_evmcs->host_fs_selector;
+		break;
+	case HOST_GS_SELECTOR:
+		*value = current_evmcs->host_gs_selector;
+		break;
+	case HOST_TR_SELECTOR:
+		*value = current_evmcs->host_tr_selector;
+		break;
+	case GUEST_ES_SELECTOR:
+		*value = current_evmcs->guest_es_selector;
+		break;
+	case GUEST_CS_SELECTOR:
+		*value = current_evmcs->guest_cs_selector;
+		break;
+	case GUEST_SS_SELECTOR:
+		*value = current_evmcs->guest_ss_selector;
+		break;
+	case GUEST_DS_SELECTOR:
+		*value = current_evmcs->guest_ds_selector;
+		break;
+	case GUEST_FS_SELECTOR:
+		*value = current_evmcs->guest_fs_selector;
+		break;
+	case GUEST_GS_SELECTOR:
+		*value = current_evmcs->guest_gs_selector;
+		break;
+	case GUEST_LDTR_SELECTOR:
+		*value = current_evmcs->guest_ldtr_selector;
+		break;
+	case GUEST_TR_SELECTOR:
+		*value = current_evmcs->guest_tr_selector;
+		break;
+	case VIRTUAL_PROCESSOR_ID:
+		*value = current_evmcs->virtual_processor_id;
+		break;
+	default: return 1;
+	}
+
+	return 0;
+}
+
+static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
+{
+	switch (encoding) {
+	case GUEST_RIP:
+		current_evmcs->guest_rip = value;
+		break;
+	case GUEST_RSP:
+		current_evmcs->guest_rsp = value;
+		break;
+	case GUEST_RFLAGS:
+		current_evmcs->guest_rflags = value;
+		break;
+	case HOST_IA32_PAT:
+		current_evmcs->host_ia32_pat = value;
+		break;
+	case HOST_IA32_EFER:
+		current_evmcs->host_ia32_efer = value;
+		break;
+	case HOST_CR0:
+		current_evmcs->host_cr0 = value;
+		break;
+	case HOST_CR3:
+		current_evmcs->host_cr3 = value;
+		break;
+	case HOST_CR4:
+		current_evmcs->host_cr4 = value;
+		break;
+	case HOST_IA32_SYSENTER_ESP:
+		current_evmcs->host_ia32_sysenter_esp = value;
+		break;
+	case HOST_IA32_SYSENTER_EIP:
+		current_evmcs->host_ia32_sysenter_eip = value;
+		break;
+	case HOST_RIP:
+		current_evmcs->host_rip = value;
+		break;
+	case IO_BITMAP_A:
+		current_evmcs->io_bitmap_a = value;
+		break;
+	case IO_BITMAP_B:
+		current_evmcs->io_bitmap_b = value;
+		break;
+	case MSR_BITMAP:
+		current_evmcs->msr_bitmap = value;
+		break;
+	case GUEST_ES_BASE:
+		current_evmcs->guest_es_base = value;
+		break;
+	case GUEST_CS_BASE:
+		current_evmcs->guest_cs_base = value;
+		break;
+	case GUEST_SS_BASE:
+		current_evmcs->guest_ss_base = value;
+		break;
+	case GUEST_DS_BASE:
+		current_evmcs->guest_ds_base = value;
+		break;
+	case GUEST_FS_BASE:
+		current_evmcs->guest_fs_base = value;
+		break;
+	case GUEST_GS_BASE:
+		current_evmcs->guest_gs_base = value;
+		break;
+	case GUEST_LDTR_BASE:
+		current_evmcs->guest_ldtr_base = value;
+		break;
+	case GUEST_TR_BASE:
+		current_evmcs->guest_tr_base = value;
+		break;
+	case GUEST_GDTR_BASE:
+		current_evmcs->guest_gdtr_base = value;
+		break;
+	case GUEST_IDTR_BASE:
+		current_evmcs->guest_idtr_base = value;
+		break;
+	case TSC_OFFSET:
+		current_evmcs->tsc_offset = value;
+		break;
+	case VIRTUAL_APIC_PAGE_ADDR:
+		current_evmcs->virtual_apic_page_addr = value;
+		break;
+	case VMCS_LINK_POINTER:
+		current_evmcs->vmcs_link_pointer = value;
+		break;
+	case GUEST_IA32_DEBUGCTL:
+		current_evmcs->guest_ia32_debugctl = value;
+		break;
+	case GUEST_IA32_PAT:
+		current_evmcs->guest_ia32_pat = value;
+		break;
+	case GUEST_IA32_EFER:
+		current_evmcs->guest_ia32_efer = value;
+		break;
+	case GUEST_PDPTR0:
+		current_evmcs->guest_pdptr0 = value;
+		break;
+	case GUEST_PDPTR1:
+		current_evmcs->guest_pdptr1 = value;
+		break;
+	case GUEST_PDPTR2:
+		current_evmcs->guest_pdptr2 = value;
+		break;
+	case GUEST_PDPTR3:
+		current_evmcs->guest_pdptr3 = value;
+		break;
+	case GUEST_PENDING_DBG_EXCEPTIONS:
+		current_evmcs->guest_pending_dbg_exceptions = value;
+		break;
+	case GUEST_SYSENTER_ESP:
+		current_evmcs->guest_sysenter_esp = value;
+		break;
+	case GUEST_SYSENTER_EIP:
+		current_evmcs->guest_sysenter_eip = value;
+		break;
+	case CR0_GUEST_HOST_MASK:
+		current_evmcs->cr0_guest_host_mask = value;
+		break;
+	case CR4_GUEST_HOST_MASK:
+		current_evmcs->cr4_guest_host_mask = value;
+		break;
+	case CR0_READ_SHADOW:
+		current_evmcs->cr0_read_shadow = value;
+		break;
+	case CR4_READ_SHADOW:
+		current_evmcs->cr4_read_shadow = value;
+		break;
+	case GUEST_CR0:
+		current_evmcs->guest_cr0 = value;
+		break;
+	case GUEST_CR3:
+		current_evmcs->guest_cr3 = value;
+		break;
+	case GUEST_CR4:
+		current_evmcs->guest_cr4 = value;
+		break;
+	case GUEST_DR7:
+		current_evmcs->guest_dr7 = value;
+		break;
+	case HOST_FS_BASE:
+		current_evmcs->host_fs_base = value;
+		break;
+	case HOST_GS_BASE:
+		current_evmcs->host_gs_base = value;
+		break;
+	case HOST_TR_BASE:
+		current_evmcs->host_tr_base = value;
+		break;
+	case HOST_GDTR_BASE:
+		current_evmcs->host_gdtr_base = value;
+		break;
+	case HOST_IDTR_BASE:
+		current_evmcs->host_idtr_base = value;
+		break;
+	case HOST_RSP:
+		current_evmcs->host_rsp = value;
+		break;
+	case EPT_POINTER:
+		current_evmcs->ept_pointer = value;
+		break;
+	case GUEST_BNDCFGS:
+		current_evmcs->guest_bndcfgs = value;
+		break;
+	case XSS_EXIT_BITMAP:
+		current_evmcs->xss_exit_bitmap = value;
+		break;
+	case GUEST_PHYSICAL_ADDRESS:
+		current_evmcs->guest_physical_address = value;
+		break;
+	case EXIT_QUALIFICATION:
+		current_evmcs->exit_qualification = value;
+		break;
+	case GUEST_LINEAR_ADDRESS:
+		current_evmcs->guest_linear_address = value;
+		break;
+	case VM_EXIT_MSR_STORE_ADDR:
+		current_evmcs->vm_exit_msr_store_addr = value;
+		break;
+	case VM_EXIT_MSR_LOAD_ADDR:
+		current_evmcs->vm_exit_msr_load_addr = value;
+		break;
+	case VM_ENTRY_MSR_LOAD_ADDR:
+		current_evmcs->vm_entry_msr_load_addr = value;
+		break;
+	case CR3_TARGET_VALUE0:
+		current_evmcs->cr3_target_value0 = value;
+		break;
+	case CR3_TARGET_VALUE1:
+		current_evmcs->cr3_target_value1 = value;
+		break;
+	case CR3_TARGET_VALUE2:
+		current_evmcs->cr3_target_value2 = value;
+		break;
+	case CR3_TARGET_VALUE3:
+		current_evmcs->cr3_target_value3 = value;
+		break;
+	case TPR_THRESHOLD:
+		current_evmcs->tpr_threshold = value;
+		break;
+	case GUEST_INTERRUPTIBILITY_INFO:
+		current_evmcs->guest_interruptibility_info = value;
+		break;
+	case CPU_BASED_VM_EXEC_CONTROL:
+		current_evmcs->cpu_based_vm_exec_control = value;
+		break;
+	case EXCEPTION_BITMAP:
+		current_evmcs->exception_bitmap = value;
+		break;
+	case VM_ENTRY_CONTROLS:
+		current_evmcs->vm_entry_controls = value;
+		break;
+	case VM_ENTRY_INTR_INFO_FIELD:
+		current_evmcs->vm_entry_intr_info_field = value;
+		break;
+	case VM_ENTRY_EXCEPTION_ERROR_CODE:
+		current_evmcs->vm_entry_exception_error_code = value;
+		break;
+	case VM_ENTRY_INSTRUCTION_LEN:
+		current_evmcs->vm_entry_instruction_len = value;
+		break;
+	case HOST_IA32_SYSENTER_CS:
+		current_evmcs->host_ia32_sysenter_cs = value;
+		break;
+	case PIN_BASED_VM_EXEC_CONTROL:
+		current_evmcs->pin_based_vm_exec_control = value;
+		break;
+	case VM_EXIT_CONTROLS:
+		current_evmcs->vm_exit_controls = value;
+		break;
+	case SECONDARY_VM_EXEC_CONTROL:
+		current_evmcs->secondary_vm_exec_control = value;
+		break;
+	case GUEST_ES_LIMIT:
+		current_evmcs->guest_es_limit = value;
+		break;
+	case GUEST_CS_LIMIT:
+		current_evmcs->guest_cs_limit = value;
+		break;
+	case GUEST_SS_LIMIT:
+		current_evmcs->guest_ss_limit = value;
+		break;
+	case GUEST_DS_LIMIT:
+		current_evmcs->guest_ds_limit = value;
+		break;
+	case GUEST_FS_LIMIT:
+		current_evmcs->guest_fs_limit = value;
+		break;
+	case GUEST_GS_LIMIT:
+		current_evmcs->guest_gs_limit = value;
+		break;
+	case GUEST_LDTR_LIMIT:
+		current_evmcs->guest_ldtr_limit = value;
+		break;
+	case GUEST_TR_LIMIT:
+		current_evmcs->guest_tr_limit = value;
+		break;
+	case GUEST_GDTR_LIMIT:
+		current_evmcs->guest_gdtr_limit = value;
+		break;
+	case GUEST_IDTR_LIMIT:
+		current_evmcs->guest_idtr_limit = value;
+		break;
+	case GUEST_ES_AR_BYTES:
+		current_evmcs->guest_es_ar_bytes = value;
+		break;
+	case GUEST_CS_AR_BYTES:
+		current_evmcs->guest_cs_ar_bytes = value;
+		break;
+	case GUEST_SS_AR_BYTES:
+		current_evmcs->guest_ss_ar_bytes = value;
+		break;
+	case GUEST_DS_AR_BYTES:
+		current_evmcs->guest_ds_ar_bytes = value;
+		break;
+	case GUEST_FS_AR_BYTES:
+		current_evmcs->guest_fs_ar_bytes = value;
+		break;
+	case GUEST_GS_AR_BYTES:
+		current_evmcs->guest_gs_ar_bytes = value;
+		break;
+	case GUEST_LDTR_AR_BYTES:
+		current_evmcs->guest_ldtr_ar_bytes = value;
+		break;
+	case GUEST_TR_AR_BYTES:
+		current_evmcs->guest_tr_ar_bytes = value;
+		break;
+	case GUEST_ACTIVITY_STATE:
+		current_evmcs->guest_activity_state = value;
+		break;
+	case GUEST_SYSENTER_CS:
+		current_evmcs->guest_sysenter_cs = value;
+		break;
+	case VM_INSTRUCTION_ERROR:
+		current_evmcs->vm_instruction_error = value;
+		break;
+	case VM_EXIT_REASON:
+		current_evmcs->vm_exit_reason = value;
+		break;
+	case VM_EXIT_INTR_INFO:
+		current_evmcs->vm_exit_intr_info = value;
+		break;
+	case VM_EXIT_INTR_ERROR_CODE:
+		current_evmcs->vm_exit_intr_error_code = value;
+		break;
+	case IDT_VECTORING_INFO_FIELD:
+		current_evmcs->idt_vectoring_info_field = value;
+		break;
+	case IDT_VECTORING_ERROR_CODE:
+		current_evmcs->idt_vectoring_error_code = value;
+		break;
+	case VM_EXIT_INSTRUCTION_LEN:
+		current_evmcs->vm_exit_instruction_len = value;
+		break;
+	case VMX_INSTRUCTION_INFO:
+		current_evmcs->vmx_instruction_info = value;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MASK:
+		current_evmcs->page_fault_error_code_mask = value;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MATCH:
+		current_evmcs->page_fault_error_code_match = value;
+		break;
+	case CR3_TARGET_COUNT:
+		current_evmcs->cr3_target_count = value;
+		break;
+	case VM_EXIT_MSR_STORE_COUNT:
+		current_evmcs->vm_exit_msr_store_count = value;
+		break;
+	case VM_EXIT_MSR_LOAD_COUNT:
+		current_evmcs->vm_exit_msr_load_count = value;
+		break;
+	case VM_ENTRY_MSR_LOAD_COUNT:
+		current_evmcs->vm_entry_msr_load_count = value;
+		break;
+	case HOST_ES_SELECTOR:
+		current_evmcs->host_es_selector = value;
+		break;
+	case HOST_CS_SELECTOR:
+		current_evmcs->host_cs_selector = value;
+		break;
+	case HOST_SS_SELECTOR:
+		current_evmcs->host_ss_selector = value;
+		break;
+	case HOST_DS_SELECTOR:
+		current_evmcs->host_ds_selector = value;
+		break;
+	case HOST_FS_SELECTOR:
+		current_evmcs->host_fs_selector = value;
+		break;
+	case HOST_GS_SELECTOR:
+		current_evmcs->host_gs_selector = value;
+		break;
+	case HOST_TR_SELECTOR:
+		current_evmcs->host_tr_selector = value;
+		break;
+	case GUEST_ES_SELECTOR:
+		current_evmcs->guest_es_selector = value;
+		break;
+	case GUEST_CS_SELECTOR:
+		current_evmcs->guest_cs_selector = value;
+		break;
+	case GUEST_SS_SELECTOR:
+		current_evmcs->guest_ss_selector = value;
+		break;
+	case GUEST_DS_SELECTOR:
+		current_evmcs->guest_ds_selector = value;
+		break;
+	case GUEST_FS_SELECTOR:
+		current_evmcs->guest_fs_selector = value;
+		break;
+	case GUEST_GS_SELECTOR:
+		current_evmcs->guest_gs_selector = value;
+		break;
+	case GUEST_LDTR_SELECTOR:
+		current_evmcs->guest_ldtr_selector = value;
+		break;
+	case GUEST_TR_SELECTOR:
+		current_evmcs->guest_tr_selector = value;
+		break;
+	case VIRTUAL_PROCESSOR_ID:
+		current_evmcs->virtual_processor_id = value;
+		break;
+	default: return 1;
+	}
+
+	return 0;
+}
+
+static inline int evmcs_vmlaunch(void)
+{
+	int ret;
+
+	current_evmcs->hv_clean_fields = 0;
+
+	__asm__ __volatile__("push %%rbp;"
+			     "push %%rcx;"
+			     "push %%rdx;"
+			     "push %%rsi;"
+			     "push %%rdi;"
+			     "push $0;"
+			     "mov %%rsp, (%[host_rsp]);"
+			     "lea 1f(%%rip), %%rax;"
+			     "mov %%rax, (%[host_rip]);"
+			     "vmlaunch;"
+			     "incq (%%rsp);"
+			     "1: pop %%rax;"
+			     "pop %%rdi;"
+			     "pop %%rsi;"
+			     "pop %%rdx;"
+			     "pop %%rcx;"
+			     "pop %%rbp;"
+			     : [ret]"=&a"(ret)
+			     : [host_rsp]"r"
+			       ((uint64_t)&current_evmcs->host_rsp),
+			       [host_rip]"r"
+			       ((uint64_t)&current_evmcs->host_rip)
+			     : "memory", "cc", "rbx", "r8", "r9", "r10",
+			       "r11", "r12", "r13", "r14", "r15");
+	return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int evmcs_vmresume(void)
+{
+	int ret;
+
+	current_evmcs->hv_clean_fields = 0;
+
+	__asm__ __volatile__("push %%rbp;"
+			     "push %%rcx;"
+			     "push %%rdx;"
+			     "push %%rsi;"
+			     "push %%rdi;"
+			     "push $0;"
+			     "mov %%rsp, (%[host_rsp]);"
+			     "lea 1f(%%rip), %%rax;"
+			     "mov %%rax, (%[host_rip]);"
+			     "vmresume;"
+			     "incq (%%rsp);"
+			     "1: pop %%rax;"
+			     "pop %%rdi;"
+			     "pop %%rsi;"
+			     "pop %%rdx;"
+			     "pop %%rcx;"
+			     "pop %%rbp;"
+			     : [ret]"=&a"(ret)
+			     : [host_rsp]"r"
+			       ((uint64_t)&current_evmcs->host_rsp),
+			       [host_rip]"r"
+			       ((uint64_t)&current_evmcs->host_rip)
+			     : "memory", "cc", "rbx", "r8", "r9", "r10",
+			       "r11", "r12", "r13", "r14", "r15");
+	return ret;
+}
+
+#endif /* !SELFTEST_KVM_EVMCS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 3acf9a91704c..a4e59e3b4826 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -7,7 +7,7 @@
  *
  */
 #ifndef SELFTEST_KVM_UTIL_H
-#define SELFTEST_KVM_UTIL_H 1
+#define SELFTEST_KVM_UTIL_H
 
 #include "test_util.h"
 
@@ -17,12 +17,6 @@
 
 #include "sparsebit.h"
 
-/*
- * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
- * created. Only applies to VMs using EPT.
- */
-#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
-
 
 /* Callers of kvm_util only have an incomplete/opaque description of the
  * structure kvm_util is using to maintain the state of a VM.
@@ -33,16 +27,23 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
 typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
 
 /* Minimum allocated guest virtual and physical addresses */
-#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_UTIL_MIN_VADDR		0x2000
 
 #define DEFAULT_GUEST_PHY_PAGES		512
 #define DEFAULT_GUEST_STACK_VADDR_MIN	0xab6000
-#define DEFAULT_STACK_PGS               5
+#define DEFAULT_STACK_PGS		5
 
 enum vm_guest_mode {
-	VM_MODE_FLAT48PG,
+	VM_MODE_P52V48_4K,
+	VM_MODE_P52V48_64K,
+	VM_MODE_P40V48_4K,
+	VM_MODE_P40V48_64K,
+	NUM_VM_MODES,
 };
 
+#define vm_guest_mode_string(m) vm_guest_mode_string[m]
+extern const char * const vm_guest_mode_string[];
+
 enum vm_mem_backing_src_type {
 	VM_MEM_SRC_ANONYMOUS,
 	VM_MEM_SRC_ANONYMOUS_THP,
@@ -58,15 +59,15 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm);
 void kvm_vm_release(struct kvm_vm *vmp);
 void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
 
-int kvm_memcmp_hva_gva(void *hva,
-	struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
+		       size_t len);
 
 void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
-	uint32_t data_memslot, uint32_t pgd_memslot);
+		     uint32_t data_memslot, uint32_t pgd_memslot);
 
 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-void vcpu_dump(FILE *stream, struct kvm_vm *vm,
-	uint32_t vcpuid, uint8_t indent);
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
+	       uint8_t indent);
 
 void vm_create_irqchip(struct kvm_vm *vm);
 
@@ -75,13 +76,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
 	uint32_t flags);
 
-void vcpu_ioctl(struct kvm_vm *vm,
-	uint32_t vcpuid, unsigned long ioctl, void *arg);
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
+		void *arg);
 void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
+		 int gdt_memslot);
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
-	uint32_t data_memslot, uint32_t pgd_memslot);
+			  uint32_t data_memslot, uint32_t pgd_memslot);
 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	      size_t size, uint32_t pgd_memslot);
 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
@@ -93,56 +95,35 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
 int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
-	struct kvm_mp_state *mp_state);
-void vcpu_regs_get(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_regs *regs);
-void vcpu_regs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_regs *regs);
+		       struct kvm_mp_state *mp_state);
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
 void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
-void vcpu_sregs_get(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs);
-void vcpu_sregs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs);
-int _vcpu_sregs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_sregs *sregs);
 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
-			  struct kvm_vcpu_events *events);
+		     struct kvm_vcpu_events *events);
 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
-			  struct kvm_vcpu_events *events);
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
-	uint64_t msr_value);
+		     struct kvm_vcpu_events *events);
 
 const char *exit_reason_str(unsigned int exit_reason);
 
 void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-	uint32_t pgd_memslot);
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
-	vm_paddr_t paddr_min, uint32_t memslot);
-
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
-void vcpu_set_cpuid(
-	struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
-
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
-
-static inline struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_entry(uint32_t function)
-{
-	return kvm_get_supported_cpuid_index(function, 0);
-}
+		 uint32_t pgd_memslot);
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+			     uint32_t memslot);
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+			      vm_paddr_t paddr_min, uint32_t memslot);
 
 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size,
 				 void *guest_code);
 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
 
-typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
-				 vm_paddr_t vmxon_paddr,
-				 vm_vaddr_t vmcs_vaddr,
-				 vm_paddr_t vmcs_paddr);
-
 struct kvm_userspace_memory_region *
 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
 				 uint64_t end);
@@ -152,43 +133,49 @@ allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
 
 int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
 
-#define GUEST_PORT_SYNC         0x1000
-#define GUEST_PORT_ABORT        0x1001
-#define GUEST_PORT_DONE         0x1002
-
-static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
-{
-	__asm__ __volatile__("in %[port], %%al"
-			     :
-			     : [port]"d"(port), "D"(arg0), "S"(arg1)
-			     : "rax");
-}
-
-/*
- * Allows to pass three arguments to the host: port is 16bit wide,
- * arg0 & arg1 are 64bit wide
- */
-#define GUEST_SYNC_ARGS(_port, _arg0, _arg1) \
-	__exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
-
-#define GUEST_ASSERT(_condition) do {				\
-		if (!(_condition))				\
-			GUEST_SYNC_ARGS(GUEST_PORT_ABORT,	\
-					"Failed guest assert: "	\
-					#_condition, __LINE__);	\
-	} while (0)
-
-#define GUEST_SYNC(stage)  GUEST_SYNC_ARGS(GUEST_PORT_SYNC, "hello", stage)
+#define sync_global_to_guest(vm, g) ({				\
+	typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g));	\
+	memcpy(_p, &(g), sizeof(g));				\
+})
+
+#define sync_global_from_guest(vm, g) ({			\
+	typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g));	\
+	memcpy(&(g), _p, sizeof(g));				\
+})
+
+/* ucall implementation types */
+typedef enum {
+	UCALL_PIO,
+	UCALL_MMIO,
+} ucall_type_t;
+
+/* Common ucalls */
+enum {
+	UCALL_NONE,
+	UCALL_SYNC,
+	UCALL_ABORT,
+	UCALL_DONE,
+};
 
-#define GUEST_DONE()  GUEST_SYNC_ARGS(GUEST_PORT_DONE, 0, 0)
+#define UCALL_MAX_ARGS 6
 
-struct guest_args {
-	uint64_t arg0;
-	uint64_t arg1;
-	uint16_t port;
-} __attribute__ ((packed));
+struct ucall {
+	uint64_t cmd;
+	uint64_t args[UCALL_MAX_ARGS];
+};
 
-void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
-		     struct guest_args *args);
+void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg);
+void ucall_uninit(struct kvm_vm *vm);
+void ucall(uint64_t cmd, int nargs, ...);
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
+
+#define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_DONE()		ucall(UCALL_DONE, 0)
+#define GUEST_ASSERT(_condition) do {			\
+	if (!(_condition))				\
+		ucall(UCALL_ABORT, 2,			\
+			"Failed guest assert: "		\
+			#_condition, __LINE__);		\
+} while (0)
 
 #endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
index 54cfeb6568d3..31e030915c1f 100644
--- a/tools/testing/selftests/kvm/include/sparsebit.h
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -15,8 +15,8 @@
  * even in the case where most bits are set.
  */
 
-#ifndef _TEST_SPARSEBIT_H_
-#define _TEST_SPARSEBIT_H_
+#ifndef SELFTEST_KVM_SPARSEBIT_H
+#define SELFTEST_KVM_SPARSEBIT_H
 
 #include <stdbool.h>
 #include <stdint.h>
@@ -72,4 +72,4 @@ void sparsebit_validate_internal(struct sparsebit *sbit);
 }
 #endif
 
-#endif /* _TEST_SPARSEBIT_H_ */
+#endif /* SELFTEST_KVM_SPARSEBIT_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 73c3933436ec..c7dafe8bd02c 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -7,8 +7,8 @@
  *
  */
 
-#ifndef TEST_UTIL_H
-#define TEST_UTIL_H 1
+#ifndef SELFTEST_KVM_TEST_UTIL_H
+#define SELFTEST_KVM_TEST_UTIL_H
 
 #include <stdlib.h>
 #include <stdarg.h>
@@ -41,4 +41,4 @@ void test_assert(bool exp, const char *exp_str,
 		    #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
 } while (0)
 
-#endif /* TEST_UTIL_H */
+#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 42c3596815b8..e2884c2b81ff 100644
--- a/tools/testing/selftests/kvm/include/x86.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -1,5 +1,5 @@
 /*
- * tools/testing/selftests/kvm/include/x86.h
+ * tools/testing/selftests/kvm/include/x86_64/processor.h
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -7,8 +7,8 @@
  *
  */
 
-#ifndef SELFTEST_KVM_X86_H
-#define SELFTEST_KVM_X86_H
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
 
 #include <assert.h>
 #include <stdint.h>
@@ -305,7 +305,25 @@ static inline unsigned long get_xmm(int n)
 
 struct kvm_x86_state;
 struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state);
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
+		     struct kvm_x86_state *state);
+
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_cpuid2 *cpuid);
+
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
+
+static inline struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_entry(uint32_t function)
+{
+	return kvm_get_supported_cpuid_index(function, 0);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+	  	  uint64_t msr_value);
 
 /*
  * Basic CPU control in CR0
@@ -1044,4 +1062,4 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
 #define MSR_VM_IGNNE                    0xc0010115
 #define MSR_VM_HSAVE_PA                 0xc0010117
 
-#endif /* !SELFTEST_KVM_X86_H */
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index b9ffe1024d3a..c9bd935b939c 100644
--- a/tools/testing/selftests/kvm/include/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -1,5 +1,5 @@
 /*
- * tools/testing/selftests/kvm/include/vmx.h
+ * tools/testing/selftests/kvm/include/x86_64/vmx.h
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -11,7 +11,7 @@
 #define SELFTEST_KVM_VMX_H
 
 #include <stdint.h>
-#include "x86.h"
+#include "processor.h"
 
 #define CPUID_VMX_BIT				5
 
@@ -339,6 +339,8 @@ struct vmx_msr_entry {
 	uint64_t value;
 } __attribute__ ((aligned(16)));
 
+#include "evmcs.h"
+
 static inline int vmxon(uint64_t phys)
 {
 	uint8_t ret;
@@ -372,6 +374,9 @@ static inline int vmptrld(uint64_t vmcs_pa)
 {
 	uint8_t ret;
 
+	if (enable_evmcs)
+		return -1;
+
 	__asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
 		: [ret]"=rm"(ret)
 		: [pa]"m"(vmcs_pa)
@@ -385,6 +390,9 @@ static inline int vmptrst(uint64_t *value)
 	uint64_t tmp;
 	uint8_t ret;
 
+	if (enable_evmcs)
+		return evmcs_vmptrst(value);
+
 	__asm__ __volatile__("vmptrst %[value]; setna %[ret]"
 		: [value]"=m"(tmp), [ret]"=rm"(ret)
 		: : "cc", "memory");
@@ -411,6 +419,9 @@ static inline int vmlaunch(void)
 {
 	int ret;
 
+	if (enable_evmcs)
+		return evmcs_vmlaunch();
+
 	__asm__ __volatile__("push %%rbp;"
 			     "push %%rcx;"
 			     "push %%rdx;"
@@ -443,6 +454,9 @@ static inline int vmresume(void)
 {
 	int ret;
 
+	if (enable_evmcs)
+		return evmcs_vmresume();
+
 	__asm__ __volatile__("push %%rbp;"
 			     "push %%rcx;"
 			     "push %%rdx;"
@@ -482,6 +496,9 @@ static inline int vmread(uint64_t encoding, uint64_t *value)
 	uint64_t tmp;
 	uint8_t ret;
 
+	if (enable_evmcs)
+		return evmcs_vmread(encoding, value);
+
 	__asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
 		: [value]"=rm"(tmp), [ret]"=rm"(ret)
 		: [encoding]"r"(encoding)
@@ -506,6 +523,9 @@ static inline int vmwrite(uint64_t encoding, uint64_t value)
 {
 	uint8_t ret;
 
+	if (enable_evmcs)
+		return evmcs_vmwrite(encoding, value);
+
 	__asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
 		: [ret]"=rm"(ret)
 		: [value]"rm"(value), [encoding]"r"(encoding)
@@ -543,10 +563,19 @@ struct vmx_pages {
 	void *vmwrite_hva;
 	uint64_t vmwrite_gpa;
 	void *vmwrite;
+
+	void *vp_assist_hva;
+	uint64_t vp_assist_gpa;
+	void *vp_assist;
+
+	void *enlightened_vmcs_hva;
+	uint64_t enlightened_vmcs_gpa;
+	void *enlightened_vmcs;
 };
 
 struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
 bool prepare_for_vmx_operation(struct vmx_pages *vmx);
 void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
+bool load_vmcs(struct vmx_pages *vmx);
 
-#endif /* !SELFTEST_KVM_VMX_H */
+#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
new file mode 100644
index 000000000000..b6022e2f116e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AArch64 code
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR		0x180000
+#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN	0xac0000
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+	return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+	uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
+
+	return (gva >> shift) & mask;
+}
+
+static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
+	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+	TEST_ASSERT(vm->pgtable_levels == 4,
+		"Mode %d does not have 4 page table levels", vm->mode);
+
+	return (gva >> shift) & mask;
+}
+
+static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
+	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+	TEST_ASSERT(vm->pgtable_levels >= 3,
+		"Mode %d does not have >= 3 page table levels", vm->mode);
+
+	return (gva >> shift) & mask;
+}
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+	return (gva >> vm->page_shift) & mask;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+	uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift;
+	return entry & mask;
+}
+
+static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
+{
+	unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+	return 1 << (vm->va_bits - shift);
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+	return 1 << (vm->page_shift - 3);
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+{
+	int rc;
+
+	if (!vm->pgd_created) {
+		vm_paddr_t paddr = vm_phy_pages_alloc(vm,
+			page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+		vm->pgd = paddr;
+		vm->pgd_created = true;
+	}
+}
+
+void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+		  uint32_t pgd_memslot, uint64_t flags)
+{
+	uint8_t attr_idx = flags & 7;
+	uint64_t *ptep;
+
+	TEST_ASSERT((vaddr % vm->page_size) == 0,
+		"Virtual address not on page boundary,\n"
+		"  vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+		(vaddr >> vm->page_shift)),
+		"Invalid virtual address, vaddr: 0x%lx", vaddr);
+	TEST_ASSERT((paddr % vm->page_size) == 0,
+		"Physical address not on page boundary,\n"
+		"  paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+		"Physical address beyond beyond maximum supported,\n"
+		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		paddr, vm->max_gfn, vm->page_size);
+
+	ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
+	if (!*ptep) {
+		*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+		*ptep |= 3;
+	}
+
+	switch (vm->pgtable_levels) {
+	case 4:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
+		if (!*ptep) {
+			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+			*ptep |= 3;
+		}
+		/* fall through */
+	case 3:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
+		if (!*ptep) {
+			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+			*ptep |= 3;
+		}
+		/* fall through */
+	case 2:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
+		break;
+	default:
+		TEST_ASSERT(false, "Page table levels must be 2, 3, or 4");
+	}
+
+	*ptep = paddr | 3;
+	*ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+		 uint32_t pgd_memslot)
+{
+	uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
+
+	_virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx);
+}
+
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	uint64_t *ptep;
+
+	if (!vm->pgd_created)
+		goto unmapped_gva;
+
+	ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
+	if (!ptep)
+		goto unmapped_gva;
+
+	switch (vm->pgtable_levels) {
+	case 4:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
+		if (!ptep)
+			goto unmapped_gva;
+		/* fall through */
+	case 3:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
+		if (!ptep)
+			goto unmapped_gva;
+		/* fall through */
+	case 2:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
+		if (!ptep)
+			goto unmapped_gva;
+		break;
+	default:
+		TEST_ASSERT(false, "Page table levels must be 2, 3, or 4");
+	}
+
+	return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+
+unmapped_gva:
+	TEST_ASSERT(false, "No mapping for vm virtual address, "
+		    "gva: 0x%lx", gva);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+#ifdef DEBUG_VM
+	static const char * const type[] = { "", "pud", "pmd", "pte" };
+	uint64_t pte, *ptep;
+
+	if (level == 4)
+		return;
+
+	for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+		ptep = addr_gpa2hva(vm, pte);
+		if (!*ptep)
+			continue;
+		printf("%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
+		pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
+	}
+#endif
+}
+
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+	int level = 4 - (vm->pgtable_levels - 1);
+	uint64_t pgd, *ptep;
+
+	if (!vm->pgd_created)
+		return;
+
+	for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
+		ptep = addr_gpa2hva(vm, pgd);
+		if (!*ptep)
+			continue;
+		printf("%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
+		pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
+	}
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+				 void *guest_code)
+{
+	uint64_t ptrs_per_4k_pte = 512;
+	uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
+	struct kvm_vm *vm;
+
+	vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+	return vm;
+}
+
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+	size_t stack_size = vm->page_size == 4096 ?
+					DEFAULT_STACK_PGS * vm->page_size :
+					vm->page_size;
+	uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+					DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
+
+	vm_vcpu_add(vm, vcpuid, 0, 0);
+
+	set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+	set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+{
+	struct kvm_vcpu_init init;
+	uint64_t sctlr_el1, tcr_el1;
+
+	memset(&init, 0, sizeof(init));
+	init.target = KVM_ARM_TARGET_GENERIC_V8;
+	vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, &init);
+
+	/*
+	 * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
+	 * registers, which the variable argument list macros do.
+	 */
+	set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20);
+
+	get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1);
+	get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1);
+
+	switch (vm->mode) {
+	case VM_MODE_P52V48_4K:
+		tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+		tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+		break;
+	case VM_MODE_P52V48_64K:
+		tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+		tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+		break;
+	case VM_MODE_P40V48_4K:
+		tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+		tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+		break;
+	case VM_MODE_P40V48_64K:
+		tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+		tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+		break;
+	default:
+		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+	}
+
+	sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
+	/* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
+	tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
+	tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+
+	set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1);
+	set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1);
+	set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1);
+	set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd);
+}
+
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+	uint64_t pstate, pc;
+
+	get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate);
+	get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc);
+
+        fprintf(stream, "%*spstate: 0x%.16llx pc: 0x%.16llx\n",
+                indent, "", pstate, pc);
+
+}
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index cd01144d27c8..6398efe67885 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -13,7 +13,7 @@
 #include <execinfo.h>
 #include <sys/syscall.h>
 
-#include "../../kselftest.h"
+#include "kselftest.h"
 
 /* Dumps the current stack trace to stderr. */
 static void __attribute__((noinline)) test_dump_stack(void);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 6fd8c089cafc..8c06da4f03db 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -16,10 +16,8 @@
 #include <sys/stat.h>
 #include <linux/kernel.h>
 
-#define KVM_DEV_PATH "/dev/kvm"
-
 #define KVM_UTIL_PGS_PER_HUGEPG 512
-#define KVM_UTIL_MIN_PADDR      0x2000
+#define KVM_UTIL_MIN_PFN	2
 
 /* Aligns x up to the next multiple of size. Size must be a power of 2. */
 static void *align(void *x, size_t size)
@@ -30,7 +28,8 @@ static void *align(void *x, size_t size)
 	return (void *) (((size_t) x + mask) & ~mask);
 }
 
-/* Capability
+/*
+ * Capability
  *
  * Input Args:
  *   cap - Capability
@@ -92,16 +91,23 @@ static void vm_open(struct kvm_vm *vm, int perm)
 	if (vm->kvm_fd < 0)
 		exit(KSFT_SKIP);
 
-	/* Create VM. */
 	vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, NULL);
 	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
 		"rc: %i errno: %i", vm->fd, errno);
 }
 
-/* VM Create
+const char * const vm_guest_mode_string[] = {
+	"PA-bits:52, VA-bits:48, 4K pages",
+	"PA-bits:52, VA-bits:48, 64K pages",
+	"PA-bits:40, VA-bits:48, 4K pages",
+	"PA-bits:40, VA-bits:48, 64K pages",
+};
+
+/*
+ * VM Create
  *
  * Input Args:
- *   mode - VM Mode (e.g. VM_MODE_FLAT48PG)
+ *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
  *   phy_pages - Physical memory pages
  *   perm - permission
  *
@@ -110,7 +116,7 @@ static void vm_open(struct kvm_vm *vm, int perm)
  * Return:
  *   Pointer to opaque structure that describes the created VM.
  *
- * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
  * When phy_pages is non-zero, a memory region of phy_pages physical pages
  * is created and mapped starting at guest physical address 0.  The file
  * descriptor to control the created VM is created with the permissions
@@ -121,7 +127,6 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	struct kvm_vm *vm;
 	int kvm_fd;
 
-	/* Allocate memory. */
 	vm = calloc(1, sizeof(*vm));
 	TEST_ASSERT(vm != NULL, "Insufficent Memory");
 
@@ -130,26 +135,48 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 
 	/* Setup mode specific traits. */
 	switch (vm->mode) {
-	case VM_MODE_FLAT48PG:
+	case VM_MODE_P52V48_4K:
+		vm->pgtable_levels = 4;
 		vm->page_size = 0x1000;
 		vm->page_shift = 12;
-
-		/* Limit to 48-bit canonical virtual addresses. */
-		vm->vpages_valid = sparsebit_alloc();
-		sparsebit_set_num(vm->vpages_valid,
-			0, (1ULL << (48 - 1)) >> vm->page_shift);
-		sparsebit_set_num(vm->vpages_valid,
-			(~((1ULL << (48 - 1)) - 1)) >> vm->page_shift,
-			(1ULL << (48 - 1)) >> vm->page_shift);
-
-		/* Limit physical addresses to 52-bits. */
-		vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1;
+		vm->va_bits = 48;
+		break;
+	case VM_MODE_P52V48_64K:
+		vm->pgtable_levels = 3;
+		vm->pa_bits = 52;
+		vm->page_size = 0x10000;
+		vm->page_shift = 16;
+		vm->va_bits = 48;
+		break;
+	case VM_MODE_P40V48_4K:
+		vm->pgtable_levels = 4;
+		vm->pa_bits = 40;
+		vm->va_bits = 48;
+		vm->page_size = 0x1000;
+		vm->page_shift = 12;
+		break;
+	case VM_MODE_P40V48_64K:
+		vm->pgtable_levels = 3;
+		vm->pa_bits = 40;
+		vm->va_bits = 48;
+		vm->page_size = 0x10000;
+		vm->page_shift = 16;
 		break;
-
 	default:
 		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
 	}
 
+	/* Limit to VA-bit canonical virtual addresses. */
+	vm->vpages_valid = sparsebit_alloc();
+	sparsebit_set_num(vm->vpages_valid,
+		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+	sparsebit_set_num(vm->vpages_valid,
+		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
+		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+
+	/* Limit physical addresses to PA-bits. */
+	vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
+
 	/* Allocate and setup memory for guest. */
 	vm->vpages_mapped = sparsebit_alloc();
 	if (phy_pages != 0)
@@ -159,7 +186,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	return vm;
 }
 
-/* VM Restart
+/*
+ * VM Restart
  *
  * Input Args:
  *   vm - VM that has been released before
@@ -186,7 +214,8 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
 			    "  rc: %i errno: %i\n"
 			    "  slot: %u flags: 0x%x\n"
 			    "  guest_phys_addr: 0x%lx size: 0x%lx",
-			    ret, errno, region->region.slot, region->region.flags,
+			    ret, errno, region->region.slot,
+			    region->region.flags,
 			    region->region.guest_phys_addr,
 			    region->region.memory_size);
 	}
@@ -202,7 +231,8 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
 		    strerror(-ret));
 }
 
-/* Userspace Memory Region Find
+/*
+ * Userspace Memory Region Find
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -220,8 +250,8 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
  * of the regions is returned.  Null is returned only when no overlapping
  * region exists.
  */
-static struct userspace_mem_region *userspace_mem_region_find(
-	struct kvm_vm *vm, uint64_t start, uint64_t end)
+static struct userspace_mem_region *
+userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
 {
 	struct userspace_mem_region *region;
 
@@ -237,7 +267,8 @@ static struct userspace_mem_region *userspace_mem_region_find(
 	return NULL;
 }
 
-/* KVM Userspace Memory Region Find
+/*
+ * KVM Userspace Memory Region Find
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -265,7 +296,8 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
 	return &region->region;
 }
 
-/* VCPU Find
+/*
+ * VCPU Find
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -280,8 +312,7 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
  * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
  * for the specified vcpuid.
  */
-struct vcpu *vcpu_find(struct kvm_vm *vm,
-	uint32_t vcpuid)
+struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
 {
 	struct vcpu *vcpup;
 
@@ -293,7 +324,8 @@ struct vcpu *vcpu_find(struct kvm_vm *vm,
 	return NULL;
 }
 
-/* VM VCPU Remove
+/*
+ * VM VCPU Remove
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -330,11 +362,9 @@ void kvm_vm_release(struct kvm_vm *vmp)
 {
 	int ret;
 
-	/* Free VCPUs. */
 	while (vmp->vcpu_head)
 		vm_vcpu_rm(vmp, vmp->vcpu_head->id);
 
-	/* Close file descriptor for the VM. */
 	ret = close(vmp->fd);
 	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
 		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
@@ -344,7 +374,8 @@ void kvm_vm_release(struct kvm_vm *vmp)
 		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
 }
 
-/* Destroys and frees the VM pointed to by vmp.
+/*
+ * Destroys and frees the VM pointed to by vmp.
  */
 void kvm_vm_free(struct kvm_vm *vmp)
 {
@@ -383,7 +414,8 @@ void kvm_vm_free(struct kvm_vm *vmp)
 	free(vmp);
 }
 
-/* Memory Compare, host virtual to guest virtual
+/*
+ * Memory Compare, host virtual to guest virtual
  *
  * Input Args:
  *   hva - Starting host virtual address
@@ -405,23 +437,25 @@ void kvm_vm_free(struct kvm_vm *vmp)
  * a length of len, to the guest bytes starting at the guest virtual
  * address given by gva.
  */
-int kvm_memcmp_hva_gva(void *hva,
-	struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
 {
 	size_t amt;
 
-	/* Compare a batch of bytes until either a match is found
+	/*
+	 * Compare a batch of bytes until either a match is found
 	 * or all the bytes have been compared.
 	 */
 	for (uintptr_t offset = 0; offset < len; offset += amt) {
 		uintptr_t ptr1 = (uintptr_t)hva + offset;
 
-		/* Determine host address for guest virtual address
+		/*
+		 * Determine host address for guest virtual address
 		 * at offset.
 		 */
 		uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
 
-		/* Determine amount to compare on this pass.
+		/*
+		 * Determine amount to compare on this pass.
 		 * Don't allow the comparsion to cross a page boundary.
 		 */
 		amt = len - offset;
@@ -433,7 +467,8 @@ int kvm_memcmp_hva_gva(void *hva,
 		assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
 		assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
 
-		/* Perform the comparison.  If there is a difference
+		/*
+		 * Perform the comparison.  If there is a difference
 		 * return that result to the caller, otherwise need
 		 * to continue on looking for a mismatch.
 		 */
@@ -442,109 +477,15 @@ int kvm_memcmp_hva_gva(void *hva,
 			return ret;
 	}
 
-	/* No mismatch found.  Let the caller know the two memory
+	/*
+	 * No mismatch found.  Let the caller know the two memory
 	 * areas are equal.
 	 */
 	return 0;
 }
 
-/* Allocate an instance of struct kvm_cpuid2
- *
- * Input Args: None
- *
- * Output Args: None
- *
- * Return: A pointer to the allocated struct. The caller is responsible
- * for freeing this struct.
- *
- * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
- * array to be decided at allocation time, allocation is slightly
- * complicated. This function uses a reasonable default length for
- * the array and performs the appropriate allocation.
- */
-static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
-{
-	struct kvm_cpuid2 *cpuid;
-	int nent = 100;
-	size_t size;
-
-	size = sizeof(*cpuid);
-	size += nent * sizeof(struct kvm_cpuid_entry2);
-	cpuid = malloc(size);
-	if (!cpuid) {
-		perror("malloc");
-		abort();
-	}
-
-	cpuid->nent = nent;
-
-	return cpuid;
-}
-
-/* KVM Supported CPUID Get
- *
- * Input Args: None
- *
- * Output Args:
- *
- * Return: The supported KVM CPUID
- *
- * Get the guest CPUID supported by KVM.
- */
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
-{
-	static struct kvm_cpuid2 *cpuid;
-	int ret;
-	int kvm_fd;
-
-	if (cpuid)
-		return cpuid;
-
-	cpuid = allocate_kvm_cpuid2();
-	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-	if (kvm_fd < 0)
-		exit(KSFT_SKIP);
-
-	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
-	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
-		    ret, errno);
-
-	close(kvm_fd);
-	return cpuid;
-}
-
-/* Locate a cpuid entry.
- *
- * Input Args:
- *   cpuid: The cpuid.
- *   function: The function of the cpuid entry to find.
- *
- * Output Args: None
- *
- * Return: A pointer to the cpuid entry. Never returns NULL.
- */
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
-{
-	struct kvm_cpuid2 *cpuid;
-	struct kvm_cpuid_entry2 *entry = NULL;
-	int i;
-
-	cpuid = kvm_get_supported_cpuid();
-	for (i = 0; i < cpuid->nent; i++) {
-		if (cpuid->entries[i].function == function &&
-		    cpuid->entries[i].index == index) {
-			entry = &cpuid->entries[i];
-			break;
-		}
-	}
-
-	TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
-		    function, index);
-	return entry;
-}
-
-/* VM Userspace Memory Region Add
+/*
+ * VM Userspace Memory Region Add
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -586,7 +527,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 		"  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
 		guest_paddr, npages, vm->max_gfn, vm->page_size);
 
-	/* Confirm a mem region with an overlapping address doesn't
+	/*
+	 * Confirm a mem region with an overlapping address doesn't
 	 * already exist.
 	 */
 	region = (struct userspace_mem_region *) userspace_mem_region_find(
@@ -677,7 +619,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	vm->userspace_mem_region_head = region;
 }
 
-/* Memslot to region
+/*
+ * Memslot to region
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -691,8 +634,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
  *   on error (e.g. currently no memory region using memslot as a KVM
  *   memory slot ID).
  */
-static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
-	uint32_t memslot)
+static struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot)
 {
 	struct userspace_mem_region *region;
 
@@ -712,7 +655,8 @@ static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
 	return region;
 }
 
-/* VM Memory Region Flags Set
+/*
+ * VM Memory Region Flags Set
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -730,7 +674,6 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
 	int ret;
 	struct userspace_mem_region *region;
 
-	/* Locate memory region. */
 	region = memslot2region(vm, slot);
 
 	region->region.flags = flags;
@@ -742,7 +685,8 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
 		ret, errno, slot, flags);
 }
 
-/* VCPU mmap Size
+/*
+ * VCPU mmap Size
  *
  * Input Args: None
  *
@@ -772,7 +716,8 @@ static int vcpu_mmap_sz(void)
 	return ret;
 }
 
-/* VM VCPU Add
+/*
+ * VM VCPU Add
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -785,7 +730,8 @@ static int vcpu_mmap_sz(void)
  * Creates and adds to the VM specified by vm and virtual CPU with
  * the ID given by vcpuid.
  */
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot)
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
+		 int gdt_memslot)
 {
 	struct vcpu *vcpu;
 
@@ -823,7 +769,8 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_me
 	vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot);
 }
 
-/* VM Virtual Address Unused Gap
+/*
+ * VM Virtual Address Unused Gap
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -843,14 +790,14 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_me
  * sz unallocated bytes >= vaddr_min is available.
  */
 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
-	vm_vaddr_t vaddr_min)
+				      vm_vaddr_t vaddr_min)
 {
 	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
 
 	/* Determine lowest permitted virtual page index. */
 	uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
 	if ((pgidx_start * vm->page_size) < vaddr_min)
-			goto no_va_found;
+		goto no_va_found;
 
 	/* Loop over section with enough valid virtual page indexes. */
 	if (!sparsebit_is_set_num(vm->vpages_valid,
@@ -909,7 +856,8 @@ va_found:
 	return pgidx_start * vm->page_size;
 }
 
-/* VM Virtual Address Allocate
+/*
+ * VM Virtual Address Allocate
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -930,13 +878,14 @@ va_found:
  * a page.
  */
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
-	uint32_t data_memslot, uint32_t pgd_memslot)
+			  uint32_t data_memslot, uint32_t pgd_memslot)
 {
 	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
 
 	virt_pgd_alloc(vm, pgd_memslot);
 
-	/* Find an unused range of virtual page addresses of at least
+	/*
+	 * Find an unused range of virtual page addresses of at least
 	 * pages in length.
 	 */
 	vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
@@ -946,7 +895,8 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
 		pages--, vaddr += vm->page_size) {
 		vm_paddr_t paddr;
 
-		paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
+		paddr = vm_phy_page_alloc(vm,
+				KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
 
 		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
 
@@ -990,7 +940,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	}
 }
 
-/* Address VM Physical to Host Virtual
+/*
+ * Address VM Physical to Host Virtual
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1022,7 +973,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
 	return NULL;
 }
 
-/* Address Host Virtual to VM Physical
+/*
+ * Address Host Virtual to VM Physical
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1056,7 +1008,8 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
 	return -1;
 }
 
-/* VM Create IRQ Chip
+/*
+ * VM Create IRQ Chip
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1078,7 +1031,8 @@ void vm_create_irqchip(struct kvm_vm *vm)
 	vm->has_irqchip = true;
 }
 
-/* VM VCPU State
+/*
+ * VM VCPU State
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1100,7 +1054,8 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
 	return vcpu->state;
 }
 
-/* VM VCPU Run
+/*
+ * VM VCPU Run
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1126,13 +1081,14 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
 	int rc;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-        do {
+	do {
 		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
 	} while (rc == -1 && errno == EINTR);
 	return rc;
 }
 
-/* VM VCPU Set MP State
+/*
+ * VM VCPU Set MP State
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1147,7 +1103,7 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
  * by mp_state.
  */
 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
-	struct kvm_mp_state *mp_state)
+		       struct kvm_mp_state *mp_state)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
@@ -1159,7 +1115,8 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
 		"rc: %i errno: %i", ret, errno);
 }
 
-/* VM VCPU Regs Get
+/*
+ * VM VCPU Regs Get
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1173,21 +1130,20 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
  * Obtains the current register state for the VCPU specified by vcpuid
  * and stores it at the location given by regs.
  */
-void vcpu_regs_get(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_regs *regs)
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Get the regs. */
 	ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
 	TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
 		ret, errno);
 }
 
-/* VM VCPU Regs Set
+/*
+ * VM VCPU Regs Set
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1201,165 +1157,46 @@ void vcpu_regs_get(struct kvm_vm *vm,
  * Sets the regs of the VCPU specified by vcpuid to the values
  * given by regs.
  */
-void vcpu_regs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_regs *regs)
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Set the regs. */
 	ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
 	TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
 		ret, errno);
 }
 
 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
-			  struct kvm_vcpu_events *events)
+		     struct kvm_vcpu_events *events)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Get the regs. */
 	ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
 	TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
 		ret, errno);
 }
 
 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
-			  struct kvm_vcpu_events *events)
+		     struct kvm_vcpu_events *events)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Set the regs. */
 	ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
 	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
 		ret, errno);
 }
 
-/* VCPU Get MSR
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
- *   msr_index - Index of MSR
- *
- * Output Args: None
- *
- * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
- *
- * Get value of MSR for VCPU.
- */
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
-{
-	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
-	struct {
-		struct kvm_msrs header;
-		struct kvm_msr_entry entry;
-	} buffer = {};
-	int r;
-
-	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-	buffer.header.nmsrs = 1;
-	buffer.entry.index = msr_index;
-	r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
-	TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
-		"  rc: %i errno: %i", r, errno);
-
-	return buffer.entry.data;
-}
-
-/* VCPU Set MSR
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
- *   msr_index - Index of MSR
- *   msr_value - New value of MSR
- *
- * Output Args: None
- *
- * Return: On success, nothing. On failure a TEST_ASSERT is produced.
- *
- * Set value of MSR for VCPU.
- */
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
-	uint64_t msr_value)
-{
-	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
-	struct {
-		struct kvm_msrs header;
-		struct kvm_msr_entry entry;
-	} buffer = {};
-	int r;
-
-	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-	memset(&buffer, 0, sizeof(buffer));
-	buffer.header.nmsrs = 1;
-	buffer.entry.index = msr_index;
-	buffer.entry.data = msr_value;
-	r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
-	TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
-		"  rc: %i errno: %i", r, errno);
-}
-
-/* VM VCPU Args Set
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
- *   num - number of arguments
- *   ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first num function input arguments to the values
- * given as variable args.  Each of the variable args is expected to
- * be of type uint64_t.
- */
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
-{
-	va_list ap;
-	struct kvm_regs regs;
-
-	TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
-		    "  num: %u\n",
-		    num);
-
-	va_start(ap, num);
-	vcpu_regs_get(vm, vcpuid, &regs);
-
-	if (num >= 1)
-		regs.rdi = va_arg(ap, uint64_t);
-
-	if (num >= 2)
-		regs.rsi = va_arg(ap, uint64_t);
-
-	if (num >= 3)
-		regs.rdx = va_arg(ap, uint64_t);
-
-	if (num >= 4)
-		regs.rcx = va_arg(ap, uint64_t);
-
-	if (num >= 5)
-		regs.r8 = va_arg(ap, uint64_t);
-
-	if (num >= 6)
-		regs.r9 = va_arg(ap, uint64_t);
-
-	vcpu_regs_set(vm, vcpuid, &regs);
-	va_end(ap);
-}
-
-/* VM VCPU System Regs Get
+/*
+ * VM VCPU System Regs Get
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1373,22 +1210,20 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
  * Obtains the current system register state for the VCPU specified by
  * vcpuid and stores it at the location given by sregs.
  */
-void vcpu_sregs_get(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs)
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Get the regs. */
-	/* Get the regs. */
 	ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
 	TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
 		ret, errno);
 }
 
-/* VM VCPU System Regs Set
+/*
+ * VM VCPU System Regs Set
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1402,27 +1237,25 @@ void vcpu_sregs_get(struct kvm_vm *vm,
  * Sets the system regs of the VCPU specified by vcpuid to the values
  * given by sregs.
  */
-void vcpu_sregs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs)
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
 {
 	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
 	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
 		"rc: %i errno: %i", ret, errno);
 }
 
-int _vcpu_sregs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs)
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Get the regs. */
 	return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
 }
 
-/* VCPU Ioctl
+/*
+ * VCPU Ioctl
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1434,8 +1267,8 @@ int _vcpu_sregs_set(struct kvm_vm *vm,
  *
  * Issues an arbitrary ioctl on a VCPU fd.
  */
-void vcpu_ioctl(struct kvm_vm *vm,
-	uint32_t vcpuid, unsigned long cmd, void *arg)
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
+		unsigned long cmd, void *arg)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
@@ -1447,7 +1280,8 @@ void vcpu_ioctl(struct kvm_vm *vm,
 		cmd, ret, errno, strerror(errno));
 }
 
-/* VM Ioctl
+/*
+ * VM Ioctl
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1467,7 +1301,8 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
 		cmd, ret, errno, strerror(errno));
 }
 
-/* VM Dump
+/*
+ * VM Dump
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1514,38 +1349,6 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 		vcpu_dump(stream, vm, vcpu->id, indent + 2);
 }
 
-/* VM VCPU Dump
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
- *   indent - Left margin indent amount
- *
- * Output Args:
- *   stream - Output FILE stream
- *
- * Return: None
- *
- * Dumps the current state of the VCPU specified by vcpuid, within the VM
- * given by vm, to the FILE stream given by stream.
- */
-void vcpu_dump(FILE *stream, struct kvm_vm *vm,
-	uint32_t vcpuid, uint8_t indent)
-{
-		struct kvm_regs regs;
-		struct kvm_sregs sregs;
-
-		fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
-
-		fprintf(stream, "%*sregs:\n", indent + 2, "");
-		vcpu_regs_get(vm, vcpuid, &regs);
-		regs_dump(stream, &regs, indent + 4);
-
-		fprintf(stream, "%*ssregs:\n", indent + 2, "");
-		vcpu_sregs_get(vm, vcpuid, &sregs);
-		sregs_dump(stream, &sregs, indent + 4);
-}
-
 /* Known KVM exit reasons */
 static struct exit_reason {
 	unsigned int reason;
@@ -1576,7 +1379,8 @@ static struct exit_reason {
 #endif
 };
 
-/* Exit Reason String
+/*
+ * Exit Reason String
  *
  * Input Args:
  *   exit_reason - Exit reason
@@ -1602,10 +1406,12 @@ const char *exit_reason_str(unsigned int exit_reason)
 	return "Unknown";
 }
 
-/* Physical Page Allocate
+/*
+ * Physical Contiguous Page Allocator
  *
  * Input Args:
  *   vm - Virtual Machine
+ *   num - number of pages
  *   paddr_min - Physical address minimum
  *   memslot - Memory region to allocate page from
  *
@@ -1614,47 +1420,59 @@ const char *exit_reason_str(unsigned int exit_reason)
  * Return:
  *   Starting physical address
  *
- * Within the VM specified by vm, locates an available physical page
- * at or above paddr_min.  If found, the page is marked as in use
- * and its address is returned.  A TEST_ASSERT failure occurs if no
- * page is available at or above paddr_min.
+ * Within the VM specified by vm, locates a range of available physical
+ * pages at or above paddr_min. If found, the pages are marked as in use
+ * and thier base address is returned. A TEST_ASSERT failure occurs if
+ * not enough pages are available at or above paddr_min.
  */
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
-	vm_paddr_t paddr_min, uint32_t memslot)
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+			      vm_paddr_t paddr_min, uint32_t memslot)
 {
 	struct userspace_mem_region *region;
-	sparsebit_idx_t pg;
+	sparsebit_idx_t pg, base;
+
+	TEST_ASSERT(num > 0, "Must allocate at least one page");
 
 	TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
 		"not divisible by page size.\n"
 		"  paddr_min: 0x%lx page_size: 0x%x",
 		paddr_min, vm->page_size);
 
-	/* Locate memory region. */
 	region = memslot2region(vm, memslot);
+	base = pg = paddr_min >> vm->page_shift;
 
-	/* Locate next available physical page at or above paddr_min. */
-	pg = paddr_min >> vm->page_shift;
-
-	if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
-		pg = sparsebit_next_set(region->unused_phy_pages, pg);
-		if (pg == 0) {
-			fprintf(stderr, "No guest physical page available, "
-				"paddr_min: 0x%lx page_size: 0x%x memslot: %u",
-				paddr_min, vm->page_size, memslot);
-			fputs("---- vm dump ----\n", stderr);
-			vm_dump(stderr, vm, 2);
-			abort();
+	do {
+		for (; pg < base + num; ++pg) {
+			if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
+				base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
+				break;
+			}
 		}
+	} while (pg && pg != base + num);
+
+	if (pg == 0) {
+		fprintf(stderr, "No guest physical page available, "
+			"paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
+			paddr_min, vm->page_size, memslot);
+		fputs("---- vm dump ----\n", stderr);
+		vm_dump(stderr, vm, 2);
+		abort();
 	}
 
-	/* Specify page as in use and return its address. */
-	sparsebit_clear(region->unused_phy_pages, pg);
+	for (pg = base; pg < base + num; ++pg)
+		sparsebit_clear(region->unused_phy_pages, pg);
+
+	return base * vm->page_size;
+}
 
-	return pg * vm->page_size;
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+			     uint32_t memslot)
+{
+	return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
 }
 
-/* Address Guest Virtual to Host Virtual
+/*
+ * Address Guest Virtual to Host Virtual
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1669,17 +1487,3 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
 {
 	return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
 }
-
-void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
-		     struct guest_args *args)
-{
-	struct kvm_run *run = vcpu_state(vm, vcpu_id);
-	struct kvm_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-	vcpu_regs_get(vm, vcpu_id, &regs);
-
-	args->port = run->io.port;
-	args->arg0 = regs.rdi;
-	args->arg1 = regs.rsi;
-}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index 542ed606b338..52701db0f253 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -1,28 +1,29 @@
 /*
- * tools/testing/selftests/kvm/lib/kvm_util.c
+ * tools/testing/selftests/kvm/lib/kvm_util_internal.h
  *
  * Copyright (C) 2018, Google LLC.
  *
  * This work is licensed under the terms of the GNU GPL, version 2.
  */
 
-#ifndef KVM_UTIL_INTERNAL_H
-#define KVM_UTIL_INTERNAL_H 1
+#ifndef SELFTEST_KVM_UTIL_INTERNAL_H
+#define SELFTEST_KVM_UTIL_INTERNAL_H
 
 #include "sparsebit.h"
 
+#define KVM_DEV_PATH		"/dev/kvm"
+
 #ifndef BITS_PER_BYTE
-#define BITS_PER_BYTE           8
+#define BITS_PER_BYTE		8
 #endif
 
 #ifndef BITS_PER_LONG
-#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+#define BITS_PER_LONG		(BITS_PER_BYTE * sizeof(long))
 #endif
 
 #define DIV_ROUND_UP(n, d)	(((n) + (d) - 1) / (d))
-#define BITS_TO_LONGS(nr)       DIV_ROUND_UP(nr, BITS_PER_LONG)
+#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_LONG)
 
-/* Concrete definition of struct kvm_vm. */
 struct userspace_mem_region {
 	struct userspace_mem_region *next, *prev;
 	struct kvm_userspace_memory_region region;
@@ -45,14 +46,16 @@ struct kvm_vm {
 	int mode;
 	int kvm_fd;
 	int fd;
+	unsigned int pgtable_levels;
 	unsigned int page_size;
 	unsigned int page_shift;
+	unsigned int pa_bits;
+	unsigned int va_bits;
 	uint64_t max_gfn;
 	struct vcpu *vcpu_head;
 	struct userspace_mem_region *userspace_mem_region_head;
 	struct sparsebit *vpages_valid;
 	struct sparsebit *vpages_mapped;
-
 	bool has_irqchip;
 	bool pgd_created;
 	vm_paddr_t pgd;
@@ -60,13 +63,11 @@ struct kvm_vm {
 	vm_vaddr_t tss;
 };
 
-struct vcpu *vcpu_find(struct kvm_vm *vm,
-	uint32_t vcpuid);
-void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot);
+struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot,
+		int gdt_memslot);
 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-void regs_dump(FILE *stream, struct kvm_regs *regs,
-	uint8_t indent);
-void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
-	uint8_t indent);
+void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
 
-#endif
+#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */
diff --git a/tools/testing/selftests/kvm/lib/ucall.c b/tools/testing/selftests/kvm/lib/ucall.c
new file mode 100644
index 000000000000..4777f9bb5194
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/ucall.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+static ucall_type_t ucall_type;
+static vm_vaddr_t *ucall_exit_mmio_addr;
+
+static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+	if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
+		return false;
+
+	virt_pg_map(vm, gpa, gpa, 0);
+
+	ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
+	sync_global_to_guest(vm, ucall_exit_mmio_addr);
+
+	return true;
+}
+
+void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg)
+{
+	ucall_type = type;
+	sync_global_to_guest(vm, ucall_type);
+
+	if (type == UCALL_PIO)
+		return;
+
+	if (type == UCALL_MMIO) {
+		vm_paddr_t gpa, start, end, step;
+		bool ret;
+
+		if (arg) {
+			gpa = (vm_paddr_t)arg;
+			ret = ucall_mmio_init(vm, gpa);
+			TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
+			return;
+		}
+
+		/*
+		 * Find an address within the allowed virtual address space,
+		 * that does _not_ have a KVM memory region associated with it.
+		 * Identity mapping an address like this allows the guest to
+		 * access it, but as KVM doesn't know what to do with it, it
+		 * will assume it's something userspace handles and exit with
+		 * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
+		 * Here we start with a guess that the addresses around two
+		 * thirds of the VA space are unmapped and then work both down
+		 * and up from there in 1/6 VA space sized steps.
+		 */
+		start = 1ul << (vm->va_bits * 2 / 3);
+		end = 1ul << vm->va_bits;
+		step = 1ul << (vm->va_bits / 6);
+		for (gpa = start; gpa >= 0; gpa -= step) {
+			if (ucall_mmio_init(vm, gpa & ~(vm->page_size - 1)))
+				return;
+		}
+		for (gpa = start + step; gpa < end; gpa += step) {
+			if (ucall_mmio_init(vm, gpa & ~(vm->page_size - 1)))
+				return;
+		}
+		TEST_ASSERT(false, "Can't find a ucall mmio address");
+	}
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+	ucall_type = 0;
+	sync_global_to_guest(vm, ucall_type);
+	ucall_exit_mmio_addr = 0;
+	sync_global_to_guest(vm, ucall_exit_mmio_addr);
+}
+
+static void ucall_pio_exit(struct ucall *uc)
+{
+#ifdef __x86_64__
+	asm volatile("in %[port], %%al"
+		: : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax");
+#endif
+}
+
+static void ucall_mmio_exit(struct ucall *uc)
+{
+	*ucall_exit_mmio_addr = (vm_vaddr_t)uc;
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+	struct ucall uc = {
+		.cmd = cmd,
+	};
+	va_list va;
+	int i;
+
+	nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+	va_start(va, nargs);
+	for (i = 0; i < nargs; ++i)
+		uc.args[i] = va_arg(va, uint64_t);
+	va_end(va);
+
+	switch (ucall_type) {
+	case UCALL_PIO:
+		ucall_pio_exit(&uc);
+		break;
+	case UCALL_MMIO:
+		ucall_mmio_exit(&uc);
+		break;
+	};
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+	struct kvm_run *run = vcpu_state(vm, vcpu_id);
+
+	memset(uc, 0, sizeof(*uc));
+
+#ifdef __x86_64__
+	if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO &&
+	    run->io.port == UCALL_PIO_PORT) {
+		struct kvm_regs regs;
+		vcpu_regs_get(vm, vcpu_id, &regs);
+		memcpy(uc, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(*uc));
+		return uc->cmd;
+	}
+#endif
+	if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO &&
+	    run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
+		vm_vaddr_t gva;
+		TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
+			    "Unexpected ucall exit mmio address access");
+		gva = *(vm_vaddr_t *)run->mmio.data;
+		memcpy(uc, addr_gva2hva(vm, gva), sizeof(*uc));
+	}
+
+	return uc->cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index a3122f1949a8..f28127f4a3af 100644
--- a/tools/testing/selftests/kvm/lib/x86.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1,5 +1,5 @@
 /*
- * tools/testing/selftests/kvm/lib/x86.c
+ * tools/testing/selftests/kvm/lib/x86_64/processor.c
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -10,8 +10,8 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "kvm_util_internal.h"
-#include "x86.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
 
 /* Minimum physical address used for virtual translation tables. */
 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
@@ -231,7 +231,7 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
 {
 	int rc;
 
-	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+	TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
 	/* If needed, create page map l4 table. */
@@ -264,7 +264,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	uint16_t index[4];
 	struct pageMapL4Entry *pml4e;
 
-	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+	TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
 	TEST_ASSERT((vaddr % vm->page_size) == 0,
@@ -551,7 +551,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 	struct pageTableEntry *pte;
 	void *hva;
 
-	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+	TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
 	index[0] = (gva >> 12) & 0x1ffu;
@@ -624,9 +624,9 @@ void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
 	kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
 
 	switch (vm->mode) {
-	case VM_MODE_FLAT48PG:
+	case VM_MODE_P52V48_4K:
 		sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
-		sregs.cr4 |= X86_CR4_PAE;
+		sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
 		sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
 
 		kvm_seg_set_unusable(&sregs.ldt);
@@ -672,6 +672,102 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 	vcpu_set_mp_state(vm, vcpuid, &mp_state);
 }
 
+/* Allocate an instance of struct kvm_cpuid2
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the allocated struct. The caller is responsible
+ * for freeing this struct.
+ *
+ * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
+ * array to be decided at allocation time, allocation is slightly
+ * complicated. This function uses a reasonable default length for
+ * the array and performs the appropriate allocation.
+ */
+static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+{
+	struct kvm_cpuid2 *cpuid;
+	int nent = 100;
+	size_t size;
+
+	size = sizeof(*cpuid);
+	size += nent * sizeof(struct kvm_cpuid_entry2);
+	cpuid = malloc(size);
+	if (!cpuid) {
+		perror("malloc");
+		abort();
+	}
+
+	cpuid->nent = nent;
+
+	return cpuid;
+}
+
+/* KVM Supported CPUID Get
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *
+ * Return: The supported KVM CPUID
+ *
+ * Get the guest CPUID supported by KVM.
+ */
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+	static struct kvm_cpuid2 *cpuid;
+	int ret;
+	int kvm_fd;
+
+	if (cpuid)
+		return cpuid;
+
+	cpuid = allocate_kvm_cpuid2();
+	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+	if (kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
+		    ret, errno);
+
+	close(kvm_fd);
+	return cpuid;
+}
+
+/* Locate a cpuid entry.
+ *
+ * Input Args:
+ *   cpuid: The cpuid.
+ *   function: The function of the cpuid entry to find.
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the cpuid entry. Never returns NULL.
+ */
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+{
+	struct kvm_cpuid2 *cpuid;
+	struct kvm_cpuid_entry2 *entry = NULL;
+	int i;
+
+	cpuid = kvm_get_supported_cpuid();
+	for (i = 0; i < cpuid->nent; i++) {
+		if (cpuid->entries[i].function == function &&
+		    cpuid->entries[i].index == index) {
+			entry = &cpuid->entries[i];
+			break;
+		}
+	}
+
+	TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
+		    function, index);
+	return entry;
+}
+
 /* VM VCPU CPUID Set
  *
  * Input Args:
@@ -698,6 +794,7 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
 		    rc, errno);
 
 }
+
 /* Create a VM with reasonable defaults
  *
  * Input Args:
@@ -726,7 +823,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
 
 	/* Create VM */
-	vm = vm_create(VM_MODE_FLAT48PG,
+	vm = vm_create(VM_MODE_P52V48_4K,
 		       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
 		       O_RDWR);
 
@@ -742,6 +839,154 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 	return vm;
 }
 
+/* VCPU Get MSR
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   msr_index - Index of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
+ *
+ * Get value of MSR for VCPU.
+ */
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct {
+		struct kvm_msrs header;
+		struct kvm_msr_entry entry;
+	} buffer = {};
+	int r;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+	buffer.header.nmsrs = 1;
+	buffer.entry.index = msr_index;
+	r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
+	TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
+		"  rc: %i errno: %i", r, errno);
+
+	return buffer.entry.data;
+}
+
+/* VCPU Set MSR
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   msr_index - Index of MSR
+ *   msr_value - New value of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ *
+ * Set value of MSR for VCPU.
+ */
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+	uint64_t msr_value)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct {
+		struct kvm_msrs header;
+		struct kvm_msr_entry entry;
+	} buffer = {};
+	int r;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+	memset(&buffer, 0, sizeof(buffer));
+	buffer.header.nmsrs = 1;
+	buffer.entry.index = msr_index;
+	buffer.entry.data = msr_value;
+	r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
+	TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
+		"  rc: %i errno: %i", r, errno);
+}
+
+/* VM VCPU Args Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   num - number of arguments
+ *   ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first num function input arguments to the values
+ * given as variable args.  Each of the variable args is expected to
+ * be of type uint64_t.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+	va_list ap;
+	struct kvm_regs regs;
+
+	TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+		    "  num: %u\n",
+		    num);
+
+	va_start(ap, num);
+	vcpu_regs_get(vm, vcpuid, &regs);
+
+	if (num >= 1)
+		regs.rdi = va_arg(ap, uint64_t);
+
+	if (num >= 2)
+		regs.rsi = va_arg(ap, uint64_t);
+
+	if (num >= 3)
+		regs.rdx = va_arg(ap, uint64_t);
+
+	if (num >= 4)
+		regs.rcx = va_arg(ap, uint64_t);
+
+	if (num >= 5)
+		regs.r8 = va_arg(ap, uint64_t);
+
+	if (num >= 6)
+		regs.r9 = va_arg(ap, uint64_t);
+
+	vcpu_regs_set(vm, vcpuid, &regs);
+	va_end(ap);
+}
+
+/*
+ * VM VCPU Dump
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   indent - Left margin indent amount
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by vcpuid, within the VM
+ * given by vm, to the FILE stream given by stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+
+	fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+
+	fprintf(stream, "%*sregs:\n", indent + 2, "");
+	vcpu_regs_get(vm, vcpuid, &regs);
+	regs_dump(stream, &regs, indent + 4);
+
+	fprintf(stream, "%*ssregs:\n", indent + 2, "");
+	vcpu_sregs_get(vm, vcpuid, &sregs);
+	sregs_dump(stream, &sregs, indent + 4);
+}
+
 struct kvm_x86_state {
 	struct kvm_vcpu_events events;
 	struct kvm_mp_state mp_state;
diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index b987c3c970eb..771ba6bf751c 100644
--- a/tools/testing/selftests/kvm/lib/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -1,5 +1,5 @@
 /*
- * tools/testing/selftests/kvm/lib/x86.c
+ * tools/testing/selftests/kvm/lib/x86_64/vmx.c
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -10,9 +10,11 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 #include "vmx.h"
 
+bool enable_evmcs;
+
 /* Allocate memory regions for nested VMX tests.
  *
  * Input Args:
@@ -62,6 +64,20 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
 	vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
 	memset(vmx->vmwrite_hva, 0, getpagesize());
 
+	/* Setup of a region of guest memory for the VP Assist page. */
+	vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(),
+						0x10000, 0, 0);
+	vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
+	vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
+
+	/* Setup of a region of guest memory for the enlightened VMCS. */
+	vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(),
+						       0x10000, 0, 0);
+	vmx->enlightened_vmcs_hva =
+		addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
+	vmx->enlightened_vmcs_gpa =
+		addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs);
+
 	*p_vmx_gva = vmx_gva;
 	return vmx;
 }
@@ -107,18 +123,31 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx)
 	if (vmxon(vmx->vmxon_gpa))
 		return false;
 
-	/* Load a VMCS. */
-	*(uint32_t *)(vmx->vmcs) = vmcs_revision();
-	if (vmclear(vmx->vmcs_gpa))
-		return false;
-
-	if (vmptrld(vmx->vmcs_gpa))
-		return false;
+	return true;
+}
 
-	/* Setup shadow VMCS, do not load it yet. */
-	*(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
-	if (vmclear(vmx->shadow_vmcs_gpa))
-		return false;
+bool load_vmcs(struct vmx_pages *vmx)
+{
+	if (!enable_evmcs) {
+		/* Load a VMCS. */
+		*(uint32_t *)(vmx->vmcs) = vmcs_revision();
+		if (vmclear(vmx->vmcs_gpa))
+			return false;
+
+		if (vmptrld(vmx->vmcs_gpa))
+			return false;
+
+		/* Setup shadow VMCS, do not load it yet. */
+		*(uint32_t *)(vmx->shadow_vmcs) =
+			vmcs_revision() | 0x80000000ul;
+		if (vmclear(vmx->shadow_vmcs_gpa))
+			return false;
+	} else {
+		if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa,
+				  vmx->enlightened_vmcs))
+			return false;
+		current_evmcs->revision_id = vmcs_revision();
+	}
 
 	return true;
 }
diff --git a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index 11ec358bf969..d503a51fad30 100644
--- a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -17,7 +17,7 @@
 #include "test_util.h"
 
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 
 #define X86_FEATURE_XSAVE	(1<<26)
 #define X86_FEATURE_OSXSAVE	(1<<27)
@@ -67,6 +67,7 @@ int main(int argc, char *argv[])
 	struct kvm_vm *vm;
 	struct kvm_sregs sregs;
 	struct kvm_cpuid_entry2 *entry;
+	struct ucall uc;
 	int rc;
 
 	entry = kvm_get_supported_cpuid_entry(1);
@@ -87,21 +88,20 @@ int main(int argc, char *argv[])
 		rc = _vcpu_run(vm, VCPU_ID);
 
 		if (run->exit_reason == KVM_EXIT_IO) {
-			switch (run->io.port) {
-			case GUEST_PORT_SYNC:
+			switch (get_ucall(vm, VCPU_ID, &uc)) {
+			case UCALL_SYNC:
 				/* emulate hypervisor clearing CR4.OSXSAVE */
 				vcpu_sregs_get(vm, VCPU_ID, &sregs);
 				sregs.cr4 &= ~X86_CR4_OSXSAVE;
 				vcpu_sregs_set(vm, VCPU_ID, &sregs);
 				break;
-			case GUEST_PORT_ABORT:
+			case UCALL_ABORT:
 				TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
 				break;
-			case GUEST_PORT_DONE:
+			case UCALL_DONE:
 				goto done;
 			default:
-				TEST_ASSERT(false, "Unknown port 0x%x.",
-					    run->io.port);
+				TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
 			}
 		}
 	}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
new file mode 100644
index 000000000000..92c2cfd1b182
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for Enlightened VMCS, including nested guest state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+
+#define VCPU_ID		5
+
+static bool have_nested_state;
+
+void l2_guest_code(void)
+{
+	GUEST_SYNC(6);
+
+	GUEST_SYNC(7);
+
+	/* Done, exit to L1 and never come back.  */
+	vmcall();
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_SYNC(3);
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+
+	GUEST_SYNC(4);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(5);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	GUEST_SYNC(8);
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	GUEST_SYNC(9);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	GUEST_SYNC(1);
+	GUEST_SYNC(2);
+
+	if (vmx_pages)
+		l1_guest_code(vmx_pages);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	struct vmx_pages *vmx_pages = NULL;
+	vm_vaddr_t vmx_pages_gva = 0;
+
+	struct kvm_regs regs1, regs2;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	struct ucall uc;
+	int stage;
+	uint16_t evmcs_ver;
+	struct kvm_enable_cap enable_evmcs_cap = {
+		.cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+		 .args[0] = (unsigned long)&evmcs_ver
+	};
+
+	struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	if (!kvm_check_cap(KVM_CAP_NESTED_STATE) ||
+	    !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+		printf("capabilities not available, skipping test\n");
+		exit(KSFT_SKIP);
+	}
+
+	vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+	vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Unexpected exit reason: %u (%s),\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vm, VCPU_ID, &regs1);
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
+				    __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+			    stage, (ulong)uc.args[1]);
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID, 0, 0);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vm, VCPU_ID, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index 3764e7121265..eb3e7a838cb4 100644
--- a/tools/testing/selftests/kvm/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -19,7 +19,7 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 
 #define VCPU_ID 0
 #define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
@@ -48,7 +48,7 @@ static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
 static void test_msr_platform_info_enabled(struct kvm_vm *vm)
 {
 	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
-	struct guest_args args;
+	struct ucall uc;
 
 	set_msr_platform_info_enabled(vm, true);
 	vcpu_run(vm, VCPU_ID);
@@ -56,11 +56,11 @@ static void test_msr_platform_info_enabled(struct kvm_vm *vm)
 			"Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
 			run->exit_reason,
 			exit_reason_str(run->exit_reason));
-	guest_args_read(vm, VCPU_ID, &args);
-	TEST_ASSERT(args.port == GUEST_PORT_SYNC,
-			"Received IO from port other than PORT_HOST_SYNC: %u\n",
-			run->io.port);
-	TEST_ASSERT((args.arg1 & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
+	get_ucall(vm, VCPU_ID, &uc);
+	TEST_ASSERT(uc.cmd == UCALL_SYNC,
+			"Received ucall other than UCALL_SYNC: %u\n",
+			ucall);
+	TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
 		MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
 		"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
 		MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index 881419d5746e..35640e8e95bc 100644
--- a/tools/testing/selftests/kvm/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -22,7 +22,7 @@
 #include "test_util.h"
 
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 
 #define VCPU_ID                  5
 
diff --git a/tools/testing/selftests/kvm/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 900e3e9dfb9f..03da41f0f736 100644
--- a/tools/testing/selftests/kvm/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -17,7 +17,7 @@
 #include "test_util.h"
 
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 #include "vmx.h"
 
 #define VCPU_ID		5
@@ -26,20 +26,20 @@ static bool have_nested_state;
 
 void l2_guest_code(void)
 {
-	GUEST_SYNC(5);
+	GUEST_SYNC(6);
 
         /* Exit to L1 */
 	vmcall();
 
 	/* L1 has now set up a shadow VMCS for us.  */
 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
-	GUEST_SYNC(9);
+	GUEST_SYNC(10);
 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
 	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
-	GUEST_SYNC(10);
+	GUEST_SYNC(11);
 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
 	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
-	GUEST_SYNC(11);
+	GUEST_SYNC(12);
 
 	/* Done, exit to L1 and never come back.  */
 	vmcall();
@@ -52,15 +52,17 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 
 	GUEST_ASSERT(vmx_pages->vmcs_gpa);
 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_SYNC(3);
+	GUEST_ASSERT(load_vmcs(vmx_pages));
 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
 
-	GUEST_SYNC(3);
+	GUEST_SYNC(4);
 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
 
 	prepare_vmcs(vmx_pages, l2_guest_code,
 		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
 
-	GUEST_SYNC(4);
+	GUEST_SYNC(5);
 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
 	GUEST_ASSERT(!vmlaunch());
 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
@@ -72,7 +74,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_ASSERT(!vmresume());
 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
 
-	GUEST_SYNC(6);
+	GUEST_SYNC(7);
 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
 
 	GUEST_ASSERT(!vmresume());
@@ -85,12 +87,12 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 
 	GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
 	GUEST_ASSERT(vmlaunch());
-	GUEST_SYNC(7);
+	GUEST_SYNC(8);
 	GUEST_ASSERT(vmlaunch());
 	GUEST_ASSERT(vmresume());
 
 	vmwrite(GUEST_RIP, 0xc0ffee);
-	GUEST_SYNC(8);
+	GUEST_SYNC(9);
 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
 
 	GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
@@ -101,7 +103,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
 	GUEST_ASSERT(vmlaunch());
 	GUEST_ASSERT(vmresume());
-	GUEST_SYNC(12);
+	GUEST_SYNC(13);
 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
 	GUEST_ASSERT(vmlaunch());
 	GUEST_ASSERT(vmresume());
@@ -127,6 +129,7 @@ int main(int argc, char *argv[])
 	struct kvm_vm *vm;
 	struct kvm_run *run;
 	struct kvm_x86_state *state;
+	struct ucall uc;
 	int stage;
 
 	struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
@@ -155,23 +158,23 @@ int main(int argc, char *argv[])
 
 		memset(&regs1, 0, sizeof(regs1));
 		vcpu_regs_get(vm, VCPU_ID, &regs1);
-		switch (run->io.port) {
-		case GUEST_PORT_ABORT:
-			TEST_ASSERT(false, "%s at %s:%d", (const char *) regs1.rdi,
-				    __FILE__, regs1.rsi);
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
+				    __FILE__, uc.args[1]);
 			/* NOT REACHED */
-		case GUEST_PORT_SYNC:
+		case UCALL_SYNC:
 			break;
-		case GUEST_PORT_DONE:
+		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
+			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
 		}
 
-		/* PORT_SYNC is handled here.  */
-		TEST_ASSERT(!strcmp((const char *)regs1.rdi, "hello") &&
-			    regs1.rsi == stage, "Unexpected register values vmexit #%lx, got %lx",
-			    stage, (ulong) regs1.rsi);
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+			    stage, (ulong)uc.args[1]);
 
 		state = vcpu_save_state(vm, VCPU_ID);
 		kvm_vm_release(vm);
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index 213343e5dff9..c8478ce9ea77 100644
--- a/tools/testing/selftests/kvm/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -19,7 +19,7 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 
 #define VCPU_ID 5
 
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index 49bcc68b0235..18fa64db0d7a 100644
--- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -1,5 +1,5 @@
 /*
- * gtests/tests/vmx_tsc_adjust_test.c
+ * vmx_tsc_adjust_test
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -22,13 +22,13 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 #include "vmx.h"
 
 #include <string.h>
 #include <sys/ioctl.h>
 
-#include "../kselftest.h"
+#include "kselftest.h"
 
 #ifndef MSR_IA32_TSC_ADJUST
 #define MSR_IA32_TSC_ADJUST 0x3b
@@ -94,6 +94,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
 	check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
 
 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
 
 	/* Prepare the VMCS for L2 execution. */
 	prepare_vmcs(vmx_pages, l2_guest_code,
@@ -146,26 +147,25 @@ int main(int argc, char *argv[])
 
 	for (;;) {
 		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
-		struct guest_args args;
+		struct ucall uc;
 
 		vcpu_run(vm, VCPU_ID);
-		guest_args_read(vm, VCPU_ID, &args);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
 			    run->exit_reason,
 			    exit_reason_str(run->exit_reason));
 
-		switch (args.port) {
-		case GUEST_PORT_ABORT:
-			TEST_ASSERT(false, "%s", (const char *) args.arg0);
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_ASSERT(false, "%s", (const char *)uc.args[0]);
 			/* NOT REACHED */
-		case GUEST_PORT_SYNC:
-			report(args.arg1);
+		case UCALL_SYNC:
+			report(uc.args[1]);
 			break;
-		case GUEST_PORT_DONE:
+		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false, "Unknown port 0x%x.", args.port);
+			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
 		}
 	}
 
diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c
index dc93fadbee96..d79c7581b175 100644
--- a/tools/usb/usbip/libsrc/usbip_host_common.c
+++ b/tools/usb/usbip/libsrc/usbip_host_common.c
@@ -43,7 +43,7 @@ static int32_t read_attr_usbip_status(struct usbip_usb_device *udev)
 	int size;
 	int fd;
 	int length;
-	char status;
+	char status[2] = { 0 };
 	int value = 0;
 
 	size = snprintf(status_attr_path, sizeof(status_attr_path),
@@ -61,14 +61,14 @@ static int32_t read_attr_usbip_status(struct usbip_usb_device *udev)
 		return -1;
 	}
 
-	length = read(fd, &status, 1);
+	length = read(fd, status, 1);
 	if (length < 0) {
 		err("error reading attribute %s", status_attr_path);
 		close(fd);
 		return -1;
 	}
 
-	value = atoi(&status);
+	value = atoi(status);
 
 	return value;
 }
diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c
index 4204359c9fee..8159fd98680b 100644
--- a/tools/usb/usbip/libsrc/vhci_driver.c
+++ b/tools/usb/usbip/libsrc/vhci_driver.c
@@ -150,7 +150,7 @@ static int get_nports(struct udev_device *hc_device)
 
 static int vhci_hcd_filter(const struct dirent *dirent)
 {
-	return strcmp(dirent->d_name, "vhci_hcd") >= 0;
+	return !strncmp(dirent->d_name, "vhci_hcd.", 9);
 }
 
 static int get_ncontrollers(void)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 150c8a69cdaf..23774970c9df 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -120,8 +120,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 	int ret, cpu;
 
-	if (type)
-		return -EINVAL;
+	ret = kvm_arm_setup_stage2(kvm, type);
+	if (ret)
+		return ret;
 
 	kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
 	if (!kvm->arch.last_vcpu_ran)
@@ -212,6 +213,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_READONLY_MEM:
 	case KVM_CAP_MP_STATE:
 	case KVM_CAP_IMMEDIATE_EXIT:
+	case KVM_CAP_VCPU_EVENTS:
 		r = 1;
 		break;
 	case KVM_CAP_ARM_SET_DEVICE_ADDR:
@@ -240,7 +242,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = 1;
 		break;
 	default:
-		r = kvm_arch_dev_ioctl_check_extension(kvm, ext);
+		r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
 		break;
 	}
 	return r;
@@ -544,7 +546,7 @@ static void update_vttbr(struct kvm *kvm)
 
 	/* update vttbr to be used with the new vmid */
 	pgd_phys = virt_to_phys(kvm->arch.pgd);
-	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
+	BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm));
 	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
 	kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp;
 
@@ -1295,8 +1297,6 @@ static void cpu_init_hyp_mode(void *dummy)
 
 	__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
 	__cpu_init_stage2();
-
-	kvm_arm_init_debug();
 }
 
 static void cpu_hyp_reset(void)
@@ -1309,16 +1309,12 @@ static void cpu_hyp_reinit(void)
 {
 	cpu_hyp_reset();
 
-	if (is_kernel_in_hyp_mode()) {
-		/*
-		 * __cpu_init_stage2() is safe to call even if the PM
-		 * event was cancelled before the CPU was reset.
-		 */
-		__cpu_init_stage2();
+	if (is_kernel_in_hyp_mode())
 		kvm_timer_init_vhe();
-	} else {
+	else
 		cpu_init_hyp_mode(NULL);
-	}
+
+	kvm_arm_init_debug();
 
 	if (vgic_present)
 		kvm_vgic_init_cpu_hardware();
@@ -1412,6 +1408,8 @@ static int init_common_resources(void)
 	kvm_vmid_bits = kvm_get_vmid_bits();
 	kvm_info("%d-bit VMID\n", kvm_vmid_bits);
 
+	kvm_set_ipa_limit();
+
 	return 0;
 }
 
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 1a2c3a1c56ce..5eca48bdb1a6 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -45,7 +45,6 @@ static phys_addr_t hyp_idmap_vector;
 
 static unsigned long io_map_base;
 
-#define S2_PGD_SIZE	(PTRS_PER_S2_PGD * sizeof(pgd_t))
 #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
 
 #define KVM_S2PTE_FLAG_IS_IOMAP		(1UL << 0)
@@ -150,20 +149,20 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 
 static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
 {
-	pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
-	stage2_pgd_clear(pgd);
+	pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, pgd, 0UL);
+	stage2_pgd_clear(kvm, pgd);
 	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	stage2_pud_free(pud_table);
+	stage2_pud_free(kvm, pud_table);
 	put_page(virt_to_page(pgd));
 }
 
 static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
-	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
-	VM_BUG_ON(stage2_pud_huge(*pud));
-	stage2_pud_clear(pud);
+	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0);
+	VM_BUG_ON(stage2_pud_huge(kvm, *pud));
+	stage2_pud_clear(kvm, pud);
 	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	stage2_pmd_free(pmd_table);
+	stage2_pmd_free(kvm, pmd_table);
 	put_page(virt_to_page(pud));
 }
 
@@ -252,7 +251,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 
-	if (stage2_pte_table_empty(start_pte))
+	if (stage2_pte_table_empty(kvm, start_pte))
 		clear_stage2_pmd_entry(kvm, pmd, start_addr);
 }
 
@@ -262,9 +261,9 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
 	phys_addr_t next, start_addr = addr;
 	pmd_t *pmd, *start_pmd;
 
-	start_pmd = pmd = stage2_pmd_offset(pud, addr);
+	start_pmd = pmd = stage2_pmd_offset(kvm, pud, addr);
 	do {
-		next = stage2_pmd_addr_end(addr, end);
+		next = stage2_pmd_addr_end(kvm, addr, end);
 		if (!pmd_none(*pmd)) {
 			if (pmd_thp_or_huge(*pmd)) {
 				pmd_t old_pmd = *pmd;
@@ -281,7 +280,7 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
 		}
 	} while (pmd++, addr = next, addr != end);
 
-	if (stage2_pmd_table_empty(start_pmd))
+	if (stage2_pmd_table_empty(kvm, start_pmd))
 		clear_stage2_pud_entry(kvm, pud, start_addr);
 }
 
@@ -291,14 +290,14 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
 	phys_addr_t next, start_addr = addr;
 	pud_t *pud, *start_pud;
 
-	start_pud = pud = stage2_pud_offset(pgd, addr);
+	start_pud = pud = stage2_pud_offset(kvm, pgd, addr);
 	do {
-		next = stage2_pud_addr_end(addr, end);
-		if (!stage2_pud_none(*pud)) {
-			if (stage2_pud_huge(*pud)) {
+		next = stage2_pud_addr_end(kvm, addr, end);
+		if (!stage2_pud_none(kvm, *pud)) {
+			if (stage2_pud_huge(kvm, *pud)) {
 				pud_t old_pud = *pud;
 
-				stage2_pud_clear(pud);
+				stage2_pud_clear(kvm, pud);
 				kvm_tlb_flush_vmid_ipa(kvm, addr);
 				kvm_flush_dcache_pud(old_pud);
 				put_page(virt_to_page(pud));
@@ -308,7 +307,7 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
 		}
 	} while (pud++, addr = next, addr != end);
 
-	if (stage2_pud_table_empty(start_pud))
+	if (stage2_pud_table_empty(kvm, start_pud))
 		clear_stage2_pgd_entry(kvm, pgd, start_addr);
 }
 
@@ -332,7 +331,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	assert_spin_locked(&kvm->mmu_lock);
 	WARN_ON(size & ~PAGE_MASK);
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
 	do {
 		/*
 		 * Make sure the page table is still active, as another thread
@@ -341,8 +340,8 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 		 */
 		if (!READ_ONCE(kvm->arch.pgd))
 			break;
-		next = stage2_pgd_addr_end(addr, end);
-		if (!stage2_pgd_none(*pgd))
+		next = stage2_pgd_addr_end(kvm, addr, end);
+		if (!stage2_pgd_none(kvm, *pgd))
 			unmap_stage2_puds(kvm, pgd, addr, next);
 		/*
 		 * If the range is too large, release the kvm->mmu_lock
@@ -371,9 +370,9 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
 	pmd_t *pmd;
 	phys_addr_t next;
 
-	pmd = stage2_pmd_offset(pud, addr);
+	pmd = stage2_pmd_offset(kvm, pud, addr);
 	do {
-		next = stage2_pmd_addr_end(addr, end);
+		next = stage2_pmd_addr_end(kvm, addr, end);
 		if (!pmd_none(*pmd)) {
 			if (pmd_thp_or_huge(*pmd))
 				kvm_flush_dcache_pmd(*pmd);
@@ -389,11 +388,11 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
 	pud_t *pud;
 	phys_addr_t next;
 
-	pud = stage2_pud_offset(pgd, addr);
+	pud = stage2_pud_offset(kvm, pgd, addr);
 	do {
-		next = stage2_pud_addr_end(addr, end);
-		if (!stage2_pud_none(*pud)) {
-			if (stage2_pud_huge(*pud))
+		next = stage2_pud_addr_end(kvm, addr, end);
+		if (!stage2_pud_none(kvm, *pud)) {
+			if (stage2_pud_huge(kvm, *pud))
 				kvm_flush_dcache_pud(*pud);
 			else
 				stage2_flush_pmds(kvm, pud, addr, next);
@@ -409,10 +408,11 @@ static void stage2_flush_memslot(struct kvm *kvm,
 	phys_addr_t next;
 	pgd_t *pgd;
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
 	do {
-		next = stage2_pgd_addr_end(addr, end);
-		stage2_flush_puds(kvm, pgd, addr, next);
+		next = stage2_pgd_addr_end(kvm, addr, end);
+		if (!stage2_pgd_none(kvm, *pgd))
+			stage2_flush_puds(kvm, pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
 }
 
@@ -897,7 +897,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 	}
 
 	/* Allocate the HW PGD, making sure that each page gets its own refcount */
-	pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO);
+	pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO);
 	if (!pgd)
 		return -ENOMEM;
 
@@ -986,7 +986,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 
 	spin_lock(&kvm->mmu_lock);
 	if (kvm->arch.pgd) {
-		unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+		unmap_stage2_range(kvm, 0, kvm_phys_size(kvm));
 		pgd = READ_ONCE(kvm->arch.pgd);
 		kvm->arch.pgd = NULL;
 	}
@@ -994,7 +994,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 
 	/* Free the HW pgd, one page at a time */
 	if (pgd)
-		free_pages_exact(pgd, S2_PGD_SIZE);
+		free_pages_exact(pgd, stage2_pgd_size(kvm));
 }
 
 static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
@@ -1003,16 +1003,16 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 	pgd_t *pgd;
 	pud_t *pud;
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
-	if (WARN_ON(stage2_pgd_none(*pgd))) {
+	pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+	if (stage2_pgd_none(kvm, *pgd)) {
 		if (!cache)
 			return NULL;
 		pud = mmu_memory_cache_alloc(cache);
-		stage2_pgd_populate(pgd, pud);
+		stage2_pgd_populate(kvm, pgd, pud);
 		get_page(virt_to_page(pgd));
 	}
 
-	return stage2_pud_offset(pgd, addr);
+	return stage2_pud_offset(kvm, pgd, addr);
 }
 
 static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
@@ -1025,15 +1025,15 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 	if (!pud)
 		return NULL;
 
-	if (stage2_pud_none(*pud)) {
+	if (stage2_pud_none(kvm, *pud)) {
 		if (!cache)
 			return NULL;
 		pmd = mmu_memory_cache_alloc(cache);
-		stage2_pud_populate(pud, pmd);
+		stage2_pud_populate(kvm, pud, pmd);
 		get_page(virt_to_page(pud));
 	}
 
-	return stage2_pmd_offset(pud, addr);
+	return stage2_pmd_offset(kvm, pud, addr);
 }
 
 static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
@@ -1207,8 +1207,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 		if (writable)
 			pte = kvm_s2pte_mkwrite(pte);
 
-		ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
-						KVM_NR_MEM_OBJS);
+		ret = mmu_topup_memory_cache(&cache,
+					     kvm_mmu_cache_min_pages(kvm),
+					     KVM_NR_MEM_OBJS);
 		if (ret)
 			goto out;
 		spin_lock(&kvm->mmu_lock);
@@ -1230,8 +1231,14 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
 {
 	kvm_pfn_t pfn = *pfnp;
 	gfn_t gfn = *ipap >> PAGE_SHIFT;
+	struct page *page = pfn_to_page(pfn);
 
-	if (PageTransCompoundMap(pfn_to_page(pfn))) {
+	/*
+	 * PageTransCompoungMap() returns true for THP and
+	 * hugetlbfs. Make sure the adjustment is done only for THP
+	 * pages.
+	 */
+	if (!PageHuge(page) && PageTransCompoundMap(page)) {
 		unsigned long mask;
 		/*
 		 * The address we faulted on is backed by a transparent huge
@@ -1296,19 +1303,21 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
 
 /**
  * stage2_wp_pmds - write protect PUD range
+ * kvm:		kvm instance for the VM
  * @pud:	pointer to pud entry
  * @addr:	range start address
  * @end:	range end address
  */
-static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
+static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
+			   phys_addr_t addr, phys_addr_t end)
 {
 	pmd_t *pmd;
 	phys_addr_t next;
 
-	pmd = stage2_pmd_offset(pud, addr);
+	pmd = stage2_pmd_offset(kvm, pud, addr);
 
 	do {
-		next = stage2_pmd_addr_end(addr, end);
+		next = stage2_pmd_addr_end(kvm, addr, end);
 		if (!pmd_none(*pmd)) {
 			if (pmd_thp_or_huge(*pmd)) {
 				if (!kvm_s2pmd_readonly(pmd))
@@ -1328,18 +1337,19 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
   *
   * Process PUD entries, for a huge PUD we cause a panic.
   */
-static void  stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
+static void  stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
+			    phys_addr_t addr, phys_addr_t end)
 {
 	pud_t *pud;
 	phys_addr_t next;
 
-	pud = stage2_pud_offset(pgd, addr);
+	pud = stage2_pud_offset(kvm, pgd, addr);
 	do {
-		next = stage2_pud_addr_end(addr, end);
-		if (!stage2_pud_none(*pud)) {
+		next = stage2_pud_addr_end(kvm, addr, end);
+		if (!stage2_pud_none(kvm, *pud)) {
 			/* TODO:PUD not supported, revisit later if supported */
-			BUG_ON(stage2_pud_huge(*pud));
-			stage2_wp_pmds(pud, addr, next);
+			BUG_ON(stage2_pud_huge(kvm, *pud));
+			stage2_wp_pmds(kvm, pud, addr, next);
 		}
 	} while (pud++, addr = next, addr != end);
 }
@@ -1355,7 +1365,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 	pgd_t *pgd;
 	phys_addr_t next;
 
-	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
 	do {
 		/*
 		 * Release kvm_mmu_lock periodically if the memory region is
@@ -1369,9 +1379,9 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 		cond_resched_lock(&kvm->mmu_lock);
 		if (!READ_ONCE(kvm->arch.pgd))
 			break;
-		next = stage2_pgd_addr_end(addr, end);
-		if (stage2_pgd_present(*pgd))
-			stage2_wp_puds(pgd, addr, next);
+		next = stage2_pgd_addr_end(kvm, addr, end);
+		if (stage2_pgd_present(kvm, *pgd))
+			stage2_wp_puds(kvm, pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
 }
 
@@ -1514,7 +1524,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	up_read(&current->mm->mmap_sem);
 
 	/* We need minimum second+third level pages */
-	ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
+	ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm),
 				     KVM_NR_MEM_OBJS);
 	if (ret)
 		return ret;
@@ -1757,7 +1767,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	}
 
 	/* Userspace should not be able to register out-of-bounds IPAs */
-	VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
+	VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
 
 	if (fault_status == FSC_ACCESS) {
 		handle_access_fault(vcpu, fault_ipa);
@@ -2056,7 +2066,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	 * space addressable by the KVM guest IPA space.
 	 */
 	if (memslot->base_gfn + memslot->npages >=
-	    (KVM_PHYS_SIZE >> PAGE_SHIFT))
+	    (kvm_phys_size(kvm) >> PAGE_SHIFT))
 		return -EFAULT;
 
 	down_read(&current->mm->mmap_sem);
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 12502251727e..eb2a390a6c86 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -241,13 +241,6 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
 	list_for_each_entry(dev, &(its)->device_list, dev_list) \
 		list_for_each_entry(ite, &(dev)->itt_head, ite_list)
 
-/*
- * We only implement 48 bits of PA at the moment, although the ITS
- * supports more. Let's be restrictive here.
- */
-#define BASER_ADDRESS(x)	((x) & GENMASK_ULL(47, 16))
-#define CBASER_ADDRESS(x)	((x) & GENMASK_ULL(47, 12))
-
 #define GIC_LPI_OFFSET 8192
 
 #define VITS_TYPER_IDBITS 16
@@ -759,6 +752,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
 {
 	int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
 	u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
+	phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser);
 	int esz = GITS_BASER_ENTRY_SIZE(baser);
 	int index;
 	gfn_t gfn;
@@ -783,7 +777,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
 		if (id >= (l1_tbl_size / esz))
 			return false;
 
-		addr = BASER_ADDRESS(baser) + id * esz;
+		addr = base + id * esz;
 		gfn = addr >> PAGE_SHIFT;
 
 		if (eaddr)
@@ -798,7 +792,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
 
 	/* Each 1st level entry is represented by a 64-bit value. */
 	if (kvm_read_guest_lock(its->dev->kvm,
-			   BASER_ADDRESS(baser) + index * sizeof(indirect_ptr),
+			   base + index * sizeof(indirect_ptr),
 			   &indirect_ptr, sizeof(indirect_ptr)))
 		return false;
 
@@ -808,11 +802,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
 	if (!(indirect_ptr & BIT_ULL(63)))
 		return false;
 
-	/*
-	 * Mask the guest physical address and calculate the frame number.
-	 * Any address beyond our supported 48 bits of PA will be caught
-	 * by the actual check in the final step.
-	 */
+	/* Mask the guest physical address and calculate the frame number. */
 	indirect_ptr &= GENMASK_ULL(51, 16);
 
 	/* Find the address of the actual entry */
@@ -1304,9 +1294,6 @@ static u64 vgic_sanitise_its_baser(u64 reg)
 				  GITS_BASER_OUTER_CACHEABILITY_SHIFT,
 				  vgic_sanitise_outer_cacheability);
 
-	/* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */
-	reg &= ~GENMASK_ULL(15, 12);
-
 	/* We support only one (ITS) page size: 64K */
 	reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K;
 
@@ -1325,11 +1312,8 @@ static u64 vgic_sanitise_its_cbaser(u64 reg)
 				  GITS_CBASER_OUTER_CACHEABILITY_SHIFT,
 				  vgic_sanitise_outer_cacheability);
 
-	/*
-	 * Sanitise the physical address to be 64k aligned.
-	 * Also limit the physical addresses to 48 bits.
-	 */
-	reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12));
+	/* Sanitise the physical address to be 64k aligned. */
+	reg &= ~GENMASK_ULL(15, 12);
 
 	return reg;
 }
@@ -1375,7 +1359,7 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
 	if (!its->enabled)
 		return;
 
-	cbaser = CBASER_ADDRESS(its->cbaser);
+	cbaser = GITS_CBASER_ADDRESS(its->cbaser);
 
 	while (its->cwriter != its->creadr) {
 		int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr,
@@ -2233,7 +2217,7 @@ static int vgic_its_restore_device_tables(struct vgic_its *its)
 	if (!(baser & GITS_BASER_VALID))
 		return 0;
 
-	l1_gpa = BASER_ADDRESS(baser);
+	l1_gpa = GITS_BASER_ADDR_48_to_52(baser);
 
 	if (baser & GITS_BASER_INDIRECT) {
 		l1_esz = GITS_LVL1_ENTRY_SIZE;
@@ -2305,7 +2289,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its)
 {
 	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
 	u64 baser = its->baser_coll_table;
-	gpa_t gpa = BASER_ADDRESS(baser);
+	gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser);
 	struct its_collection *collection;
 	u64 val;
 	size_t max_size, filled = 0;
@@ -2354,7 +2338,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its)
 	if (!(baser & GITS_BASER_VALID))
 		return 0;
 
-	gpa = BASER_ADDRESS(baser);
+	gpa = GITS_BASER_ADDR_48_to_52(baser);
 
 	max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
 
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index 6ada2432e37c..114dce9f4bf5 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -25,7 +25,7 @@
 int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
 		      phys_addr_t addr, phys_addr_t alignment)
 {
-	if (addr & ~KVM_PHYS_MASK)
+	if (addr & ~kvm_phys_mask(kvm))
 		return -E2BIG;
 
 	if (!IS_ALIGNED(addr, alignment))
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index a2a175b08b17..b3d1f0985117 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -364,7 +364,6 @@ static u64 vgic_sanitise_pendbaser(u64 reg)
 				  vgic_sanitise_outer_cacheability);
 
 	reg &= ~PENDBASER_RES0_MASK;
-	reg &= ~GENMASK_ULL(51, 48);
 
 	return reg;
 }
@@ -382,7 +381,6 @@ static u64 vgic_sanitise_propbaser(u64 reg)
 				  vgic_sanitise_outer_cacheability);
 
 	reg &= ~PROPBASER_RES0_MASK;
-	reg &= ~GENMASK_ULL(51, 48);
 	return reg;
 }
 
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 9e65feb6fa58..3710342cf6ad 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -83,6 +83,7 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
 	ring->coalesced_mmio[ring->last].phys_addr = addr;
 	ring->coalesced_mmio[ring->last].len = len;
 	memcpy(ring->coalesced_mmio[ring->last].data, val, len);
+	ring->coalesced_mmio[ring->last].pio = dev->zone.pio;
 	smp_wmb();
 	ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
 	spin_unlock(&dev->kvm->ring_lock);
@@ -140,6 +141,9 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
 	int ret;
 	struct kvm_coalesced_mmio_dev *dev;
 
+	if (zone->pio != 1 && zone->pio != 0)
+		return -EINVAL;
+
 	dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
 	if (!dev)
 		return -ENOMEM;
@@ -149,8 +153,9 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
 	dev->zone = *zone;
 
 	mutex_lock(&kvm->slots_lock);
-	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr,
-				      zone->size, &dev->dev);
+	ret = kvm_io_bus_register_dev(kvm,
+				zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS,
+				zone->addr, zone->size, &dev->dev);
 	if (ret < 0)
 		goto out_free_dev;
 	list_add_tail(&dev->list, &kvm->coalesced_zones);
@@ -174,7 +179,8 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
 
 	list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list)
 		if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
-			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+			kvm_io_bus_unregister_dev(kvm,
+				zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev);
 			kvm_iodevice_destructor(&dev->dev);
 		}
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f986e31fa68c..786ade1843a2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -219,7 +219,7 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
 	me = get_cpu();
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
-		if (!test_bit(i, vcpu_bitmap))
+		if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
 			continue;
 
 		kvm_make_request(req, vcpu);
@@ -243,12 +243,10 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
 	cpumask_var_t cpus;
 	bool called;
-	static unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]
-		= {[0 ... BITS_TO_LONGS(KVM_MAX_VCPUS)-1] = ULONG_MAX};
 
 	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
 
-	called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap, cpus);
+	called = kvm_make_vcpus_request_mask(kvm, req, NULL, cpus);
 
 	free_cpumask_var(cpus);
 	return called;
@@ -807,20 +805,25 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
  * sorted array and known changed memslot position.
  */
 static void update_memslots(struct kvm_memslots *slots,
-			    struct kvm_memory_slot *new)
+			    struct kvm_memory_slot *new,
+			    enum kvm_mr_change change)
 {
 	int id = new->id;
 	int i = slots->id_to_index[id];
 	struct kvm_memory_slot *mslots = slots->memslots;
 
 	WARN_ON(mslots[i].id != id);
-	if (!new->npages) {
-		WARN_ON(!mslots[i].npages);
-		if (mslots[i].npages)
-			slots->used_slots--;
-	} else {
-		if (!mslots[i].npages)
-			slots->used_slots++;
+	switch (change) {
+	case KVM_MR_CREATE:
+		slots->used_slots++;
+		WARN_ON(mslots[i].npages || !new->npages);
+		break;
+	case KVM_MR_DELETE:
+		slots->used_slots--;
+		WARN_ON(new->npages || !mslots[i].npages);
+		break;
+	default:
+		break;
 	}
 
 	while (i < KVM_MEM_SLOTS_NUM - 1 &&
@@ -1056,7 +1059,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		memset(&new.arch, 0, sizeof(new.arch));
 	}
 
-	update_memslots(slots, &new);
+	update_memslots(slots, &new, change);
 	old_memslots = install_new_memslots(kvm, as_id, slots);
 
 	kvm_arch_commit_memory_region(kvm, mem, &old, &new, change);
@@ -1311,8 +1314,12 @@ unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn)
 EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva);
 
 /*
- * If writable is set to false, the hva returned by this function is only
- * allowed to be read.
+ * Return the hva of a @gfn and the R/W attribute if possible.
+ *
+ * @slot: the kvm_memory_slot which contains @gfn
+ * @gfn: the gfn to be translated
+ * @writable: used to return the read/write attribute of the @slot if the hva
+ * is valid and @writable is not NULL
  */
 unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
 				      gfn_t gfn, bool *writable)
@@ -2946,6 +2953,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #ifdef CONFIG_KVM_MMIO
 	case KVM_CAP_COALESCED_MMIO:
 		return KVM_COALESCED_MMIO_PAGE_OFFSET;
+	case KVM_CAP_COALESCED_PIO:
+		return 1;
 #endif
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 	case KVM_CAP_IRQ_ROUTING: