241 files changed, 3709 insertions, 2780 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index b9a83dd24732..2a7b38c832c7 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -963,11 +963,6 @@ elevator_dispatch_fn*		fills the dispatch queue with ready requests.
 
 elevator_add_req_fn*		called to add a new request into the scheduler
 
-elevator_queue_empty_fn		returns true if the merge queue is empty.
-				Drivers shouldn't use this, but rather check
-				if elv_next_request is NULL (without losing the
-				request if one exists!)
-
 elevator_former_req_fn
 elevator_latter_req_fn		These return the request before or after the
 				one specified in disk sort order. Used by the
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index 4ed7b5ceeed2..465351d4cf85 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -140,7 +140,7 @@ Proportional weight policy files
 	- Specifies per cgroup weight. This is default weight of the group
 	  on all the devices until and unless overridden by per device rule.
 	  (See blkio.weight_device).
-	  Currently allowed range of weights is from 100 to 1000.
+	  Currently allowed range of weights is from 10 to 1000.
 
 - blkio.weight_device
 	- One can specify per cgroup per device rules using this interface.
@@ -343,34 +343,6 @@ Common files among various policies
 
 CFQ sysfs tunable
 =================
-/sys/block/<disk>/queue/iosched/group_isolation
------------------------------------------------
-
-If group_isolation=1, it provides stronger isolation between groups at the
-expense of throughput. By default group_isolation is 0. In general that
-means that if group_isolation=0, expect fairness for sequential workload
-only. Set group_isolation=1 to see fairness for random IO workload also.
-
-Generally CFQ will put random seeky workload in sync-noidle category. CFQ
-will disable idling on these queues and it does a collective idling on group
-of such queues. Generally these are slow moving queues and if there is a
-sync-noidle service tree in each group, that group gets exclusive access to
-disk for certain period. That means it will bring the throughput down if
-group does not have enough IO to drive deeper queue depths and utilize disk
-capacity to the fullest in the slice allocated to it. But the flip side is
-that even a random reader should get better latencies and overall throughput
-if there are lots of sequential readers/sync-idle workload running in the
-system.
-
-If group_isolation=0, then CFQ automatically moves all the random seeky queues
-in the root group. That means there will be no service differentiation for
-that kind of workload. This leads to better throughput as we do collective
-idling on root sync-noidle tree.
-
-By default one should run with group_isolation=0. If that is not sufficient
-and one wants stronger isolation between groups, then set group_isolation=1
-but this will come at cost of reduced throughput.
-
 /sys/block/<disk>/queue/iosched/slice_idle
 ------------------------------------------
 On a faster hardware CFQ can be slow, especially with sequential workload.
diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt
index f6dece5b7014..c76c21d87e85 100644
--- a/Documentation/iostats.txt
+++ b/Documentation/iostats.txt
@@ -1,8 +1,6 @@
 I/O statistics fields
 ---------------
 
-Last modified Sep 30, 2003
-
 Since 2.4.20 (and some versions before, with patches), and 2.5.45,
 more extensive disk statistics have been introduced to help measure disk
 activity. Tools such as sar and iostat typically interpret these and do
@@ -46,11 +44,12 @@ the above example, the first field of statistics would be 446216.
 By contrast, in 2.6 if you look at /sys/block/hda/stat, you'll
 find just the eleven fields, beginning with 446216.  If you look at
 /proc/diskstats, the eleven fields will be preceded by the major and
-minor device numbers, and device name.  Each of these formats provide
+minor device numbers, and device name.  Each of these formats provides
 eleven fields of statistics, each meaning exactly the same things.
 All fields except field 9 are cumulative since boot.  Field 9 should
-go to zero as I/Os complete; all others only increase.  Yes, these are
-32 bit unsigned numbers, and on a very busy or long-lived system they
+go to zero as I/Os complete; all others only increase (unless they
+overflow and wrap).  Yes, these are (32-bit or 64-bit) unsigned long
+(native word size) numbers, and on a very busy or long-lived system they
 may wrap. Applications should be prepared to deal with that; unless
 your observations are measured in large numbers of minutes or hours,
 they should not wrap twice before you notice them.
@@ -96,11 +95,11 @@ introduced when changes collide, so (for instance) adding up all the
 read I/Os issued per partition should equal those made to the disks ...
 but due to the lack of locking it may only be very close.
 
-In 2.6, there are counters for each cpu, which made the lack of locking
-almost a non-issue.  When the statistics are read, the per-cpu counters
-are summed (possibly overflowing the unsigned 32-bit variable they are
+In 2.6, there are counters for each CPU, which make the lack of locking
+almost a non-issue.  When the statistics are read, the per-CPU counters
+are summed (possibly overflowing the unsigned long variable they are
 summed to) and the result given to the user.  There is no convenient
-user interface for accessing the per-cpu counters themselves.
+user interface for accessing the per-CPU counters themselves.
 
 Disks vs Partitions
 -------------------
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index b3b0f0f5053d..e5f6fc428348 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -78,7 +78,7 @@ __tagtable(ATAG_INITRD2, parse_tag_initrd2);
  */
 struct meminfo meminfo;
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	int free = 0, total = 0, reserved = 0;
 	int shared = 0, cached = 0, slab = 0, i;
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 54bf54059811..9a018cde5d84 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -36,7 +36,7 @@ static unsigned long max_gap;
  * Shows a simple page count of reserved and used pages in the system.
  * For discontig machines, it does this on a per-pgdat basis.
  */
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	int i, total_reserved = 0;
 	int total_shared = 0, total_cached = 0;
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 61620323bb60..82ab1bc6afb1 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -614,7 +614,7 @@ void __cpuinit *per_cpu_init(void)
  * Shows a simple page count of reserved and used pages in the system.
  * For discontig machines, it does this on a per-pgdat basis.
  */
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	int i, total_reserved = 0;
 	int total_shared = 0, total_cached = 0;
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 10971be43061..d8ab97a73db2 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -3,6 +3,8 @@ config MN10300
 	select HAVE_OPROFILE
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_HARDIRQS_NO_DEPRECATED
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_ARCH_KGDB
 
 config AM33_2
 	def_bool n
@@ -401,9 +403,9 @@ comment "[!] NOTE: A lower number/level indicates a higher priority (0 is highes
 comment "____Non-maskable interrupt levels____"
 comment "The following must be set to a higher priority than local_irq_disable() and on-chip serial"
 
-config GDBSTUB_IRQ_LEVEL
-	int "GDBSTUB interrupt priority"
-	depends on GDBSTUB
+config DEBUGGER_IRQ_LEVEL
+	int "DEBUGGER interrupt priority"
+	depends on KERNEL_DEBUGGER
 	range 0 1 if LINUX_CLI_LEVEL = 2
 	range 0 2 if LINUX_CLI_LEVEL = 3
 	range 0 3 if LINUX_CLI_LEVEL = 4
@@ -437,7 +439,7 @@ config LINUX_CLI_LEVEL
 	  EPSW.IM from 7.  Any interrupt is permitted for which the level is
 	  lower than EPSW.IM.
 
-	  Certain interrupts, such as GDBSTUB and virtual MN10300 on-chip
+	  Certain interrupts, such as DEBUGGER and virtual MN10300 on-chip
 	  serial DMA interrupts are allowed to interrupt normal disabled
 	  sections.
 
diff --git a/arch/mn10300/Kconfig.debug b/arch/mn10300/Kconfig.debug
index ce83c74b3fd7..bdbfd444a9ff 100644
--- a/arch/mn10300/Kconfig.debug
+++ b/arch/mn10300/Kconfig.debug
@@ -36,7 +36,7 @@ config KPROBES
 
 config GDBSTUB
 	bool "Remote GDB kernel debugging"
-	depends on DEBUG_KERNEL
+	depends on DEBUG_KERNEL && DEPRECATED
 	select DEBUG_INFO
 	select FRAME_POINTER
 	help
@@ -46,6 +46,9 @@ config GDBSTUB
 	  RAM to avoid excessive linking time. This is only useful for kernel
 	  hackers. If unsure, say N.
 
+	  This is deprecated in favour of KGDB and will be removed in a later
+	  version.
+
 config GDBSTUB_IMMEDIATE
 	bool "Break into GDB stub immediately"
 	depends on GDBSTUB
@@ -54,6 +57,14 @@ config GDBSTUB_IMMEDIATE
 	  possible, leaving the program counter at the beginning of
 	  start_kernel() in init/main.c.
 
+config GDBSTUB_ALLOW_SINGLE_STEP
+	bool "Allow software single-stepping in GDB stub"
+	depends on GDBSTUB && !SMP && !PREEMPT
+	help
+	  Allow GDB stub to perform software single-stepping through the
+	  kernel.  This doesn't work very well on SMP or preemptible kernels as
+	  it uses temporary breakpoints to emulate single-stepping.
+
 config GDB_CONSOLE
 	bool "Console output to GDB"
 	depends on GDBSTUB
@@ -142,3 +153,7 @@ config GDBSTUB_ON_TTYSx
 	default y
 
 endmenu
+
+config KERNEL_DEBUGGER
+	def_bool y
+	depends on GDBSTUB || KGDB
diff --git a/arch/mn10300/include/asm/debugger.h b/arch/mn10300/include/asm/debugger.h
new file mode 100644
index 000000000000..e1d3b083696c
--- /dev/null
+++ b/arch/mn10300/include/asm/debugger.h
@@ -0,0 +1,43 @@
+/* Kernel debugger for MN10300
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_DEBUGGER_H
+#define _ASM_DEBUGGER_H
+
+#if defined(CONFIG_KERNEL_DEBUGGER)
+
+extern int debugger_intercept(enum exception_code, int, int, struct pt_regs *);
+extern int at_debugger_breakpoint(struct pt_regs *);
+
+#ifndef CONFIG_MN10300_DEBUGGER_CACHE_NO_FLUSH
+extern void debugger_local_cache_flushinv(void);
+extern void debugger_local_cache_flushinv_one(u8 *);
+#else
+static inline void debugger_local_cache_flushinv(void) {}
+static inline void debugger_local_cache_flushinv_one(u8 *addr) {}
+#endif
+
+#else /* CONFIG_KERNEL_DEBUGGER */
+
+static inline int debugger_intercept(enum exception_code excep,
+				     int signo, int si_code,
+				     struct pt_regs *regs)
+{
+	return 0;
+}
+
+static inline int at_debugger_breakpoint(struct pt_regs *regs)
+{
+	return 0;
+}
+
+#endif /* CONFIG_KERNEL_DEBUGGER */
+#endif /* _ASM_DEBUGGER_H */
diff --git a/arch/mn10300/include/asm/div64.h b/arch/mn10300/include/asm/div64.h
index 34dcb8e68309..503efab2a516 100644
--- a/arch/mn10300/include/asm/div64.h
+++ b/arch/mn10300/include/asm/div64.h
@@ -16,6 +16,19 @@
 extern void ____unhandled_size_in_do_div___(void);
 
 /*
+ * Beginning with gcc 4.6, the MDR register is represented explicitly.  We
+ * must, therefore, at least explicitly clobber the register when we make
+ * changes to it.  The following assembly fragments *could* be rearranged in
+ * order to leave the moves to/from the MDR register to the compiler, but the
+ * gains would be minimal at best.
+ */
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+# define CLOBBER_MDR_CC		"mdr", "cc"
+#else
+# define CLOBBER_MDR_CC		"cc"
+#endif
+
+/*
  * divide n by base, leaving the result in n and returning the remainder
  * - we can do this quite efficiently on the MN10300 by cascading the divides
  *   through the MDR register
@@ -29,7 +42,7 @@ extern void ____unhandled_size_in_do_div___(void);
 		    "mov	mdr,%1	\n"				\
 		    : "+r"(n), "=d"(__rem)				\
 		    : "r"(base), "1"(__rem)				\
-		    : "cc"						\
+		    : CLOBBER_MDR_CC					\
 		    );							\
 	} else if (sizeof(n) <= 8) {					\
 		union {							\
@@ -48,7 +61,7 @@ extern void ____unhandled_size_in_do_div___(void);
 		    : "=d"(__rem), "=r"(__quot.w[1]), "=r"(__quot.w[0])	\
 		    : "r"(base), "0"(__rem), "1"(__quot.w[1]),		\
 		      "2"(__quot.w[0])					\
-		    : "cc"						\
+		    : CLOBBER_MDR_CC					\
 		    );							\
 		n = __quot.l;						\
 	} else {							\
@@ -72,7 +85,7 @@ unsigned __muldiv64u(unsigned val, unsigned mult, unsigned div)
 					 * MDR = MDR:val%div */
 	    : "=r"(result)
 	    : "0"(val), "ir"(mult), "r"(div)
-	    : "cc"
+	    : CLOBBER_MDR_CC
 	    );
 
 	return result;
@@ -93,7 +106,7 @@ signed __muldiv64s(signed val, signed mult, signed div)
 					 * MDR = MDR:val%div */
 	    : "=r"(result)
 	    : "0"(val), "ir"(mult), "r"(div)
-	    : "cc"
+	    : CLOBBER_MDR_CC
 	    );
 
 	return result;
diff --git a/arch/mn10300/include/asm/fpu.h b/arch/mn10300/include/asm/fpu.h
index b7625de8eade..738ff72659d5 100644
--- a/arch/mn10300/include/asm/fpu.h
+++ b/arch/mn10300/include/asm/fpu.h
@@ -55,7 +55,6 @@ static inline void clear_using_fpu(struct task_struct *tsk)
 
 extern asmlinkage void fpu_kill_state(struct task_struct *);
 extern asmlinkage void fpu_exception(struct pt_regs *, enum exception_code);
-extern asmlinkage void fpu_invalid_op(struct pt_regs *, enum exception_code);
 extern asmlinkage void fpu_init_state(void);
 extern asmlinkage void fpu_save(struct fpu_state_struct *);
 extern int fpu_setup_sigcontext(struct fpucontext *buf);
@@ -113,7 +112,6 @@ static inline void flush_fpu(void)
 
 extern asmlinkage
 void unexpected_fpu_exception(struct pt_regs *, enum exception_code);
-#define fpu_invalid_op unexpected_fpu_exception
 #define fpu_exception unexpected_fpu_exception
 
 struct task_struct;
diff --git a/arch/mn10300/include/asm/irqflags.h b/arch/mn10300/include/asm/irqflags.h
index 7a7ae12c7119..678f68d5f37b 100644
--- a/arch/mn10300/include/asm/irqflags.h
+++ b/arch/mn10300/include/asm/irqflags.h
@@ -20,7 +20,7 @@
 /*
  * interrupt control
  * - "disabled": run in IM1/2
- *   - level 0 - GDB stub
+ *   - level 0 - kernel debugger
  *   - level 1 - virtual serial DMA (if present)
  *   - level 5 - normal interrupt priority
  *   - level 6 - timer interrupt
diff --git a/arch/mn10300/include/asm/kgdb.h b/arch/mn10300/include/asm/kgdb.h
new file mode 100644
index 000000000000..eb245f18a708
--- /dev/null
+++ b/arch/mn10300/include/asm/kgdb.h
@@ -0,0 +1,81 @@
+/* Kernel debugger for MN10300
+ *
+ * Copyright (C) 2010 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_KGDB_H
+#define _ASM_KGDB_H
+
+/*
+ * BUFMAX defines the maximum number of characters in inbound/outbound
+ * buffers at least NUMREGBYTES*2 are needed for register packets
+ * Longer buffer is needed to list all threads
+ */
+#define BUFMAX			1024
+
+/*
+ * Note that this register image is in a different order than the register
+ * image that Linux produces at interrupt time.
+ */
+enum regnames {
+	GDB_FR_D0		= 0,
+	GDB_FR_D1		= 1,
+	GDB_FR_D2		= 2,
+	GDB_FR_D3		= 3,
+	GDB_FR_A0		= 4,
+	GDB_FR_A1		= 5,
+	GDB_FR_A2		= 6,
+	GDB_FR_A3		= 7,
+
+	GDB_FR_SP		= 8,
+	GDB_FR_PC		= 9,
+	GDB_FR_MDR		= 10,
+	GDB_FR_EPSW		= 11,
+	GDB_FR_LIR		= 12,
+	GDB_FR_LAR		= 13,
+	GDB_FR_MDRQ		= 14,
+
+	GDB_FR_E0		= 15,
+	GDB_FR_E1		= 16,
+	GDB_FR_E2		= 17,
+	GDB_FR_E3		= 18,
+	GDB_FR_E4		= 19,
+	GDB_FR_E5		= 20,
+	GDB_FR_E6		= 21,
+	GDB_FR_E7		= 22,
+
+	GDB_FR_SSP		= 23,
+	GDB_FR_MSP		= 24,
+	GDB_FR_USP		= 25,
+	GDB_FR_MCRH		= 26,
+	GDB_FR_MCRL		= 27,
+	GDB_FR_MCVF		= 28,
+
+	GDB_FR_FPCR		= 29,
+	GDB_FR_DUMMY0		= 30,
+	GDB_FR_DUMMY1		= 31,
+
+	GDB_FR_FS0		= 32,
+
+	GDB_FR_SIZE		= 64,
+};
+
+#define GDB_ORIG_D0		41
+#define NUMREGBYTES		(GDB_FR_SIZE*4)
+
+static inline void arch_kgdb_breakpoint(void)
+{
+	asm(".globl __arch_kgdb_breakpoint; __arch_kgdb_breakpoint: break");
+}
+extern u8 __arch_kgdb_breakpoint;
+
+#define BREAK_INSTR_SIZE	1
+#define CACHE_FLUSH_IS_SAFE	1
+
+#endif /* _ASM_KGDB_H */
diff --git a/arch/mn10300/include/asm/smp.h b/arch/mn10300/include/asm/smp.h
index a3930e43a958..6745dbe64944 100644
--- a/arch/mn10300/include/asm/smp.h
+++ b/arch/mn10300/include/asm/smp.h
@@ -34,7 +34,7 @@
 #define LOCAL_TIMER_IPI		193
 #define FLUSH_CACHE_IPI		194
 #define CALL_FUNCTION_NMI_IPI	195
-#define GDB_NMI_IPI		196
+#define DEBUGGER_NMI_IPI	196
 
 #define SMP_BOOT_IRQ		195
 
@@ -43,6 +43,7 @@
 #define LOCAL_TIMER_GxICR_LV	GxICR_LEVEL_4
 #define FLUSH_CACHE_GxICR_LV	GxICR_LEVEL_0
 #define SMP_BOOT_GxICR_LV	GxICR_LEVEL_0
+#define DEBUGGER_GxICR_LV	CONFIG_DEBUGGER_IRQ_LEVEL
 
 #define TIME_OUT_COUNT_BOOT_IPI	100
 #define DELAY_TIME_BOOT_IPI	75000
@@ -61,8 +62,9 @@
  * An alternate way of dealing with this could be to use the EPSW.S bits to
  * cache this information for systems with up to four CPUs.
  */
+#define arch_smp_processor_id()	(CPUID)
 #if 0
-#define raw_smp_processor_id()	(CPUID)
+#define raw_smp_processor_id()	(arch_smp_processor_id())
 #else
 #define raw_smp_processor_id()	(current_thread_info()->cpu)
 #endif
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index 8d53f09c878d..87c213002d4c 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -131,7 +131,11 @@ static inline unsigned long current_stack_pointer(void)
 		kmalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #endif
 
+#ifndef CONFIG_KGDB
 #define free_thread_info(ti)	kfree((ti))
+#else
+extern void free_thread_info(struct thread_info *);
+#endif
 #define get_thread_info(ti)	get_task_struct((ti)->task)
 #define put_thread_info(ti)	put_task_struct((ti)->task)
 
diff --git a/arch/mn10300/kernel/Makefile b/arch/mn10300/kernel/Makefile
index a06a2e10051d..47ed30fe8178 100644
--- a/arch/mn10300/kernel/Makefile
+++ b/arch/mn10300/kernel/Makefile
@@ -21,11 +21,8 @@ obj-$(CONFIG_GDBSTUB) += gdb-stub.o gdb-low.o
 obj-$(CONFIG_GDBSTUB_ON_TTYSx) += gdb-io-serial.o gdb-io-serial-low.o
 obj-$(CONFIG_GDBSTUB_ON_TTYSMx) += gdb-io-ttysm.o gdb-io-ttysm-low.o
 
-ifeq ($(CONFIG_MN10300_CACHE_ENABLED),y)
-obj-$(CONFIG_GDBSTUB) += gdb-cache.o
-endif
-
 obj-$(CONFIG_MN10300_RTC) += rtc.o
 obj-$(CONFIG_PROFILE) += profile.o profile-low.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index f00b9bafcd3e..fb93ad720b82 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -266,7 +266,11 @@ ENTRY(raw_bus_error)
 
 ###############################################################################
 #
-# Miscellaneous exception entry points
+# NMI exception entry points
+#
+# This is used by ordinary interrupt channels that have the GxICR_NMI bit set
+# in addition to the main NMI and Watchdog channels.  SMP NMI IPIs use this
+# facility.
 #
 ###############################################################################
 ENTRY(nmi_handler)
@@ -281,7 +285,7 @@ ENTRY(nmi_handler)
 	and	NMIAGR_GN,d0
 	lsr	0x2,d0
 	cmp	CALL_FUNCTION_NMI_IPI,d0
-	bne	5f			# if not call function, jump
+	bne	nmi_not_smp_callfunc	# if not call function, jump
 
 	# function call nmi ipi
 	add	4,sp			# no need to store TBR
@@ -295,59 +299,38 @@ ENTRY(nmi_handler)
 	call	smp_nmi_call_function_interrupt[],0
 	RESTORE_ALL
 
-5:
-#ifdef CONFIG_GDBSTUB
-	cmp	GDB_NMI_IPI,d0
-	bne	3f			# if not gdb nmi ipi, jump
+nmi_not_smp_callfunc:
+#ifdef CONFIG_KERNEL_DEBUGGER
+	cmp	DEBUGGER_NMI_IPI,d0
+	bne	nmi_not_debugger	# if not kernel debugger NMI IPI, jump
 
-	# gdb nmi ipi
+	# kernel debugger NMI IPI
 	add	4,sp			# no need to store TBR
 	mov	GxICR_DETECT,d0		# clear NMI
-	movbu	d0,(GxICR(GDB_NMI_IPI))
-	movhu	(GxICR(GDB_NMI_IPI)),d0
+	movbu	d0,(GxICR(DEBUGGER_NMI_IPI))
+	movhu	(GxICR(DEBUGGER_NMI_IPI)),d0
 	and	~EPSW_NMID,epsw		# enable NMI
-#ifdef CONFIG_MN10300_CACHE_ENABLED
-	mov	(gdbstub_nmi_opr_type),d0
-	cmp	GDBSTUB_NMI_CACHE_PURGE,d0
-	bne	4f			# if not gdb cache purge, jump
-
-	# gdb cache purge nmi ipi
-	add	-20,sp
-	mov	d1,(4,sp)
-	mov	a0,(8,sp)
-	mov	a1,(12,sp)
-	mov	mdr,d0
-	mov	d0,(16,sp)
-	call	gdbstub_local_purge_cache[],0
-	mov	0x1,d0
-	mov	(CPUID),d1
-	asl	d1,d0
-	mov	gdbstub_nmi_cpumask,a0
-	bclr	d0,(a0)
-	mov	(4,sp),d1
-	mov	(8,sp),a0
-	mov	(12,sp),a1
-	mov	(16,sp),d0
-	mov	d0,mdr
-	add	20,sp
-	mov	(sp),d0
-	add	4,sp
-	rti
-4:
-#endif /* CONFIG_MN10300_CACHE_ENABLED */
-	# gdb wait nmi ipi
+
 	mov     (sp),d0
 	SAVE_ALL
-	call    gdbstub_nmi_wait[],0
+	mov	fp,d0			# arg 0: stacked register file
+	mov	a2,d1			# arg 1: exception number
+	call    debugger_nmi_interrupt[],0
 	RESTORE_ALL
-3:
-#endif /* CONFIG_GDBSTUB */
+
+nmi_not_debugger:
+#endif /* CONFIG_KERNEL_DEBUGGER */
 	mov     (sp),d0                 # restore TBR to d0
 	add     4,sp
 #endif /* CONFIG_SMP */
 
 	bra	__common_exception_nonmi
 
+###############################################################################
+#
+# General exception entry point
+#
+###############################################################################
 ENTRY(__common_exception)
 	add	-4,sp
 	mov	d0,(sp)
diff --git a/arch/mn10300/kernel/fpu.c b/arch/mn10300/kernel/fpu.c
index 5f9c3fa19a85..bb5fa7df6c44 100644
--- a/arch/mn10300/kernel/fpu.c
+++ b/arch/mn10300/kernel/fpu.c
@@ -70,24 +70,6 @@ asmlinkage void fpu_exception(struct pt_regs *regs, enum exception_code code)
 }
 
 /*
- * handle an FPU invalid_op exception
- * - Derived from DO_EINFO() macro in arch/mn10300/kernel/traps.c
- */
-asmlinkage void fpu_invalid_op(struct pt_regs *regs, enum exception_code code)
-{
-	siginfo_t info;
-
-	if (!user_mode(regs))
-		die_if_no_fixup("FPU invalid opcode", regs, code);
-
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_COPROC;
-	info.si_addr = (void *) regs->pc;
-	force_sig_info(info.si_signo, &info, current);
-}
-
-/*
  * save the FPU state to a signal context
  */
 int fpu_setup_sigcontext(struct fpucontext *fpucontext)
diff --git a/arch/mn10300/kernel/gdb-cache.S b/arch/mn10300/kernel/gdb-cache.S
deleted file mode 100644
index 1108badc3d32..000000000000
--- a/arch/mn10300/kernel/gdb-cache.S
+++ /dev/null
@@ -1,105 +0,0 @@
-###############################################################################
-#
-# MN10300 Low-level cache purging routines for gdbstub
-#
-# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
-# Written by David Howells (dhowells@redhat.com)
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public Licence
-# as published by the Free Software Foundation; either version
-# 2 of the Licence, or (at your option) any later version.
-#
-###############################################################################
-#include <linux/sys.h>
-#include <linux/linkage.h>
-#include <asm/smp.h>
-#include <asm/cache.h>
-#include <asm/cpu-regs.h>
-#include <asm/exceptions.h>
-#include <asm/frame.inc>
-#include <asm/serial-regs.h>
-
-	.text
-
-###############################################################################
-#
-# GDB stub cache purge
-#
-###############################################################################
-	.type	gdbstub_purge_cache,@function
-ENTRY(gdbstub_purge_cache)
-	#######################################################################
-	# read the addresses tagged in the cache's tag RAM and attempt to flush
-	# those addresses specifically
-	# - we rely on the hardware to filter out invalid tag entry addresses
-	mov	DCACHE_TAG(0,0),a0		# dcache tag RAM access address
-	mov	DCACHE_PURGE(0,0),a1		# dcache purge request address
-	mov	L1_CACHE_NWAYS*L1_CACHE_NENTRIES,d1  # total number of entries
-
-mn10300_dcache_flush_loop:
-	mov	(a0),d0
-	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0
-	or	L1_CACHE_TAG_VALID,d0		# retain valid entries in the
-						# cache
-	mov	d0,(a1)				# conditional purge
-
-mn10300_dcache_flush_skip:
-	add	L1_CACHE_BYTES,a0
-	add	L1_CACHE_BYTES,a1
-	add	-1,d1
-	bne	mn10300_dcache_flush_loop
-
-;; 	# unconditionally flush and invalidate the dcache
-;; 	mov	DCACHE_PURGE(0,0),a1		# dcache purge request address
-;; 	mov	L1_CACHE_NWAYS*L1_CACHE_NENTRIES,d1	# total number of
-;;							# entries
-;;
-;; gdbstub_purge_cache__dcache_loop:
-;; 	mov	(a1),d0				# unconditional purge
-;;
-;; 	add	L1_CACHE_BYTES,a1
-;; 	add	-1,d1
-;; 	bne	gdbstub_purge_cache__dcache_loop
-
-	#######################################################################
-	# now invalidate the icache
-	mov	CHCTR,a0
-	movhu	(a0),a1
-
-	mov	epsw,d1
-	and	~EPSW_IE,epsw
-	nop
-	nop
-
-	# disable the icache
-	and	~CHCTR_ICEN,d0
-	movhu	d0,(a0)
-
-	# and wait for it to calm down
-	setlb
-	movhu	(a0),d0
-	btst	CHCTR_ICBUSY,d0
-	lne
-
-	# invalidate
-	or	CHCTR_ICINV,d0
-	movhu	d0,(a0)
-
-	# wait for the cache to finish
-	mov	CHCTR,a0
-	setlb
-	movhu	(a0),d0
-	btst	CHCTR_ICBUSY,d0
-	lne
-
-	# and reenable it
-	movhu	a1,(a0)
-	movhu	(a0),d0			# read back to flush
-					# (SIGILLs all over without this)
-
-	mov	d1,epsw
-
-	ret	[],0
-
-	.size	gdbstub_purge_cache,.-gdbstub_purge_cache
diff --git a/arch/mn10300/kernel/gdb-io-ttysm.c b/arch/mn10300/kernel/gdb-io-ttysm.c
index abdeea153c89..c859cacbb9c3 100644
--- a/arch/mn10300/kernel/gdb-io-ttysm.c
+++ b/arch/mn10300/kernel/gdb-io-ttysm.c
@@ -59,10 +59,10 @@ void __init gdbstub_io_init(void)
 
 	/* we want to get serial receive interrupts */
 	set_intr_level(gdbstub_port->rx_irq,
-		NUM2GxICR_LEVEL(CONFIG_GDBSTUB_IRQ_LEVEL));
+		NUM2GxICR_LEVEL(CONFIG_DEBUGGER_IRQ_LEVEL));
 	set_intr_level(gdbstub_port->tx_irq,
-		NUM2GxICR_LEVEL(CONFIG_GDBSTUB_IRQ_LEVEL));
-	set_intr_stub(NUM2EXCEP_IRQ_LEVEL(CONFIG_GDBSTUB_IRQ_LEVEL),
+		NUM2GxICR_LEVEL(CONFIG_DEBUGGER_IRQ_LEVEL));
+	set_intr_stub(NUM2EXCEP_IRQ_LEVEL(CONFIG_DEBUGGER_IRQ_LEVEL),
 		gdbstub_io_rx_handler);
 
 	*gdbstub_port->rx_icr |= GxICR_ENABLE;
@@ -88,7 +88,7 @@ void __init gdbstub_io_init(void)
 
 	/* permit level 0 IRQs only */
 	arch_local_change_intr_mask_level(
-		NUM2EPSW_IM(CONFIG_GDBSTUB_IRQ_LEVEL + 1));
+		NUM2EPSW_IM(CONFIG_DEBUGGER_IRQ_LEVEL + 1));
 }
 
 /*
diff --git a/arch/mn10300/kernel/gdb-stub.c b/arch/mn10300/kernel/gdb-stub.c
index b169d99d9f20..538266b2c9bc 100644
--- a/arch/mn10300/kernel/gdb-stub.c
+++ b/arch/mn10300/kernel/gdb-stub.c
@@ -133,7 +133,7 @@
 #include <asm/system.h>
 #include <asm/gdb-stub.h>
 #include <asm/exceptions.h>
-#include <asm/cacheflush.h>
+#include <asm/debugger.h>
 #include <asm/serial-regs.h>
 #include <asm/busctl-regs.h>
 #include <unit/leds.h>
@@ -405,6 +405,7 @@ static int hexToInt(char **ptr, int *intValue)
 	return (numChars);
 }
 
+#ifdef CONFIG_GDBSTUB_ALLOW_SINGLE_STEP
 /*
  * We single-step by setting breakpoints. When an exception
  * is handled, we need to restore the instructions hoisted
@@ -729,6 +730,7 @@ static int gdbstub_single_step(struct pt_regs *regs)
 	__gdbstub_restore_bp();
 	return -EFAULT;
 }
+#endif /* CONFIG_GDBSTUB_ALLOW_SINGLE_STEP */
 
 #ifdef CONFIG_GDBSTUB_CONSOLE
 
@@ -1171,7 +1173,7 @@ int gdbstub_clear_breakpoint(u8 *addr, int len)
 
 /*
  * This function does all command processing for interfacing to gdb
- * - returns 1 if the exception should be skipped, 0 otherwise.
+ * - returns 0 if the exception should be skipped, -ERROR otherwise.
  */
 static int gdbstub(struct pt_regs *regs, enum exception_code excep)
 {
@@ -1186,7 +1188,7 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep)
 	int loop;
 
 	if (excep == EXCEP_FPU_DISABLED)
-		return 0;
+		return -ENOTSUPP;
 
 	gdbstub_flush_caches = 0;
 
@@ -1195,7 +1197,7 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep)
 	asm volatile("mov mdr,%0" : "=d"(mdr));
 	local_save_flags(epsw);
 	arch_local_change_intr_mask_level(
-		NUM2EPSW_IM(CONFIG_GDBSTUB_IRQ_LEVEL + 1));
+		NUM2EPSW_IM(CONFIG_DEBUGGER_IRQ_LEVEL + 1));
 
 	gdbstub_store_fpu();
 
@@ -1208,11 +1210,13 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep)
 	/* if we were single stepping, restore the opcodes hoisted for the
 	 * breakpoint[s] */
 	broke = 0;
+#ifdef CONFIG_GDBSTUB_ALLOW_SINGLE_STEP
 	if ((step_bp[0].addr && step_bp[0].addr == (u8 *) regs->pc) ||
 	    (step_bp[1].addr && step_bp[1].addr == (u8 *) regs->pc))
 		broke = 1;
 
 	__gdbstub_restore_bp();
+#endif
 
 	if (gdbstub_rx_unget) {
 		sigval = SIGINT;
@@ -1548,17 +1552,21 @@ packet_waiting:
 			 * Step to next instruction
 			 */
 		case 's':
-			/*
-			 * using the T flag doesn't seem to perform single
+			/* Using the T flag doesn't seem to perform single
 			 * stepping (it seems to wind up being caught by the
 			 * JTAG unit), so we have to use breakpoints and
 			 * continue instead.
 			 */
+#ifdef CONFIG_GDBSTUB_ALLOW_SINGLE_STEP
 			if (gdbstub_single_step(regs) < 0)
 				/* ignore any fault error for now */
 				gdbstub_printk("unable to set single-step"
 					       " bp\n");
 			goto done;
+#else
+			gdbstub_strcpy(output_buffer, "E01");
+			break;
+#endif
 
 			/*
 			 * Set baud rate (bBB)
@@ -1657,7 +1665,7 @@ done:
 	 * NB: We flush both caches, just to be sure...
 	 */
 	if (gdbstub_flush_caches)
-		gdbstub_purge_cache();
+		debugger_local_cache_flushinv();
 
 	gdbstub_load_fpu();
 	mn10300_set_gdbleds(0);
@@ -1667,14 +1675,23 @@ done:
 	touch_softlockup_watchdog();
 
 	local_irq_restore(epsw);
-	return 1;
+	return 0;
+}
+
+/*
+ * Determine if we hit a debugger special breakpoint that needs skipping over
+ * automatically.
+ */
+int at_debugger_breakpoint(struct pt_regs *regs)
+{
+	return 0;
 }
 
 /*
  * handle event interception
  */
-asmlinkage int gdbstub_intercept(struct pt_regs *regs,
-				 enum exception_code excep)
+asmlinkage int debugger_intercept(enum exception_code excep,
+				  int signo, int si_code, struct pt_regs *regs)
 {
 	static u8 notfirst = 1;
 	int ret;
@@ -1688,7 +1705,7 @@ asmlinkage int gdbstub_intercept(struct pt_regs *regs,
 		asm("mov mdr,%0" : "=d"(mdr));
 
 		gdbstub_entry(
-			"--> gdbstub_intercept(%p,%04x) [MDR=%lx PC=%lx]\n",
+			"--> debugger_intercept(%p,%04x) [MDR=%lx PC=%lx]\n",
 			regs, excep, mdr, regs->pc);
 
 		gdbstub_entry(
@@ -1722,7 +1739,7 @@ asmlinkage int gdbstub_intercept(struct pt_regs *regs,
 
 	ret = gdbstub(regs, excep);
 
-	gdbstub_entry("<-- gdbstub_intercept()\n");
+	gdbstub_entry("<-- debugger_intercept()\n");
 	gdbstub_busy = 0;
 	return ret;
 }
diff --git a/arch/mn10300/kernel/internal.h b/arch/mn10300/kernel/internal.h
index ea946613f46d..a5ac755dd69f 100644
--- a/arch/mn10300/kernel/internal.h
+++ b/arch/mn10300/kernel/internal.h
@@ -30,6 +30,13 @@ extern void mn10300_low_ipi_handler(void);
 #endif
 
 /*
+ * smp.c
+ */
+#ifdef CONFIG_SMP
+extern void smp_jump_to_debugger(void);
+#endif
+
+/*
  * time.c
  */
 extern irqreturn_t local_timer_interrupt(void);
diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c
index f09fed5e6afc..5f7fc3eb45e6 100644
--- a/arch/mn10300/kernel/irq.c
+++ b/arch/mn10300/kernel/irq.c
@@ -153,7 +153,7 @@ mn10300_cpupic_setaffinity(struct irq_data *d, const struct cpumask *mask,
 	case LOCAL_TIMER_IPI:
 	case FLUSH_CACHE_IPI:
 	case CALL_FUNCTION_NMI_IPI:
-	case GDB_NMI_IPI:
+	case DEBUGGER_NMI_IPI:
 #ifdef CONFIG_MN10300_TTYSM0
 	case SC0RXIRQ:
 	case SC0TXIRQ:
diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c
new file mode 100644
index 000000000000..f6c981db2a36
--- /dev/null
+++ b/arch/mn10300/kernel/kgdb.c
@@ -0,0 +1,502 @@
+/* kgdb support for MN10300
+ *
+ * Copyright (C) 2010 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/ptrace.h>
+#include <linux/kgdb.h>
+#include <linux/uaccess.h>
+#include <unit/leds.h>
+#include <unit/serial.h>
+#include <asm/debugger.h>
+#include <asm/serial-regs.h>
+#include "internal.h"
+
+/*
+ * Software single-stepping breakpoint save (used by __switch_to())
+ */
+static struct thread_info *kgdb_sstep_thread;
+u8 *kgdb_sstep_bp_addr[2];
+u8 kgdb_sstep_bp[2];
+
+/*
+ * Copy kernel exception frame registers to the GDB register file
+ */
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+	unsigned long ssp = (unsigned long) (regs + 1);
+
+	gdb_regs[GDB_FR_D0]	= regs->d0;
+	gdb_regs[GDB_FR_D1]	= regs->d1;
+	gdb_regs[GDB_FR_D2]	= regs->d2;
+	gdb_regs[GDB_FR_D3]	= regs->d3;
+	gdb_regs[GDB_FR_A0]	= regs->a0;
+	gdb_regs[GDB_FR_A1]	= regs->a1;
+	gdb_regs[GDB_FR_A2]	= regs->a2;
+	gdb_regs[GDB_FR_A3]	= regs->a3;
+	gdb_regs[GDB_FR_SP]	= (regs->epsw & EPSW_nSL) ? regs->sp : ssp;
+	gdb_regs[GDB_FR_PC]	= regs->pc;
+	gdb_regs[GDB_FR_MDR]	= regs->mdr;
+	gdb_regs[GDB_FR_EPSW]	= regs->epsw;
+	gdb_regs[GDB_FR_LIR]	= regs->lir;
+	gdb_regs[GDB_FR_LAR]	= regs->lar;
+	gdb_regs[GDB_FR_MDRQ]	= regs->mdrq;
+	gdb_regs[GDB_FR_E0]	= regs->e0;
+	gdb_regs[GDB_FR_E1]	= regs->e1;
+	gdb_regs[GDB_FR_E2]	= regs->e2;
+	gdb_regs[GDB_FR_E3]	= regs->e3;
+	gdb_regs[GDB_FR_E4]	= regs->e4;
+	gdb_regs[GDB_FR_E5]	= regs->e5;
+	gdb_regs[GDB_FR_E6]	= regs->e6;
+	gdb_regs[GDB_FR_E7]	= regs->e7;
+	gdb_regs[GDB_FR_SSP]	= ssp;
+	gdb_regs[GDB_FR_MSP]	= 0;
+	gdb_regs[GDB_FR_USP]	= regs->sp;
+	gdb_regs[GDB_FR_MCRH]	= regs->mcrh;
+	gdb_regs[GDB_FR_MCRL]	= regs->mcrl;
+	gdb_regs[GDB_FR_MCVF]	= regs->mcvf;
+	gdb_regs[GDB_FR_DUMMY0]	= 0;
+	gdb_regs[GDB_FR_DUMMY1]	= 0;
+	gdb_regs[GDB_FR_FS0]	= 0;
+}
+
+/*
+ * Extracts kernel SP/PC values understandable by gdb from the values
+ * saved by switch_to().
+ */
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+	gdb_regs[GDB_FR_SSP]	= p->thread.sp;
+	gdb_regs[GDB_FR_PC]	= p->thread.pc;
+	gdb_regs[GDB_FR_A3]	= p->thread.a3;
+	gdb_regs[GDB_FR_USP]	= p->thread.usp;
+	gdb_regs[GDB_FR_FPCR]	= p->thread.fpu_state.fpcr;
+}
+
+/*
+ * Fill kernel exception frame registers from the GDB register file
+ */
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+	regs->d0	= gdb_regs[GDB_FR_D0];
+	regs->d1	= gdb_regs[GDB_FR_D1];
+	regs->d2	= gdb_regs[GDB_FR_D2];
+	regs->d3	= gdb_regs[GDB_FR_D3];
+	regs->a0	= gdb_regs[GDB_FR_A0];
+	regs->a1	= gdb_regs[GDB_FR_A1];
+	regs->a2	= gdb_regs[GDB_FR_A2];
+	regs->a3	= gdb_regs[GDB_FR_A3];
+	regs->sp	= gdb_regs[GDB_FR_SP];
+	regs->pc	= gdb_regs[GDB_FR_PC];
+	regs->mdr	= gdb_regs[GDB_FR_MDR];
+	regs->epsw	= gdb_regs[GDB_FR_EPSW];
+	regs->lir	= gdb_regs[GDB_FR_LIR];
+	regs->lar	= gdb_regs[GDB_FR_LAR];
+	regs->mdrq	= gdb_regs[GDB_FR_MDRQ];
+	regs->e0	= gdb_regs[GDB_FR_E0];
+	regs->e1	= gdb_regs[GDB_FR_E1];
+	regs->e2	= gdb_regs[GDB_FR_E2];
+	regs->e3	= gdb_regs[GDB_FR_E3];
+	regs->e4	= gdb_regs[GDB_FR_E4];
+	regs->e5	= gdb_regs[GDB_FR_E5];
+	regs->e6	= gdb_regs[GDB_FR_E6];
+	regs->e7	= gdb_regs[GDB_FR_E7];
+	regs->sp	= gdb_regs[GDB_FR_SSP];
+	/* gdb_regs[GDB_FR_MSP]; */
+	// regs->usp	= gdb_regs[GDB_FR_USP];
+	regs->mcrh	= gdb_regs[GDB_FR_MCRH];
+	regs->mcrl	= gdb_regs[GDB_FR_MCRL];
+	regs->mcvf	= gdb_regs[GDB_FR_MCVF];
+	/* gdb_regs[GDB_FR_DUMMY0]; */
+	/* gdb_regs[GDB_FR_DUMMY1]; */
+
+	// regs->fpcr	= gdb_regs[GDB_FR_FPCR];
+	// regs->fs0	= gdb_regs[GDB_FR_FS0];
+}
+
+struct kgdb_arch arch_kgdb_ops = {
+	.gdb_bpt_instr	= { 0xff },
+	.flags		= KGDB_HW_BREAKPOINT,
+};
+
+static const unsigned char mn10300_kgdb_insn_sizes[256] =
+{
+	/* 1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+	1, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3,	/* 0 */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 1 */
+	2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, /* 2 */
+	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, /* 3 */
+	1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, /* 4 */
+	1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, /* 5 */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6 */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 7 */
+	2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* 8 */
+	2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* 9 */
+	2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* a */
+	2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* b */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 2, /* c */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* e */
+	0, 2, 2, 2, 2, 2, 2, 4, 0, 3, 0, 4, 0, 6, 7, 1  /* f */
+};
+
+/*
+ * Attempt to emulate single stepping by means of breakpoint instructions.
+ * Although there is a single-step trace flag in EPSW, its use is not
+ * sufficiently documented and is only intended for use with the JTAG debugger.
+ */
+static int kgdb_arch_do_singlestep(struct pt_regs *regs)
+{
+	unsigned long arg;
+	unsigned size;
+	u8 *pc = (u8 *)regs->pc, *sp = (u8 *)(regs + 1), cur;
+	u8 *x = NULL, *y = NULL;
+	int ret;
+
+	ret = probe_kernel_read(&cur, pc, 1);
+	if (ret < 0)
+		return ret;
+
+	size = mn10300_kgdb_insn_sizes[cur];
+	if (size > 0) {
+		x = pc + size;
+		goto set_x;
+	}
+
+	switch (cur) {
+		/* Bxx (d8,PC) */
+	case 0xc0 ... 0xca:
+		ret = probe_kernel_read(&arg, pc + 1, 1);
+		if (ret < 0)
+			return ret;
+		x = pc + 2;
+		if (arg >= 0 && arg <= 2)
+			goto set_x;
+		y = pc + (s8)arg;
+		goto set_x_and_y;
+
+		/* LXX (d8,PC) */
+	case 0xd0 ... 0xda:
+		x = pc + 1;
+		if (regs->pc == regs->lar)
+			goto set_x;
+		y = (u8 *)regs->lar;
+		goto set_x_and_y;
+
+		/* SETLB - loads the next four bytes into the LIR register
+		 * (which mustn't include a breakpoint instruction) */
+	case 0xdb:
+		x = pc + 5;
+		goto set_x;
+
+		/* JMP (d16,PC) or CALL (d16,PC) */
+	case 0xcc:
+	case 0xcd:
+		ret = probe_kernel_read(&arg, pc + 1, 2);
+		if (ret < 0)
+			return ret;
+		x = pc + (s16)arg;
+		goto set_x;
+
+		/* JMP (d32,PC) or CALL (d32,PC) */
+	case 0xdc:
+	case 0xdd:
+		ret = probe_kernel_read(&arg, pc + 1, 4);
+		if (ret < 0)
+			return ret;
+		x = pc + (s32)arg;
+		goto set_x;
+
+		/* RETF */
+	case 0xde:
+		x = (u8 *)regs->mdr;
+		goto set_x;
+
+		/* RET */
+	case 0xdf:
+		ret = probe_kernel_read(&arg, pc + 2, 1);
+		if (ret < 0)
+			return ret;
+		ret = probe_kernel_read(&x, sp + (s8)arg, 4);
+		if (ret < 0)
+			return ret;
+		goto set_x;
+
+	case 0xf0:
+		ret = probe_kernel_read(&cur, pc + 1, 1);
+		if (ret < 0)
+			return ret;
+
+		if (cur >= 0xf0 && cur <= 0xf7) {
+			/* JMP (An) / CALLS (An) */
+			switch (cur & 3) {
+			case 0: x = (u8 *)regs->a0; break;
+			case 1: x = (u8 *)regs->a1; break;
+			case 2: x = (u8 *)regs->a2; break;
+			case 3: x = (u8 *)regs->a3; break;
+			}
+			goto set_x;
+		} else if (cur == 0xfc) {
+			/* RETS */
+			ret = probe_kernel_read(&x, sp, 4);
+			if (ret < 0)
+				return ret;
+			goto set_x;
+		} else if (cur == 0xfd) {
+			/* RTI */
+			ret = probe_kernel_read(&x, sp + 4, 4);
+			if (ret < 0)
+				return ret;
+			goto set_x;
+		} else {
+			x = pc + 2;
+			goto set_x;
+		}
+		break;
+
+		/* potential 3-byte conditional branches */
+	case 0xf8:
+		ret = probe_kernel_read(&cur, pc + 1, 1);
+		if (ret < 0)
+			return ret;
+		x = pc + 3;
+
+		if (cur >= 0xe8 && cur <= 0xeb) {
+			ret = probe_kernel_read(&arg, pc + 2, 1);
+			if (ret < 0)
+				return ret;
+			if (arg >= 0 && arg <= 3)
+				goto set_x;
+			y = pc + (s8)arg;
+			goto set_x_and_y;
+		}
+		goto set_x;
+
+	case 0xfa:
+		ret = probe_kernel_read(&cur, pc + 1, 1);
+		if (ret < 0)
+			return ret;
+
+		if (cur == 0xff) {
+			/* CALLS (d16,PC) */
+			ret = probe_kernel_read(&arg, pc + 2, 2);
+			if (ret < 0)
+				return ret;
+			x = pc + (s16)arg;
+			goto set_x;
+		}
+
+		x = pc + 4;
+		goto set_x;
+
+	case 0xfc:
+		ret = probe_kernel_read(&cur, pc + 1, 1);
+		if (ret < 0)
+			return ret;
+
+		if (cur == 0xff) {
+			/* CALLS (d32,PC) */
+			ret = probe_kernel_read(&arg, pc + 2, 4);
+			if (ret < 0)
+				return ret;
+			x = pc + (s32)arg;
+			goto set_x;
+		}
+
+		x = pc + 6;
+		goto set_x;
+	}
+
+	return 0;
+
+set_x:
+	kgdb_sstep_bp_addr[0] = x;
+	kgdb_sstep_bp_addr[1] = NULL;
+	ret = probe_kernel_read(&kgdb_sstep_bp[0], x, 1);
+	if (ret < 0)
+		return ret;
+	ret = probe_kernel_write(x, &arch_kgdb_ops.gdb_bpt_instr, 1);
+	if (ret < 0)
+		return ret;
+	kgdb_sstep_thread = current_thread_info();
+	debugger_local_cache_flushinv_one(x);
+	return ret;
+
+set_x_and_y:
+	kgdb_sstep_bp_addr[0] = x;
+	kgdb_sstep_bp_addr[1] = y;
+	ret = probe_kernel_read(&kgdb_sstep_bp[0], x, 1);
+	if (ret < 0)
+		return ret;
+	ret = probe_kernel_read(&kgdb_sstep_bp[1], y, 1);
+	if (ret < 0)
+		return ret;
+	ret = probe_kernel_write(x, &arch_kgdb_ops.gdb_bpt_instr, 1);
+	if (ret < 0)
+		return ret;
+	ret = probe_kernel_write(y, &arch_kgdb_ops.gdb_bpt_instr, 1);
+	if (ret < 0) {
+		probe_kernel_write(kgdb_sstep_bp_addr[0],
+				   &kgdb_sstep_bp[0], 1);
+	} else {
+		kgdb_sstep_thread = current_thread_info();
+	}
+	debugger_local_cache_flushinv_one(x);
+	debugger_local_cache_flushinv_one(y);
+	return ret;
+}
+
+/*
+ * Remove emplaced single-step breakpoints, returning true if we hit one of
+ * them.
+ */
+static bool kgdb_arch_undo_singlestep(struct pt_regs *regs)
+{
+	bool hit = false;
+	u8 *x = kgdb_sstep_bp_addr[0], *y = kgdb_sstep_bp_addr[1];
+	u8 opcode;
+
+	if (kgdb_sstep_thread == current_thread_info()) {
+		if (x) {
+			if (x == (u8 *)regs->pc)
+				hit = true;
+			if (probe_kernel_read(&opcode, x,
+					      1) < 0 ||
+			    opcode != 0xff)
+				BUG();
+			probe_kernel_write(x, &kgdb_sstep_bp[0], 1);
+			debugger_local_cache_flushinv_one(x);
+		}
+		if (y) {
+			if (y == (u8 *)regs->pc)
+				hit = true;
+			if (probe_kernel_read(&opcode, y,
+					      1) < 0 ||
+			    opcode != 0xff)
+				BUG();
+			probe_kernel_write(y, &kgdb_sstep_bp[1], 1);
+			debugger_local_cache_flushinv_one(y);
+		}
+	}
+
+	kgdb_sstep_bp_addr[0] = NULL;
+	kgdb_sstep_bp_addr[1] = NULL;
+	kgdb_sstep_thread = NULL;
+	return hit;
+}
+
+/*
+ * Catch a single-step-pending thread being deleted and make sure the global
+ * single-step state is cleared.  At this point the breakpoints should have
+ * been removed by __switch_to().
+ */
+void free_thread_info(struct thread_info *ti)
+{
+	if (kgdb_sstep_thread == ti) {
+		kgdb_sstep_thread = NULL;
+
+		/* However, we may now be running in degraded mode, with most
+		 * of the CPUs disabled until such a time as KGDB is reentered,
+		 * so force immediate reentry */
+		kgdb_breakpoint();
+	}
+	kfree(ti);
+}
+
+/*
+ * Handle unknown packets and [CcsDk] packets
+ * - at this point breakpoints have been installed
+ */
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+			       char *remcom_in_buffer, char *remcom_out_buffer,
+			       struct pt_regs *regs)
+{
+	long addr;
+	char *ptr;
+
+	switch (remcom_in_buffer[0]) {
+	case 'c':
+	case 's':
+		/* try to read optional parameter, pc unchanged if no parm */
+		ptr = &remcom_in_buffer[1];
+		if (kgdb_hex2long(&ptr, &addr))
+			regs->pc = addr;
+	case 'D':
+	case 'k':
+		atomic_set(&kgdb_cpu_doing_single_step, -1);
+
+		if (remcom_in_buffer[0] == 's') {
+			kgdb_arch_do_singlestep(regs);
+			kgdb_single_step = 1;
+			atomic_set(&kgdb_cpu_doing_single_step,
+				   raw_smp_processor_id());
+		}
+		return 0;
+	}
+	return -1; /* this means that we do not want to exit from the handler */
+}
+
+/*
+ * Handle event interception
+ * - returns 0 if the exception should be skipped, -ERROR otherwise.
+ */
+int debugger_intercept(enum exception_code excep, int signo, int si_code,
+		       struct pt_regs *regs)
+{
+	int ret;
+
+	if (kgdb_arch_undo_singlestep(regs)) {
+		excep = EXCEP_TRAP;
+		signo = SIGTRAP;
+		si_code = TRAP_TRACE;
+	}
+
+	ret = kgdb_handle_exception(excep, signo, si_code, regs);
+
+	debugger_local_cache_flushinv();
+
+	return ret;
+}
+
+/*
+ * Determine if we've hit a debugger special breakpoint
+ */
+int at_debugger_breakpoint(struct pt_regs *regs)
+{
+	return regs->pc == (unsigned long)&__arch_kgdb_breakpoint;
+}
+
+/*
+ * Initialise kgdb
+ */
+int kgdb_arch_init(void)
+{
+	return 0;
+}
+
+/*
+ * Do something, perhaps, but don't know what.
+ */
+void kgdb_arch_exit(void)
+{
+}
+
+#ifdef CONFIG_SMP
+void debugger_nmi_interrupt(struct pt_regs *regs, enum exception_code code)
+{
+	kgdb_nmicallback(arch_smp_processor_id(), regs);
+	debugger_local_cache_flushinv();
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+	smp_jump_to_debugger();
+}
+#endif
diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c
index 93c53739cfc9..efca426a2ed4 100644
--- a/arch/mn10300/kernel/mn10300-serial.c
+++ b/arch/mn10300/kernel/mn10300-serial.c
@@ -119,6 +119,10 @@ static int mn10300_serial_request_port(struct uart_port *);
 static void mn10300_serial_config_port(struct uart_port *, int);
 static int mn10300_serial_verify_port(struct uart_port *,
 					struct serial_struct *);
+#ifdef CONFIG_CONSOLE_POLL
+static void mn10300_serial_poll_put_char(struct uart_port *, unsigned char);
+static int mn10300_serial_poll_get_char(struct uart_port *);
+#endif
 
 static const struct uart_ops mn10300_serial_ops = {
 	.tx_empty	= mn10300_serial_tx_empty,
@@ -138,6 +142,10 @@ static const struct uart_ops mn10300_serial_ops = {
 	.request_port	= mn10300_serial_request_port,
 	.config_port	= mn10300_serial_config_port,
 	.verify_port	= mn10300_serial_verify_port,
+#ifdef CONFIG_CONSOLE_POLL
+	.poll_put_char	= mn10300_serial_poll_put_char,
+	.poll_get_char	= mn10300_serial_poll_get_char,
+#endif
 };
 
 static irqreturn_t mn10300_serial_interrupt(int irq, void *dev_id);
@@ -1634,3 +1642,70 @@ static int __init mn10300_serial_console_init(void)
 
 console_initcall(mn10300_serial_console_init);
 #endif
+
+#ifdef CONFIG_CONSOLE_POLL
+/*
+ * Polled character reception for the kernel debugger
+ */
+static int mn10300_serial_poll_get_char(struct uart_port *_port)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+	unsigned ix;
+	u8 st, ch;
+
+	_enter("%s", port->name);
+
+	do {
+		/* pull chars out of the hat */
+		ix = port->rx_outp;
+		if (ix == port->rx_inp)
+			return NO_POLL_CHAR;
+
+		ch = port->rx_buffer[ix++];
+		st = port->rx_buffer[ix++];
+		smp_rmb();
+		port->rx_outp = ix & (MNSC_BUFFER_SIZE - 1);
+
+	} while (st & (SC01STR_FEF | SC01STR_PEF | SC01STR_OEF));
+
+	return ch;
+}
+
+
+/*
+ * Polled character transmission for the kernel debugger
+ */
+static void mn10300_serial_poll_put_char(struct uart_port *_port,
+					 unsigned char ch)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+	u8 intr, tmp;
+
+	/* wait for the transmitter to finish anything it might be doing (and
+	 * this includes the virtual DMA handler, so it might take a while) */
+	while (*port->_status & (SC01STR_TBF | SC01STR_TXF))
+		continue;
+
+	/* disable the Tx ready interrupt */
+	intr = *port->_intr;
+	*port->_intr = intr & ~SC01ICR_TI;
+	tmp = *port->_intr;
+
+	if (ch == 0x0a) {
+		*(u8 *) port->_txb = 0x0d;
+		while (*port->_status & SC01STR_TBF)
+			continue;
+	}
+
+	*(u8 *) port->_txb = ch;
+	while (*port->_status & SC01STR_TBF)
+		continue;
+
+	/* restore the Tx interrupt flag */
+	*port->_intr = intr;
+	tmp = *port->_intr;
+}
+
+#endif /* CONFIG_CONSOLE_POLL */
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index e1b14a6ed544..28eec3102535 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -135,7 +135,7 @@ void release_segments(struct mm_struct *mm)
 
 void machine_restart(char *cmd)
 {
-#ifdef CONFIG_GDBSTUB
+#ifdef CONFIG_KERNEL_DEBUGGER
 	gdbstub_exit(0);
 #endif
 
@@ -148,14 +148,14 @@ void machine_restart(char *cmd)
 
 void machine_halt(void)
 {
-#ifdef CONFIG_GDBSTUB
+#ifdef CONFIG_KERNEL_DEBUGGER
 	gdbstub_exit(0);
 #endif
 }
 
 void machine_power_off(void)
 {
-#ifdef CONFIG_GDBSTUB
+#ifdef CONFIG_KERNEL_DEBUGGER
 	gdbstub_exit(0);
 #endif
 }
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index 1ebb79f1650d..51c02f97dcea 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -440,6 +440,22 @@ int smp_nmi_call_function(smp_call_func_t func, void *info, int wait)
 }
 
 /**
+ * smp_jump_to_debugger - Make other CPUs enter the debugger by sending an IPI
+ *
+ * Send a non-maskable request to all other CPUs in the system, instructing
+ * them to jump into the debugger.  The caller is responsible for checking that
+ * the other CPUs responded to the instruction.
+ *
+ * The caller should make sure that this CPU's debugger IPI is disabled.
+ */
+void smp_jump_to_debugger(void)
+{
+	if (num_online_cpus() > 1)
+		/* Send a message to all other CPUs */
+		send_IPI_allbutself(DEBUGGER_NMI_IPI);
+}
+
+/**
  * stop_this_cpu - Callback to stop a CPU.
  * @unused: Callback context (ignored).
  */
@@ -603,7 +619,7 @@ static void __init smp_cpu_init(void)
 /**
  * smp_prepare_cpu_init - Initialise CPU in startup_secondary
  *
- * Set interrupt level 0-6 setting and init ICR of gdbstub.
+ * Set interrupt level 0-6 setting and init ICR of the kernel debugger.
  */
 void smp_prepare_cpu_init(void)
 {
@@ -622,15 +638,15 @@ void smp_prepare_cpu_init(void)
 	for (loop = 0; loop < GxICR_NUM_IRQS; loop++)
 		GxICR(loop) = GxICR_LEVEL_6 | GxICR_DETECT;
 
-#ifdef CONFIG_GDBSTUB
-	/* initialise GDB-stub */
+#ifdef CONFIG_KERNEL_DEBUGGER
+	/* initialise the kernel debugger interrupt */
 	do {
 		unsigned long flags;
 		u16 tmp16;
 
 		flags = arch_local_cli_save();
-		GxICR(GDB_NMI_IPI) = GxICR_NMI | GxICR_ENABLE | GxICR_DETECT;
-		tmp16 = GxICR(GDB_NMI_IPI);
+		GxICR(DEBUGGER_NMI_IPI) = GxICR_NMI | GxICR_ENABLE | GxICR_DETECT;
+		tmp16 = GxICR(DEBUGGER_NMI_IPI);
 		arch_local_irq_restore(flags);
 	} while (0);
 #endif
diff --git a/arch/mn10300/kernel/switch_to.S b/arch/mn10300/kernel/switch_to.S
index 9074d0fb8788..de3e74fc9ea0 100644
--- a/arch/mn10300/kernel/switch_to.S
+++ b/arch/mn10300/kernel/switch_to.S
@@ -39,11 +39,17 @@ ENTRY(__switch_to)
 
 	# save prev context
 	mov	__switch_back,d0
-	mov	d0,(THREAD_PC,a0)
 	mov	sp,a2
 	mov	a2,(THREAD_SP,a0)
 	mov	a3,(THREAD_A3,a0)
 
+#ifdef CONFIG_KGDB
+	btst	0xff,(kgdb_single_step)
+	bne	__switch_to__lift_sstep_bp
+__switch_to__continue:
+#endif
+	mov	d0,(THREAD_PC,a0)
+
 	mov	(THREAD_A3,a1),a3
 	mov	(THREAD_SP,a1),a2
 
@@ -68,3 +74,106 @@ ENTRY(__switch_to)
 __switch_back:
 	and	~EPSW_NMID,epsw
 	ret	[d2,d3,a2,a3,exreg1],32
+
+#ifdef CONFIG_KGDB
+###############################################################################
+#
+# Lift the single-step breakpoints when the task being traced is switched out
+# A0 = prev
+# A1 = next
+#
+###############################################################################
+__switch_to__lift_sstep_bp:
+	add	-12,sp
+	mov	a0,e4
+	mov	a1,e5
+
+	# Clear the single-step flag to prevent us coming this way until we get
+	# switched back in
+	bclr	0xff,(kgdb_single_step)
+
+	# Remove first breakpoint
+	mov	(kgdb_sstep_bp_addr),a2
+	cmp	0,a2
+	beq	1f
+	movbu	(kgdb_sstep_bp),d0
+	movbu	d0,(a2)
+#if defined(CONFIG_MN10300_CACHE_FLUSH_ICACHE) || defined(CONFIG_MN10300_CACHE_INV_ICACHE)
+	mov	a2,d0
+	mov	a2,d1
+	add	1,d1
+	calls	flush_icache_range
+#endif
+1:
+
+	# Remove second breakpoint
+	mov	(kgdb_sstep_bp_addr+4),a2
+	cmp	0,a2
+	beq	2f
+	movbu	(kgdb_sstep_bp+1),d0
+	movbu	d0,(a2)
+#if defined(CONFIG_MN10300_CACHE_FLUSH_ICACHE) || defined(CONFIG_MN10300_CACHE_INV_ICACHE)
+	mov	a2,d0
+	mov	a2,d1
+	add	1,d1
+	calls	flush_icache_range
+#endif
+2:
+
+	# Change the resumption address and return
+	mov	__switch_back__reinstall_sstep_bp,d0
+	mov	e4,a0
+	mov	e5,a1
+	add	12,sp
+	bra	__switch_to__continue
+
+###############################################################################
+#
+# Reinstall the single-step breakpoints when the task being traced is switched
+# back in (A1 points to the new thread_struct).
+#
+###############################################################################
+__switch_back__reinstall_sstep_bp:
+	add	-12,sp
+	mov	a0,e4			# save the return value
+	mov	0xff,d3
+
+	# Reinstall first breakpoint
+	mov	(kgdb_sstep_bp_addr),a2
+	cmp	0,a2
+	beq	1f
+	movbu	(a2),d0
+	movbu	d0,(kgdb_sstep_bp)
+	movbu	d3,(a2)
+#if defined(CONFIG_MN10300_CACHE_FLUSH_ICACHE) || defined(CONFIG_MN10300_CACHE_INV_ICACHE)
+	mov	a2,d0
+	mov	a2,d1
+	add	1,d1
+	calls	flush_icache_range
+#endif
+1:
+
+	# Reinstall second breakpoint
+	mov	(kgdb_sstep_bp_addr+4),a2
+	cmp	0,a2
+	beq	2f
+	movbu	(a2),d0
+	movbu	d0,(kgdb_sstep_bp+1)
+	movbu	d3,(a2)
+#if defined(CONFIG_MN10300_CACHE_FLUSH_ICACHE) || defined(CONFIG_MN10300_CACHE_INV_ICACHE)
+	mov	a2,d0
+	mov	a2,d1
+	add	1,d1
+	calls	flush_icache_range
+#endif
+2:
+
+	mov	d3,(kgdb_single_step)
+
+	# Restore the return value (the previous thread_struct pointer)
+	mov	e4,a0
+	mov	a0,d0
+	add	12,sp
+	bra	__switch_back
+
+#endif /* CONFIG_KGDB */
diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c
index b90c3f160c77..f03cb278828f 100644
--- a/arch/mn10300/kernel/traps.c
+++ b/arch/mn10300/kernel/traps.c
@@ -38,8 +38,9 @@
 #include <asm/busctl-regs.h>
 #include <unit/leds.h>
 #include <asm/fpu.h>
-#include <asm/gdb-stub.h>
 #include <asm/sections.h>
+#include <asm/debugger.h>
+#include "internal.h"
 
 #if (CONFIG_INTERRUPT_VECTOR_BASE & 0xffffff)
 #error "INTERRUPT_VECTOR_BASE not aligned to 16MiB boundary!"
@@ -49,63 +50,169 @@ int kstack_depth_to_print = 24;
 
 spinlock_t die_lock = __SPIN_LOCK_UNLOCKED(die_lock);
 
-ATOMIC_NOTIFIER_HEAD(mn10300_die_chain);
+struct exception_to_signal_map {
+	u8	signo;
+	u32	si_code;
+};
+
+static const struct exception_to_signal_map exception_to_signal_map[256] = {
+	/* MMU exceptions */
+	[EXCEP_ITLBMISS >> 3]	= { 0, 0 },
+	[EXCEP_DTLBMISS >> 3]	= { 0, 0 },
+	[EXCEP_IAERROR >> 3]	= { 0, 0 },
+	[EXCEP_DAERROR >> 3]	= { 0, 0 },
+
+	/* system exceptions */
+	[EXCEP_TRAP >> 3]	= { SIGTRAP,	TRAP_BRKPT },
+	[EXCEP_ISTEP >> 3]	= { SIGTRAP,	TRAP_TRACE },	/* Monitor */
+	[EXCEP_IBREAK >> 3]	= { SIGTRAP,	TRAP_HWBKPT },	/* Monitor */
+	[EXCEP_OBREAK >> 3]	= { SIGTRAP,	TRAP_HWBKPT },	/* Monitor */
+	[EXCEP_PRIVINS >> 3]	= { SIGILL,	ILL_PRVOPC },
+	[EXCEP_UNIMPINS >> 3]	= { SIGILL,	ILL_ILLOPC },
+	[EXCEP_UNIMPEXINS >> 3]	= { SIGILL,	ILL_ILLOPC },
+	[EXCEP_MEMERR >> 3]	= { SIGSEGV,	SEGV_ACCERR },
+	[EXCEP_MISALIGN >> 3]	= { SIGBUS,	BUS_ADRALN },
+	[EXCEP_BUSERROR >> 3]	= { SIGBUS,	BUS_ADRERR },
+	[EXCEP_ILLINSACC >> 3]	= { SIGSEGV,	SEGV_ACCERR },
+	[EXCEP_ILLDATACC >> 3]	= { SIGSEGV,	SEGV_ACCERR },
+	[EXCEP_IOINSACC >> 3]	= { SIGSEGV,	SEGV_ACCERR },
+	[EXCEP_PRIVINSACC >> 3]	= { SIGSEGV,	SEGV_ACCERR }, /* userspace */
+	[EXCEP_PRIVDATACC >> 3]	= { SIGSEGV,	SEGV_ACCERR }, /* userspace */
+	[EXCEP_DATINSACC >> 3]	= { SIGSEGV,	SEGV_ACCERR },
+	[EXCEP_DOUBLE_FAULT >> 3] = { SIGILL,	ILL_BADSTK },
+
+	/* FPU exceptions */
+	[EXCEP_FPU_DISABLED >> 3] = { SIGILL,	ILL_COPROC },
+	[EXCEP_FPU_UNIMPINS >> 3] = { SIGILL,	ILL_COPROC },
+	[EXCEP_FPU_OPERATION >> 3] = { SIGFPE,	FPE_INTDIV },
+
+	/* interrupts */
+	[EXCEP_WDT >> 3]	= { SIGALRM,	0 },
+	[EXCEP_NMI >> 3]	= { SIGQUIT,	0 },
+	[EXCEP_IRQ_LEVEL0 >> 3]	= { SIGINT,	0 },
+	[EXCEP_IRQ_LEVEL1 >> 3]	= { 0, 0 },
+	[EXCEP_IRQ_LEVEL2 >> 3]	= { 0, 0 },
+	[EXCEP_IRQ_LEVEL3 >> 3]	= { 0, 0 },
+	[EXCEP_IRQ_LEVEL4 >> 3]	= { 0, 0 },
+	[EXCEP_IRQ_LEVEL5 >> 3]	= { 0, 0 },
+	[EXCEP_IRQ_LEVEL6 >> 3]	= { 0, 0 },
+
+	/* system calls */
+	[EXCEP_SYSCALL0 >> 3]	= { 0, 0 },
+	[EXCEP_SYSCALL1 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL2 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL3 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL4 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL5 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL6 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL7 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL8 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL9 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL10 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL11 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL12 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL13 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL14 >> 3]	= { SIGILL,	ILL_ILLTRP },
+	[EXCEP_SYSCALL15 >> 3]	= { SIGABRT,	0 },
+};
 
 /*
- * These constants are for searching for possible module text
- * segments. MODULE_RANGE is a guess of how much space is likely
- * to be vmalloced.
+ * Handle kernel exceptions.
+ *
+ * See if there's a fixup handler we can force a jump to when an exception
+ * happens due to something kernel code did
  */
-#define MODULE_RANGE (8 * 1024 * 1024)
-
-#define DO_ERROR(signr, prologue, str, name)			\
-asmlinkage void name(struct pt_regs *regs, u32 intcode)		\
-{								\
-	prologue;						\
-	if (die_if_no_fixup(str, regs, intcode))		\
-		return;						\
-	force_sig(signr, current);				\
-}
+int die_if_no_fixup(const char *str, struct pt_regs *regs,
+		    enum exception_code code)
+{
+	u8 opcode;
+	int signo, si_code;
+
+	if (user_mode(regs))
+		return 0;
+
+	peripheral_leds_display_exception(code);
+
+	signo = exception_to_signal_map[code >> 3].signo;
+	si_code = exception_to_signal_map[code >> 3].si_code;
+
+	switch (code) {
+		/* see if we can fixup the kernel accessing memory */
+	case EXCEP_ITLBMISS:
+	case EXCEP_DTLBMISS:
+	case EXCEP_IAERROR:
+	case EXCEP_DAERROR:
+	case EXCEP_MEMERR:
+	case EXCEP_MISALIGN:
+	case EXCEP_BUSERROR:
+	case EXCEP_ILLDATACC:
+	case EXCEP_IOINSACC:
+	case EXCEP_PRIVINSACC:
+	case EXCEP_PRIVDATACC:
+	case EXCEP_DATINSACC:
+		if (fixup_exception(regs))
+			return 1;
+		break;
 
-#define DO_EINFO(signr, prologue, str, name, sicode)			\
-asmlinkage void name(struct pt_regs *regs, u32 intcode)			\
-{									\
-	siginfo_t info;							\
-	prologue;							\
-	if (die_if_no_fixup(str, regs, intcode))			\
-		return;							\
-	info.si_signo = signr;						\
-	if (signr == SIGILL && sicode == ILL_ILLOPC) {			\
-		uint8_t opcode;						\
-		if (get_user(opcode, (uint8_t __user *)regs->pc) == 0)	\
-			if (opcode == 0xff)				\
-				info.si_signo = SIGTRAP;		\
-	}								\
-	info.si_errno = 0;						\
-	info.si_code = sicode;						\
-	info.si_addr = (void *) regs->pc;				\
-	force_sig_info(info.si_signo, &info, current);			\
+	case EXCEP_TRAP:
+	case EXCEP_UNIMPINS:
+		if (get_user(opcode, (uint8_t __user *)regs->pc) != 0)
+			break;
+		if (opcode == 0xff) {
+			if (notify_die(DIE_BREAKPOINT, str, regs, code, 0, 0))
+				return 1;
+			if (at_debugger_breakpoint(regs))
+				regs->pc++;
+			signo = SIGTRAP;
+			si_code = TRAP_BRKPT;
+		}
+		break;
+
+	case EXCEP_SYSCALL1 ... EXCEP_SYSCALL14:
+		/* syscall return addr is _after_ the instruction */
+		regs->pc -= 2;
+		break;
+
+	case EXCEP_SYSCALL15:
+		if (report_bug(regs->pc, regs) == BUG_TRAP_TYPE_WARN)
+			return 1;
+
+		/* syscall return addr is _after_ the instruction */
+		regs->pc -= 2;
+		break;
+
+	default:
+		break;
+	}
+
+	if (debugger_intercept(code, signo, si_code, regs) == 0)
+		return 1;
+
+	if (notify_die(DIE_GPF, str, regs, code, 0, 0))
+		return 1;
+
+	/* make the process die as the last resort */
+	die(str, regs, code);
 }
 
-DO_ERROR(SIGTRAP, {}, "trap",			trap);
-DO_ERROR(SIGSEGV, {}, "ibreak",			ibreak);
-DO_ERROR(SIGSEGV, {}, "obreak",			obreak);
-DO_EINFO(SIGSEGV, {}, "access error",		access_error,	SEGV_ACCERR);
-DO_EINFO(SIGSEGV, {}, "insn access error",	insn_acc_error,	SEGV_ACCERR);
-DO_EINFO(SIGSEGV, {}, "data access error",	data_acc_error,	SEGV_ACCERR);
-DO_EINFO(SIGILL,  {}, "privileged opcode",	priv_op,	ILL_PRVOPC);
-DO_EINFO(SIGILL,  {}, "invalid opcode",		invalid_op,	ILL_ILLOPC);
-DO_EINFO(SIGILL,  {}, "invalid ex opcode",	invalid_exop,	ILL_ILLOPC);
-DO_EINFO(SIGBUS,  {}, "invalid address",	mem_error,	BUS_ADRERR);
-DO_EINFO(SIGBUS,  {}, "bus error",		bus_error,	BUS_ADRERR);
-
-DO_ERROR(SIGTRAP,
-#ifndef CONFIG_MN10300_USING_JTAG
-	 DCR &= ~0x0001,
-#else
-	 {},
-#endif
-	 "single step", istep);
+/*
+ * General exception handler
+ */
+asmlinkage void handle_exception(struct pt_regs *regs, u32 intcode)
+{
+	siginfo_t info;
+
+	/* deal with kernel exceptions here */
+	if (die_if_no_fixup(NULL, regs, intcode))
+		return;
+
+	/* otherwise it's a userspace exception */
+	info.si_signo = exception_to_signal_map[intcode >> 3].signo;
+	info.si_code = exception_to_signal_map[intcode >> 3].si_code;
+	info.si_errno = 0;
+	info.si_addr = (void *) regs->pc;
+	force_sig_info(info.si_signo, &info, current);
+}
 
 /*
  * handle NMI
@@ -113,10 +220,8 @@ DO_ERROR(SIGTRAP,
 asmlinkage void nmi(struct pt_regs *regs, enum exception_code code)
 {
 	/* see if gdbstub wants to deal with it */
-#ifdef CONFIG_GDBSTUB
-	if (gdbstub_intercept(regs, code))
+	if (debugger_intercept(code, SIGQUIT, 0, regs))
 		return;
-#endif
 
 	printk(KERN_WARNING "--- Register Dump ---\n");
 	show_registers(regs);
@@ -128,29 +233,36 @@ asmlinkage void nmi(struct pt_regs *regs, enum exception_code code)
  */
 void show_trace(unsigned long *sp)
 {
-	unsigned long *stack, addr, module_start, module_end;
-	int i;
-
-	printk(KERN_EMERG "\nCall Trace:");
-
-	stack = sp;
-	i = 0;
-	module_start = VMALLOC_START;
-	module_end = VMALLOC_END;
+	unsigned long bottom, stack, addr, fp, raslot;
+
+	printk(KERN_EMERG "\nCall Trace:\n");
+
+	//stack = (unsigned long)sp;
+	asm("mov sp,%0" : "=a"(stack));
+	asm("mov a3,%0" : "=r"(fp));
+
+	raslot = ULONG_MAX;
+	bottom = (stack + THREAD_SIZE) & ~(THREAD_SIZE - 1);
+	for (; stack < bottom; stack += sizeof(addr)) {
+		addr = *(unsigned long *)stack;
+		if (stack == fp) {
+			if (addr > stack && addr < bottom) {
+				fp = addr;
+				raslot = stack + sizeof(addr);
+				continue;
+			}
+			fp = 0;
+			raslot = ULONG_MAX;
+		}
 
-	while (((long) stack & (THREAD_SIZE - 1)) != 0) {
-		addr = *stack++;
 		if (__kernel_text_address(addr)) {
-#if 1
 			printk(" [<%08lx>]", addr);
+			if (stack >= raslot)
+				raslot = ULONG_MAX;
+			else
+				printk(" ?");
 			print_symbol(" %s", addr);
 			printk("\n");
-#else
-			if ((i % 6) == 0)
-				printk(KERN_EMERG "  ");
-			printk("[<%08lx>] ", addr);
-			i++;
-#endif
 		}
 	}
 
@@ -323,86 +435,6 @@ void die(const char *str, struct pt_regs *regs, enum exception_code code)
 }
 
 /*
- * see if there's a fixup handler we can force a jump to when an exception
- * happens due to something kernel code did
- */
-int die_if_no_fixup(const char *str, struct pt_regs *regs,
-		    enum exception_code code)
-{
-	if (user_mode(regs))
-		return 0;
-
-	peripheral_leds_display_exception(code);
-
-	switch (code) {
-		/* see if we can fixup the kernel accessing memory */
-	case EXCEP_ITLBMISS:
-	case EXCEP_DTLBMISS:
-	case EXCEP_IAERROR:
-	case EXCEP_DAERROR:
-	case EXCEP_MEMERR:
-	case EXCEP_MISALIGN:
-	case EXCEP_BUSERROR:
-	case EXCEP_ILLDATACC:
-	case EXCEP_IOINSACC:
-	case EXCEP_PRIVINSACC:
-	case EXCEP_PRIVDATACC:
-	case EXCEP_DATINSACC:
-		if (fixup_exception(regs))
-			return 1;
-	case EXCEP_UNIMPINS:
-		if (regs->pc && *(uint8_t *)regs->pc == 0xff)
-			if (notify_die(DIE_BREAKPOINT, str, regs, code, 0, 0))
-				return 1;
-		break;
-	default:
-		break;
-	}
-
-	/* see if gdbstub wants to deal with it */
-#ifdef CONFIG_GDBSTUB
-	if (gdbstub_intercept(regs, code))
-		return 1;
-#endif
-
-	if (notify_die(DIE_GPF, str, regs, code, 0, 0))
-		return 1;
-
-	/* make the process die as the last resort */
-	die(str, regs, code);
-}
-
-/*
- * handle unsupported syscall instructions (syscall 1-15)
- */
-static asmlinkage void unsupported_syscall(struct pt_regs *regs,
-					   enum exception_code code)
-{
-	struct task_struct *tsk = current;
-	siginfo_t info;
-
-	/* catch a kernel BUG() */
-	if (code == EXCEP_SYSCALL15 && !user_mode(regs)) {
-		if (report_bug(regs->pc, regs) == BUG_TRAP_TYPE_BUG) {
-#ifdef CONFIG_GDBSTUB
-			gdbstub_intercept(regs, code);
-#endif
-		}
-	}
-
-	regs->pc -= 2; /* syscall return addr is _after_ the instruction */
-
-	die_if_no_fixup("An unsupported syscall insn was used by the kernel\n",
-			regs, code);
-
-	info.si_signo	= SIGILL;
-	info.si_errno	= ENOSYS;
-	info.si_code	= ILL_ILLTRP;
-	info.si_addr	= (void *) regs->pc;
-	force_sig_info(SIGILL, &info, tsk);
-}
-
-/*
  * display the register file when the stack pointer gets clobbered
  */
 asmlinkage void do_double_fault(struct pt_regs *regs)
@@ -481,10 +513,8 @@ asmlinkage void uninitialised_exception(struct pt_regs *regs,
 {
 
 	/* see if gdbstub wants to deal with it */
-#ifdef CONFIG_GDBSTUB
-	if (gdbstub_intercept(regs, code))
+	if (debugger_intercept(code, SIGSYS, 0, regs) == 0)
 		return;
-#endif
 
 	peripheral_leds_display_exception(code);
 	printk(KERN_EMERG "Uninitialised Exception 0x%04x\n", code & 0xFFFF);
@@ -549,43 +579,43 @@ void __init set_intr_stub(enum exception_code code, void *handler)
  */
 void __init trap_init(void)
 {
-	set_excp_vector(EXCEP_TRAP,		trap);
-	set_excp_vector(EXCEP_ISTEP,		istep);
-	set_excp_vector(EXCEP_IBREAK,		ibreak);
-	set_excp_vector(EXCEP_OBREAK,		obreak);
-
-	set_excp_vector(EXCEP_PRIVINS,		priv_op);
-	set_excp_vector(EXCEP_UNIMPINS,		invalid_op);
-	set_excp_vector(EXCEP_UNIMPEXINS,	invalid_exop);
-	set_excp_vector(EXCEP_MEMERR,		mem_error);
+	set_excp_vector(EXCEP_TRAP,		handle_exception);
+	set_excp_vector(EXCEP_ISTEP,		handle_exception);
+	set_excp_vector(EXCEP_IBREAK,		handle_exception);
+	set_excp_vector(EXCEP_OBREAK,		handle_exception);
+
+	set_excp_vector(EXCEP_PRIVINS,		handle_exception);
+	set_excp_vector(EXCEP_UNIMPINS,		handle_exception);
+	set_excp_vector(EXCEP_UNIMPEXINS,	handle_exception);
+	set_excp_vector(EXCEP_MEMERR,		handle_exception);
 	set_excp_vector(EXCEP_MISALIGN,		misalignment);
-	set_excp_vector(EXCEP_BUSERROR,		bus_error);
-	set_excp_vector(EXCEP_ILLINSACC,	insn_acc_error);
-	set_excp_vector(EXCEP_ILLDATACC,	data_acc_error);
-	set_excp_vector(EXCEP_IOINSACC,		insn_acc_error);
-	set_excp_vector(EXCEP_PRIVINSACC,	insn_acc_error);
-	set_excp_vector(EXCEP_PRIVDATACC,	data_acc_error);
-	set_excp_vector(EXCEP_DATINSACC,	insn_acc_error);
-	set_excp_vector(EXCEP_FPU_UNIMPINS,	fpu_invalid_op);
+	set_excp_vector(EXCEP_BUSERROR,		handle_exception);
+	set_excp_vector(EXCEP_ILLINSACC,	handle_exception);
+	set_excp_vector(EXCEP_ILLDATACC,	handle_exception);
+	set_excp_vector(EXCEP_IOINSACC,		handle_exception);
+	set_excp_vector(EXCEP_PRIVINSACC,	handle_exception);
+	set_excp_vector(EXCEP_PRIVDATACC,	handle_exception);
+	set_excp_vector(EXCEP_DATINSACC,	handle_exception);
+	set_excp_vector(EXCEP_FPU_UNIMPINS,	handle_exception);
 	set_excp_vector(EXCEP_FPU_OPERATION,	fpu_exception);
 
 	set_excp_vector(EXCEP_NMI,		nmi);
 
-	set_excp_vector(EXCEP_SYSCALL1,		unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL2,		unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL3,		unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL4,		unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL5,		unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL6,		unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL7,		unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL8,		unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL9,		unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL10,	unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL11,	unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL12,	unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL13,	unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL14,	unsupported_syscall);
-	set_excp_vector(EXCEP_SYSCALL15,	unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL1,		handle_exception);
+	set_excp_vector(EXCEP_SYSCALL2,		handle_exception);
+	set_excp_vector(EXCEP_SYSCALL3,		handle_exception);
+	set_excp_vector(EXCEP_SYSCALL4,		handle_exception);
+	set_excp_vector(EXCEP_SYSCALL5,		handle_exception);
+	set_excp_vector(EXCEP_SYSCALL6,		handle_exception);
+	set_excp_vector(EXCEP_SYSCALL7,		handle_exception);
+	set_excp_vector(EXCEP_SYSCALL8,		handle_exception);
+	set_excp_vector(EXCEP_SYSCALL9,		handle_exception);
+	set_excp_vector(EXCEP_SYSCALL10,	handle_exception);
+	set_excp_vector(EXCEP_SYSCALL11,	handle_exception);
+	set_excp_vector(EXCEP_SYSCALL12,	handle_exception);
+	set_excp_vector(EXCEP_SYSCALL13,	handle_exception);
+	set_excp_vector(EXCEP_SYSCALL14,	handle_exception);
+	set_excp_vector(EXCEP_SYSCALL15,	handle_exception);
 }
 
 /*
diff --git a/arch/mn10300/mm/Kconfig.cache b/arch/mn10300/mm/Kconfig.cache
index c4fd923a55a0..bfbe52691f2c 100644
--- a/arch/mn10300/mm/Kconfig.cache
+++ b/arch/mn10300/mm/Kconfig.cache
@@ -99,3 +99,49 @@ config MN10300_CACHE_INV_ICACHE
 	help
 	  Set if we need the icache to be invalidated, even if the dcache is in
 	  write-through mode and doesn't need flushing.
+
+#
+# The kernel debugger gets its own separate cache flushing functions
+#
+config MN10300_DEBUGGER_CACHE_FLUSH_BY_TAG
+	def_bool y if KERNEL_DEBUGGER && \
+			MN10300_CACHE_WBACK && \
+			!MN10300_CACHE_SNOOP && \
+			MN10300_CACHE_MANAGE_BY_TAG
+	help
+	  Set if the debugger needs to flush the dcache and invalidate the
+	  icache using the cache tag registers to make breakpoints work.
+
+config MN10300_DEBUGGER_CACHE_FLUSH_BY_REG
+	def_bool y if KERNEL_DEBUGGER && \
+			MN10300_CACHE_WBACK && \
+			!MN10300_CACHE_SNOOP && \
+			MN10300_CACHE_MANAGE_BY_REG
+	help
+	  Set if the debugger needs to flush the dcache and invalidate the
+	  icache using automatic purge registers to make breakpoints work.
+
+config MN10300_DEBUGGER_CACHE_INV_BY_TAG
+	def_bool y if KERNEL_DEBUGGER && \
+			MN10300_CACHE_WTHRU && \
+			!MN10300_CACHE_SNOOP && \
+			MN10300_CACHE_MANAGE_BY_TAG
+	help
+	  Set if the debugger needs to invalidate the icache using the cache
+	  tag registers to make breakpoints work.
+
+config MN10300_DEBUGGER_CACHE_INV_BY_REG
+	def_bool y if KERNEL_DEBUGGER && \
+			MN10300_CACHE_WTHRU && \
+			!MN10300_CACHE_SNOOP && \
+			MN10300_CACHE_MANAGE_BY_REG
+	help
+	  Set if the debugger needs to invalidate the icache using automatic
+	  purge registers to make breakpoints work.
+
+config MN10300_DEBUGGER_CACHE_NO_FLUSH
+	def_bool y if KERNEL_DEBUGGER && \
+			(MN10300_CACHE_DISABLED || MN10300_CACHE_SNOOP)
+	help
+	  Set if the debugger does not need to flush the dcache and/or
+	  invalidate the icache to make breakpoints work.
diff --git a/arch/mn10300/mm/Makefile b/arch/mn10300/mm/Makefile
index 203fee23f7d7..11f38466ac28 100644
--- a/arch/mn10300/mm/Makefile
+++ b/arch/mn10300/mm/Makefile
@@ -13,6 +13,15 @@ cacheflush-$(CONFIG_MN10300_CACHE_INV_BY_REG) += cache-inv-by-reg.o
 cacheflush-$(CONFIG_MN10300_CACHE_FLUSH_BY_TAG) += cache-flush-by-tag.o
 cacheflush-$(CONFIG_MN10300_CACHE_FLUSH_BY_REG) += cache-flush-by-reg.o
 
+cacheflush-$(CONFIG_MN10300_DEBUGGER_CACHE_FLUSH_BY_TAG) += \
+	cache-dbg-flush-by-tag.o cache-dbg-inv-by-tag.o
+cacheflush-$(CONFIG_MN10300_DEBUGGER_CACHE_FLUSH_BY_REG) += \
+	cache-dbg-flush-by-reg.o
+cacheflush-$(CONFIG_MN10300_DEBUGGER_CACHE_INV_BY_TAG) += \
+	cache-dbg-inv-by-tag.o cache-dbg-inv.o
+cacheflush-$(CONFIG_MN10300_DEBUGGER_CACHE_INV_BY_REG) += \
+	cache-dbg-inv-by-reg.o cache-dbg-inv.o
+
 cacheflush-$(CONFIG_MN10300_CACHE_DISABLED) := cache-disabled.o
 
 obj-y := \
diff --git a/arch/mn10300/mm/cache-dbg-flush-by-reg.S b/arch/mn10300/mm/cache-dbg-flush-by-reg.S
new file mode 100644
index 000000000000..665919f2ab62
--- /dev/null
+++ b/arch/mn10300/mm/cache-dbg-flush-by-reg.S
@@ -0,0 +1,160 @@
+/* MN10300 CPU cache invalidation routines, using automatic purge registers
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/irqflags.h>
+#include <asm/cacheflush.h>
+#include "cache.inc"
+
+	.am33_2
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv(void)
+# Flush the entire data cache back to RAM and invalidate the icache
+#
+###############################################################################
+	ALIGN
+	.globl	debugger_local_cache_flushinv
+        .type	debugger_local_cache_flushinv,@function
+debugger_local_cache_flushinv:
+	#
+	# firstly flush the dcache
+	#
+	movhu	(CHCTR),d0
+	btst	CHCTR_DCEN|CHCTR_ICEN,d0
+	beq	debugger_local_cache_flushinv_end
+
+	mov	DCPGCR,a0
+
+	mov	epsw,d1
+	and	~EPSW_IE,epsw
+	or	EPSW_NMID,epsw
+	nop
+
+	btst	CHCTR_DCEN,d0
+	beq	debugger_local_cache_flushinv_no_dcache
+
+	# wait for busy bit of area purge
+	setlb
+	mov	(a0),d0
+	btst	DCPGCR_DCPGBSY,d0
+	lne
+
+	# set mask
+	clr	d0
+	mov	d0,(DCPGMR)
+
+	# area purge
+	#
+	# DCPGCR = DCPGCR_DCP
+	#
+	mov	DCPGCR_DCP,d0
+	mov	d0,(a0)
+
+	# wait for busy bit of area purge
+	setlb
+	mov	(a0),d0
+	btst	DCPGCR_DCPGBSY,d0
+	lne
+
+debugger_local_cache_flushinv_no_dcache:
+	#
+	# secondly, invalidate the icache if it is enabled
+	#
+	mov	CHCTR,a0
+	movhu	(a0),d0
+	btst	CHCTR_ICEN,d0
+	beq	debugger_local_cache_flushinv_done
+
+	invalidate_icache 0
+
+debugger_local_cache_flushinv_done:
+	mov	d1,epsw
+
+debugger_local_cache_flushinv_end:
+	ret	[],0
+	.size	debugger_local_cache_flushinv,.-debugger_local_cache_flushinv
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv_one(u8 *addr)
+#
+# Invalidate one particular cacheline if it's in the icache
+#
+###############################################################################
+	ALIGN
+	.globl	debugger_local_cache_flushinv_one
+	.type	debugger_local_cache_flushinv_one,@function
+debugger_local_cache_flushinv_one:
+	movhu	(CHCTR),d1
+	btst	CHCTR_DCEN|CHCTR_ICEN,d1
+	beq	debugger_local_cache_flushinv_one_end
+	btst	CHCTR_DCEN,d1
+	beq	debugger_local_cache_flushinv_one_no_dcache
+
+	# round cacheline addr down
+	and	L1_CACHE_TAG_MASK,d0
+	mov	d0,a1
+	mov	d0,d1
+
+	# determine the dcache purge control reg address
+	mov	DCACHE_PURGE(0,0),a0
+	and	L1_CACHE_TAG_ENTRY,d0
+	add	d0,a0
+
+	# retain valid entries in the cache
+	or	L1_CACHE_TAG_VALID,d1
+
+	# conditionally purge this line in all ways
+	mov	d1,(L1_CACHE_WAYDISP*0,a0)
+
+debugger_local_cache_flushinv_no_dcache:
+	#
+	# now try to flush the icache
+	#
+	mov	CHCTR,a0
+	movhu	(a0),d0
+	btst	CHCTR_ICEN,d0
+	beq	mn10300_local_icache_inv_range_reg_end
+
+	LOCAL_CLI_SAVE(d1)
+
+	mov	ICIVCR,a0
+
+	# wait for the invalidator to quiesce
+	setlb
+	mov	(a0),d0
+	btst	ICIVCR_ICIVBSY,d0
+	lne
+
+	# set the mask
+	mov	L1_CACHE_TAG_MASK,d0
+	mov	d0,(ICIVMR)
+
+	# invalidate the cache line at the given address
+	or	ICIVCR_ICI,a1
+	mov	a1,(a0)
+
+	# wait for the invalidator to quiesce again
+	setlb
+	mov	(a0),d0
+	btst	ICIVCR_ICIVBSY,d0
+	lne
+
+	LOCAL_IRQ_RESTORE(d1)
+
+debugger_local_cache_flushinv_one_end:
+	ret	[],0
+	.size	debugger_local_cache_flushinv_one,.-debugger_local_cache_flushinv_one
diff --git a/arch/mn10300/mm/cache-dbg-flush-by-tag.S b/arch/mn10300/mm/cache-dbg-flush-by-tag.S
new file mode 100644
index 000000000000..bf56930e6e70
--- /dev/null
+++ b/arch/mn10300/mm/cache-dbg-flush-by-tag.S
@@ -0,0 +1,114 @@
+/* MN10300 CPU cache invalidation routines, using direct tag flushing
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/irqflags.h>
+#include <asm/cacheflush.h>
+#include "cache.inc"
+
+	.am33_2
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv(void)
+#
+# Flush the entire data cache back to RAM and invalidate the icache
+#
+###############################################################################
+	ALIGN
+	.globl	debugger_local_cache_flushinv
+        .type	debugger_local_cache_flushinv,@function
+debugger_local_cache_flushinv:
+	#
+	# firstly flush the dcache
+	#
+	movhu	(CHCTR),d0
+	btst	CHCTR_DCEN|CHCTR_ICEN,d0
+	beq	debugger_local_cache_flushinv_end
+
+	btst	CHCTR_DCEN,d0
+	beq	debugger_local_cache_flushinv_no_dcache
+
+	# read the addresses tagged in the cache's tag RAM and attempt to flush
+	# those addresses specifically
+	# - we rely on the hardware to filter out invalid tag entry addresses
+	mov	DCACHE_TAG(0,0),a0		# dcache tag RAM access address
+	mov	DCACHE_PURGE(0,0),a1		# dcache purge request address
+	mov	L1_CACHE_NWAYS*L1_CACHE_NENTRIES,e0  # total number of entries
+
+mn10300_local_dcache_flush_loop:
+	mov	(a0),d0
+	and	L1_CACHE_TAG_MASK,d0
+	or	L1_CACHE_TAG_VALID,d0		# retain valid entries in the
+						# cache
+	mov	d0,(a1)				# conditional purge
+
+	add	L1_CACHE_BYTES,a0
+	add	L1_CACHE_BYTES,a1
+	add	-1,e0
+	bne	mn10300_local_dcache_flush_loop
+
+debugger_local_cache_flushinv_no_dcache:
+	#
+	# secondly, invalidate the icache if it is enabled
+	#
+	mov	CHCTR,a0
+	movhu	(a0),d0
+	btst	CHCTR_ICEN,d0
+	beq	debugger_local_cache_flushinv_end
+
+	invalidate_icache 1
+
+debugger_local_cache_flushinv_end:
+	ret	[],0
+	.size	debugger_local_cache_flushinv,.-debugger_local_cache_flushinv
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv_one(u8 *addr)
+#
+# Invalidate one particular cacheline if it's in the icache
+#
+###############################################################################
+	ALIGN
+	.globl	debugger_local_cache_flushinv_one
+	.type	debugger_local_cache_flushinv_one,@function
+debugger_local_cache_flushinv_one:
+	movhu	(CHCTR),d1
+	btst	CHCTR_DCEN|CHCTR_ICEN,d1
+	beq	debugger_local_cache_flushinv_one_end
+	btst	CHCTR_DCEN,d1
+	beq	debugger_local_cache_flushinv_one_icache
+
+	# round cacheline addr down
+	and	L1_CACHE_TAG_MASK,d0
+	mov	d0,a1
+
+	# determine the dcache purge control reg address
+	mov	DCACHE_PURGE(0,0),a0
+	and	L1_CACHE_TAG_ENTRY,d0
+	add	d0,a0
+
+	# retain valid entries in the cache
+	or	L1_CACHE_TAG_VALID,a1
+
+	# conditionally purge this line in all ways
+	mov	a1,(L1_CACHE_WAYDISP*0,a0)
+
+	# now go and do the icache
+	bra	debugger_local_cache_flushinv_one_icache
+
+debugger_local_cache_flushinv_one_end:
+	ret	[],0
+	.size	debugger_local_cache_flushinv_one,.-debugger_local_cache_flushinv_one
diff --git a/arch/mn10300/mm/cache-dbg-inv-by-reg.S b/arch/mn10300/mm/cache-dbg-inv-by-reg.S
new file mode 100644
index 000000000000..c4e6252941b1
--- /dev/null
+++ b/arch/mn10300/mm/cache-dbg-inv-by-reg.S
@@ -0,0 +1,69 @@
+/* MN10300 CPU cache invalidation routines, using automatic purge registers
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/cache.h>
+#include <asm/irqflags.h>
+#include <asm/cacheflush.h>
+#include "cache.inc"
+
+	.am33_2
+
+	.globl	debugger_local_cache_flushinv_one
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv_one(u8 *addr)
+#
+# Invalidate one particular cacheline if it's in the icache
+#
+###############################################################################
+	ALIGN
+	.globl	debugger_local_cache_flushinv_one
+	.type	debugger_local_cache_flushinv_one,@function
+debugger_local_cache_flushinv_one:
+	mov	d0,a1
+
+	mov	CHCTR,a0
+	movhu	(a0),d0
+	btst	CHCTR_ICEN,d0
+	beq	mn10300_local_icache_inv_range_reg_end
+
+	LOCAL_CLI_SAVE(d1)
+
+	mov	ICIVCR,a0
+
+	# wait for the invalidator to quiesce
+	setlb
+	mov	(a0),d0
+	btst	ICIVCR_ICIVBSY,d0
+	lne
+
+	# set the mask
+	mov	~L1_CACHE_TAG_MASK,d0
+	mov	d0,(ICIVMR)
+
+	# invalidate the cache line at the given address
+	and	~L1_CACHE_TAG_MASK,a1
+	or	ICIVCR_ICI,a1
+	mov	a1,(a0)
+
+	# wait for the invalidator to quiesce again
+	setlb
+	mov	(a0),d0
+	btst	ICIVCR_ICIVBSY,d0
+	lne
+
+	LOCAL_IRQ_RESTORE(d1)
+
+mn10300_local_icache_inv_range_reg_end:
+	ret	[],0
+	.size	debugger_local_cache_flushinv_one,.-debugger_local_cache_flushinv_one
diff --git a/arch/mn10300/mm/cache-dbg-inv-by-tag.S b/arch/mn10300/mm/cache-dbg-inv-by-tag.S
new file mode 100644
index 000000000000..d8ec821e5f88
--- /dev/null
+++ b/arch/mn10300/mm/cache-dbg-inv-by-tag.S
@@ -0,0 +1,120 @@
+/* MN10300 CPU cache invalidation routines, using direct tag flushing
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/irqflags.h>
+#include <asm/cacheflush.h>
+#include "cache.inc"
+
+	.am33_2
+
+	.globl	debugger_local_cache_flushinv_one_icache
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv_one(u8 *addr)
+#
+# Invalidate one particular cacheline if it's in the icache
+#
+###############################################################################
+	ALIGN
+	.globl	debugger_local_cache_flushinv_one_icache
+	.type	debugger_local_cache_flushinv_one_icache,@function
+debugger_local_cache_flushinv_one_icache:
+	movm	[d3,a2],(sp)
+
+	mov	CHCTR,a2
+	movhu	(a2),d0
+	btst	CHCTR_ICEN,d0
+	beq	debugger_local_cache_flushinv_one_icache_end
+
+	mov	d0,a1
+	and	L1_CACHE_TAG_MASK,a1
+
+	# read the tags from the tag RAM, and if they indicate a matching valid
+	# cache line then we invalidate that line
+	mov	ICACHE_TAG(0,0),a0
+	mov	a1,d0
+	and	L1_CACHE_TAG_ENTRY,d0
+	add	d0,a0				# starting icache tag RAM
+						# access address
+
+	and	~(L1_CACHE_DISPARITY-1),a1	# determine comparator base
+	or	L1_CACHE_TAG_VALID,a1
+	mov	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_VALID,d1
+
+	LOCAL_CLI_SAVE(d3)
+
+	# disable the icache
+	movhu	(a2),d0
+	and	~CHCTR_ICEN,d0
+	movhu	d0,(a2)
+
+	# and wait for it to calm down
+	setlb
+	movhu	(a2),d0
+	btst	CHCTR_ICBUSY,d0
+	lne
+
+	# check all the way tags for this cache entry
+	mov	(a0),d0				# read the tag in the way 0 slot
+	xor	a1,d0
+	and	d1,d0
+	beq	debugger_local_icache_kill	# jump if matched
+
+	add	L1_CACHE_WAYDISP,a0
+	mov	(a0),d0				# read the tag in the way 1 slot
+	xor	a1,d0
+	and	d1,d0
+	beq	debugger_local_icache_kill	# jump if matched
+
+	add	L1_CACHE_WAYDISP,a0
+	mov	(a0),d0				# read the tag in the way 2 slot
+	xor	a1,d0
+	and	d1,d0
+	beq	debugger_local_icache_kill	# jump if matched
+
+	add	L1_CACHE_WAYDISP,a0
+	mov	(a0),d0				# read the tag in the way 3 slot
+	xor	a1,d0
+	and	d1,d0
+	bne	debugger_local_icache_finish	# jump if not matched
+
+debugger_local_icache_kill:
+	mov	d0,(a0)				# kill the tag (D0 is 0 at this point)
+
+debugger_local_icache_finish:
+	# wait for the cache to finish what it's doing
+	setlb
+	movhu	(a2),d0
+	btst	CHCTR_ICBUSY,d0
+	lne
+
+	# and reenable it
+	or	CHCTR_ICEN,d0
+	movhu	d0,(a2)
+	movhu	(a2),d0
+
+	# re-enable interrupts
+	LOCAL_IRQ_RESTORE(d3)
+
+debugger_local_cache_flushinv_one_icache_end:
+	ret	[d3,a2],8
+	.size	debugger_local_cache_flushinv_one_icache,.-debugger_local_cache_flushinv_one_icache
+
+#ifdef CONFIG_MN10300_DEBUGGER_CACHE_INV_BY_TAG
+	.globl	debugger_local_cache_flushinv_one
+	.type	debugger_local_cache_flushinv_one,@function
+debugger_local_cache_flushinv_one = debugger_local_cache_flushinv_one_icache
+#endif
diff --git a/arch/mn10300/mm/cache-dbg-inv.S b/arch/mn10300/mm/cache-dbg-inv.S
new file mode 100644
index 000000000000..eba2d6dca066
--- /dev/null
+++ b/arch/mn10300/mm/cache-dbg-inv.S
@@ -0,0 +1,47 @@
+/* MN10300 CPU cache invalidation routines
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/irqflags.h>
+#include <asm/cacheflush.h>
+#include "cache.inc"
+
+	.am33_2
+
+	.globl	debugger_local_cache_flushinv
+
+###############################################################################
+#
+# void debugger_local_cache_flushinv(void)
+#
+# Invalidate the entire icache
+#
+###############################################################################
+	ALIGN
+	.globl	debugger_local_cache_flushinv
+        .type	debugger_local_cache_flushinv,@function
+debugger_local_cache_flushinv:
+	#
+	# we only need to invalidate the icache in this cache mode
+	#
+	mov	CHCTR,a0
+	movhu	(a0),d0
+	btst	CHCTR_ICEN,d0
+	beq	debugger_local_cache_flushinv_end
+
+	invalidate_icache 1
+
+debugger_local_cache_flushinv_end:
+	ret	[],0
+	.size	debugger_local_cache_flushinv,.-debugger_local_cache_flushinv
diff --git a/arch/mn10300/mm/cache-flush-by-tag.S b/arch/mn10300/mm/cache-flush-by-tag.S
index 5cd6a27dd63e..1ddc06849242 100644
--- a/arch/mn10300/mm/cache-flush-by-tag.S
+++ b/arch/mn10300/mm/cache-flush-by-tag.S
@@ -62,7 +62,7 @@ mn10300_local_dcache_flush:
 
 mn10300_local_dcache_flush_loop:
 	mov	(a0),d0
-	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0
+	and	L1_CACHE_TAG_MASK,d0
 	or	L1_CACHE_TAG_VALID,d0		# retain valid entries in the
 						# cache
 	mov	d0,(a1)				# conditional purge
@@ -112,11 +112,11 @@ mn10300_local_dcache_flush_range:
 1:
 
 	# round start addr down
-	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0
+	and	L1_CACHE_TAG_MASK,d0
 	mov	d0,a1
 
 	add	L1_CACHE_BYTES,d1			# round end addr up
-	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d1
+	and	L1_CACHE_TAG_MASK,d1
 
 	# write a request to flush all instances of an address from the cache
 	mov	DCACHE_PURGE(0,0),a0
@@ -215,12 +215,11 @@ mn10300_local_dcache_flush_inv_range:
 	bra	mn10300_local_dcache_flush_inv
 1:
 
-	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0	# round start
-								# addr down
+	and	L1_CACHE_TAG_MASK,d0		# round start addr down
 	mov	d0,a1
 
-	add	L1_CACHE_BYTES,d1			# round end addr up
-	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d1
+	add	L1_CACHE_BYTES,d1		# round end addr up
+	and	L1_CACHE_TAG_MASK,d1
 
 	# write a request to flush and invalidate all instances of an address
 	# from the cache
diff --git a/arch/mn10300/mm/cache-inv-by-reg.S b/arch/mn10300/mm/cache-inv-by-reg.S
index c8950861ed77..a60825b91e77 100644
--- a/arch/mn10300/mm/cache-inv-by-reg.S
+++ b/arch/mn10300/mm/cache-inv-by-reg.S
@@ -15,6 +15,7 @@
 #include <asm/cache.h>
 #include <asm/irqflags.h>
 #include <asm/cacheflush.h>
+#include "cache.inc"
 
 #define mn10300_local_dcache_inv_range_intr_interval \
 	+((1 << MN10300_DCACHE_INV_RANGE_INTR_LOG2_INTERVAL) - 1)
@@ -62,10 +63,7 @@ mn10300_local_icache_inv:
 	btst	CHCTR_ICEN,d0
 	beq	mn10300_local_icache_inv_end
 
-	# invalidate
-	or	CHCTR_ICINV,d0
-	movhu	d0,(a0)
-	movhu	(a0),d0
+	invalidate_icache 1
 
 mn10300_local_icache_inv_end:
 	ret	[],0
@@ -87,11 +85,8 @@ mn10300_local_dcache_inv:
 	btst	CHCTR_DCEN,d0
 	beq	mn10300_local_dcache_inv_end
 
-	# invalidate
-	or	CHCTR_DCINV,d0
-	movhu	d0,(a0)
-	movhu	(a0),d0
-
+	invalidate_dcache 1
+	
 mn10300_local_dcache_inv_end:
 	ret	[],0
 	.size	mn10300_local_dcache_inv,.-mn10300_local_dcache_inv
@@ -121,9 +116,9 @@ mn10300_local_dcache_inv_range:
 	# and if they're not cacheline-aligned, we must flush any bits outside
 	# the range that share cachelines with stuff inside the range
 #ifdef CONFIG_MN10300_CACHE_WBACK
-	btst	~(L1_CACHE_BYTES-1),d0
+	btst	~L1_CACHE_TAG_MASK,d0
 	bne	1f
-	btst	~(L1_CACHE_BYTES-1),d1
+	btst	~L1_CACHE_TAG_MASK,d1
 	beq	2f
 1:
 	bra	mn10300_local_dcache_flush_inv_range
@@ -141,12 +136,11 @@ mn10300_local_dcache_inv_range:
 	# writeback mode, in which case we would be in flush and invalidate by
 	# now
 #ifndef CONFIG_MN10300_CACHE_WBACK
-	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0	# round start
-								# addr down
+	and	L1_CACHE_TAG_MASK,d0	# round start addr down
 
 	mov	L1_CACHE_BYTES-1,d2
 	add	d2,d1
-	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d1	# round end addr up
+	and	L1_CACHE_TAG_MASK,d1	# round end addr up
 #endif /* !CONFIG_MN10300_CACHE_WBACK */
 
 	sub	d0,d1,d2		# calculate the total size
diff --git a/arch/mn10300/mm/cache-inv-by-tag.S b/arch/mn10300/mm/cache-inv-by-tag.S
index e9713b40c0ff..ccedce9c144d 100644
--- a/arch/mn10300/mm/cache-inv-by-tag.S
+++ b/arch/mn10300/mm/cache-inv-by-tag.S
@@ -15,6 +15,7 @@
 #include <asm/cache.h>
 #include <asm/irqflags.h>
 #include <asm/cacheflush.h>
+#include "cache.inc"
 
 #define mn10300_local_dcache_inv_range_intr_interval \
 	+((1 << MN10300_DCACHE_INV_RANGE_INTR_LOG2_INTERVAL) - 1)
@@ -70,43 +71,7 @@ mn10300_local_icache_inv:
 	btst	CHCTR_ICEN,d0
 	beq	mn10300_local_icache_inv_end
 
-#if defined(CONFIG_AM33_2) || defined(CONFIG_AM33_3)
-	LOCAL_CLI_SAVE(d1)
-
-	# disable the icache
-	and	~CHCTR_ICEN,d0
-	movhu	d0,(a0)
-
-	# and wait for it to calm down
-	setlb
-	movhu	(a0),d0
-	btst	CHCTR_ICBUSY,d0
-	lne
-
-	# invalidate
-	or	CHCTR_ICINV,d0
-	movhu	d0,(a0)
-
-	# wait for the cache to finish
-	mov	CHCTR,a0
-	setlb
-	movhu	(a0),d0
-	btst	CHCTR_ICBUSY,d0
-	lne
-
-	# and reenable it
-	and	~CHCTR_ICINV,d0
-	or	CHCTR_ICEN,d0
-	movhu	d0,(a0)
-	movhu	(a0),d0
-
-	LOCAL_IRQ_RESTORE(d1)
-#else /* CONFIG_AM33_2 || CONFIG_AM33_3 */
-	# invalidate
-	or	CHCTR_ICINV,d0
-	movhu	d0,(a0)
-	movhu	(a0),d0
-#endif /* CONFIG_AM33_2 || CONFIG_AM33_3 */
+	invalidate_icache 1
 
 mn10300_local_icache_inv_end:
 	ret	[],0
@@ -128,43 +93,7 @@ mn10300_local_dcache_inv:
 	btst	CHCTR_DCEN,d0
 	beq	mn10300_local_dcache_inv_end
 
-#if defined(CONFIG_AM33_2) || defined(CONFIG_AM33_3)
-	LOCAL_CLI_SAVE(d1)
-
-	# disable the dcache
-	and	~CHCTR_DCEN,d0
-	movhu	d0,(a0)
-
-	# and wait for it to calm down
-	setlb
-	movhu	(a0),d0
-	btst	CHCTR_DCBUSY,d0
-	lne
-
-	# invalidate
-	or	CHCTR_DCINV,d0
-	movhu	d0,(a0)
-
-	# wait for the cache to finish
-	mov	CHCTR,a0
-	setlb
-	movhu	(a0),d0
-	btst	CHCTR_DCBUSY,d0
-	lne
-
-	# and reenable it
-	and	~CHCTR_DCINV,d0
-	or	CHCTR_DCEN,d0
-	movhu	d0,(a0)
-	movhu	(a0),d0
-
-	LOCAL_IRQ_RESTORE(d1)
-#else /* CONFIG_AM33_2 || CONFIG_AM33_3 */
-	# invalidate
-	or	CHCTR_DCINV,d0
-	movhu	d0,(a0)
-	movhu	(a0),d0
-#endif /* CONFIG_AM33_2 || CONFIG_AM33_3 */
+	invalidate_dcache 1
 
 mn10300_local_dcache_inv_end:
 	ret	[],0
@@ -195,9 +124,9 @@ mn10300_local_dcache_inv_range:
 	# and if they're not cacheline-aligned, we must flush any bits outside
 	# the range that share cachelines with stuff inside the range
 #ifdef CONFIG_MN10300_CACHE_WBACK
-	btst	~(L1_CACHE_BYTES-1),d0
+	btst	~L1_CACHE_TAG_MASK,d0
 	bne	1f
-	btst	~(L1_CACHE_BYTES-1),d1
+	btst	~L1_CACHE_TAG_MASK,d1
 	beq	2f
 1:
 	bra	mn10300_local_dcache_flush_inv_range
@@ -212,11 +141,10 @@ mn10300_local_dcache_inv_range:
 	beq	mn10300_local_dcache_inv_range_end
 
 #ifndef CONFIG_MN10300_CACHE_WBACK
-	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0	# round start
-								# addr down
+	and	L1_CACHE_TAG_MASK,d0		# round start addr down
 
 	add	L1_CACHE_BYTES,d1		# round end addr up
-	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d1
+	and	L1_CACHE_TAG_MASK,d1
 #endif /* !CONFIG_MN10300_CACHE_WBACK */
 	mov	d0,a1
 
diff --git a/arch/mn10300/mm/cache.inc b/arch/mn10300/mm/cache.inc
new file mode 100644
index 000000000000..394a119b9c73
--- /dev/null
+++ b/arch/mn10300/mm/cache.inc
@@ -0,0 +1,133 @@
+/* MN10300 CPU core caching macros -*- asm -*-
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+
+###############################################################################
+#
+# Invalidate the instruction cache.
+#	A0: Should hold CHCTR
+#	D0: Should have been read from CHCTR
+#	D1: Will be clobbered
+#
+# On some cores it is necessary to disable the icache whilst we do this.
+#
+###############################################################################
+	.macro invalidate_icache,disable_irq
+
+#if defined(CONFIG_AM33_2) || defined(CONFIG_AM33_3)
+	.if \disable_irq
+	# don't want an interrupt routine seeing a disabled cache
+	mov	epsw,d1
+	and	~EPSW_IE,epsw
+	or	EPSW_NMID,epsw
+	nop
+	nop
+	.endif
+
+	# disable the icache
+	and	~CHCTR_ICEN,d0
+	movhu	d0,(a0)
+
+	# and wait for it to calm down
+	setlb
+	movhu	(a0),d0
+	btst	CHCTR_ICBUSY,d0
+	lne
+
+	# invalidate
+	or	CHCTR_ICINV,d0
+	movhu	d0,(a0)
+
+	# wait for the cache to finish
+	setlb
+	movhu	(a0),d0
+	btst	CHCTR_ICBUSY,d0
+	lne
+
+	# and reenable it
+	or	CHCTR_ICEN,d0
+	movhu	d0,(a0)
+	movhu	(a0),d0
+
+	.if \disable_irq
+	LOCAL_IRQ_RESTORE(d1)
+	.endif
+
+#else /* CONFIG_AM33_2 || CONFIG_AM33_3 */
+
+	# invalidate
+	or	CHCTR_ICINV,d0
+	movhu	d0,(a0)
+	movhu	(a0),d0
+
+#endif /* CONFIG_AM33_2 || CONFIG_AM33_3 */
+	.endm
+
+###############################################################################
+#
+# Invalidate the data cache.
+#	A0: Should hold CHCTR
+#	D0: Should have been read from CHCTR
+#	D1: Will be clobbered
+#
+# On some cores it is necessary to disable the dcache whilst we do this.
+#
+###############################################################################
+	.macro invalidate_dcache,disable_irq
+
+#if defined(CONFIG_AM33_2) || defined(CONFIG_AM33_3)
+	.if \disable_irq
+	# don't want an interrupt routine seeing a disabled cache
+	mov	epsw,d1
+	and	~EPSW_IE,epsw
+	or	EPSW_NMID,epsw
+	nop
+	nop
+	.endif
+	
+	# disable the dcache
+	and	~CHCTR_DCEN,d0
+	movhu	d0,(a0)
+
+	# and wait for it to calm down
+	setlb
+	movhu	(a0),d0
+	btst	CHCTR_DCBUSY,d0
+	lne
+
+	# invalidate
+	or	CHCTR_DCINV,d0
+	movhu	d0,(a0)
+
+	# wait for the cache to finish
+	setlb
+	movhu	(a0),d0
+	btst	CHCTR_DCBUSY,d0
+	lne
+
+	# and reenable it
+	or	CHCTR_DCEN,d0
+	movhu	d0,(a0)
+	movhu	(a0),d0
+
+	.if \disable_irq
+	LOCAL_IRQ_RESTORE(d1)
+	.endif
+
+#else /* CONFIG_AM33_2 || CONFIG_AM33_3 */
+
+	# invalidate
+	or	CHCTR_DCINV,d0
+	movhu	d0,(a0)
+	movhu	(a0),d0
+
+#endif /* CONFIG_AM33_2 || CONFIG_AM33_3 */
+	.endm
diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
index 59c3da49d9d9..0945409a8022 100644
--- a/arch/mn10300/mm/fault.c
+++ b/arch/mn10300/mm/fault.c
@@ -28,8 +28,9 @@
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/hardirq.h>
-#include <asm/gdb-stub.h>
 #include <asm/cpu-regs.h>
+#include <asm/debugger.h>
+#include <asm/gdb-stub.h>
 
 /*
  * Unlock any spinlocks which will prevent us from getting the
@@ -306,10 +307,8 @@ no_context:
 	printk(" printing pc:\n");
 	printk(KERN_ALERT "%08lx\n", regs->pc);
 
-#ifdef CONFIG_GDBSTUB
-	gdbstub_intercept(
-		regs, fault_code & 0x00010000 ? EXCEP_IAERROR : EXCEP_DAERROR);
-#endif
+	debugger_intercept(fault_code & 0x00010000 ? EXCEP_IAERROR : EXCEP_DAERROR,
+			   SIGSEGV, SEGV_ACCERR, regs);
 
 	page = PTBR;
 	page = ((unsigned long *) __va(page))[address >> 22];
diff --git a/arch/mn10300/proc-mn103e010/include/proc/cache.h b/arch/mn10300/proc-mn103e010/include/proc/cache.h
index c1528004163c..967d144f307e 100644
--- a/arch/mn10300/proc-mn103e010/include/proc/cache.h
+++ b/arch/mn10300/proc-mn103e010/include/proc/cache.h
@@ -23,6 +23,7 @@
 #define L1_CACHE_TAG_DIRTY	0x00000008	/* data cache tag dirty bit */
 #define L1_CACHE_TAG_ENTRY	0x00000ff0	/* cache tag entry address mask */
 #define L1_CACHE_TAG_ADDRESS	0xfffff000	/* cache tag line address mask */
+#define L1_CACHE_TAG_MASK	+(L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY)
 
 /*
  * specification of the interval between interrupt checking intervals whilst
diff --git a/arch/mn10300/proc-mn2ws0050/include/proc/cache.h b/arch/mn10300/proc-mn2ws0050/include/proc/cache.h
index cafd7b5b55b4..bcb5df2d892f 100644
--- a/arch/mn10300/proc-mn2ws0050/include/proc/cache.h
+++ b/arch/mn10300/proc-mn2ws0050/include/proc/cache.h
@@ -29,6 +29,7 @@
 #define L1_CACHE_TAG_DIRTY	0x00000008	/* data cache tag dirty bit */
 #define L1_CACHE_TAG_ENTRY	0x00000fe0	/* cache tag entry address mask */
 #define L1_CACHE_TAG_ADDRESS	0xfffff000	/* cache tag line address mask */
+#define L1_CACHE_TAG_MASK	+(L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY)
 
 /*
  * specification of the interval between interrupt checking intervals whilst
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index f4f4d700833a..b7ed8d7a9b33 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -544,7 +544,7 @@ void __init mem_init(void)
 unsigned long *empty_zero_page __read_mostly;
 EXPORT_SYMBOL(empty_zero_page);
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	int i,free = 0,total = 0,reserved = 0;
 	int shared = 0, cached = 0;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index d17d04cfb2cd..33794c1d92c3 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -821,7 +821,7 @@ cmds(struct pt_regs *excp)
 				memzcan();
 				break;
 			case 'i':
-				show_mem();
+				show_mem(0);
 				break;
 			default:
 				termch = cmd;
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 6d0e02c4fe09..4c31e2b6e71b 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -75,7 +75,7 @@ void __init kmap_init(void)
 	kmap_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE);
 }
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	printk("Mem-info:\n");
 	show_free_areas();
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 1a2b36f8866d..de7d8e21e01d 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -41,7 +41,7 @@
  * The normal show_free_areas() is too verbose on Tile, with dozens
  * of processors and often four NUMA zones each with high and lowmem.
  */
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	struct zone *zone;
 
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 3dbe3709b69d..1fc02633f700 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -55,7 +55,7 @@ early_param("initrd", early_initrd);
  */
 struct meminfo meminfo;
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	int free = 0, total = 0, reserved = 0;
 	int shared = 0, cached = 0, slab = 0, i;
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 455768a3eb9e..2bef5705ce24 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -371,12 +371,14 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
 
-void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
+void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,
+				unsigned long unaccounted_time)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
 	blkg->stats.time += time;
+	blkg->stats.unaccounted_time += unaccounted_time;
 	spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
@@ -604,6 +606,9 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 					blkg->stats.sectors, cb, dev);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
+	if (type == BLKIO_STAT_UNACCOUNTED_TIME)
+		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
+					blkg->stats.unaccounted_time, cb, dev);
 	if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
 		uint64_t sum = blkg->stats.avg_queue_size_sum;
 		uint64_t samples = blkg->stats.avg_queue_size_samples;
@@ -1125,6 +1130,9 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
 			return blkio_read_blkg_stats(blkcg, cft, cb,
 						BLKIO_STAT_QUEUED, 1);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
+		case BLKIO_PROP_unaccounted_time:
+			return blkio_read_blkg_stats(blkcg, cft, cb,
+						BLKIO_STAT_UNACCOUNTED_TIME, 0);
 		case BLKIO_PROP_dequeue:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
 						BLKIO_STAT_DEQUEUE, 0);
@@ -1382,6 +1390,12 @@ struct cftype blkio_files[] = {
 				BLKIO_PROP_dequeue),
 		.read_map = blkiocg_file_read_map,
 	},
+	{
+		.name = "unaccounted_time",
+		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
+				BLKIO_PROP_unaccounted_time),
+		.read_map = blkiocg_file_read_map,
+	},
 #endif
 };
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index ea4861bdd549..10919fae2d3a 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -49,6 +49,8 @@ enum stat_type {
 	/* All the single valued stats go below this */
 	BLKIO_STAT_TIME,
 	BLKIO_STAT_SECTORS,
+	/* Time not charged to this cgroup */
+	BLKIO_STAT_UNACCOUNTED_TIME,
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 	BLKIO_STAT_AVG_QUEUE_SIZE,
 	BLKIO_STAT_IDLE_TIME,
@@ -81,6 +83,7 @@ enum blkcg_file_name_prop {
 	BLKIO_PROP_io_serviced,
 	BLKIO_PROP_time,
 	BLKIO_PROP_sectors,
+	BLKIO_PROP_unaccounted_time,
 	BLKIO_PROP_io_service_time,
 	BLKIO_PROP_io_wait_time,
 	BLKIO_PROP_io_merged,
@@ -114,6 +117,8 @@ struct blkio_group_stats {
 	/* total disk time and nr sectors dispatched by this group */
 	uint64_t time;
 	uint64_t sectors;
+	/* Time not charged to this cgroup */
+	uint64_t unaccounted_time;
 	uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 	/* Sum of number of IOs queued across all samples */
@@ -240,7 +245,7 @@ static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
 
 #endif
 
-#define BLKIO_WEIGHT_MIN	100
+#define BLKIO_WEIGHT_MIN	10
 #define BLKIO_WEIGHT_MAX	1000
 #define BLKIO_WEIGHT_DEFAULT	500
 
@@ -293,7 +298,8 @@ extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
 extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
 						void *key);
 void blkiocg_update_timeslice_used(struct blkio_group *blkg,
-					unsigned long time);
+					unsigned long time,
+					unsigned long unaccounted_time);
 void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,
 						bool direction, bool sync);
 void blkiocg_update_completion_stats(struct blkio_group *blkg,
@@ -319,7 +325,9 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
 static inline struct blkio_group *
 blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
 static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
-						unsigned long time) {}
+						unsigned long time,
+						unsigned long unaccounted_time)
+{}
 static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 				uint64_t bytes, bool direction, bool sync) {}
 static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
diff --git a/block/blk-core.c b/block/blk-core.c
index a63336d49f30..59b5c00c0126 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -27,6 +27,7 @@
 #include <linux/writeback.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/fault-inject.h>
+#include <linux/list_sort.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
@@ -149,39 +150,29 @@ EXPORT_SYMBOL(blk_rq_init);
 static void req_bio_endio(struct request *rq, struct bio *bio,
 			  unsigned int nbytes, int error)
 {
-	struct request_queue *q = rq->q;
-
-	if (&q->flush_rq != rq) {
-		if (error)
-			clear_bit(BIO_UPTODATE, &bio->bi_flags);
-		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-			error = -EIO;
+	if (error)
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+		error = -EIO;
 
-		if (unlikely(nbytes > bio->bi_size)) {
-			printk(KERN_ERR "%s: want %u bytes done, %u left\n",
-			       __func__, nbytes, bio->bi_size);
-			nbytes = bio->bi_size;
-		}
+	if (unlikely(nbytes > bio->bi_size)) {
+		printk(KERN_ERR "%s: want %u bytes done, %u left\n",
+		       __func__, nbytes, bio->bi_size);
+		nbytes = bio->bi_size;
+	}
 
-		if (unlikely(rq->cmd_flags & REQ_QUIET))
-			set_bit(BIO_QUIET, &bio->bi_flags);
+	if (unlikely(rq->cmd_flags & REQ_QUIET))
+		set_bit(BIO_QUIET, &bio->bi_flags);
 
-		bio->bi_size -= nbytes;
-		bio->bi_sector += (nbytes >> 9);
+	bio->bi_size -= nbytes;
+	bio->bi_sector += (nbytes >> 9);
 
-		if (bio_integrity(bio))
-			bio_integrity_advance(bio, nbytes);
+	if (bio_integrity(bio))
+		bio_integrity_advance(bio, nbytes);
 
-		if (bio->bi_size == 0)
-			bio_endio(bio, error);
-	} else {
-		/*
-		 * Okay, this is the sequenced flush request in
-		 * progress, just record the error;
-		 */
-		if (error && !q->flush_err)
-			q->flush_err = error;
-	}
+	/* don't actually finish bio if it's part of flush sequence */
+	if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
+		bio_endio(bio, error);
 }
 
 void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -208,135 +199,43 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 EXPORT_SYMBOL(blk_dump_rq_flags);
 
 /*
- * "plug" the device if there are no outstanding requests: this will
- * force the transfer to start only after we have put all the requests
- * on the list.
- *
- * This is called with interrupts off and no requests on the queue and
- * with the queue lock held.
- */
-void blk_plug_device(struct request_queue *q)
+ * Make sure that plugs that were pending when this function was entered,
+ * are now complete and requests pushed to the queue.
+*/
+static inline void queue_sync_plugs(struct request_queue *q)
 {
-	WARN_ON(!irqs_disabled());
-
 	/*
-	 * don't plug a stopped queue, it must be paired with blk_start_queue()
-	 * which will restart the queueing
+	 * If the current process is plugged and has barriers submitted,
+	 * we will livelock if we don't unplug first.
 	 */
-	if (blk_queue_stopped(q))
-		return;
-
-	if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
-		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
-		trace_block_plug(q);
-	}
-}
-EXPORT_SYMBOL(blk_plug_device);
-
-/**
- * blk_plug_device_unlocked - plug a device without queue lock held
- * @q:    The &struct request_queue to plug
- *
- * Description:
- *   Like @blk_plug_device(), but grabs the queue lock and disables
- *   interrupts.
- **/
-void blk_plug_device_unlocked(struct request_queue *q)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	blk_plug_device(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-EXPORT_SYMBOL(blk_plug_device_unlocked);
-
-/*
- * remove the queue from the plugged list, if present. called with
- * queue lock held and interrupts disabled.
- */
-int blk_remove_plug(struct request_queue *q)
-{
-	WARN_ON(!irqs_disabled());
-
-	if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
-		return 0;
-
-	del_timer(&q->unplug_timer);
-	return 1;
+	blk_flush_plug(current);
 }
-EXPORT_SYMBOL(blk_remove_plug);
 
-/*
- * remove the plug and let it rip..
- */
-void __generic_unplug_device(struct request_queue *q)
+static void blk_delay_work(struct work_struct *work)
 {
-	if (unlikely(blk_queue_stopped(q)))
-		return;
-	if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
-		return;
+	struct request_queue *q;
 
-	q->request_fn(q);
+	q = container_of(work, struct request_queue, delay_work.work);
+	spin_lock_irq(q->queue_lock);
+	__blk_run_queue(q, false);
+	spin_unlock_irq(q->queue_lock);
 }
 
 /**
- * generic_unplug_device - fire a request queue
- * @q:    The &struct request_queue in question
+ * blk_delay_queue - restart queueing after defined interval
+ * @q:		The &struct request_queue in question
+ * @msecs:	Delay in msecs
  *
  * Description:
- *   Linux uses plugging to build bigger requests queues before letting
- *   the device have at them. If a queue is plugged, the I/O scheduler
- *   is still adding and merging requests on the queue. Once the queue
- *   gets unplugged, the request_fn defined for the queue is invoked and
- *   transfers started.
- **/
-void generic_unplug_device(struct request_queue *q)
-{
-	if (blk_queue_plugged(q)) {
-		spin_lock_irq(q->queue_lock);
-		__generic_unplug_device(q);
-		spin_unlock_irq(q->queue_lock);
-	}
-}
-EXPORT_SYMBOL(generic_unplug_device);
-
-static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
-				   struct page *page)
-{
-	struct request_queue *q = bdi->unplug_io_data;
-
-	blk_unplug(q);
-}
-
-void blk_unplug_work(struct work_struct *work)
-{
-	struct request_queue *q =
-		container_of(work, struct request_queue, unplug_work);
-
-	trace_block_unplug_io(q);
-	q->unplug_fn(q);
-}
-
-void blk_unplug_timeout(unsigned long data)
-{
-	struct request_queue *q = (struct request_queue *)data;
-
-	trace_block_unplug_timer(q);
-	kblockd_schedule_work(q, &q->unplug_work);
-}
-
-void blk_unplug(struct request_queue *q)
+ *   Sometimes queueing needs to be postponed for a little while, to allow
+ *   resources to come back. This function will make sure that queueing is
+ *   restarted around the specified time.
+ */
+void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
-	/*
-	 * devices don't necessarily have an ->unplug_fn defined
-	 */
-	if (q->unplug_fn) {
-		trace_block_unplug_io(q);
-		q->unplug_fn(q);
-	}
+	schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs));
 }
-EXPORT_SYMBOL(blk_unplug);
+EXPORT_SYMBOL(blk_delay_queue);
 
 /**
  * blk_start_queue - restart a previously stopped queue
@@ -372,7 +271,7 @@ EXPORT_SYMBOL(blk_start_queue);
  **/
 void blk_stop_queue(struct request_queue *q)
 {
-	blk_remove_plug(q);
+	cancel_delayed_work(&q->delay_work);
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
 EXPORT_SYMBOL(blk_stop_queue);
@@ -390,13 +289,16 @@ EXPORT_SYMBOL(blk_stop_queue);
  *     that its ->make_request_fn will not re-add plugging prior to calling
  *     this function.
  *
+ *     This function does not cancel any asynchronous activity arising
+ *     out of elevator or throttling code. That would require elevaotor_exit()
+ *     and blk_throtl_exit() to be called with queue lock initialized.
+ *
  */
 void blk_sync_queue(struct request_queue *q)
 {
-	del_timer_sync(&q->unplug_timer);
 	del_timer_sync(&q->timeout);
-	cancel_work_sync(&q->unplug_work);
-	throtl_shutdown_timer_wq(q);
+	cancel_delayed_work_sync(&q->delay_work);
+	queue_sync_plugs(q);
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
@@ -412,14 +314,9 @@ EXPORT_SYMBOL(blk_sync_queue);
  */
 void __blk_run_queue(struct request_queue *q, bool force_kblockd)
 {
-	blk_remove_plug(q);
-
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
-	if (elv_queue_empty(q))
-		return;
-
 	/*
 	 * Only recurse once to avoid overrunning the stack, let the unplug
 	 * handling reinvoke the handler shortly if we already got there.
@@ -427,10 +324,8 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd)
 	if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
 		q->request_fn(q);
 		queue_flag_clear(QUEUE_FLAG_REENTER, q);
-	} else {
-		queue_flag_set(QUEUE_FLAG_PLUGGED, q);
-		kblockd_schedule_work(q, &q->unplug_work);
-	}
+	} else
+		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -457,6 +352,11 @@ void blk_put_queue(struct request_queue *q)
 	kobject_put(&q->kobj);
 }
 
+/*
+ * Note: If a driver supplied the queue lock, it should not zap that lock
+ * unexpectedly as some queue cleanup components like elevator_exit() and
+ * blk_throtl_exit() need queue lock.
+ */
 void blk_cleanup_queue(struct request_queue *q)
 {
 	/*
@@ -475,6 +375,8 @@ void blk_cleanup_queue(struct request_queue *q)
 	if (q->elevator)
 		elevator_exit(q->elevator);
 
+	blk_throtl_exit(q);
+
 	blk_put_queue(q);
 }
 EXPORT_SYMBOL(blk_cleanup_queue);
@@ -517,8 +419,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (!q)
 		return NULL;
 
-	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
-	q->backing_dev_info.unplug_io_data = q;
 	q->backing_dev_info.ra_pages =
 			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
@@ -538,17 +438,24 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 
 	setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
 		    laptop_mode_timer_fn, (unsigned long) q);
-	init_timer(&q->unplug_timer);
 	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
 	INIT_LIST_HEAD(&q->timeout_list);
-	INIT_LIST_HEAD(&q->pending_flushes);
-	INIT_WORK(&q->unplug_work, blk_unplug_work);
+	INIT_LIST_HEAD(&q->flush_queue[0]);
+	INIT_LIST_HEAD(&q->flush_queue[1]);
+	INIT_LIST_HEAD(&q->flush_data_in_flight);
+	INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
 
 	mutex_init(&q->sysfs_lock);
 	spin_lock_init(&q->__queue_lock);
 
+	/*
+	 * By default initialize queue_lock to internal lock and driver can
+	 * override it later if need be.
+	 */
+	q->queue_lock = &q->__queue_lock;
+
 	return q;
 }
 EXPORT_SYMBOL(blk_alloc_queue_node);
@@ -631,9 +538,11 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
 	q->request_fn		= rfn;
 	q->prep_rq_fn		= NULL;
 	q->unprep_rq_fn		= NULL;
-	q->unplug_fn		= generic_unplug_device;
 	q->queue_flags		= QUEUE_FLAG_DEFAULT;
-	q->queue_lock		= lock;
+
+	/* Override internal queue lock with supplied lock pointer */
+	if (lock)
+		q->queue_lock		= lock;
 
 	/*
 	 * This also sets hw/phys segments, boundary and size
@@ -666,6 +575,8 @@ int blk_get_queue(struct request_queue *q)
 
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
+	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
+
 	if (rq->cmd_flags & REQ_ELVPRIV)
 		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
@@ -762,6 +673,25 @@ static void freed_request(struct request_queue *q, int sync, int priv)
 }
 
 /*
+ * Determine if elevator data should be initialized when allocating the
+ * request associated with @bio.
+ */
+static bool blk_rq_should_init_elevator(struct bio *bio)
+{
+	if (!bio)
+		return true;
+
+	/*
+	 * Flush requests do not use the elevator so skip initialization.
+	 * This allows a request to share the flush and elevator data.
+	 */
+	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
+		return false;
+
+	return true;
+}
+
+/*
  * Get a free request, queue_lock must be held.
  * Returns NULL on failure, with queue_lock held.
  * Returns !NULL on success, with queue_lock *not held*.
@@ -773,7 +703,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 	struct request_list *rl = &q->rq;
 	struct io_context *ioc = NULL;
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
-	int may_queue, priv;
+	int may_queue, priv = 0;
 
 	may_queue = elv_may_queue(q, rw_flags);
 	if (may_queue == ELV_MQUEUE_NO)
@@ -817,9 +747,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 	rl->count[is_sync]++;
 	rl->starved[is_sync] = 0;
 
-	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
-	if (priv)
-		rl->elvpriv++;
+	if (blk_rq_should_init_elevator(bio)) {
+		priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+		if (priv)
+			rl->elvpriv++;
+	}
 
 	if (blk_queue_io_stat(q))
 		rw_flags |= REQ_IO_STAT;
@@ -866,8 +798,8 @@ out:
 }
 
 /*
- * No available requests for this queue, unplug the device and wait for some
- * requests to become available.
+ * No available requests for this queue, wait for some requests to become
+ * available.
  *
  * Called with q->queue_lock held, and returns with it unlocked.
  */
@@ -888,7 +820,6 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 
 		trace_block_sleeprq(q, bio, rw_flags & 1);
 
-		__generic_unplug_device(q);
 		spin_unlock_irq(q->queue_lock);
 		io_schedule();
 
@@ -1010,6 +941,13 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL(blk_requeue_request);
 
+static void add_acct_request(struct request_queue *q, struct request *rq,
+			     int where)
+{
+	drive_stat_acct(rq, 1);
+	__elv_add_request(q, rq, where);
+}
+
 /**
  * blk_insert_request - insert a special request into a request queue
  * @q:		request queue where request should be inserted
@@ -1052,8 +990,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 
-	drive_stat_acct(rq, 1);
-	__elv_add_request(q, rq, where, 0);
+	add_acct_request(q, rq, where);
 	__blk_run_queue(q, false);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
@@ -1174,6 +1111,113 @@ void blk_add_request_payload(struct request *rq, struct page *page,
 }
 EXPORT_SYMBOL_GPL(blk_add_request_payload);
 
+static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
+				   struct bio *bio)
+{
+	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+
+	/*
+	 * Debug stuff, kill later
+	 */
+	if (!rq_mergeable(req)) {
+		blk_dump_rq_flags(req, "back");
+		return false;
+	}
+
+	if (!ll_back_merge_fn(q, req, bio))
+		return false;
+
+	trace_block_bio_backmerge(q, bio);
+
+	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
+		blk_rq_set_mixed_merge(req);
+
+	req->biotail->bi_next = bio;
+	req->biotail = bio;
+	req->__data_len += bio->bi_size;
+	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
+
+	drive_stat_acct(req, 0);
+	return true;
+}
+
+static bool bio_attempt_front_merge(struct request_queue *q,
+				    struct request *req, struct bio *bio)
+{
+	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+	sector_t sector;
+
+	/*
+	 * Debug stuff, kill later
+	 */
+	if (!rq_mergeable(req)) {
+		blk_dump_rq_flags(req, "front");
+		return false;
+	}
+
+	if (!ll_front_merge_fn(q, req, bio))
+		return false;
+
+	trace_block_bio_frontmerge(q, bio);
+
+	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
+		blk_rq_set_mixed_merge(req);
+
+	sector = bio->bi_sector;
+
+	bio->bi_next = req->bio;
+	req->bio = bio;
+
+	/*
+	 * may not be valid. if the low level driver said
+	 * it didn't need a bounce buffer then it better
+	 * not touch req->buffer either...
+	 */
+	req->buffer = bio_data(bio);
+	req->__sector = bio->bi_sector;
+	req->__data_len += bio->bi_size;
+	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
+
+	drive_stat_acct(req, 0);
+	return true;
+}
+
+/*
+ * Attempts to merge with the plugged list in the current process. Returns
+ * true if merge was succesful, otherwise false.
+ */
+static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
+			       struct bio *bio)
+{
+	struct blk_plug *plug;
+	struct request *rq;
+	bool ret = false;
+
+	plug = tsk->plug;
+	if (!plug)
+		goto out;
+
+	list_for_each_entry_reverse(rq, &plug->list, queuelist) {
+		int el_ret;
+
+		if (rq->q != q)
+			continue;
+
+		el_ret = elv_try_merge(rq, bio);
+		if (el_ret == ELEVATOR_BACK_MERGE) {
+			ret = bio_attempt_back_merge(q, rq, bio);
+			if (ret)
+				break;
+		} else if (el_ret == ELEVATOR_FRONT_MERGE) {
+			ret = bio_attempt_front_merge(q, rq, bio);
+			if (ret)
+				break;
+		}
+	}
+out:
+	return ret;
+}
+
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
 	req->cpu = bio->bi_comp_cpu;
@@ -1189,26 +1233,12 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 	blk_rq_bio_prep(req->q, req, bio);
 }
 
-/*
- * Only disabling plugging for non-rotational devices if it does tagging
- * as well, otherwise we do need the proper merging
- */
-static inline bool queue_should_plug(struct request_queue *q)
-{
-	return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
-}
-
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
-	struct request *req;
-	int el_ret;
-	unsigned int bytes = bio->bi_size;
-	const unsigned short prio = bio_prio(bio);
 	const bool sync = !!(bio->bi_rw & REQ_SYNC);
-	const bool unplug = !!(bio->bi_rw & REQ_UNPLUG);
-	const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK;
-	int where = ELEVATOR_INSERT_SORT;
-	int rw_flags;
+	struct blk_plug *plug;
+	int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
+	struct request *req;
 
 	/*
 	 * low level driver can indicate that it wants pages above a
@@ -1217,78 +1247,36 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 	 */
 	blk_queue_bounce(q, &bio);
 
-	spin_lock_irq(q->queue_lock);
-
 	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
-		where = ELEVATOR_INSERT_FRONT;
+		spin_lock_irq(q->queue_lock);
+		where = ELEVATOR_INSERT_FLUSH;
 		goto get_rq;
 	}
 
-	if (elv_queue_empty(q))
-		goto get_rq;
-
-	el_ret = elv_merge(q, &req, bio);
-	switch (el_ret) {
-	case ELEVATOR_BACK_MERGE:
-		BUG_ON(!rq_mergeable(req));
-
-		if (!ll_back_merge_fn(q, req, bio))
-			break;
-
-		trace_block_bio_backmerge(q, bio);
-
-		if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
-			blk_rq_set_mixed_merge(req);
-
-		req->biotail->bi_next = bio;
-		req->biotail = bio;
-		req->__data_len += bytes;
-		req->ioprio = ioprio_best(req->ioprio, prio);
-		if (!blk_rq_cpu_valid(req))
-			req->cpu = bio->bi_comp_cpu;
-		drive_stat_acct(req, 0);
-		elv_bio_merged(q, req, bio);
-		if (!attempt_back_merge(q, req))
-			elv_merged_request(q, req, el_ret);
+	/*
+	 * Check if we can merge with the plugged list before grabbing
+	 * any locks.
+	 */
+	if (attempt_plug_merge(current, q, bio))
 		goto out;
 
-	case ELEVATOR_FRONT_MERGE:
-		BUG_ON(!rq_mergeable(req));
-
-		if (!ll_front_merge_fn(q, req, bio))
-			break;
-
-		trace_block_bio_frontmerge(q, bio);
+	spin_lock_irq(q->queue_lock);
 
-		if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
-			blk_rq_set_mixed_merge(req);
-			req->cmd_flags &= ~REQ_FAILFAST_MASK;
-			req->cmd_flags |= ff;
+	el_ret = elv_merge(q, &req, bio);
+	if (el_ret == ELEVATOR_BACK_MERGE) {
+		BUG_ON(req->cmd_flags & REQ_ON_PLUG);
+		if (bio_attempt_back_merge(q, req, bio)) {
+			if (!attempt_back_merge(q, req))
+				elv_merged_request(q, req, el_ret);
+			goto out_unlock;
+		}
+	} else if (el_ret == ELEVATOR_FRONT_MERGE) {
+		BUG_ON(req->cmd_flags & REQ_ON_PLUG);
+		if (bio_attempt_front_merge(q, req, bio)) {
+			if (!attempt_front_merge(q, req))
+				elv_merged_request(q, req, el_ret);
+			goto out_unlock;
 		}
-
-		bio->bi_next = req->bio;
-		req->bio = bio;
-
-		/*
-		 * may not be valid. if the low level driver said
-		 * it didn't need a bounce buffer then it better
-		 * not touch req->buffer either...
-		 */
-		req->buffer = bio_data(bio);
-		req->__sector = bio->bi_sector;
-		req->__data_len += bytes;
-		req->ioprio = ioprio_best(req->ioprio, prio);
-		if (!blk_rq_cpu_valid(req))
-			req->cpu = bio->bi_comp_cpu;
-		drive_stat_acct(req, 0);
-		elv_bio_merged(q, req, bio);
-		if (!attempt_front_merge(q, req))
-			elv_merged_request(q, req, el_ret);
-		goto out;
-
-	/* ELV_NO_MERGE: elevator says don't/can't merge. */
-	default:
-		;
 	}
 
 get_rq:
@@ -1315,20 +1303,35 @@ get_rq:
 	 */
 	init_request_from_bio(req, bio);
 
-	spin_lock_irq(q->queue_lock);
 	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
-	    bio_flagged(bio, BIO_CPU_AFFINE))
-		req->cpu = blk_cpu_to_group(smp_processor_id());
-	if (queue_should_plug(q) && elv_queue_empty(q))
-		blk_plug_device(q);
-
-	/* insert the request into the elevator */
-	drive_stat_acct(req, 1);
-	__elv_add_request(q, req, where, 0);
+	    bio_flagged(bio, BIO_CPU_AFFINE)) {
+		req->cpu = blk_cpu_to_group(get_cpu());
+		put_cpu();
+	}
+
+	plug = current->plug;
+	if (plug) {
+		if (!plug->should_sort && !list_empty(&plug->list)) {
+			struct request *__rq;
+
+			__rq = list_entry_rq(plug->list.prev);
+			if (__rq->q != q)
+				plug->should_sort = 1;
+		}
+		/*
+		 * Debug flag, kill later
+		 */
+		req->cmd_flags |= REQ_ON_PLUG;
+		list_add_tail(&req->queuelist, &plug->list);
+		drive_stat_acct(req, 1);
+	} else {
+		spin_lock_irq(q->queue_lock);
+		add_acct_request(q, req, where);
+		__blk_run_queue(q, false);
+out_unlock:
+		spin_unlock_irq(q->queue_lock);
+	}
 out:
-	if (unplug || !queue_should_plug(q))
-		__generic_unplug_device(q);
-	spin_unlock_irq(q->queue_lock);
 	return 0;
 }
 
@@ -1731,9 +1734,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 	 */
 	BUG_ON(blk_queued_rq(rq));
 
-	drive_stat_acct(rq, 1);
-	__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
-
+	add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
 	return 0;
@@ -1805,7 +1806,7 @@ static void blk_account_io_done(struct request *req)
 	 * normal IO on queueing nor completion.  Accounting the
 	 * containing request is enough.
 	 */
-	if (blk_do_io_stat(req) && req != &req->q->flush_rq) {
+	if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
@@ -2628,6 +2629,113 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 
+int kblockd_schedule_delayed_work(struct request_queue *q,
+			struct delayed_work *dwork, unsigned long delay)
+{
+	return queue_delayed_work(kblockd_workqueue, dwork, delay);
+}
+EXPORT_SYMBOL(kblockd_schedule_delayed_work);
+
+#define PLUG_MAGIC	0x91827364
+
+void blk_start_plug(struct blk_plug *plug)
+{
+	struct task_struct *tsk = current;
+
+	plug->magic = PLUG_MAGIC;
+	INIT_LIST_HEAD(&plug->list);
+	plug->should_sort = 0;
+
+	/*
+	 * If this is a nested plug, don't actually assign it. It will be
+	 * flushed on its own.
+	 */
+	if (!tsk->plug) {
+		/*
+		 * Store ordering should not be needed here, since a potential
+		 * preempt will imply a full memory barrier
+		 */
+		tsk->plug = plug;
+	}
+}
+EXPORT_SYMBOL(blk_start_plug);
+
+static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct request *rqa = container_of(a, struct request, queuelist);
+	struct request *rqb = container_of(b, struct request, queuelist);
+
+	return !(rqa->q == rqb->q);
+}
+
+static void flush_plug_list(struct blk_plug *plug)
+{
+	struct request_queue *q;
+	unsigned long flags;
+	struct request *rq;
+
+	BUG_ON(plug->magic != PLUG_MAGIC);
+
+	if (list_empty(&plug->list))
+		return;
+
+	if (plug->should_sort)
+		list_sort(NULL, &plug->list, plug_rq_cmp);
+
+	q = NULL;
+	local_irq_save(flags);
+	while (!list_empty(&plug->list)) {
+		rq = list_entry_rq(plug->list.next);
+		list_del_init(&rq->queuelist);
+		BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
+		BUG_ON(!rq->q);
+		if (rq->q != q) {
+			if (q) {
+				__blk_run_queue(q, false);
+				spin_unlock(q->queue_lock);
+			}
+			q = rq->q;
+			spin_lock(q->queue_lock);
+		}
+		rq->cmd_flags &= ~REQ_ON_PLUG;
+
+		/*
+		 * rq is already accounted, so use raw insert
+		 */
+		__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
+	}
+
+	if (q) {
+		__blk_run_queue(q, false);
+		spin_unlock(q->queue_lock);
+	}
+
+	BUG_ON(!list_empty(&plug->list));
+	local_irq_restore(flags);
+}
+
+static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)
+{
+	flush_plug_list(plug);
+
+	if (plug == tsk->plug)
+		tsk->plug = NULL;
+}
+
+void blk_finish_plug(struct blk_plug *plug)
+{
+	if (plug)
+		__blk_finish_plug(current, plug);
+}
+EXPORT_SYMBOL(blk_finish_plug);
+
+void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)
+{
+	__blk_finish_plug(tsk, plug);
+	tsk->plug = plug;
+}
+EXPORT_SYMBOL(__blk_flush_plug);
+
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/blk-exec.c b/block/blk-exec.c
index cf1456a02acd..7482b7fa863b 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -54,8 +54,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	rq->end_io = done;
 	WARN_ON(irqs_disabled());
 	spin_lock_irq(q->queue_lock);
-	__elv_add_request(q, rq, where, 1);
-	__generic_unplug_device(q);
+	__elv_add_request(q, rq, where);
+	__blk_run_queue(q, false);
 	/* the queue is stopped so it won't be plugged+unplugged */
 	if (rq->cmd_type == REQ_TYPE_PM_RESUME)
 		q->request_fn(q);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index b27d0208611b..93d5fd8e51eb 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -1,6 +1,69 @@
 /*
  * Functions to sequence FLUSH and FUA writes.
+ *
+ * Copyright (C) 2011		Max Planck Institute for Gravitational Physics
+ * Copyright (C) 2011		Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * REQ_{FLUSH|FUA} requests are decomposed to sequences consisted of three
+ * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
+ * properties and hardware capability.
+ *
+ * If a request doesn't have data, only REQ_FLUSH makes sense, which
+ * indicates a simple flush request.  If there is data, REQ_FLUSH indicates
+ * that the device cache should be flushed before the data is executed, and
+ * REQ_FUA means that the data must be on non-volatile media on request
+ * completion.
+ *
+ * If the device doesn't have writeback cache, FLUSH and FUA don't make any
+ * difference.  The requests are either completed immediately if there's no
+ * data or executed as normal requests otherwise.
+ *
+ * If the device has writeback cache and supports FUA, REQ_FLUSH is
+ * translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
+ *
+ * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is
+ * translated to PREFLUSH and REQ_FUA to POSTFLUSH.
+ *
+ * The actual execution of flush is double buffered.  Whenever a request
+ * needs to execute PRE or POSTFLUSH, it queues at
+ * q->flush_queue[q->flush_pending_idx].  Once certain criteria are met, a
+ * flush is issued and the pending_idx is toggled.  When the flush
+ * completes, all the requests which were pending are proceeded to the next
+ * step.  This allows arbitrary merging of different types of FLUSH/FUA
+ * requests.
+ *
+ * Currently, the following conditions are used to determine when to issue
+ * flush.
+ *
+ * C1. At any given time, only one flush shall be in progress.  This makes
+ *     double buffering sufficient.
+ *
+ * C2. Flush is deferred if any request is executing DATA of its sequence.
+ *     This avoids issuing separate POSTFLUSHes for requests which shared
+ *     PREFLUSH.
+ *
+ * C3. The second condition is ignored if there is a request which has
+ *     waited longer than FLUSH_PENDING_TIMEOUT.  This is to avoid
+ *     starvation in the unlikely case where there are continuous stream of
+ *     FUA (without FLUSH) requests.
+ *
+ * For devices which support FUA, it isn't clear whether C2 (and thus C3)
+ * is beneficial.
+ *
+ * Note that a sequenced FLUSH/FUA request with DATA is completed twice.
+ * Once while executing DATA and again after the whole sequence is
+ * complete.  The first completion updates the contained bio but doesn't
+ * finish it so that the bio submitter is notified only after the whole
+ * sequence is complete.  This is implemented by testing REQ_FLUSH_SEQ in
+ * req_bio_endio().
+ *
+ * The above peculiarity requires that each FLUSH/FUA request has only one
+ * bio attached to it, which is guaranteed as they aren't allowed to be
+ * merged in the usual way.
  */
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/bio.h>
@@ -11,58 +74,142 @@
 
 /* FLUSH/FUA sequences */
 enum {
-	QUEUE_FSEQ_STARTED	= (1 << 0), /* flushing in progress */
-	QUEUE_FSEQ_PREFLUSH	= (1 << 1), /* pre-flushing in progress */
-	QUEUE_FSEQ_DATA		= (1 << 2), /* data write in progress */
-	QUEUE_FSEQ_POSTFLUSH	= (1 << 3), /* post-flushing in progress */
-	QUEUE_FSEQ_DONE		= (1 << 4),
+	REQ_FSEQ_PREFLUSH	= (1 << 0), /* pre-flushing in progress */
+	REQ_FSEQ_DATA		= (1 << 1), /* data write in progress */
+	REQ_FSEQ_POSTFLUSH	= (1 << 2), /* post-flushing in progress */
+	REQ_FSEQ_DONE		= (1 << 3),
+
+	REQ_FSEQ_ACTIONS	= REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA |
+				  REQ_FSEQ_POSTFLUSH,
+
+	/*
+	 * If flush has been pending longer than the following timeout,
+	 * it's issued even if flush_data requests are still in flight.
+	 */
+	FLUSH_PENDING_TIMEOUT	= 5 * HZ,
 };
 
-static struct request *queue_next_fseq(struct request_queue *q);
+static bool blk_kick_flush(struct request_queue *q);
 
-unsigned blk_flush_cur_seq(struct request_queue *q)
+static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
 {
-	if (!q->flush_seq)
-		return 0;
-	return 1 << ffz(q->flush_seq);
+	unsigned int policy = 0;
+
+	if (fflags & REQ_FLUSH) {
+		if (rq->cmd_flags & REQ_FLUSH)
+			policy |= REQ_FSEQ_PREFLUSH;
+		if (blk_rq_sectors(rq))
+			policy |= REQ_FSEQ_DATA;
+		if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
+			policy |= REQ_FSEQ_POSTFLUSH;
+	}
+	return policy;
 }
 
-static struct request *blk_flush_complete_seq(struct request_queue *q,
-					      unsigned seq, int error)
+static unsigned int blk_flush_cur_seq(struct request *rq)
 {
-	struct request *next_rq = NULL;
-
-	if (error && !q->flush_err)
-		q->flush_err = error;
-
-	BUG_ON(q->flush_seq & seq);
-	q->flush_seq |= seq;
-
-	if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) {
-		/* not complete yet, queue the next flush sequence */
-		next_rq = queue_next_fseq(q);
-	} else {
-		/* complete this flush request */
-		__blk_end_request_all(q->orig_flush_rq, q->flush_err);
-		q->orig_flush_rq = NULL;
-		q->flush_seq = 0;
-
-		/* dispatch the next flush if there's one */
-		if (!list_empty(&q->pending_flushes)) {
-			next_rq = list_entry_rq(q->pending_flushes.next);
-			list_move(&next_rq->queuelist, &q->queue_head);
-		}
+	return 1 << ffz(rq->flush.seq);
+}
+
+static void blk_flush_restore_request(struct request *rq)
+{
+	/*
+	 * After flush data completion, @rq->bio is %NULL but we need to
+	 * complete the bio again.  @rq->biotail is guaranteed to equal the
+	 * original @rq->bio.  Restore it.
+	 */
+	rq->bio = rq->biotail;
+
+	/* make @rq a normal request */
+	rq->cmd_flags &= ~REQ_FLUSH_SEQ;
+	rq->end_io = NULL;
+}
+
+/**
+ * blk_flush_complete_seq - complete flush sequence
+ * @rq: FLUSH/FUA request being sequenced
+ * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
+ * @error: whether an error occurred
+ *
+ * @rq just completed @seq part of its flush sequence, record the
+ * completion and trigger the next step.
+ *
+ * CONTEXT:
+ * spin_lock_irq(q->queue_lock)
+ *
+ * RETURNS:
+ * %true if requests were added to the dispatch queue, %false otherwise.
+ */
+static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
+				   int error)
+{
+	struct request_queue *q = rq->q;
+	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
+	bool queued = false;
+
+	BUG_ON(rq->flush.seq & seq);
+	rq->flush.seq |= seq;
+
+	if (likely(!error))
+		seq = blk_flush_cur_seq(rq);
+	else
+		seq = REQ_FSEQ_DONE;
+
+	switch (seq) {
+	case REQ_FSEQ_PREFLUSH:
+	case REQ_FSEQ_POSTFLUSH:
+		/* queue for flush */
+		if (list_empty(pending))
+			q->flush_pending_since = jiffies;
+		list_move_tail(&rq->flush.list, pending);
+		break;
+
+	case REQ_FSEQ_DATA:
+		list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
+		list_add(&rq->queuelist, &q->queue_head);
+		queued = true;
+		break;
+
+	case REQ_FSEQ_DONE:
+		/*
+		 * @rq was previously adjusted by blk_flush_issue() for
+		 * flush sequencing and may already have gone through the
+		 * flush data request completion path.  Restore @rq for
+		 * normal completion and end it.
+		 */
+		BUG_ON(!list_empty(&rq->queuelist));
+		list_del_init(&rq->flush.list);
+		blk_flush_restore_request(rq);
+		__blk_end_request_all(rq, error);
+		break;
+
+	default:
+		BUG();
 	}
-	return next_rq;
+
+	return blk_kick_flush(q) | queued;
 }
 
-static void blk_flush_complete_seq_end_io(struct request_queue *q,
-					  unsigned seq, int error)
+static void flush_end_io(struct request *flush_rq, int error)
 {
-	bool was_empty = elv_queue_empty(q);
-	struct request *next_rq;
+	struct request_queue *q = flush_rq->q;
+	struct list_head *running = &q->flush_queue[q->flush_running_idx];
+	bool queued = false;
+	struct request *rq, *n;
 
-	next_rq = blk_flush_complete_seq(q, seq, error);
+	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
+
+	/* account completion of the flush request */
+	q->flush_running_idx ^= 1;
+	elv_completed_request(q, flush_rq);
+
+	/* and push the waiting requests to the next stage */
+	list_for_each_entry_safe(rq, n, running, flush.list) {
+		unsigned int seq = blk_flush_cur_seq(rq);
+
+		BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
+		queued |= blk_flush_complete_seq(rq, seq, error);
+	}
 
 	/*
 	 * Moving a request silently to empty queue_head may stall the
@@ -70,127 +217,153 @@ static void blk_flush_complete_seq_end_io(struct request_queue *q,
 	 * from request completion path and calling directly into
 	 * request_fn may confuse the driver.  Always use kblockd.
 	 */
-	if (was_empty && next_rq)
+	if (queued)
 		__blk_run_queue(q, true);
 }
 
-static void pre_flush_end_io(struct request *rq, int error)
+/**
+ * blk_kick_flush - consider issuing flush request
+ * @q: request_queue being kicked
+ *
+ * Flush related states of @q have changed, consider issuing flush request.
+ * Please read the comment at the top of this file for more info.
+ *
+ * CONTEXT:
+ * spin_lock_irq(q->queue_lock)
+ *
+ * RETURNS:
+ * %true if flush was issued, %false otherwise.
+ */
+static bool blk_kick_flush(struct request_queue *q)
 {
-	elv_completed_request(rq->q, rq);
-	blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error);
+	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
+	struct request *first_rq =
+		list_first_entry(pending, struct request, flush.list);
+
+	/* C1 described at the top of this file */
+	if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending))
+		return false;
+
+	/* C2 and C3 */
+	if (!list_empty(&q->flush_data_in_flight) &&
+	    time_before(jiffies,
+			q->flush_pending_since + FLUSH_PENDING_TIMEOUT))
+		return false;
+
+	/*
+	 * Issue flush and toggle pending_idx.  This makes pending_idx
+	 * different from running_idx, which means flush is in flight.
+	 */
+	blk_rq_init(q, &q->flush_rq);
+	q->flush_rq.cmd_type = REQ_TYPE_FS;
+	q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
+	q->flush_rq.rq_disk = first_rq->rq_disk;
+	q->flush_rq.end_io = flush_end_io;
+
+	q->flush_pending_idx ^= 1;
+	elv_insert(q, &q->flush_rq, ELEVATOR_INSERT_REQUEUE);
+	return true;
 }
 
 static void flush_data_end_io(struct request *rq, int error)
 {
-	elv_completed_request(rq->q, rq);
-	blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error);
-}
+	struct request_queue *q = rq->q;
 
-static void post_flush_end_io(struct request *rq, int error)
-{
-	elv_completed_request(rq->q, rq);
-	blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
+	/*
+	 * After populating an empty queue, kick it to avoid stall.  Read
+	 * the comment in flush_end_io().
+	 */
+	if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
+		__blk_run_queue(q, true);
 }
 
-static void init_flush_request(struct request *rq, struct gendisk *disk)
+/**
+ * blk_insert_flush - insert a new FLUSH/FUA request
+ * @rq: request to insert
+ *
+ * To be called from elv_insert() for %ELEVATOR_INSERT_FLUSH insertions.
+ * @rq is being submitted.  Analyze what needs to be done and put it on the
+ * right queue.
+ *
+ * CONTEXT:
+ * spin_lock_irq(q->queue_lock)
+ */
+void blk_insert_flush(struct request *rq)
 {
-	rq->cmd_type = REQ_TYPE_FS;
-	rq->cmd_flags = WRITE_FLUSH;
-	rq->rq_disk = disk;
-}
+	struct request_queue *q = rq->q;
+	unsigned int fflags = q->flush_flags;	/* may change, cache */
+	unsigned int policy = blk_flush_policy(fflags, rq);
 
-static struct request *queue_next_fseq(struct request_queue *q)
-{
-	struct request *orig_rq = q->orig_flush_rq;
-	struct request *rq = &q->flush_rq;
+	BUG_ON(rq->end_io);
+	BUG_ON(!rq->bio || rq->bio != rq->biotail);
 
-	blk_rq_init(q, rq);
+	/*
+	 * @policy now records what operations need to be done.  Adjust
+	 * REQ_FLUSH and FUA for the driver.
+	 */
+	rq->cmd_flags &= ~REQ_FLUSH;
+	if (!(fflags & REQ_FUA))
+		rq->cmd_flags &= ~REQ_FUA;
 
-	switch (blk_flush_cur_seq(q)) {
-	case QUEUE_FSEQ_PREFLUSH:
-		init_flush_request(rq, orig_rq->rq_disk);
-		rq->end_io = pre_flush_end_io;
-		break;
-	case QUEUE_FSEQ_DATA:
-		init_request_from_bio(rq, orig_rq->bio);
-		/*
-		 * orig_rq->rq_disk may be different from
-		 * bio->bi_bdev->bd_disk if orig_rq got here through
-		 * remapping drivers.  Make sure rq->rq_disk points
-		 * to the same one as orig_rq.
-		 */
-		rq->rq_disk = orig_rq->rq_disk;
-		rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
-		rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
-		rq->end_io = flush_data_end_io;
-		break;
-	case QUEUE_FSEQ_POSTFLUSH:
-		init_flush_request(rq, orig_rq->rq_disk);
-		rq->end_io = post_flush_end_io;
-		break;
-	default:
-		BUG();
+	/*
+	 * If there's data but flush is not necessary, the request can be
+	 * processed directly without going through flush machinery.  Queue
+	 * for normal execution.
+	 */
+	if ((policy & REQ_FSEQ_DATA) &&
+	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
+		list_add(&rq->queuelist, &q->queue_head);
+		return;
 	}
 
-	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
-	return rq;
+	/*
+	 * @rq should go through flush machinery.  Mark it part of flush
+	 * sequence and submit for further processing.
+	 */
+	memset(&rq->flush, 0, sizeof(rq->flush));
+	INIT_LIST_HEAD(&rq->flush.list);
+	rq->cmd_flags |= REQ_FLUSH_SEQ;
+	rq->end_io = flush_data_end_io;
+
+	blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
 }
 
-struct request *blk_do_flush(struct request_queue *q, struct request *rq)
+/**
+ * blk_abort_flushes - @q is being aborted, abort flush requests
+ * @q: request_queue being aborted
+ *
+ * To be called from elv_abort_queue().  @q is being aborted.  Prepare all
+ * FLUSH/FUA requests for abortion.
+ *
+ * CONTEXT:
+ * spin_lock_irq(q->queue_lock)
+ */
+void blk_abort_flushes(struct request_queue *q)
 {
-	unsigned int fflags = q->flush_flags; /* may change, cache it */
-	bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA;
-	bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
-	bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
-	unsigned skip = 0;
+	struct request *rq, *n;
+	int i;
 
 	/*
-	 * Special case.  If there's data but flush is not necessary,
-	 * the request can be issued directly.
-	 *
-	 * Flush w/o data should be able to be issued directly too but
-	 * currently some drivers assume that rq->bio contains
-	 * non-zero data if it isn't NULL and empty FLUSH requests
-	 * getting here usually have bio's without data.
+	 * Requests in flight for data are already owned by the dispatch
+	 * queue or the device driver.  Just restore for normal completion.
 	 */
-	if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) {
-		rq->cmd_flags &= ~REQ_FLUSH;
-		if (!has_fua)
-			rq->cmd_flags &= ~REQ_FUA;
-		return rq;
+	list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) {
+		list_del_init(&rq->flush.list);
+		blk_flush_restore_request(rq);
 	}
 
 	/*
-	 * Sequenced flushes can't be processed in parallel.  If
-	 * another one is already in progress, queue for later
-	 * processing.
+	 * We need to give away requests on flush queues.  Restore for
+	 * normal completion and put them on the dispatch queue.
 	 */
-	if (q->flush_seq) {
-		list_move_tail(&rq->queuelist, &q->pending_flushes);
-		return NULL;
+	for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) {
+		list_for_each_entry_safe(rq, n, &q->flush_queue[i],
+					 flush.list) {
+			list_del_init(&rq->flush.list);
+			blk_flush_restore_request(rq);
+			list_add_tail(&rq->queuelist, &q->queue_head);
+		}
 	}
-
-	/*
-	 * Start a new flush sequence
-	 */
-	q->flush_err = 0;
-	q->flush_seq |= QUEUE_FSEQ_STARTED;
-
-	/* adjust FLUSH/FUA of the original request and stash it away */
-	rq->cmd_flags &= ~REQ_FLUSH;
-	if (!has_fua)
-		rq->cmd_flags &= ~REQ_FUA;
-	blk_dequeue_request(rq);
-	q->orig_flush_rq = rq;
-
-	/* skip unneded sequences and return the first one */
-	if (!do_preflush)
-		skip |= QUEUE_FSEQ_PREFLUSH;
-	if (!blk_rq_sectors(rq))
-		skip |= QUEUE_FSEQ_DATA;
-	if (!do_postflush)
-		skip |= QUEUE_FSEQ_POSTFLUSH;
-	return blk_flush_complete_seq(q, skip, 0);
 }
 
 static void bio_end_flush(struct bio *bio, int err)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index bd3e8df4d5e2..25de73e4759b 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -136,8 +136,6 @@ static void bio_batch_end_io(struct bio *bio, int err)
  *
  * Description:
  *  Generate and issue number of bios with zerofiled pages.
- *  Send barrier at the beginning and at the end if requested. This guarantie
- *  correct request ordering. Empty barrier allow us to avoid post queue flush.
  */
 
 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
diff --git a/block/blk-merge.c b/block/blk-merge.c
index ea85e20d5e94..cfcc37cb222b 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -465,3 +465,9 @@ int attempt_front_merge(struct request_queue *q, struct request *rq)
 
 	return 0;
 }
+
+int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
+			  struct request *next)
+{
+	return attempt_merge(q, rq, next);
+}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 36c8c1f2af18..1fa769293597 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -164,25 +164,10 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	blk_queue_congestion_threshold(q);
 	q->nr_batching = BLK_BATCH_REQ;
 
-	q->unplug_thresh = 4;		/* hmm */
-	q->unplug_delay = msecs_to_jiffies(3);	/* 3 milliseconds */
-	if (q->unplug_delay == 0)
-		q->unplug_delay = 1;
-
-	q->unplug_timer.function = blk_unplug_timeout;
-	q->unplug_timer.data = (unsigned long)q;
-
 	blk_set_default_limits(&q->limits);
 	blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
 
 	/*
-	 * If the caller didn't supply a lock, fall back to our embedded
-	 * per-queue locks
-	 */
-	if (!q->queue_lock)
-		q->queue_lock = &q->__queue_lock;
-
-	/*
 	 * by default assume old behaviour and bounce for any highmem page
 	 */
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 41fb69150b4d..261c75c665ae 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -471,8 +471,6 @@ static void blk_release_queue(struct kobject *kobj)
 
 	blk_sync_queue(q);
 
-	blk_throtl_exit(q);
-
 	if (rl->rq_pool)
 		mempool_destroy(rl->rq_pool);
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index e36cc10a346c..5352bdafbcf0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -102,7 +102,7 @@ struct throtl_data
 	/* Work for dispatching throttled bios */
 	struct delayed_work throtl_work;
 
-	atomic_t limits_changed;
+	bool limits_changed;
 };
 
 enum tg_state_flags {
@@ -201,6 +201,7 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
 	RB_CLEAR_NODE(&tg->rb_node);
 	bio_list_init(&tg->bio_lists[0]);
 	bio_list_init(&tg->bio_lists[1]);
+	td->limits_changed = false;
 
 	/*
 	 * Take the initial reference that will be released on destroy
@@ -737,34 +738,36 @@ static void throtl_process_limit_change(struct throtl_data *td)
 	struct throtl_grp *tg;
 	struct hlist_node *pos, *n;
 
-	if (!atomic_read(&td->limits_changed))
+	if (!td->limits_changed)
 		return;
 
-	throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed));
+	xchg(&td->limits_changed, false);
 
-	/*
-	 * Make sure updates from throtl_update_blkio_group_read_bps() group
-	 * of functions to tg->limits_changed are visible. We do not
-	 * want update td->limits_changed to be visible but update to
-	 * tg->limits_changed not being visible yet on this cpu. Hence
-	 * the read barrier.
-	 */
-	smp_rmb();
+	throtl_log(td, "limits changed");
 
 	hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
-		if (throtl_tg_on_rr(tg) && tg->limits_changed) {
-			throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
-				" riops=%u wiops=%u", tg->bps[READ],
-				tg->bps[WRITE], tg->iops[READ],
-				tg->iops[WRITE]);
+		if (!tg->limits_changed)
+			continue;
+
+		if (!xchg(&tg->limits_changed, false))
+			continue;
+
+		throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
+			" riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE],
+			tg->iops[READ], tg->iops[WRITE]);
+
+		/*
+		 * Restart the slices for both READ and WRITES. It
+		 * might happen that a group's limit are dropped
+		 * suddenly and we don't want to account recently
+		 * dispatched IO with new low rate
+		 */
+		throtl_start_new_slice(td, tg, 0);
+		throtl_start_new_slice(td, tg, 1);
+
+		if (throtl_tg_on_rr(tg))
 			tg_update_disptime(td, tg);
-			tg->limits_changed = false;
-		}
 	}
-
-	smp_mb__before_atomic_dec();
-	atomic_dec(&td->limits_changed);
-	smp_mb__after_atomic_dec();
 }
 
 /* Dispatch throttled bios. Should be called without queue lock held. */
@@ -774,6 +777,7 @@ static int throtl_dispatch(struct request_queue *q)
 	unsigned int nr_disp = 0;
 	struct bio_list bio_list_on_stack;
 	struct bio *bio;
+	struct blk_plug plug;
 
 	spin_lock_irq(q->queue_lock);
 
@@ -802,9 +806,10 @@ out:
 	 * immediate dispatch
 	 */
 	if (nr_disp) {
+		blk_start_plug(&plug);
 		while((bio = bio_list_pop(&bio_list_on_stack)))
 			generic_make_request(bio);
-		blk_unplug(q);
+		blk_finish_plug(&plug);
 	}
 	return nr_disp;
 }
@@ -825,7 +830,8 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
 
 	struct delayed_work *dwork = &td->throtl_work;
 
-	if (total_nr_queued(td) > 0) {
+	/* schedule work if limits changed even if no bio is queued */
+	if (total_nr_queued(td) > 0 || td->limits_changed) {
 		/*
 		 * We might have a work scheduled to be executed in future.
 		 * Cancel that and schedule a new one.
@@ -898,6 +904,15 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg)
 	spin_unlock_irqrestore(td->queue->queue_lock, flags);
 }
 
+static void throtl_update_blkio_group_common(struct throtl_data *td,
+				struct throtl_grp *tg)
+{
+	xchg(&tg->limits_changed, true);
+	xchg(&td->limits_changed, true);
+	/* Schedule a work now to process the limit change */
+	throtl_schedule_delayed_work(td, 0);
+}
+
 /*
  * For all update functions, key should be a valid pointer because these
  * update functions are called under blkcg_lock, that means, blkg is
@@ -911,64 +926,43 @@ static void throtl_update_blkio_group_read_bps(void *key,
 				struct blkio_group *blkg, u64 read_bps)
 {
 	struct throtl_data *td = key;
+	struct throtl_grp *tg = tg_of_blkg(blkg);
 
-	tg_of_blkg(blkg)->bps[READ] = read_bps;
-	/* Make sure read_bps is updated before setting limits_changed */
-	smp_wmb();
-	tg_of_blkg(blkg)->limits_changed = true;
-
-	/* Make sure tg->limits_changed is updated before td->limits_changed */
-	smp_mb__before_atomic_inc();
-	atomic_inc(&td->limits_changed);
-	smp_mb__after_atomic_inc();
-
-	/* Schedule a work now to process the limit change */
-	throtl_schedule_delayed_work(td, 0);
+	tg->bps[READ] = read_bps;
+	throtl_update_blkio_group_common(td, tg);
 }
 
 static void throtl_update_blkio_group_write_bps(void *key,
 				struct blkio_group *blkg, u64 write_bps)
 {
 	struct throtl_data *td = key;
+	struct throtl_grp *tg = tg_of_blkg(blkg);
 
-	tg_of_blkg(blkg)->bps[WRITE] = write_bps;
-	smp_wmb();
-	tg_of_blkg(blkg)->limits_changed = true;
-	smp_mb__before_atomic_inc();
-	atomic_inc(&td->limits_changed);
-	smp_mb__after_atomic_inc();
-	throtl_schedule_delayed_work(td, 0);
+	tg->bps[WRITE] = write_bps;
+	throtl_update_blkio_group_common(td, tg);
 }
 
 static void throtl_update_blkio_group_read_iops(void *key,
 			struct blkio_group *blkg, unsigned int read_iops)
 {
 	struct throtl_data *td = key;
+	struct throtl_grp *tg = tg_of_blkg(blkg);
 
-	tg_of_blkg(blkg)->iops[READ] = read_iops;
-	smp_wmb();
-	tg_of_blkg(blkg)->limits_changed = true;
-	smp_mb__before_atomic_inc();
-	atomic_inc(&td->limits_changed);
-	smp_mb__after_atomic_inc();
-	throtl_schedule_delayed_work(td, 0);
+	tg->iops[READ] = read_iops;
+	throtl_update_blkio_group_common(td, tg);
 }
 
 static void throtl_update_blkio_group_write_iops(void *key,
 			struct blkio_group *blkg, unsigned int write_iops)
 {
 	struct throtl_data *td = key;
+	struct throtl_grp *tg = tg_of_blkg(blkg);
 
-	tg_of_blkg(blkg)->iops[WRITE] = write_iops;
-	smp_wmb();
-	tg_of_blkg(blkg)->limits_changed = true;
-	smp_mb__before_atomic_inc();
-	atomic_inc(&td->limits_changed);
-	smp_mb__after_atomic_inc();
-	throtl_schedule_delayed_work(td, 0);
+	tg->iops[WRITE] = write_iops;
+	throtl_update_blkio_group_common(td, tg);
 }
 
-void throtl_shutdown_timer_wq(struct request_queue *q)
+static void throtl_shutdown_wq(struct request_queue *q)
 {
 	struct throtl_data *td = q->td;
 
@@ -1009,20 +1003,28 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
 		/*
 		 * There is already another bio queued in same dir. No
 		 * need to update dispatch time.
-		 * Still update the disptime if rate limits on this group
-		 * were changed.
 		 */
-		if (!tg->limits_changed)
-			update_disptime = false;
-		else
-			tg->limits_changed = false;
-
+		update_disptime = false;
 		goto queue_bio;
+
 	}
 
 	/* Bio is with-in rate limit of group */
 	if (tg_may_dispatch(td, tg, bio, NULL)) {
 		throtl_charge_bio(tg, bio);
+
+		/*
+		 * We need to trim slice even when bios are not being queued
+		 * otherwise it might happen that a bio is not queued for
+		 * a long time and slice keeps on extending and trim is not
+		 * called for a long time. Now if limits are reduced suddenly
+		 * we take into account all the IO dispatched so far at new
+		 * low rate and * newly queued IO gets a really long dispatch
+		 * time.
+		 *
+		 * So keep on trimming slice even if bio is not queued.
+		 */
+		throtl_trim_slice(td, tg, rw);
 		goto out;
 	}
 
@@ -1058,7 +1060,7 @@ int blk_throtl_init(struct request_queue *q)
 
 	INIT_HLIST_HEAD(&td->tg_list);
 	td->tg_service_tree = THROTL_RB_ROOT;
-	atomic_set(&td->limits_changed, 0);
+	td->limits_changed = false;
 
 	/* Init root group */
 	tg = &td->root_tg;
@@ -1070,6 +1072,7 @@ int blk_throtl_init(struct request_queue *q)
 	/* Practically unlimited BW */
 	tg->bps[0] = tg->bps[1] = -1;
 	tg->iops[0] = tg->iops[1] = -1;
+	td->limits_changed = false;
 
 	/*
 	 * Set root group reference to 2. One reference will be dropped when
@@ -1102,7 +1105,7 @@ void blk_throtl_exit(struct request_queue *q)
 
 	BUG_ON(!td);
 
-	throtl_shutdown_timer_wq(q);
+	throtl_shutdown_wq(q);
 
 	spin_lock_irq(q->queue_lock);
 	throtl_release_tgs(td);
@@ -1132,7 +1135,7 @@ void blk_throtl_exit(struct request_queue *q)
 	 * update limits through cgroup and another work got queued, cancel
 	 * it.
 	 */
-	throtl_shutdown_timer_wq(q);
+	throtl_shutdown_wq(q);
 	throtl_td_free(td);
 }
 
diff --git a/block/blk.h b/block/blk.h
index 2db8f32838e7..c8db371a921d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -18,8 +18,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
 void blk_dequeue_request(struct request *rq);
 void __blk_queue_free_tags(struct request_queue *q);
 
-void blk_unplug_work(struct work_struct *work);
-void blk_unplug_timeout(unsigned long data);
 void blk_rq_timed_out_timer(unsigned long data);
 void blk_delete_timer(struct request *);
 void blk_add_timer(struct request *);
@@ -51,21 +49,17 @@ static inline void blk_clear_rq_complete(struct request *rq)
  */
 #define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
 
-struct request *blk_do_flush(struct request_queue *q, struct request *rq);
+void blk_insert_flush(struct request *rq);
+void blk_abort_flushes(struct request_queue *q);
 
 static inline struct request *__elv_next_request(struct request_queue *q)
 {
 	struct request *rq;
 
 	while (1) {
-		while (!list_empty(&q->queue_head)) {
+		if (!list_empty(&q->queue_head)) {
 			rq = list_entry_rq(q->queue_head.next);
-			if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) ||
-			    rq == &q->flush_rq)
-				return rq;
-			rq = blk_do_flush(q, rq);
-			if (rq)
-				return rq;
+			return rq;
 		}
 
 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
@@ -109,6 +103,8 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 		      struct bio *bio);
 int attempt_back_merge(struct request_queue *q, struct request *rq);
 int attempt_front_merge(struct request_queue *q, struct request *rq);
+int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
+				struct request *next);
 void blk_recalc_rq_segments(struct request *rq);
 void blk_rq_set_mixed_merge(struct request *rq);
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ea83a4f0c27d..7785169f3c8f 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -54,9 +54,9 @@ static const int cfq_hist_divisor = 4;
 #define CFQQ_SEEKY(cfqq)	(hweight32(cfqq->seek_history) > 32/8)
 
 #define RQ_CIC(rq)		\
-	((struct cfq_io_context *) (rq)->elevator_private)
-#define RQ_CFQQ(rq)		(struct cfq_queue *) ((rq)->elevator_private2)
-#define RQ_CFQG(rq)		(struct cfq_group *) ((rq)->elevator_private3)
+	((struct cfq_io_context *) (rq)->elevator_private[0])
+#define RQ_CFQQ(rq)		(struct cfq_queue *) ((rq)->elevator_private[1])
+#define RQ_CFQG(rq)		(struct cfq_group *) ((rq)->elevator_private[2])
 
 static struct kmem_cache *cfq_pool;
 static struct kmem_cache *cfq_ioc_pool;
@@ -146,7 +146,6 @@ struct cfq_queue {
 	struct cfq_rb_root *service_tree;
 	struct cfq_queue *new_cfqq;
 	struct cfq_group *cfqg;
-	struct cfq_group *orig_cfqg;
 	/* Number of sectors dispatched from queue in single dispatch round */
 	unsigned long nr_sectors;
 };
@@ -179,6 +178,8 @@ struct cfq_group {
 	/* group service_tree key */
 	u64 vdisktime;
 	unsigned int weight;
+	unsigned int new_weight;
+	bool needs_update;
 
 	/* number of cfqq currently on this group */
 	int nr_cfqq;
@@ -238,6 +239,7 @@ struct cfq_data {
 	struct rb_root prio_trees[CFQ_PRIO_LISTS];
 
 	unsigned int busy_queues;
+	unsigned int busy_sync_queues;
 
 	int rq_in_driver;
 	int rq_in_flight[2];
@@ -285,7 +287,6 @@ struct cfq_data {
 	unsigned int cfq_slice_idle;
 	unsigned int cfq_group_idle;
 	unsigned int cfq_latency;
-	unsigned int cfq_group_isolation;
 
 	unsigned int cic_index;
 	struct list_head cic_list;
@@ -501,13 +502,6 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 	}
 }
 
-static int cfq_queue_empty(struct request_queue *q)
-{
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-
-	return !cfqd->rq_queued;
-}
-
 /*
  * Scale schedule slice based on io priority. Use the sync time slice only
  * if a queue is marked sync and has sync io queued. A sync queue with async
@@ -558,15 +552,13 @@ static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
 
 static void update_min_vdisktime(struct cfq_rb_root *st)
 {
-	u64 vdisktime = st->min_vdisktime;
 	struct cfq_group *cfqg;
 
 	if (st->left) {
 		cfqg = rb_entry_cfqg(st->left);
-		vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
+		st->min_vdisktime = max_vdisktime(st->min_vdisktime,
+						  cfqg->vdisktime);
 	}
-
-	st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime);
 }
 
 /*
@@ -863,7 +855,27 @@ __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
 }
 
 static void
-cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
+cfq_update_group_weight(struct cfq_group *cfqg)
+{
+	BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
+	if (cfqg->needs_update) {
+		cfqg->weight = cfqg->new_weight;
+		cfqg->needs_update = false;
+	}
+}
+
+static void
+cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
+{
+	BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
+
+	cfq_update_group_weight(cfqg);
+	__cfq_group_service_tree_add(st, cfqg);
+	st->total_weight += cfqg->weight;
+}
+
+static void
+cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
 	struct cfq_rb_root *st = &cfqd->grp_service_tree;
 	struct cfq_group *__cfqg;
@@ -884,13 +896,19 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
 		cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
 	} else
 		cfqg->vdisktime = st->min_vdisktime;
+	cfq_group_service_tree_add(st, cfqg);
+}
 
-	__cfq_group_service_tree_add(st, cfqg);
-	st->total_weight += cfqg->weight;
+static void
+cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg)
+{
+	st->total_weight -= cfqg->weight;
+	if (!RB_EMPTY_NODE(&cfqg->rb_node))
+		cfq_rb_erase(&cfqg->rb_node, st);
 }
 
 static void
-cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
+cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
 	struct cfq_rb_root *st = &cfqd->grp_service_tree;
 
@@ -902,14 +920,13 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
 		return;
 
 	cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
-	st->total_weight -= cfqg->weight;
-	if (!RB_EMPTY_NODE(&cfqg->rb_node))
-		cfq_rb_erase(&cfqg->rb_node, st);
+	cfq_group_service_tree_del(st, cfqg);
 	cfqg->saved_workload_slice = 0;
 	cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
 }
 
-static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
+static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
+						unsigned int *unaccounted_time)
 {
 	unsigned int slice_used;
 
@@ -928,8 +945,13 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
 					1);
 	} else {
 		slice_used = jiffies - cfqq->slice_start;
-		if (slice_used > cfqq->allocated_slice)
+		if (slice_used > cfqq->allocated_slice) {
+			*unaccounted_time = slice_used - cfqq->allocated_slice;
 			slice_used = cfqq->allocated_slice;
+		}
+		if (time_after(cfqq->slice_start, cfqq->dispatch_start))
+			*unaccounted_time += cfqq->slice_start -
+					cfqq->dispatch_start;
 	}
 
 	return slice_used;
@@ -939,12 +961,12 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 				struct cfq_queue *cfqq)
 {
 	struct cfq_rb_root *st = &cfqd->grp_service_tree;
-	unsigned int used_sl, charge;
+	unsigned int used_sl, charge, unaccounted_sl = 0;
 	int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
 			- cfqg->service_tree_idle.count;
 
 	BUG_ON(nr_sync < 0);
-	used_sl = charge = cfq_cfqq_slice_usage(cfqq);
+	used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
 
 	if (iops_mode(cfqd))
 		charge = cfqq->slice_dispatch;
@@ -952,9 +974,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 		charge = cfqq->allocated_slice;
 
 	/* Can't update vdisktime while group is on service tree */
-	cfq_rb_erase(&cfqg->rb_node, st);
+	cfq_group_service_tree_del(st, cfqg);
 	cfqg->vdisktime += cfq_scale_slice(charge, cfqg);
-	__cfq_group_service_tree_add(st, cfqg);
+	/* If a new weight was requested, update now, off tree */
+	cfq_group_service_tree_add(st, cfqg);
 
 	/* This group is being expired. Save the context */
 	if (time_after(cfqd->workload_expires, jiffies)) {
@@ -970,7 +993,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 	cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u"
 			" sect=%u", used_sl, cfqq->slice_dispatch, charge,
 			iops_mode(cfqd), cfqq->nr_sectors);
-	cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
+	cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
+					  unaccounted_sl);
 	cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
 }
 
@@ -985,7 +1009,9 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
 void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
 					unsigned int weight)
 {
-	cfqg_of_blkg(blkg)->weight = weight;
+	struct cfq_group *cfqg = cfqg_of_blkg(blkg);
+	cfqg->new_weight = weight;
+	cfqg->needs_update = true;
 }
 
 static struct cfq_group *
@@ -1187,32 +1213,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	int new_cfqq = 1;
 	int group_changed = 0;
 
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-	if (!cfqd->cfq_group_isolation
-	    && cfqq_type(cfqq) == SYNC_NOIDLE_WORKLOAD
-	    && cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) {
-		/* Move this cfq to root group */
-		cfq_log_cfqq(cfqd, cfqq, "moving to root group");
-		if (!RB_EMPTY_NODE(&cfqq->rb_node))
-			cfq_group_service_tree_del(cfqd, cfqq->cfqg);
-		cfqq->orig_cfqg = cfqq->cfqg;
-		cfqq->cfqg = &cfqd->root_group;
-		cfqd->root_group.ref++;
-		group_changed = 1;
-	} else if (!cfqd->cfq_group_isolation
-		   && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
-		/* cfqq is sequential now needs to go to its original group */
-		BUG_ON(cfqq->cfqg != &cfqd->root_group);
-		if (!RB_EMPTY_NODE(&cfqq->rb_node))
-			cfq_group_service_tree_del(cfqd, cfqq->cfqg);
-		cfq_put_cfqg(cfqq->cfqg);
-		cfqq->cfqg = cfqq->orig_cfqg;
-		cfqq->orig_cfqg = NULL;
-		group_changed = 1;
-		cfq_log_cfqq(cfqd, cfqq, "moved to origin group");
-	}
-#endif
-
 	service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
 						cfqq_type(cfqq));
 	if (cfq_class_idle(cfqq)) {
@@ -1284,7 +1284,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	service_tree->count++;
 	if ((add_front || !new_cfqq) && !group_changed)
 		return;
-	cfq_group_service_tree_add(cfqd, cfqq->cfqg);
+	cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
 }
 
 static struct cfq_queue *
@@ -1372,6 +1372,8 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	BUG_ON(cfq_cfqq_on_rr(cfqq));
 	cfq_mark_cfqq_on_rr(cfqq);
 	cfqd->busy_queues++;
+	if (cfq_cfqq_sync(cfqq))
+		cfqd->busy_sync_queues++;
 
 	cfq_resort_rr_list(cfqd, cfqq);
 }
@@ -1395,9 +1397,11 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 		cfqq->p_root = NULL;
 	}
 
-	cfq_group_service_tree_del(cfqd, cfqq->cfqg);
+	cfq_group_notify_queue_del(cfqd, cfqq->cfqg);
 	BUG_ON(!cfqd->busy_queues);
 	cfqd->busy_queues--;
+	if (cfq_cfqq_sync(cfqq))
+		cfqd->busy_sync_queues--;
 }
 
 /*
@@ -2405,6 +2409,7 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	 * Does this cfqq already have too much IO in flight?
 	 */
 	if (cfqq->dispatched >= max_dispatch) {
+		bool promote_sync = false;
 		/*
 		 * idle queue must always only have a single IO in flight
 		 */
@@ -2412,15 +2417,26 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 			return false;
 
 		/*
+		 * If there is only one sync queue
+		 * we can ignore async queue here and give the sync
+		 * queue no dispatch limit. The reason is a sync queue can
+		 * preempt async queue, limiting the sync queue doesn't make
+		 * sense. This is useful for aiostress test.
+		 */
+		if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1)
+			promote_sync = true;
+
+		/*
 		 * We have other queues, don't allow more IO from this one
 		 */
-		if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq))
+		if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq) &&
+				!promote_sync)
 			return false;
 
 		/*
 		 * Sole queue user, no limit
 		 */
-		if (cfqd->busy_queues == 1)
+		if (cfqd->busy_queues == 1 || promote_sync)
 			max_dispatch = -1;
 		else
 			/*
@@ -2542,7 +2558,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
 static void cfq_put_queue(struct cfq_queue *cfqq)
 {
 	struct cfq_data *cfqd = cfqq->cfqd;
-	struct cfq_group *cfqg, *orig_cfqg;
+	struct cfq_group *cfqg;
 
 	BUG_ON(cfqq->ref <= 0);
 
@@ -2554,7 +2570,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 	BUG_ON(rb_first(&cfqq->sort_list));
 	BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
 	cfqg = cfqq->cfqg;
-	orig_cfqg = cfqq->orig_cfqg;
 
 	if (unlikely(cfqd->active_queue == cfqq)) {
 		__cfq_slice_expired(cfqd, cfqq, 0);
@@ -2564,8 +2579,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 	BUG_ON(cfq_cfqq_on_rr(cfqq));
 	kmem_cache_free(cfq_pool, cfqq);
 	cfq_put_cfqg(cfqg);
-	if (orig_cfqg)
-		cfq_put_cfqg(orig_cfqg);
 }
 
 /*
@@ -3613,12 +3626,12 @@ static void cfq_put_request(struct request *rq)
 
 		put_io_context(RQ_CIC(rq)->ioc);
 
-		rq->elevator_private = NULL;
-		rq->elevator_private2 = NULL;
+		rq->elevator_private[0] = NULL;
+		rq->elevator_private[1] = NULL;
 
 		/* Put down rq reference on cfqg */
 		cfq_put_cfqg(RQ_CFQG(rq));
-		rq->elevator_private3 = NULL;
+		rq->elevator_private[2] = NULL;
 
 		cfq_put_queue(cfqq);
 	}
@@ -3705,13 +3718,12 @@ new_queue:
 	}
 
 	cfqq->allocated[rw]++;
-	cfqq->ref++;
-	rq->elevator_private = cic;
-	rq->elevator_private2 = cfqq;
-	rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
 
+	cfqq->ref++;
+	rq->elevator_private[0] = cic;
+	rq->elevator_private[1] = cfqq;
+	rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg);
 	spin_unlock_irqrestore(q->queue_lock, flags);
-
 	return 0;
 
 queue_fail:
@@ -3953,7 +3965,6 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->cfq_slice_idle = cfq_slice_idle;
 	cfqd->cfq_group_idle = cfq_group_idle;
 	cfqd->cfq_latency = 1;
-	cfqd->cfq_group_isolation = 0;
 	cfqd->hw_tag = -1;
 	/*
 	 * we optimistically start assuming sync ops weren't delayed in last
@@ -4029,7 +4040,6 @@ SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
 SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
-SHOW_FUNCTION(cfq_group_isolation_show, cfqd->cfq_group_isolation, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -4063,7 +4073,6 @@ STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
 		UINT_MAX, 0);
 STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
-STORE_FUNCTION(cfq_group_isolation_store, &cfqd->cfq_group_isolation, 0, 1, 0);
 #undef STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
@@ -4081,7 +4090,6 @@ static struct elv_fs_entry cfq_attrs[] = {
 	CFQ_ATTR(slice_idle),
 	CFQ_ATTR(group_idle),
 	CFQ_ATTR(low_latency),
-	CFQ_ATTR(group_isolation),
 	__ATTR_NULL
 };
 
@@ -4096,7 +4104,6 @@ static struct elevator_type iosched_cfq = {
 		.elevator_add_req_fn =		cfq_insert_request,
 		.elevator_activate_req_fn =	cfq_activate_request,
 		.elevator_deactivate_req_fn =	cfq_deactivate_request,
-		.elevator_queue_empty_fn =	cfq_queue_empty,
 		.elevator_completed_req_fn =	cfq_completed_request,
 		.elevator_former_req_fn =	elv_rb_former_request,
 		.elevator_latter_req_fn =	elv_rb_latter_request,
diff --git a/block/cfq.h b/block/cfq.h
index 54a6d90f8e8c..2a155927e37c 100644
--- a/block/cfq.h
+++ b/block/cfq.h
@@ -16,9 +16,9 @@ static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
 }
 
 static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
-			unsigned long time)
+			unsigned long time, unsigned long unaccounted_time)
 {
-	blkiocg_update_timeslice_used(blkg, time);
+	blkiocg_update_timeslice_used(blkg, time, unaccounted_time);
 }
 
 static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg)
@@ -85,7 +85,7 @@ static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
 			unsigned long dequeue) {}
 
 static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
-			unsigned long time) {}
+			unsigned long time, unsigned long unaccounted_time) {}
 static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
 static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg,
 				bool direction, bool sync) {}
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index b547cbca7b23..5139c0ea1864 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -326,14 +326,6 @@ dispatch_request:
 	return 1;
 }
 
-static int deadline_queue_empty(struct request_queue *q)
-{
-	struct deadline_data *dd = q->elevator->elevator_data;
-
-	return list_empty(&dd->fifo_list[WRITE])
-		&& list_empty(&dd->fifo_list[READ]);
-}
-
 static void deadline_exit_queue(struct elevator_queue *e)
 {
 	struct deadline_data *dd = e->elevator_data;
@@ -445,7 +437,6 @@ static struct elevator_type iosched_deadline = {
 		.elevator_merge_req_fn =	deadline_merged_requests,
 		.elevator_dispatch_fn =		deadline_dispatch_requests,
 		.elevator_add_req_fn =		deadline_add_request,
-		.elevator_queue_empty_fn =	deadline_queue_empty,
 		.elevator_former_req_fn =	elv_rb_former_request,
 		.elevator_latter_req_fn =	elv_rb_latter_request,
 		.elevator_init_fn =		deadline_init_queue,
diff --git a/block/elevator.c b/block/elevator.c
index 236e93c1f46c..c387d3168734 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -113,7 +113,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
 }
 EXPORT_SYMBOL(elv_rq_merge_ok);
 
-static inline int elv_try_merge(struct request *__rq, struct bio *bio)
+int elv_try_merge(struct request *__rq, struct bio *bio)
 {
 	int ret = ELEVATOR_NO_MERGE;
 
@@ -421,6 +421,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
 	struct list_head *entry;
 	int stop_flags;
 
+	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
+
 	if (q->last_merge == rq)
 		q->last_merge = NULL;
 
@@ -519,6 +521,40 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 	return ELEVATOR_NO_MERGE;
 }
 
+/*
+ * Attempt to do an insertion back merge. Only check for the case where
+ * we can append 'rq' to an existing request, so we can throw 'rq' away
+ * afterwards.
+ *
+ * Returns true if we merged, false otherwise
+ */
+static bool elv_attempt_insert_merge(struct request_queue *q,
+				     struct request *rq)
+{
+	struct request *__rq;
+
+	if (blk_queue_nomerges(q))
+		return false;
+
+	/*
+	 * First try one-hit cache.
+	 */
+	if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
+		return true;
+
+	if (blk_queue_noxmerges(q))
+		return false;
+
+	/*
+	 * See if our hash lookup can find a potential backmerge.
+	 */
+	__rq = elv_rqhash_find(q, blk_rq_pos(rq));
+	if (__rq && blk_attempt_req_merge(q, __rq, rq))
+		return true;
+
+	return false;
+}
+
 void elv_merged_request(struct request_queue *q, struct request *rq, int type)
 {
 	struct elevator_queue *e = q->elevator;
@@ -536,14 +572,18 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
 			     struct request *next)
 {
 	struct elevator_queue *e = q->elevator;
+	const int next_sorted = next->cmd_flags & REQ_SORTED;
 
-	if (e->ops->elevator_merge_req_fn)
+	if (next_sorted && e->ops->elevator_merge_req_fn)
 		e->ops->elevator_merge_req_fn(q, rq, next);
 
 	elv_rqhash_reposition(q, rq);
-	elv_rqhash_del(q, next);
 
-	q->nr_sorted--;
+	if (next_sorted) {
+		elv_rqhash_del(q, next);
+		q->nr_sorted--;
+	}
+
 	q->last_merge = rq;
 }
 
@@ -617,21 +657,12 @@ void elv_quiesce_end(struct request_queue *q)
 
 void elv_insert(struct request_queue *q, struct request *rq, int where)
 {
-	int unplug_it = 1;
-
 	trace_block_rq_insert(q, rq);
 
 	rq->q = q;
 
 	switch (where) {
 	case ELEVATOR_INSERT_REQUEUE:
-		/*
-		 * Most requeues happen because of a busy condition,
-		 * don't force unplug of the queue for that case.
-		 * Clear unplug_it and fall through.
-		 */
-		unplug_it = 0;
-
 	case ELEVATOR_INSERT_FRONT:
 		rq->cmd_flags |= REQ_SOFTBARRIER;
 		list_add(&rq->queuelist, &q->queue_head);
@@ -654,6 +685,14 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 		__blk_run_queue(q, false);
 		break;
 
+	case ELEVATOR_INSERT_SORT_MERGE:
+		/*
+		 * If we succeed in merging this request with one in the
+		 * queue already, we are done - rq has now been freed,
+		 * so no need to do anything further.
+		 */
+		if (elv_attempt_insert_merge(q, rq))
+			break;
 	case ELEVATOR_INSERT_SORT:
 		BUG_ON(rq->cmd_type != REQ_TYPE_FS &&
 		       !(rq->cmd_flags & REQ_DISCARD));
@@ -673,24 +712,21 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 		q->elevator->ops->elevator_add_req_fn(q, rq);
 		break;
 
+	case ELEVATOR_INSERT_FLUSH:
+		rq->cmd_flags |= REQ_SOFTBARRIER;
+		blk_insert_flush(rq);
+		break;
 	default:
 		printk(KERN_ERR "%s: bad insertion point %d\n",
 		       __func__, where);
 		BUG();
 	}
-
-	if (unplug_it && blk_queue_plugged(q)) {
-		int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
-				- queue_in_flight(q);
-
-		if (nrq >= q->unplug_thresh)
-			__generic_unplug_device(q);
-	}
 }
 
-void __elv_add_request(struct request_queue *q, struct request *rq, int where,
-		       int plug)
+void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 {
+	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
+
 	if (rq->cmd_flags & REQ_SOFTBARRIER) {
 		/* barriers are scheduling boundary, update end_sector */
 		if (rq->cmd_type == REQ_TYPE_FS ||
@@ -702,38 +738,20 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where,
 		    where == ELEVATOR_INSERT_SORT)
 		where = ELEVATOR_INSERT_BACK;
 
-	if (plug)
-		blk_plug_device(q);
-
 	elv_insert(q, rq, where);
 }
 EXPORT_SYMBOL(__elv_add_request);
 
-void elv_add_request(struct request_queue *q, struct request *rq, int where,
-		     int plug)
+void elv_add_request(struct request_queue *q, struct request *rq, int where)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	__elv_add_request(q, rq, where, plug);
+	__elv_add_request(q, rq, where);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(elv_add_request);
 
-int elv_queue_empty(struct request_queue *q)
-{
-	struct elevator_queue *e = q->elevator;
-
-	if (!list_empty(&q->queue_head))
-		return 0;
-
-	if (e->ops->elevator_queue_empty_fn)
-		return e->ops->elevator_queue_empty_fn(q);
-
-	return 1;
-}
-EXPORT_SYMBOL(elv_queue_empty);
-
 struct request *elv_latter_request(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
@@ -759,7 +777,7 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 	if (e->ops->elevator_set_req_fn)
 		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
 
-	rq->elevator_private = NULL;
+	rq->elevator_private[0] = NULL;
 	return 0;
 }
 
@@ -785,6 +803,8 @@ void elv_abort_queue(struct request_queue *q)
 {
 	struct request *rq;
 
+	blk_abort_flushes(q);
+
 	while (!list_empty(&q->queue_head)) {
 		rq = list_entry_rq(q->queue_head.next);
 		rq->cmd_flags |= REQ_QUIET;
diff --git a/block/genhd.c b/block/genhd.c
index cbf1112a885c..c91a2dac6b6b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1158,14 +1158,14 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   "%u %lu %lu %llu %u %u %u %u\n",
 			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 			   disk_name(gp, hd->partno, buf),
-			   part_stat_read(hd, ios[0]),
-			   part_stat_read(hd, merges[0]),
-			   (unsigned long long)part_stat_read(hd, sectors[0]),
-			   jiffies_to_msecs(part_stat_read(hd, ticks[0])),
-			   part_stat_read(hd, ios[1]),
-			   part_stat_read(hd, merges[1]),
-			   (unsigned long long)part_stat_read(hd, sectors[1]),
-			   jiffies_to_msecs(part_stat_read(hd, ticks[1])),
+			   part_stat_read(hd, ios[READ]),
+			   part_stat_read(hd, merges[READ]),
+			   (unsigned long long)part_stat_read(hd, sectors[READ]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
+			   part_stat_read(hd, ios[WRITE]),
+			   part_stat_read(hd, merges[WRITE]),
+			   (unsigned long long)part_stat_read(hd, sectors[WRITE]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
 			   part_in_flight(hd),
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
@@ -1494,7 +1494,7 @@ void disk_block_events(struct gendisk *disk)
 void disk_unblock_events(struct gendisk *disk)
 {
 	if (disk->ev)
-		__disk_unblock_events(disk, true);
+		__disk_unblock_events(disk, false);
 }
 
 /**
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 232c4b38cd37..06389e9ef96d 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -39,13 +39,6 @@ static void noop_add_request(struct request_queue *q, struct request *rq)
 	list_add_tail(&rq->queuelist, &nd->queue);
 }
 
-static int noop_queue_empty(struct request_queue *q)
-{
-	struct noop_data *nd = q->elevator->elevator_data;
-
-	return list_empty(&nd->queue);
-}
-
 static struct request *
 noop_former_request(struct request_queue *q, struct request *rq)
 {
@@ -90,7 +83,6 @@ static struct elevator_type elevator_noop = {
 		.elevator_merge_req_fn		= noop_merged_requests,
 		.elevator_dispatch_fn		= noop_dispatch,
 		.elevator_add_req_fn		= noop_add_request,
-		.elevator_queue_empty_fn	= noop_queue_empty,
 		.elevator_former_req_fn		= noop_former_request,
 		.elevator_latter_req_fn		= noop_latter_request,
 		.elevator_init_fn		= noop_init_queue,
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 1f286ab461d3..79882104e431 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -140,13 +140,14 @@ static int DAC960_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static int DAC960_media_changed(struct gendisk *disk)
+static unsigned int DAC960_check_events(struct gendisk *disk,
+					unsigned int clearing)
 {
 	DAC960_Controller_T *p = disk->queue->queuedata;
 	int drive_nr = (long)disk->private_data;
 
 	if (!p->LogicalDriveInitiallyAccessible[drive_nr])
-		return 1;
+		return DISK_EVENT_MEDIA_CHANGE;
 	return 0;
 }
 
@@ -163,7 +164,7 @@ static const struct block_device_operations DAC960_BlockDeviceOperations = {
 	.owner			= THIS_MODULE,
 	.open			= DAC960_open,
 	.getgeo			= DAC960_getgeo,
-	.media_changed		= DAC960_media_changed,
+	.check_events		= DAC960_check_events,
 	.revalidate_disk	= DAC960_revalidate_disk,
 };
 
@@ -2546,6 +2547,7 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
 	disk->major = MajorNumber;
 	disk->first_minor = n << DAC960_MaxPartitionsBits;
 	disk->fops = &DAC960_BlockDeviceOperations;
+	disk->events = DISK_EVENT_MEDIA_CHANGE;
    }
   /*
     Indicate the Block Device Registration completed successfully,
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 363855ca376e..456c0cc90dcf 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1658,12 +1658,12 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
 }
 
 /*
- * floppy-change is never called from an interrupt, so we can relax a bit
+ * check_events is never called from an interrupt, so we can relax a bit
  * here, sleep etc. Note that floppy-on tries to set current_DOR to point
  * to the desired drive, but it will probably not survive the sleep if
  * several floppies are used at the same time: thus the loop.
  */
-static int amiga_floppy_change(struct gendisk *disk)
+static unsigned amiga_check_events(struct gendisk *disk, unsigned int clearing)
 {
 	struct amiga_floppy_struct *p = disk->private_data;
 	int drive = p - unit;
@@ -1686,7 +1686,7 @@ static int amiga_floppy_change(struct gendisk *disk)
 		p->dirty = 0;
 		writepending = 0; /* if this was true before, too bad! */
 		writefromint = 0;
-		return 1;
+		return DISK_EVENT_MEDIA_CHANGE;
 	}
 	return 0;
 }
@@ -1697,7 +1697,7 @@ static const struct block_device_operations floppy_fops = {
 	.release	= floppy_release,
 	.ioctl		= fd_ioctl,
 	.getgeo		= fd_getgeo,
-	.media_changed	= amiga_floppy_change,
+	.check_events	= amiga_check_events,
 };
 
 static int __init fd_probe_drives(void)
@@ -1736,6 +1736,7 @@ static int __init fd_probe_drives(void)
 		disk->major = FLOPPY_MAJOR;
 		disk->first_minor = drive;
 		disk->fops = &floppy_fops;
+		disk->events = DISK_EVENT_MEDIA_CHANGE;
 		sprintf(disk->disk_name, "fd%d", drive);
 		disk->private_data = &unit[drive];
 		set_capacity(disk, 880*2);
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 605a67e40bbf..c871eae14120 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1324,23 +1324,24 @@ static void finish_fdc_done( int dummy )
  * due to unrecognised disk changes.
  */
 
-static int check_floppy_change(struct gendisk *disk)
+static unsigned int floppy_check_events(struct gendisk *disk,
+					unsigned int clearing)
 {
 	struct atari_floppy_struct *p = disk->private_data;
 	unsigned int drive = p - unit;
 	if (test_bit (drive, &fake_change)) {
 		/* simulated change (e.g. after formatting) */
-		return 1;
+		return DISK_EVENT_MEDIA_CHANGE;
 	}
 	if (test_bit (drive, &changed_floppies)) {
 		/* surely changed (the WP signal changed at least once) */
-		return 1;
+		return DISK_EVENT_MEDIA_CHANGE;
 	}
 	if (UD.wpstat) {
 		/* WP is on -> could be changed: to be sure, buffers should be
 		 * invalidated...
 		 */
-		return 1;
+		return DISK_EVENT_MEDIA_CHANGE;
 	}
 
 	return 0;
@@ -1570,7 +1571,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
 		 * or the next access will revalidate - and clear UDT :-(
 		 */
 
-		if (check_floppy_change(disk))
+		if (floppy_check_events(disk, 0))
 		        floppy_revalidate(disk);
 
 		if (UD.flags & FTD_MSG)
@@ -1904,7 +1905,7 @@ static const struct block_device_operations floppy_fops = {
 	.open		= floppy_unlocked_open,
 	.release	= floppy_release,
 	.ioctl		= fd_ioctl,
-	.media_changed	= check_floppy_change,
+	.check_events	= floppy_check_events,
 	.revalidate_disk= floppy_revalidate,
 };
 
@@ -1963,6 +1964,7 @@ static int __init atari_floppy_init (void)
 		unit[i].disk->first_minor = i;
 		sprintf(unit[i].disk->disk_name, "fd%d", i);
 		unit[i].disk->fops = &floppy_fops;
+		unit[i].disk->events = DISK_EVENT_MEDIA_CHANGE;
 		unit[i].disk->private_data = &unit[i];
 		unit[i].disk->queue = blk_init_queue(do_fd_request,
 					&ataflop_lock);
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 9279272b3732..35658f445fca 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3170,12 +3170,6 @@ static void do_cciss_request(struct request_queue *q)
 	int sg_index = 0;
 	int chained = 0;
 
-	/* We call start_io here in case there is a command waiting on the
-	 * queue that has not been sent.
-	 */
-	if (blk_queue_plugged(q))
-		goto startio;
-
       queue:
 	creq = blk_peek_request(q);
 	if (!creq)
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 946dad4caef3..b2fceb53e809 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -911,9 +911,6 @@ static void do_ida_request(struct request_queue *q)
 	struct scatterlist tmp_sg[SG_MAX];
 	int i, dir, seg;
 
-	if (blk_queue_plugged(q))
-		goto startio;
-
 queue_next:
 	creq = blk_peek_request(q);
 	if (!creq)
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index ba95cba192be..aca302492ff2 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -80,7 +80,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
 
 	if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
 		rw |= REQ_FUA;
-	rw |= REQ_UNPLUG | REQ_SYNC;
+	rw |= REQ_SYNC;
 
 	bio = bio_alloc(GFP_NOIO, 1);
 	bio->bi_bdev = bdev->md_bdev;
@@ -689,8 +689,6 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
 		}
 	}
 
-	drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
-
 	/* always (try to) flush bitmap to stable storage */
 	drbd_md_flush(mdev);
 
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index fd42832f785b..0645ca829a94 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -840,7 +840,6 @@ static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
 	for (i = 0; i < num_pages; i++)
 		bm_page_io_async(mdev, b, i, rw);
 
-	drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
 	wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0);
 
 	if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) {
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 3803a0348937..b0bd27dfc1e8 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -377,7 +377,7 @@ union p_header {
 #define DP_HARDBARRIER	      1 /* depricated */
 #define DP_RW_SYNC	      2 /* equals REQ_SYNC    */
 #define DP_MAY_SET_IN_SYNC    4
-#define DP_UNPLUG             8 /* equals REQ_UNPLUG  */
+#define DP_UNPLUG             8 /* not used anymore   */
 #define DP_FUA               16 /* equals REQ_FUA     */
 #define DP_FLUSH             32 /* equals REQ_FLUSH   */
 #define DP_DISCARD           64 /* equals REQ_DISCARD */
@@ -2382,20 +2382,6 @@ static inline int drbd_queue_order_type(struct drbd_conf *mdev)
 	return QUEUE_ORDERED_NONE;
 }
 
-static inline void drbd_blk_run_queue(struct request_queue *q)
-{
-	if (q && q->unplug_fn)
-		q->unplug_fn(q);
-}
-
-static inline void drbd_kick_lo(struct drbd_conf *mdev)
-{
-	if (get_ldev(mdev)) {
-		drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev));
-		put_ldev(mdev);
-	}
-}
-
 static inline void drbd_md_flush(struct drbd_conf *mdev)
 {
 	int r;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 29cd0dc9fe4f..8a43ce0edeed 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2477,12 +2477,11 @@ static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
 {
 	if (mdev->agreed_pro_version >= 95)
 		return  (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
-			(bi_rw & REQ_UNPLUG ? DP_UNPLUG : 0) |
 			(bi_rw & REQ_FUA ? DP_FUA : 0) |
 			(bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
 			(bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
 	else
-		return bi_rw & (REQ_SYNC | REQ_UNPLUG) ? DP_RW_SYNC : 0;
+		return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
 }
 
 /* Used to send write requests
@@ -2719,35 +2718,6 @@ static int drbd_release(struct gendisk *gd, fmode_t mode)
 	return 0;
 }
 
-static void drbd_unplug_fn(struct request_queue *q)
-{
-	struct drbd_conf *mdev = q->queuedata;
-
-	/* unplug FIRST */
-	spin_lock_irq(q->queue_lock);
-	blk_remove_plug(q);
-	spin_unlock_irq(q->queue_lock);
-
-	/* only if connected */
-	spin_lock_irq(&mdev->req_lock);
-	if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.conn >= C_CONNECTED) {
-		D_ASSERT(mdev->state.role == R_PRIMARY);
-		if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) {
-			/* add to the data.work queue,
-			 * unless already queued.
-			 * XXX this might be a good addition to drbd_queue_work
-			 * anyways, to detect "double queuing" ... */
-			if (list_empty(&mdev->unplug_work.list))
-				drbd_queue_work(&mdev->data.work,
-						&mdev->unplug_work);
-		}
-	}
-	spin_unlock_irq(&mdev->req_lock);
-
-	if (mdev->state.disk >= D_INCONSISTENT)
-		drbd_kick_lo(mdev);
-}
-
 static void drbd_set_defaults(struct drbd_conf *mdev)
 {
 	/* This way we get a compile error when sync_conf grows,
@@ -3222,9 +3192,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
 	blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE);
 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	blk_queue_merge_bvec(q, drbd_merge_bvec);
-	q->queue_lock = &mdev->req_lock; /* needed since we use */
-		/* plugging on a queue, that actually has no requests! */
-	q->unplug_fn = drbd_unplug_fn;
+	q->queue_lock = &mdev->req_lock;
 
 	mdev->md_io_page = alloc_page(GFP_KERNEL);
 	if (!mdev->md_io_page)
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 24487d4fb202..8e68be939deb 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -187,15 +187,6 @@ static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int
 	return NULL;
 }
 
-/* kick lower level device, if we have more than (arbitrary number)
- * reference counts on it, which typically are locally submitted io
- * requests.  don't use unacked_cnt, so we speed up proto A and B, too. */
-static void maybe_kick_lo(struct drbd_conf *mdev)
-{
-	if (atomic_read(&mdev->local_cnt) >= mdev->net_conf->unplug_watermark)
-		drbd_kick_lo(mdev);
-}
-
 static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
 {
 	struct drbd_epoch_entry *e;
@@ -219,7 +210,6 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
 	LIST_HEAD(reclaimed);
 	struct drbd_epoch_entry *e, *t;
 
-	maybe_kick_lo(mdev);
 	spin_lock_irq(&mdev->req_lock);
 	reclaim_net_ee(mdev, &reclaimed);
 	spin_unlock_irq(&mdev->req_lock);
@@ -436,8 +426,7 @@ void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
 	while (!list_empty(head)) {
 		prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
 		spin_unlock_irq(&mdev->req_lock);
-		drbd_kick_lo(mdev);
-		schedule();
+		io_schedule();
 		finish_wait(&mdev->ee_wait, &wait);
 		spin_lock_irq(&mdev->req_lock);
 	}
@@ -1111,8 +1100,6 @@ next_bio:
 	/* > e->sector, unless this is the first bio */
 	bio->bi_sector = sector;
 	bio->bi_bdev = mdev->ldev->backing_bdev;
-	/* we special case some flags in the multi-bio case, see below
-	 * (REQ_UNPLUG) */
 	bio->bi_rw = rw;
 	bio->bi_private = e;
 	bio->bi_end_io = drbd_endio_sec;
@@ -1141,13 +1128,8 @@ next_bio:
 		bios = bios->bi_next;
 		bio->bi_next = NULL;
 
-		/* strip off REQ_UNPLUG unless it is the last bio */
-		if (bios)
-			bio->bi_rw &= ~REQ_UNPLUG;
-
 		drbd_generic_make_request(mdev, fault_type, bio);
 	} while (bios);
-	maybe_kick_lo(mdev);
 	return 0;
 
 fail:
@@ -1167,9 +1149,6 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
 
 	inc_unacked(mdev);
 
-	if (mdev->net_conf->wire_protocol != DRBD_PROT_C)
-		drbd_kick_lo(mdev);
-
 	mdev->current_epoch->barrier_nr = p->barrier;
 	rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
 
@@ -1636,12 +1615,11 @@ static unsigned long write_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
 {
 	if (mdev->agreed_pro_version >= 95)
 		return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
-			(dpf & DP_UNPLUG ? REQ_UNPLUG : 0) |
 			(dpf & DP_FUA ? REQ_FUA : 0) |
 			(dpf & DP_FLUSH ? REQ_FUA : 0) |
 			(dpf & DP_DISCARD ? REQ_DISCARD : 0);
 	else
-		return dpf & DP_RW_SYNC ? (REQ_SYNC | REQ_UNPLUG) : 0;
+		return dpf & DP_RW_SYNC ? REQ_SYNC : 0;
 }
 
 /* mirrored write */
@@ -3556,9 +3534,6 @@ static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 
 static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
 {
-	if (mdev->state.disk >= D_INCONSISTENT)
-		drbd_kick_lo(mdev);
-
 	/* Make sure we've acked all the TCP data associated
 	 * with the data requests being unplugged */
 	drbd_tcp_quickack(mdev->data.socket);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 11a75d32a2e2..ad3fc6228f27 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -960,10 +960,6 @@ allocate_barrier:
 			bio_endio(req->private_bio, -EIO);
 	}
 
-	/* we need to plug ALWAYS since we possibly need to kick lo_dev.
-	 * we plug after submit, so we won't miss an unplug event */
-	drbd_plug_device(mdev);
-
 	return 0;
 
 fail_conflicting:
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 34f224b018b3..e027446590d3 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -792,7 +792,6 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 		 * queue (or even the read operations for those packets
 		 * is not finished by now).   Retry in 100ms. */
 
-		drbd_kick_lo(mdev);
 		__set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(HZ / 10);
 		w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h
index defdb5013ea3..53586fa5ae1b 100644
--- a/drivers/block/drbd/drbd_wrappers.h
+++ b/drivers/block/drbd/drbd_wrappers.h
@@ -45,24 +45,6 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev,
 		generic_make_request(bio);
 }
 
-static inline void drbd_plug_device(struct drbd_conf *mdev)
-{
-	struct request_queue *q;
-	q = bdev_get_queue(mdev->this_bdev);
-
-	spin_lock_irq(q->queue_lock);
-
-/* XXX the check on !blk_queue_plugged is redundant,
- * implicitly checked in blk_plug_device */
-
-	if (!blk_queue_plugged(q)) {
-		blk_plug_device(q);
-		del_timer(&q->unplug_timer);
-		/* unplugging should not happen automatically... */
-	}
-	spin_unlock_irq(q->queue_lock);
-}
-
 static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm)
 {
         return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 77fc76f8aea9..301d7a9a41a6 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3770,13 +3770,14 @@ out2:
 /*
  * Check if the disk has been changed or if a change has been faked.
  */
-static int check_floppy_change(struct gendisk *disk)
+static unsigned int floppy_check_events(struct gendisk *disk,
+					unsigned int clearing)
 {
 	int drive = (long)disk->private_data;
 
 	if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
 	    test_bit(FD_VERIFY_BIT, &UDRS->flags))
-		return 1;
+		return DISK_EVENT_MEDIA_CHANGE;
 
 	if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
 		lock_fdc(drive, false);
@@ -3788,7 +3789,7 @@ static int check_floppy_change(struct gendisk *disk)
 	    test_bit(FD_VERIFY_BIT, &UDRS->flags) ||
 	    test_bit(drive, &fake_change) ||
 	    drive_no_geom(drive))
-		return 1;
+		return DISK_EVENT_MEDIA_CHANGE;
 	return 0;
 }
 
@@ -3837,7 +3838,6 @@ static int __floppy_read_block_0(struct block_device *bdev)
 	bio.bi_end_io = floppy_rb0_complete;
 
 	submit_bio(READ, &bio);
-	generic_unplug_device(bdev_get_queue(bdev));
 	process_fd_request();
 	wait_for_completion(&complete);
 
@@ -3898,7 +3898,7 @@ static const struct block_device_operations floppy_fops = {
 	.release		= floppy_release,
 	.ioctl			= fd_ioctl,
 	.getgeo			= fd_getgeo,
-	.media_changed		= check_floppy_change,
+	.check_events		= floppy_check_events,
 	.revalidate_disk	= floppy_revalidate,
 };
 
@@ -4205,6 +4205,7 @@ static int __init floppy_init(void)
 		disks[dr]->major = FLOPPY_MAJOR;
 		disks[dr]->first_minor = TOMINOR(dr);
 		disks[dr]->fops = &floppy_fops;
+		disks[dr]->events = DISK_EVENT_MEDIA_CHANGE;
 		sprintf(disks[dr]->disk_name, "fd%d", dr);
 
 		init_timer(&motor_off_timer[dr]);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index dbf31ec9114d..a076a14ca72d 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -540,17 +540,6 @@ out:
 	return 0;
 }
 
-/*
- * kick off io on the underlying address space
- */
-static void loop_unplug(struct request_queue *q)
-{
-	struct loop_device *lo = q->queuedata;
-
-	queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
-	blk_run_address_space(lo->lo_backing_file->f_mapping);
-}
-
 struct switch_request {
 	struct file *file;
 	struct completion wait;
@@ -917,7 +906,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	 */
 	blk_queue_make_request(lo->lo_queue, loop_make_request);
 	lo->lo_queue->queuedata = lo;
-	lo->lo_queue->unplug_fn = loop_unplug;
 
 	if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
 		blk_queue_flush(lo->lo_queue, REQ_FLUSH);
@@ -1019,7 +1007,6 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 
 	kthread_stop(lo->lo_thread);
 
-	lo->lo_queue->unplug_fn = NULL;
 	lo->lo_backing_file = NULL;
 
 	loop_release_xfer(lo);
@@ -1636,9 +1623,6 @@ out:
 
 static void loop_free(struct loop_device *lo)
 {
-	if (!lo->lo_queue->queue_lock)
-		lo->lo_queue->queue_lock = &lo->lo_queue->__queue_lock;
-
 	blk_cleanup_queue(lo->lo_queue);
 	put_disk(lo->lo_disk);
 	list_del(&lo->lo_list);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 62cec6afd7ad..2f2ccf686251 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -172,7 +172,8 @@ module_param_array(drive3, int, NULL, 0);
 static int pcd_open(struct cdrom_device_info *cdi, int purpose);
 static void pcd_release(struct cdrom_device_info *cdi);
 static int pcd_drive_status(struct cdrom_device_info *cdi, int slot_nr);
-static int pcd_media_changed(struct cdrom_device_info *cdi, int slot_nr);
+static unsigned int pcd_check_events(struct cdrom_device_info *cdi,
+				     unsigned int clearing, int slot_nr);
 static int pcd_tray_move(struct cdrom_device_info *cdi, int position);
 static int pcd_lock_door(struct cdrom_device_info *cdi, int lock);
 static int pcd_drive_reset(struct cdrom_device_info *cdi);
@@ -257,10 +258,11 @@ static int pcd_block_ioctl(struct block_device *bdev, fmode_t mode,
 	return ret;
 }
 
-static int pcd_block_media_changed(struct gendisk *disk)
+static unsigned int pcd_block_check_events(struct gendisk *disk,
+					   unsigned int clearing)
 {
 	struct pcd_unit *cd = disk->private_data;
-	return cdrom_media_changed(&cd->info);
+	return cdrom_check_events(&cd->info, clearing);
 }
 
 static const struct block_device_operations pcd_bdops = {
@@ -268,14 +270,14 @@ static const struct block_device_operations pcd_bdops = {
 	.open		= pcd_block_open,
 	.release	= pcd_block_release,
 	.ioctl		= pcd_block_ioctl,
-	.media_changed	= pcd_block_media_changed,
+	.check_events	= pcd_block_check_events,
 };
 
 static struct cdrom_device_ops pcd_dops = {
 	.open		= pcd_open,
 	.release	= pcd_release,
 	.drive_status	= pcd_drive_status,
-	.media_changed	= pcd_media_changed,
+	.check_events	= pcd_check_events,
 	.tray_move	= pcd_tray_move,
 	.lock_door	= pcd_lock_door,
 	.get_mcn	= pcd_get_mcn,
@@ -318,6 +320,7 @@ static void pcd_init_units(void)
 		disk->first_minor = unit;
 		strcpy(disk->disk_name, cd->name);	/* umm... */
 		disk->fops = &pcd_bdops;
+		disk->events = DISK_EVENT_MEDIA_CHANGE;
 	}
 }
 
@@ -502,13 +505,14 @@ static int pcd_packet(struct cdrom_device_info *cdi, struct packet_command *cgc)
 
 #define DBMSG(msg)	((verbose>1)?(msg):NULL)
 
-static int pcd_media_changed(struct cdrom_device_info *cdi, int slot_nr)
+static unsigned int pcd_check_events(struct cdrom_device_info *cdi,
+				     unsigned int clearing, int slot_nr)
 {
 	struct pcd_unit *cd = cdi->handle;
 	int res = cd->changed;
 	if (res)
 		cd->changed = 0;
-	return res;
+	return res ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static int pcd_lock_door(struct cdrom_device_info *cdi, int lock)
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index c0ee1558b9bb..21dfdb776869 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -794,7 +794,7 @@ static int pd_release(struct gendisk *p, fmode_t mode)
 	return 0;
 }
 
-static int pd_check_media(struct gendisk *p)
+static unsigned int pd_check_events(struct gendisk *p, unsigned int clearing)
 {
 	struct pd_unit *disk = p->private_data;
 	int r;
@@ -803,7 +803,7 @@ static int pd_check_media(struct gendisk *p)
 	pd_special_command(disk, pd_media_check);
 	r = disk->changed;
 	disk->changed = 0;
-	return r;
+	return r ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static int pd_revalidate(struct gendisk *p)
@@ -822,7 +822,7 @@ static const struct block_device_operations pd_fops = {
 	.release	= pd_release,
 	.ioctl		= pd_ioctl,
 	.getgeo		= pd_getgeo,
-	.media_changed	= pd_check_media,
+	.check_events	= pd_check_events,
 	.revalidate_disk= pd_revalidate
 };
 
@@ -837,6 +837,7 @@ static void pd_probe_drive(struct pd_unit *disk)
 	p->fops = &pd_fops;
 	p->major = major;
 	p->first_minor = (disk - pd) << PD_BITS;
+	p->events = DISK_EVENT_MEDIA_CHANGE;
 	disk->gd = p;
 	p->private_data = disk;
 	p->queue = pd_queue;
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 635f25dd9e10..7adeb1edbf43 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -243,7 +243,8 @@ static struct pf_unit units[PF_UNITS];
 static int pf_identify(struct pf_unit *pf);
 static void pf_lock(struct pf_unit *pf, int func);
 static void pf_eject(struct pf_unit *pf);
-static int pf_check_media(struct gendisk *disk);
+static unsigned int pf_check_events(struct gendisk *disk,
+				    unsigned int clearing);
 
 static char pf_scratch[512];	/* scratch block buffer */
 
@@ -270,7 +271,7 @@ static const struct block_device_operations pf_fops = {
 	.release	= pf_release,
 	.ioctl		= pf_ioctl,
 	.getgeo		= pf_getgeo,
-	.media_changed	= pf_check_media,
+	.check_events	= pf_check_events,
 };
 
 static void __init pf_init_units(void)
@@ -293,6 +294,7 @@ static void __init pf_init_units(void)
 		disk->first_minor = unit;
 		strcpy(disk->disk_name, pf->name);
 		disk->fops = &pf_fops;
+		disk->events = DISK_EVENT_MEDIA_CHANGE;
 		if (!(*drives[unit])[D_PRT])
 			pf_drive_count++;
 	}
@@ -377,9 +379,9 @@ static int pf_release(struct gendisk *disk, fmode_t mode)
 
 }
 
-static int pf_check_media(struct gendisk *disk)
+static unsigned int pf_check_events(struct gendisk *disk, unsigned int clearing)
 {
-	return 1;
+	return DISK_EVENT_MEDIA_CHANGE;
 }
 
 static inline int status_reg(struct pf_unit *pf)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 77d70eebb6b2..07a382eaf0a8 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1606,8 +1606,6 @@ static int kcdrwd(void *foobar)
 					min_sleep_time = pkt->sleep_time;
 			}
 
-			generic_unplug_device(bdev_get_queue(pd->bdev));
-
 			VPRINTK("kcdrwd: sleeping\n");
 			residue = schedule_timeout(min_sleep_time);
 			VPRINTK("kcdrwd: wake up\n");
@@ -2796,7 +2794,8 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 	return ret;
 }
 
-static int pkt_media_changed(struct gendisk *disk)
+static unsigned int pkt_check_events(struct gendisk *disk,
+				     unsigned int clearing)
 {
 	struct pktcdvd_device *pd = disk->private_data;
 	struct gendisk *attached_disk;
@@ -2806,9 +2805,9 @@ static int pkt_media_changed(struct gendisk *disk)
 	if (!pd->bdev)
 		return 0;
 	attached_disk = pd->bdev->bd_disk;
-	if (!attached_disk)
+	if (!attached_disk || !attached_disk->fops->check_events)
 		return 0;
-	return attached_disk->fops->media_changed(attached_disk);
+	return attached_disk->fops->check_events(attached_disk, clearing);
 }
 
 static const struct block_device_operations pktcdvd_ops = {
@@ -2816,7 +2815,7 @@ static const struct block_device_operations pktcdvd_ops = {
 	.open =			pkt_open,
 	.release =		pkt_close,
 	.ioctl =		pkt_ioctl,
-	.media_changed =	pkt_media_changed,
+	.check_events =		pkt_check_events,
 };
 
 static char *pktcdvd_devnode(struct gendisk *gd, mode_t *mode)
@@ -2889,6 +2888,10 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 	if (ret)
 		goto out_new_dev;
 
+	/* inherit events of the host device */
+	disk->events = pd->bdev->bd_disk->events;
+	disk->async_events = pd->bdev->bd_disk->async_events;
+
 	add_disk(disk);
 
 	pkt_sysfs_dev_new(pd);
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 75333d0a3327..24a482f2fbd6 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -741,11 +741,12 @@ static int floppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static int floppy_check_change(struct gendisk *disk)
+static unsigned int floppy_check_events(struct gendisk *disk,
+					unsigned int clearing)
 {
 	struct floppy_state *fs = disk->private_data;
 
-	return fs->ejected;
+	return fs->ejected ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static int floppy_revalidate(struct gendisk *disk)
@@ -772,7 +773,7 @@ static const struct block_device_operations floppy_fops = {
 	.release	 = floppy_release,
 	.ioctl		 = floppy_ioctl,
 	.getgeo		 = floppy_getgeo,
-	.media_changed	 = floppy_check_change,
+	.check_events	 = floppy_check_events,
 	.revalidate_disk = floppy_revalidate,
 };
 
@@ -857,6 +858,7 @@ static int __devinit swim_floppy_init(struct swim_priv *swd)
 		swd->unit[drive].disk->first_minor = drive;
 		sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive);
 		swd->unit[drive].disk->fops = &floppy_fops;
+		swd->unit[drive].disk->events = DISK_EVENT_MEDIA_CHANGE;
 		swd->unit[drive].disk->private_data = &swd->unit[drive];
 		swd->unit[drive].disk->queue = swd->queue;
 		set_capacity(swd->unit[drive].disk, 2880);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index bf3a5b859299..4c10f56facbf 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -250,7 +250,8 @@ static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
 			unsigned int cmd, unsigned long param);
 static int floppy_open(struct block_device *bdev, fmode_t mode);
 static int floppy_release(struct gendisk *disk, fmode_t mode);
-static int floppy_check_change(struct gendisk *disk);
+static unsigned int floppy_check_events(struct gendisk *disk,
+					unsigned int clearing);
 static int floppy_revalidate(struct gendisk *disk);
 
 static bool swim3_end_request(int err, unsigned int nr_bytes)
@@ -975,10 +976,11 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static int floppy_check_change(struct gendisk *disk)
+static unsigned int floppy_check_events(struct gendisk *disk,
+					unsigned int clearing)
 {
 	struct floppy_state *fs = disk->private_data;
-	return fs->ejected;
+	return fs->ejected ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static int floppy_revalidate(struct gendisk *disk)
@@ -1025,7 +1027,7 @@ static const struct block_device_operations floppy_fops = {
 	.open		= floppy_unlocked_open,
 	.release	= floppy_release,
 	.ioctl		= floppy_ioctl,
-	.media_changed	= floppy_check_change,
+	.check_events	= floppy_check_events,
 	.revalidate_disk= floppy_revalidate,
 };
 
@@ -1161,6 +1163,7 @@ static int __devinit swim3_attach(struct macio_dev *mdev, const struct of_device
 	disk->major = FLOPPY_MAJOR;
 	disk->first_minor = i;
 	disk->fops = &floppy_fops;
+	disk->events = DISK_EVENT_MEDIA_CHANGE;
 	disk->private_data = &floppy_states[i];
 	disk->queue = swim3_queue;
 	disk->flags |= GENHD_FL_REMOVABLE;
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 9ae3bb713286..68b9430c7cfe 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -1788,7 +1788,8 @@ static int ub_bd_revalidate(struct gendisk *disk)
  *
  * The return code is bool!
  */
-static int ub_bd_media_changed(struct gendisk *disk)
+static unsigned int ub_bd_check_events(struct gendisk *disk,
+				       unsigned int clearing)
 {
 	struct ub_lun *lun = disk->private_data;
 
@@ -1806,10 +1807,10 @@ static int ub_bd_media_changed(struct gendisk *disk)
 	 */
 	if (ub_sync_tur(lun->udev, lun) != 0) {
 		lun->changed = 1;
-		return 1;
+		return DISK_EVENT_MEDIA_CHANGE;
 	}
 
-	return lun->changed;
+	return lun->changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static const struct block_device_operations ub_bd_fops = {
@@ -1817,7 +1818,7 @@ static const struct block_device_operations ub_bd_fops = {
 	.open		= ub_bd_unlocked_open,
 	.release	= ub_bd_release,
 	.ioctl		= ub_bd_ioctl,
-	.media_changed	= ub_bd_media_changed,
+	.check_events	= ub_bd_check_events,
 	.revalidate_disk = ub_bd_revalidate,
 };
 
@@ -2333,6 +2334,7 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum)
 	disk->major = UB_MAJOR;
 	disk->first_minor = lun->id * UB_PARTS_PER_LUN;
 	disk->fops = &ub_bd_fops;
+	disk->events = DISK_EVENT_MEDIA_CHANGE;
 	disk->private_data = lun;
 	disk->driverfs_dev = &sc->intf->dev;
 
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 8be57151f5d6..031ca720d926 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -241,8 +241,7 @@ static void dump_dmastat(struct cardinfo *card, unsigned int dmastat)
  *
  * Whenever IO on the active page completes, the Ready page is activated
  * and the ex-Active page is clean out and made Ready.
- * Otherwise the Ready page is only activated when it becomes full, or
- * when mm_unplug_device is called via the unplug_io_fn.
+ * Otherwise the Ready page is only activated when it becomes full.
  *
  * If a request arrives while both pages a full, it is queued, and b_rdev is
  * overloaded to record whether it was a read or a write.
@@ -333,17 +332,6 @@ static inline void reset_page(struct mm_page *page)
 	page->biotail = &page->bio;
 }
 
-static void mm_unplug_device(struct request_queue *q)
-{
-	struct cardinfo *card = q->queuedata;
-	unsigned long flags;
-
-	spin_lock_irqsave(&card->lock, flags);
-	if (blk_remove_plug(q))
-		activate(card);
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
 /*
  * If there is room on Ready page, take
  * one bh off list and add it.
@@ -535,7 +523,6 @@ static int mm_make_request(struct request_queue *q, struct bio *bio)
 	*card->biotail = bio;
 	bio->bi_next = NULL;
 	card->biotail = &bio->bi_next;
-	blk_plug_device(q);
 	spin_unlock_irq(&card->lock);
 
 	return 0;
@@ -779,20 +766,10 @@ static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-/*
- * Future support for removable devices
- */
-static int mm_check_change(struct gendisk *disk)
-{
-/*  struct cardinfo *dev = disk->private_data; */
-	return 0;
-}
-
 static const struct block_device_operations mm_fops = {
 	.owner		= THIS_MODULE,
 	.getgeo		= mm_getgeo,
 	.revalidate_disk = mm_revalidate,
-	.media_changed	= mm_check_change,
 };
 
 static int __devinit mm_pci_probe(struct pci_dev *dev,
@@ -907,7 +884,6 @@ static int __devinit mm_pci_probe(struct pci_dev *dev,
 	blk_queue_make_request(card->queue, mm_make_request);
 	card->queue->queue_lock = &card->lock;
 	card->queue->queuedata = card;
-	card->queue->unplug_fn = mm_unplug_device;
 
 	tasklet_init(&card->tasklet, process_page, (unsigned long)card);
 
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 2c590a796aa1..73354b081ed3 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -867,12 +867,12 @@ static void ace_request(struct request_queue * q)
 	}
 }
 
-static int ace_media_changed(struct gendisk *gd)
+static unsigned int ace_check_events(struct gendisk *gd, unsigned int clearing)
 {
 	struct ace_device *ace = gd->private_data;
-	dev_dbg(ace->dev, "ace_media_changed(): %i\n", ace->media_change);
+	dev_dbg(ace->dev, "ace_check_events(): %i\n", ace->media_change);
 
-	return ace->media_change;
+	return ace->media_change ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static int ace_revalidate_disk(struct gendisk *gd)
@@ -953,7 +953,7 @@ static const struct block_device_operations ace_fops = {
 	.owner = THIS_MODULE,
 	.open = ace_open,
 	.release = ace_release,
-	.media_changed = ace_media_changed,
+	.check_events = ace_check_events,
 	.revalidate_disk = ace_revalidate_disk,
 	.getgeo = ace_getgeo,
 };
@@ -1005,6 +1005,7 @@ static int __devinit ace_setup(struct ace_device *ace)
 	ace->gd->major = ace_major;
 	ace->gd->first_minor = ace->id * ACE_NUM_MINORS;
 	ace->gd->fops = &ace_fops;
+	ace->gd->events = DISK_EVENT_MEDIA_CHANGE;
 	ace->gd->queue = ace->queue;
 	ace->gd->private_data = ace;
 	snprintf(ace->gd->disk_name, 32, "xs%c", ace->id + 'a');
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 64a21461c408..b2b034fea34e 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -395,10 +395,12 @@ static int gdrom_drivestatus(struct cdrom_device_info *cd_info, int ignore)
 	return CDS_NO_INFO;
 }
 
-static int gdrom_mediachanged(struct cdrom_device_info *cd_info, int ignore)
+static unsigned int gdrom_check_events(struct cdrom_device_info *cd_info,
+				       unsigned int clearing, int ignore)
 {
 	/* check the sense key */
-	return (__raw_readb(GDROM_ERROR_REG) & 0xF0) == 0x60;
+	return (__raw_readb(GDROM_ERROR_REG) & 0xF0) == 0x60 ?
+		DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 /* reset the G1 bus */
@@ -483,7 +485,7 @@ static struct cdrom_device_ops gdrom_ops = {
 	.open			= gdrom_open,
 	.release		= gdrom_release,
 	.drive_status		= gdrom_drivestatus,
-	.media_changed		= gdrom_mediachanged,
+	.check_events		= gdrom_check_events,
 	.get_last_session	= gdrom_get_last_session,
 	.reset			= gdrom_hardreset,
 	.audio_ioctl		= gdrom_audio_ioctl,
@@ -509,9 +511,10 @@ static int gdrom_bdops_release(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static int gdrom_bdops_mediachanged(struct gendisk *disk)
+static unsigned int gdrom_bdops_check_events(struct gendisk *disk,
+					     unsigned int clearing)
 {
-	return cdrom_media_changed(gd.cd_info);
+	return cdrom_check_events(gd.cd_info, clearing);
 }
 
 static int gdrom_bdops_ioctl(struct block_device *bdev, fmode_t mode,
@@ -530,7 +533,7 @@ static const struct block_device_operations gdrom_bdops = {
 	.owner			= THIS_MODULE,
 	.open			= gdrom_bdops_open,
 	.release		= gdrom_bdops_release,
-	.media_changed		= gdrom_bdops_mediachanged,
+	.check_events		= gdrom_bdops_check_events,
 	.ioctl			= gdrom_bdops_ioctl,
 };
 
@@ -800,6 +803,7 @@ static int __devinit probe_gdrom(struct platform_device *devptr)
 		goto probe_fail_cdrom_register;
 	}
 	gd.disk->fops = &gdrom_bdops;
+	gd.disk->events = DISK_EVENT_MEDIA_CHANGE;
 	/* latch on to the interrupt */
 	err = gdrom_set_interrupt_handlers();
 	if (err)
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index be73a9b493a6..4e874c5fa605 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -186,10 +186,11 @@ static int viocd_blk_ioctl(struct block_device *bdev, fmode_t mode,
 	return ret;
 }
 
-static int viocd_blk_media_changed(struct gendisk *disk)
+static unsigned int viocd_blk_check_events(struct gendisk *disk,
+					   unsigned int clearing)
 {
 	struct disk_info *di = disk->private_data;
-	return cdrom_media_changed(&di->viocd_info);
+	return cdrom_check_events(&di->viocd_info, clearing);
 }
 
 static const struct block_device_operations viocd_fops = {
@@ -197,7 +198,7 @@ static const struct block_device_operations viocd_fops = {
 	.open =			viocd_blk_open,
 	.release =		viocd_blk_release,
 	.ioctl =		viocd_blk_ioctl,
-	.media_changed =	viocd_blk_media_changed,
+	.check_events =		viocd_blk_check_events,
 };
 
 static int viocd_open(struct cdrom_device_info *cdi, int purpose)
@@ -320,7 +321,8 @@ static void do_viocd_request(struct request_queue *q)
 	}
 }
 
-static int viocd_media_changed(struct cdrom_device_info *cdi, int disc_nr)
+static unsigned int viocd_check_events(struct cdrom_device_info *cdi,
+				       unsigned int clearing, int disc_nr)
 {
 	struct viocd_waitevent we;
 	HvLpEvent_Rc hvrc;
@@ -340,7 +342,7 @@ static int viocd_media_changed(struct cdrom_device_info *cdi, int disc_nr)
 	if (hvrc != 0) {
 		pr_warning("bad rc on HvCallEvent_signalLpEventFast %d\n",
 			   (int)hvrc);
-		return -EIO;
+		return 0;
 	}
 
 	wait_for_completion(&we.com);
@@ -354,7 +356,7 @@ static int viocd_media_changed(struct cdrom_device_info *cdi, int disc_nr)
 		return 0;
 	}
 
-	return we.changed;
+	return we.changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static int viocd_lock_door(struct cdrom_device_info *cdi, int locking)
@@ -550,7 +552,7 @@ static int viocd_audio_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
 static struct cdrom_device_ops viocd_dops = {
 	.open = viocd_open,
 	.release = viocd_release,
-	.media_changed = viocd_media_changed,
+	.check_events = viocd_check_events,
 	.lock_door = viocd_lock_door,
 	.generic_packet = viocd_packet,
 	.audio_ioctl = viocd_audio_ioctl,
@@ -624,6 +626,7 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	gendisk->queue = q;
 	gendisk->fops = &viocd_fops;
 	gendisk->flags = GENHD_FL_CD|GENHD_FL_REMOVABLE;
+	gendisk->events = DISK_EVENT_MEDIA_CHANGE;
 	set_capacity(gendisk, 0);
 	gendisk->private_data = d;
 	d->viocd_disk = gendisk;
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 4c95b5fd9df3..799e1490cf24 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -1073,6 +1073,9 @@ int drm_mode_getresources(struct drm_device *dev, void *data,
 	uint32_t __user *encoder_id;
 	struct drm_mode_group *mode_group;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 
 	/*
@@ -1244,6 +1247,9 @@ int drm_mode_getcrtc(struct drm_device *dev,
 	struct drm_mode_object *obj;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 
 	obj = drm_mode_object_find(dev, crtc_resp->crtc_id,
@@ -1312,6 +1318,9 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 	uint64_t __user *prop_values;
 	uint32_t __user *encoder_ptr;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	memset(&u_mode, 0, sizeof(struct drm_mode_modeinfo));
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:?]\n", out_resp->connector_id);
@@ -1431,6 +1440,9 @@ int drm_mode_getencoder(struct drm_device *dev, void *data,
 	struct drm_encoder *encoder;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, enc_resp->encoder_id,
 				   DRM_MODE_OBJECT_ENCODER);
@@ -1486,6 +1498,9 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 	int ret = 0;
 	int i;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, crtc_req->crtc_id,
 				   DRM_MODE_OBJECT_CRTC);
@@ -1603,6 +1618,9 @@ int drm_mode_cursor_ioctl(struct drm_device *dev,
 	struct drm_crtc *crtc;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if (!req->flags) {
 		DRM_ERROR("no operation set\n");
 		return -EINVAL;
@@ -1667,6 +1685,9 @@ int drm_mode_addfb(struct drm_device *dev,
 	struct drm_framebuffer *fb;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if ((config->min_width > r->width) || (r->width > config->max_width)) {
 		DRM_ERROR("mode new framebuffer width not within limits\n");
 		return -EINVAL;
@@ -1724,6 +1745,9 @@ int drm_mode_rmfb(struct drm_device *dev,
 	int ret = 0;
 	int found = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, *id, DRM_MODE_OBJECT_FB);
 	/* TODO check that we realy get a framebuffer back. */
@@ -1780,6 +1804,9 @@ int drm_mode_getfb(struct drm_device *dev,
 	struct drm_framebuffer *fb;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB);
 	if (!obj) {
@@ -1813,6 +1840,9 @@ int drm_mode_dirtyfb_ioctl(struct drm_device *dev,
 	int num_clips;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB);
 	if (!obj) {
@@ -1996,6 +2026,9 @@ int drm_mode_attachmode_ioctl(struct drm_device *dev,
 	struct drm_mode_modeinfo *umode = &mode_cmd->mode;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 
 	obj = drm_mode_object_find(dev, mode_cmd->connector_id, DRM_MODE_OBJECT_CONNECTOR);
@@ -2042,6 +2075,9 @@ int drm_mode_detachmode_ioctl(struct drm_device *dev,
 	struct drm_mode_modeinfo *umode = &mode_cmd->mode;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 
 	obj = drm_mode_object_find(dev, mode_cmd->connector_id, DRM_MODE_OBJECT_CONNECTOR);
@@ -2211,6 +2247,9 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev,
 	uint64_t __user *values_ptr;
 	uint32_t __user *blob_length_ptr;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, out_resp->prop_id, DRM_MODE_OBJECT_PROPERTY);
 	if (!obj) {
@@ -2333,6 +2372,9 @@ int drm_mode_getblob_ioctl(struct drm_device *dev,
 	int ret = 0;
 	void *blob_ptr;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, out_resp->blob_id, DRM_MODE_OBJECT_BLOB);
 	if (!obj) {
@@ -2393,6 +2435,9 @@ int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
 	int ret = -EINVAL;
 	int i;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 
 	obj = drm_mode_object_find(dev, out_resp->connector_id, DRM_MODE_OBJECT_CONNECTOR);
@@ -2509,6 +2554,9 @@ int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 	int size;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
 	if (!obj) {
@@ -2560,6 +2608,9 @@ int drm_mode_gamma_get_ioctl(struct drm_device *dev,
 	int size;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
 	if (!obj) {
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 57ce27c9a747..74e4ff578017 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -499,11 +499,12 @@ EXPORT_SYMBOL(drm_gem_vm_open);
 void drm_gem_vm_close(struct vm_area_struct *vma)
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
 
-	mutex_lock(&obj->dev->struct_mutex);
+	mutex_lock(&dev->struct_mutex);
 	drm_vm_close_locked(vma);
 	drm_gem_object_unreference(obj);
-	mutex_unlock(&obj->dev->struct_mutex);
+	mutex_unlock(&dev->struct_mutex);
 }
 EXPORT_SYMBOL(drm_gem_vm_close);
 
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 7f6912a16761..904d7e9c8e47 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -280,6 +280,9 @@ int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_priv)
 		if (dev->driver->dumb_create)
 			req->value = 1;
 		break;
+	case DRM_CAP_VBLANK_HIGH_CRTC:
+		req->value = 1;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index a34ef97d3c81..741457bd1c46 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -1125,7 +1125,7 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 {
 	union drm_wait_vblank *vblwait = data;
 	int ret = 0;
-	unsigned int flags, seq, crtc;
+	unsigned int flags, seq, crtc, high_crtc;
 
 	if ((!drm_dev_to_irq(dev)) || (!dev->irq_enabled))
 		return -EINVAL;
@@ -1134,16 +1134,21 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 		return -EINVAL;
 
 	if (vblwait->request.type &
-	    ~(_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK)) {
+	    ~(_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK |
+	      _DRM_VBLANK_HIGH_CRTC_MASK)) {
 		DRM_ERROR("Unsupported type value 0x%x, supported mask 0x%x\n",
 			  vblwait->request.type,
-			  (_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK));
+			  (_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK |
+			   _DRM_VBLANK_HIGH_CRTC_MASK));
 		return -EINVAL;
 	}
 
 	flags = vblwait->request.type & _DRM_VBLANK_FLAGS_MASK;
-	crtc = flags & _DRM_VBLANK_SECONDARY ? 1 : 0;
-
+	high_crtc = (vblwait->request.type & _DRM_VBLANK_HIGH_CRTC_MASK);
+	if (high_crtc)
+		crtc = high_crtc >> _DRM_VBLANK_HIGH_CRTC_SHIFT;
+	else
+		crtc = flags & _DRM_VBLANK_SECONDARY ? 1 : 0;
 	if (crtc >= dev->num_crtcs)
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 09e0327fc6ce..87c8e29465e3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -892,7 +892,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 		seq_printf(m, "Render p-state limit: %d\n",
 			   rp_state_limits & 0xff);
 		seq_printf(m, "CAGF: %dMHz\n", ((rpstat & GEN6_CAGF_MASK) >>
-						GEN6_CAGF_SHIFT) * 100);
+						GEN6_CAGF_SHIFT) * 50);
 		seq_printf(m, "RP CUR UP EI: %dus\n", rpupei &
 			   GEN6_CURICONT_MASK);
 		seq_printf(m, "RP CUR UP: %dus\n", rpcurup &
@@ -908,15 +908,15 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 
 		max_freq = (rp_state_cap & 0xff0000) >> 16;
 		seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
-			   max_freq * 100);
+			   max_freq * 50);
 
 		max_freq = (rp_state_cap & 0xff00) >> 8;
 		seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
-			   max_freq * 100);
+			   max_freq * 50);
 
 		max_freq = rp_state_cap & 0xff;
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
-			   max_freq * 100);
+			   max_freq * 50);
 
 		__gen6_gt_force_wake_put(dev_priv);
 	} else {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c4c2855d002d..7ce3f353af33 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -224,7 +224,7 @@ i915_gem_dumb_create(struct drm_file *file,
 		     struct drm_mode_create_dumb *args)
 {
 	/* have to work out size/pitch and return them */
-	args->pitch = ALIGN(args->width & ((args->bpp + 1) / 8), 64);
+	args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
 	args->size = args->pitch * args->height;
 	return i915_gem_create(file, dev,
 			       args->size, &args->handle);
@@ -1356,9 +1356,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	if (!obj->fault_mappable)
 		return;
 
-	unmap_mapping_range(obj->base.dev->dev_mapping,
-			    (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
-			    obj->base.size, 1);
+	if (obj->base.dev->dev_mapping)
+		unmap_mapping_range(obj->base.dev->dev_mapping,
+				    (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
+				    obj->base.size, 1);
 
 	obj->fault_mappable = false;
 }
@@ -1796,8 +1797,10 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
 		return;
 
 	spin_lock(&file_priv->mm.lock);
-	list_del(&request->client_list);
-	request->file_priv = NULL;
+	if (request->file_priv) {
+		list_del(&request->client_list);
+		request->file_priv = NULL;
+	}
 	spin_unlock(&file_priv->mm.lock);
 }
 
@@ -2217,13 +2220,18 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring,
 {
 	int ret;
 
+	if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
+		return 0;
+
 	trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains);
 
 	ret = ring->flush(ring, invalidate_domains, flush_domains);
 	if (ret)
 		return ret;
 
-	i915_gem_process_flushing_list(ring, flush_domains);
+	if (flush_domains & I915_GEM_GPU_DOMAINS)
+		i915_gem_process_flushing_list(ring, flush_domains);
+
 	return 0;
 }
 
@@ -2579,8 +2587,23 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 		reg = &dev_priv->fence_regs[obj->fence_reg];
 		list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
 
-		if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
-			pipelined = NULL;
+		if (obj->tiling_changed) {
+			ret = i915_gem_object_flush_fence(obj, pipelined);
+			if (ret)
+				return ret;
+
+			if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
+				pipelined = NULL;
+
+			if (pipelined) {
+				reg->setup_seqno =
+					i915_gem_next_request_seqno(pipelined);
+				obj->last_fenced_seqno = reg->setup_seqno;
+				obj->last_fenced_ring = pipelined;
+			}
+
+			goto update;
+		}
 
 		if (!pipelined) {
 			if (reg->setup_seqno) {
@@ -2599,31 +2622,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 			ret = i915_gem_object_flush_fence(obj, pipelined);
 			if (ret)
 				return ret;
-		} else if (obj->tiling_changed) {
-			if (obj->fenced_gpu_access) {
-				if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
-					ret = i915_gem_flush_ring(obj->ring,
-								  0, obj->base.write_domain);
-					if (ret)
-						return ret;
-				}
-
-				obj->fenced_gpu_access = false;
-			}
-		}
-
-		if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
-			pipelined = NULL;
-		BUG_ON(!pipelined && reg->setup_seqno);
-
-		if (obj->tiling_changed) {
-			if (pipelined) {
-				reg->setup_seqno =
-					i915_gem_next_request_seqno(pipelined);
-				obj->last_fenced_seqno = reg->setup_seqno;
-				obj->last_fenced_ring = pipelined;
-			}
-			goto update;
 		}
 
 		return 0;
@@ -3606,6 +3604,8 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
 		return;
 	}
 
+	trace_i915_gem_object_destroy(obj);
+
 	if (obj->base.map_list.map)
 		i915_gem_free_mmap_offset(obj);
 
@@ -3615,8 +3615,6 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
 	kfree(obj->page_cpu_valid);
 	kfree(obj->bit_17);
 	kfree(obj);
-
-	trace_i915_gem_object_destroy(obj);
 }
 
 void i915_gem_free_object(struct drm_gem_object *gem_obj)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7ff7f933ddf1..20a4cc5b818f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -367,6 +367,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		uint32_t __iomem *reloc_entry;
 		void __iomem *reloc_page;
 
+		/* We can't wait for rendering with pagefaults disabled */
+		if (obj->active && in_atomic())
+			return -EFAULT;
+
 		ret = i915_gem_object_set_to_gtt_domain(obj, 1);
 		if (ret)
 			return ret;
@@ -440,15 +444,24 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
 			     struct list_head *objects)
 {
 	struct drm_i915_gem_object *obj;
-	int ret;
-
+	int ret = 0;
+
+	/* This is the fast path and we cannot handle a pagefault whilst
+	 * holding the struct mutex lest the user pass in the relocations
+	 * contained within a mmaped bo. For in such a case we, the page
+	 * fault handler would call i915_gem_fault() and we would try to
+	 * acquire the struct mutex again. Obviously this is bad and so
+	 * lockdep complains vehemently.
+	 */
+	pagefault_disable();
 	list_for_each_entry(obj, objects, exec_list) {
 		ret = i915_gem_execbuffer_relocate_object(obj, eb);
 		if (ret)
-			return ret;
+			break;
 	}
+	pagefault_enable();
 
-	return 0;
+	return ret;
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3106c0dc8389..432fc04c6bff 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1516,9 +1516,10 @@ static void intel_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe,
 
 	reg = PIPECONF(pipe);
 	val = I915_READ(reg);
-	val |= PIPECONF_ENABLE;
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
+	if (val & PIPECONF_ENABLE)
+		return;
+
+	I915_WRITE(reg, val | PIPECONF_ENABLE);
 	intel_wait_for_vblank(dev_priv->dev, pipe);
 }
 
@@ -1552,9 +1553,10 @@ static void intel_disable_pipe(struct drm_i915_private *dev_priv,
 
 	reg = PIPECONF(pipe);
 	val = I915_READ(reg);
-	val &= ~PIPECONF_ENABLE;
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
+	if ((val & PIPECONF_ENABLE) == 0)
+		return;
+
+	I915_WRITE(reg, val & ~PIPECONF_ENABLE);
 	intel_wait_for_pipe_off(dev_priv->dev, pipe);
 }
 
@@ -1577,9 +1579,10 @@ static void intel_enable_plane(struct drm_i915_private *dev_priv,
 
 	reg = DSPCNTR(plane);
 	val = I915_READ(reg);
-	val |= DISPLAY_PLANE_ENABLE;
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
+	if (val & DISPLAY_PLANE_ENABLE)
+		return;
+
+	I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE);
 	intel_wait_for_vblank(dev_priv->dev, pipe);
 }
 
@@ -1610,9 +1613,10 @@ static void intel_disable_plane(struct drm_i915_private *dev_priv,
 
 	reg = DSPCNTR(plane);
 	val = I915_READ(reg);
-	val &= ~DISPLAY_PLANE_ENABLE;
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
+	if ((val & DISPLAY_PLANE_ENABLE) == 0)
+		return;
+
+	I915_WRITE(reg, val & ~DISPLAY_PLANE_ENABLE);
 	intel_flush_display_plane(dev_priv, plane);
 	intel_wait_for_vblank(dev_priv->dev, pipe);
 }
@@ -1769,7 +1773,6 @@ static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
 			return;
 
 		I915_WRITE(DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN);
-		POSTING_READ(DPFC_CONTROL);
 		intel_wait_for_vblank(dev, intel_crtc->pipe);
 	}
 
@@ -1861,7 +1864,6 @@ static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
 			return;
 
 		I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN);
-		POSTING_READ(ILK_DPFC_CONTROL);
 		intel_wait_for_vblank(dev, intel_crtc->pipe);
 	}
 
@@ -3883,10 +3885,7 @@ static bool g4x_compute_srwm(struct drm_device *dev,
 			      display, cursor);
 }
 
-static inline bool single_plane_enabled(unsigned int mask)
-{
-	return mask && (mask & -mask) == 0;
-}
+#define single_plane_enabled(mask) is_power_of_2(mask)
 
 static void g4x_update_wm(struct drm_device *dev)
 {
@@ -5777,7 +5776,6 @@ static void intel_increase_pllclock(struct drm_crtc *crtc)
 
 		dpll &= ~DISPLAY_RATE_SELECT_FPA1;
 		I915_WRITE(dpll_reg, dpll);
-		POSTING_READ(dpll_reg);
 		intel_wait_for_vblank(dev, pipe);
 
 		dpll = I915_READ(dpll_reg);
@@ -5821,7 +5819,6 @@ static void intel_decrease_pllclock(struct drm_crtc *crtc)
 
 		dpll |= DISPLAY_RATE_SELECT_FPA1;
 		I915_WRITE(dpll_reg, dpll);
-		dpll = I915_READ(dpll_reg);
 		intel_wait_for_vblank(dev, pipe);
 		dpll = I915_READ(dpll_reg);
 		if (!(dpll & DISPLAY_RATE_SELECT_FPA1))
@@ -6933,7 +6930,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 		DRM_ERROR("timeout waiting for pcode mailbox to finish\n");
 	if (pcu_mbox & (1<<31)) { /* OC supported */
 		max_freq = pcu_mbox & 0xff;
-		DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 100);
+		DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50);
 	}
 
 	/* In units of 100MHz */
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index d29e33f815d7..0daefca5cbb8 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1957,9 +1957,9 @@ intel_dp_init(struct drm_device *dev, int output_reg)
 					DP_NO_AUX_HANDSHAKE_LINK_TRAINING;
 		} else {
 			/* if this fails, presume the device is a ghost */
-			DRM_ERROR("failed to retrieve link info\n");
-			intel_dp_destroy(&intel_connector->base);
+			DRM_INFO("failed to retrieve link info, disabling eDP\n");
 			intel_dp_encoder_destroy(&intel_dp->base.base);
+			intel_dp_destroy(&intel_connector->base);
 			return;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 789c47801ba8..e9e6f71418a4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -65,62 +65,60 @@ render_ring_flush(struct intel_ring_buffer *ring,
 	u32 cmd;
 	int ret;
 
-	if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {
+	/*
+	 * read/write caches:
+	 *
+	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
+	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
+	 * also flushed at 2d versus 3d pipeline switches.
+	 *
+	 * read-only caches:
+	 *
+	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
+	 * MI_READ_FLUSH is set, and is always flushed on 965.
+	 *
+	 * I915_GEM_DOMAIN_COMMAND may not exist?
+	 *
+	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
+	 * invalidated when MI_EXE_FLUSH is set.
+	 *
+	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
+	 * invalidated with every MI_FLUSH.
+	 *
+	 * TLBs:
+	 *
+	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
+	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
+	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
+	 * are flushed at any MI_FLUSH.
+	 */
+
+	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
+	if ((invalidate_domains|flush_domains) &
+	    I915_GEM_DOMAIN_RENDER)
+		cmd &= ~MI_NO_WRITE_FLUSH;
+	if (INTEL_INFO(dev)->gen < 4) {
 		/*
-		 * read/write caches:
-		 *
-		 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
-		 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
-		 * also flushed at 2d versus 3d pipeline switches.
-		 *
-		 * read-only caches:
-		 *
-		 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
-		 * MI_READ_FLUSH is set, and is always flushed on 965.
-		 *
-		 * I915_GEM_DOMAIN_COMMAND may not exist?
-		 *
-		 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
-		 * invalidated when MI_EXE_FLUSH is set.
-		 *
-		 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
-		 * invalidated with every MI_FLUSH.
-		 *
-		 * TLBs:
-		 *
-		 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
-		 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
-		 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
-		 * are flushed at any MI_FLUSH.
+		 * On the 965, the sampler cache always gets flushed
+		 * and this bit is reserved.
 		 */
+		if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
+			cmd |= MI_READ_FLUSH;
+	}
+	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
+		cmd |= MI_EXE_FLUSH;
 
-		cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
-		if ((invalidate_domains|flush_domains) &
-		    I915_GEM_DOMAIN_RENDER)
-			cmd &= ~MI_NO_WRITE_FLUSH;
-		if (INTEL_INFO(dev)->gen < 4) {
-			/*
-			 * On the 965, the sampler cache always gets flushed
-			 * and this bit is reserved.
-			 */
-			if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
-				cmd |= MI_READ_FLUSH;
-		}
-		if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
-			cmd |= MI_EXE_FLUSH;
-
-		if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
-		    (IS_G4X(dev) || IS_GEN5(dev)))
-			cmd |= MI_INVALIDATE_ISP;
+	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
+	    (IS_G4X(dev) || IS_GEN5(dev)))
+		cmd |= MI_INVALIDATE_ISP;
 
-		ret = intel_ring_begin(ring, 2);
-		if (ret)
-			return ret;
+	ret = intel_ring_begin(ring, 2);
+	if (ret)
+		return ret;
 
-		intel_ring_emit(ring, cmd);
-		intel_ring_emit(ring, MI_NOOP);
-		intel_ring_advance(ring);
-	}
+	intel_ring_emit(ring, cmd);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
 
 	return 0;
 }
@@ -568,9 +566,6 @@ bsd_ring_flush(struct intel_ring_buffer *ring,
 {
 	int ret;
 
-	if ((flush_domains & I915_GEM_DOMAIN_RENDER) == 0)
-		return 0;
-
 	ret = intel_ring_begin(ring, 2);
 	if (ret)
 		return ret;
@@ -1056,9 +1051,6 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
 	uint32_t cmd;
 	int ret;
 
-	if (((invalidate | flush) & I915_GEM_GPU_DOMAINS) == 0)
-		return 0;
-
 	ret = intel_ring_begin(ring, 4);
 	if (ret)
 		return ret;
@@ -1230,9 +1222,6 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
 	uint32_t cmd;
 	int ret;
 
-	if (((invalidate | flush) & I915_GEM_DOMAIN_RENDER) == 0)
-		return 0;
-
 	ret = blt_ring_begin(ring, 4);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 3cd3234ba0af..10e41af6b026 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -957,7 +957,11 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode
 	/* adjust pixel clock as needed */
 	adjusted_clock = atombios_adjust_pll(crtc, mode, pll, ss_enabled, &ss);
 
-	if (ASIC_IS_AVIVO(rdev))
+	if (radeon_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+		/* TV seems to prefer the legacy algo on some boards */
+		radeon_compute_pll_legacy(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div,
+					  &ref_div, &post_div);
+	else if (ASIC_IS_AVIVO(rdev))
 		radeon_compute_pll_avivo(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div,
 					 &ref_div, &post_div);
 	else
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index cf7c8d5b4ec2..cf602e2d0718 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -448,7 +448,7 @@ static uint16_t combios_get_table_offset(struct drm_device *dev,
 
 bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)
 {
-	int edid_info;
+	int edid_info, size;
 	struct edid *edid;
 	unsigned char *raw;
 	edid_info = combios_get_table_offset(rdev->ddev, COMBIOS_HARDCODED_EDID_TABLE);
@@ -456,11 +456,12 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)
 		return false;
 
 	raw = rdev->bios + edid_info;
-	edid = kmalloc(EDID_LENGTH * (raw[0x7e] + 1), GFP_KERNEL);
+	size = EDID_LENGTH * (raw[0x7e] + 1);
+	edid = kmalloc(size, GFP_KERNEL);
 	if (edid == NULL)
 		return false;
 
-	memcpy((unsigned char *)edid, raw, EDID_LENGTH * (raw[0x7e] + 1));
+	memcpy((unsigned char *)edid, raw, size);
 
 	if (!drm_edid_is_valid(edid)) {
 		kfree(edid);
@@ -468,6 +469,7 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)
 	}
 
 	rdev->mode_info.bios_hardcoded_edid = edid;
+	rdev->mode_info.bios_hardcoded_edid_size = size;
 	return true;
 }
 
@@ -475,8 +477,17 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)
 struct edid *
 radeon_bios_get_hardcoded_edid(struct radeon_device *rdev)
 {
-	if (rdev->mode_info.bios_hardcoded_edid)
-		return rdev->mode_info.bios_hardcoded_edid;
+	struct edid *edid;
+
+	if (rdev->mode_info.bios_hardcoded_edid) {
+		edid = kmalloc(rdev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL);
+		if (edid) {
+			memcpy((unsigned char *)edid,
+			       (unsigned char *)rdev->mode_info.bios_hardcoded_edid,
+			       rdev->mode_info.bios_hardcoded_edid_size);
+			return edid;
+		}
+	}
 	return NULL;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 28c7961cd19b..2ef6d5135064 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -633,6 +633,8 @@ static int radeon_vga_mode_valid(struct drm_connector *connector,
 static enum drm_connector_status
 radeon_vga_detect(struct drm_connector *connector, bool force)
 {
+	struct drm_device *dev = connector->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder;
 	struct drm_encoder_helper_funcs *encoder_funcs;
@@ -683,6 +685,17 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
 
 	if (ret == connector_status_connected)
 		ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, true);
+
+	/* RN50 and some RV100 asics in servers often have a hardcoded EDID in the
+	 * vbios to deal with KVMs. If we have one and are not able to detect a monitor
+	 * by other means, assume the CRT is connected and use that EDID.
+	 */
+	if ((!rdev->is_atom_bios) &&
+	    (ret == connector_status_disconnected) &&
+	    rdev->mode_info.bios_hardcoded_edid_size) {
+		ret = connector_status_connected;
+	}
+
 	radeon_connector_update_scratch_regs(connector, ret);
 	return ret;
 }
@@ -794,6 +807,8 @@ static int radeon_dvi_get_modes(struct drm_connector *connector)
 static enum drm_connector_status
 radeon_dvi_detect(struct drm_connector *connector, bool force)
 {
+	struct drm_device *dev = connector->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder = NULL;
 	struct drm_encoder_helper_funcs *encoder_funcs;
@@ -833,8 +848,6 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 			 * you don't really know what's connected to which port as both are digital.
 			 */
 			if (radeon_connector->shared_ddc && (ret == connector_status_connected)) {
-				struct drm_device *dev = connector->dev;
-				struct radeon_device *rdev = dev->dev_private;
 				struct drm_connector *list_connector;
 				struct radeon_connector *list_radeon_connector;
 				list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) {
@@ -899,6 +912,19 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 		ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, true);
 	}
 
+	/* RN50 and some RV100 asics in servers often have a hardcoded EDID in the
+	 * vbios to deal with KVMs. If we have one and are not able to detect a monitor
+	 * by other means, assume the DFP is connected and use that EDID.  In most
+	 * cases the DVI port is actually a virtual KVM port connected to the service
+	 * processor.
+	 */
+	if ((!rdev->is_atom_bios) &&
+	    (ret == connector_status_disconnected) &&
+	    rdev->mode_info.bios_hardcoded_edid_size) {
+		radeon_connector->use_digital = true;
+		ret = connector_status_connected;
+	}
+
 out:
 	/* updated in get modes as well since we need to know if it's analog or digital */
 	radeon_connector_update_scratch_regs(connector, ret);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index e4582814bb78..9c57538231d5 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -239,6 +239,7 @@ struct radeon_mode_info {
 	struct drm_property *underscan_vborder_property;
 	/* hardcoded DFP edid from BIOS */
 	struct edid *bios_hardcoded_edid;
+	int bios_hardcoded_edid_size;
 
 	/* pointer to fbdev info structure */
 	struct radeon_fbdev *rfbdev;
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 2aed03bde4b2..08de669e025a 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -365,12 +365,14 @@ static ssize_t radeon_set_pm_profile(struct device *dev,
 		else if (strncmp("high", buf, strlen("high")) == 0)
 			rdev->pm.profile = PM_PROFILE_HIGH;
 		else {
-			DRM_ERROR("invalid power profile!\n");
+			count = -EINVAL;
 			goto fail;
 		}
 		radeon_pm_update_profile(rdev);
 		radeon_pm_set_clocks(rdev);
-	}
+	} else
+		count = -EINVAL;
+
 fail:
 	mutex_unlock(&rdev->pm.mutex);
 
@@ -413,7 +415,7 @@ static ssize_t radeon_set_pm_method(struct device *dev,
 		mutex_unlock(&rdev->pm.mutex);
 		cancel_delayed_work_sync(&rdev->pm.dynpm_idle_work);
 	} else {
-		DRM_ERROR("invalid power method!\n");
+		count = -EINVAL;
 		goto fail;
 	}
 	radeon_pm_compute_clocks(rdev);
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index e88a2cf17711..6f218e014e99 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -233,8 +233,7 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special)
 
 	drive->hwif->rq = NULL;
 
-	elv_add_request(drive->queue, &drive->sense_rq,
-			ELEVATOR_INSERT_FRONT, 0);
+	elv_add_request(drive->queue, &drive->sense_rq, ELEVATOR_INSERT_FRONT);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 0c73fe39a236..fd1e11799137 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -258,17 +258,10 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq)
 	if (time_after(jiffies, info->write_timeout))
 		return 0;
 	else {
-		struct request_queue *q = drive->queue;
-		unsigned long flags;
-
 		/*
-		 * take a breather relying on the unplug timer to kick us again
+		 * take a breather
 		 */
-
-		spin_lock_irqsave(q->queue_lock, flags);
-		blk_plug_device(q);
-		spin_unlock_irqrestore(q->queue_lock, flags);
-
+		blk_delay_queue(drive->queue, 1);
 		return 1;
 	}
 }
@@ -1177,7 +1170,7 @@ static struct cdrom_device_ops ide_cdrom_dops = {
 	.open			= ide_cdrom_open_real,
 	.release		= ide_cdrom_release_real,
 	.drive_status		= ide_cdrom_drive_status,
-	.media_changed		= ide_cdrom_check_media_change_real,
+	.check_events		= ide_cdrom_check_events_real,
 	.tray_move		= ide_cdrom_tray_move,
 	.lock_door		= ide_cdrom_lock_door,
 	.select_speed		= ide_cdrom_select_speed,
@@ -1514,8 +1507,6 @@ static int ide_cdrom_setup(ide_drive_t *drive)
 	blk_queue_dma_alignment(q, 31);
 	blk_queue_update_dma_pad(q, 15);
 
-	q->unplug_delay = max((1 * HZ) / 1000, 1);
-
 	drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
 	drive->atapi_flags = IDE_AFLAG_NO_EJECT | ide_cd_flags(id);
 
@@ -1702,10 +1693,11 @@ static int idecd_ioctl(struct block_device *bdev, fmode_t mode,
 }
 
 
-static int idecd_media_changed(struct gendisk *disk)
+static unsigned int idecd_check_events(struct gendisk *disk,
+				       unsigned int clearing)
 {
 	struct cdrom_info *info = ide_drv_g(disk, cdrom_info);
-	return cdrom_media_changed(&info->devinfo);
+	return cdrom_check_events(&info->devinfo, clearing);
 }
 
 static int idecd_revalidate_disk(struct gendisk *disk)
@@ -1723,7 +1715,7 @@ static const struct block_device_operations idecd_ops = {
 	.open			= idecd_open,
 	.release		= idecd_release,
 	.ioctl			= idecd_ioctl,
-	.media_changed		= idecd_media_changed,
+	.check_events		= idecd_check_events,
 	.revalidate_disk	= idecd_revalidate_disk
 };
 
@@ -1790,6 +1782,7 @@ static int ide_cd_probe(ide_drive_t *drive)
 	ide_cd_read_toc(drive, &sense);
 	g->fops = &idecd_ops;
 	g->flags |= GENHD_FL_REMOVABLE;
+	g->events = DISK_EVENT_MEDIA_CHANGE;
 	add_disk(g);
 	return 0;
 
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 93a3cf1b0f3f..1efc936f5b66 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -111,7 +111,8 @@ int cdrom_check_status(ide_drive_t *, struct request_sense *);
 int ide_cdrom_open_real(struct cdrom_device_info *, int);
 void ide_cdrom_release_real(struct cdrom_device_info *);
 int ide_cdrom_drive_status(struct cdrom_device_info *, int);
-int ide_cdrom_check_media_change_real(struct cdrom_device_info *, int);
+unsigned int ide_cdrom_check_events_real(struct cdrom_device_info *,
+					 unsigned int clearing, int slot_nr);
 int ide_cdrom_tray_move(struct cdrom_device_info *, int);
 int ide_cdrom_lock_door(struct cdrom_device_info *, int);
 int ide_cdrom_select_speed(struct cdrom_device_info *, int);
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 766b3deeb23c..2a6bc50e8a41 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -79,8 +79,8 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr)
 	return CDS_DRIVE_NOT_READY;
 }
 
-int ide_cdrom_check_media_change_real(struct cdrom_device_info *cdi,
-				       int slot_nr)
+unsigned int ide_cdrom_check_events_real(struct cdrom_device_info *cdi,
+					 unsigned int clearing, int slot_nr)
 {
 	ide_drive_t *drive = cdi->handle;
 	int retval;
@@ -89,9 +89,9 @@ int ide_cdrom_check_media_change_real(struct cdrom_device_info *cdi,
 		(void) cdrom_check_status(drive, NULL);
 		retval = (drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED) ? 1 : 0;
 		drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
-		return retval;
+		return retval ? DISK_EVENT_MEDIA_CHANGE : 0;
 	} else {
-		return -EINVAL;
+		return 0;
 	}
 }
 
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 35c4b43585e3..c4ffd4888939 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -285,11 +285,12 @@ static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static int ide_gd_media_changed(struct gendisk *disk)
+static unsigned int ide_gd_check_events(struct gendisk *disk,
+					unsigned int clearing)
 {
 	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
-	int ret;
+	bool ret;
 
 	/* do not scan partitions twice if this is a removable device */
 	if (drive->dev_flags & IDE_DFLAG_ATTACH) {
@@ -297,10 +298,10 @@ static int ide_gd_media_changed(struct gendisk *disk)
 		return 0;
 	}
 
-	ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
+	ret = drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED;
 	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
 
-	return ret;
+	return ret ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 static void ide_gd_unlock_native_capacity(struct gendisk *disk)
@@ -318,7 +319,7 @@ static int ide_gd_revalidate_disk(struct gendisk *disk)
 	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
 	ide_drive_t *drive = idkp->drive;
 
-	if (ide_gd_media_changed(disk))
+	if (ide_gd_check_events(disk, 0))
 		drive->disk_ops->get_capacity(drive);
 
 	set_capacity(disk, ide_gd_capacity(drive));
@@ -340,7 +341,7 @@ static const struct block_device_operations ide_gd_ops = {
 	.release		= ide_gd_release,
 	.ioctl			= ide_gd_ioctl,
 	.getgeo			= ide_gd_getgeo,
-	.media_changed		= ide_gd_media_changed,
+	.check_events		= ide_gd_check_events,
 	.unlock_native_capacity	= ide_gd_unlock_native_capacity,
 	.revalidate_disk	= ide_gd_revalidate_disk
 };
@@ -412,6 +413,7 @@ static int ide_gd_probe(ide_drive_t *drive)
 	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
 		g->flags = GENHD_FL_REMOVABLE;
 	g->fops = &ide_gd_ops;
+	g->events = DISK_EVENT_MEDIA_CHANGE;
 	add_disk(g);
 	return 0;
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 999dac054bcc..f4077840d3ab 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -549,8 +549,6 @@ plug_device_2:
 
 	if (rq)
 		blk_requeue_request(q, rq);
-	if (!elv_queue_empty(q))
-		blk_plug_device(q);
 }
 
 void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
@@ -562,8 +560,6 @@ void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
 
 	if (rq)
 		blk_requeue_request(q, rq);
-	if (!elv_queue_empty(q))
-		blk_plug_device(q);
 
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 88a380c5a470..6ab9ab2a5081 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -52,7 +52,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	rq->cmd[0] = REQ_UNPARK_HEADS;
 	rq->cmd_len = 1;
 	rq->cmd_type = REQ_TYPE_SPECIAL;
-	elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1);
+	elv_add_request(q, rq, ELEVATOR_INSERT_FRONT);
 
 out:
 	return;
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index a2ce0b2da281..5c9362792f1d 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -347,7 +347,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait)
 			atomic_inc(&bitmap->pending_writes);
 			set_buffer_locked(bh);
 			set_buffer_mapped(bh);
-			submit_bh(WRITE | REQ_UNPLUG | REQ_SYNC, bh);
+			submit_bh(WRITE | REQ_SYNC, bh);
 			bh = bh->b_this_page;
 		}
 
@@ -1339,8 +1339,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
 			prepare_to_wait(&bitmap->overflow_wait, &__wait,
 					TASK_UNINTERRUPTIBLE);
 			spin_unlock_irq(&bitmap->lock);
-			md_unplug(bitmap->mddev);
-			schedule();
+			io_schedule();
 			finish_wait(&bitmap->overflow_wait, &__wait);
 			continue;
 		}
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 4e054bd91664..2c62c1169f78 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -991,11 +991,6 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
 	clone->bi_destructor = dm_crypt_bio_destructor;
 }
 
-static void kcryptd_unplug(struct crypt_config *cc)
-{
-	blk_unplug(bdev_get_queue(cc->dev->bdev));
-}
-
 static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
 {
 	struct crypt_config *cc = io->target->private;
@@ -1008,10 +1003,8 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
 	 * one in order to decrypt the whole bio data *afterwards*.
 	 */
 	clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs);
-	if (!clone) {
-		kcryptd_unplug(cc);
+	if (!clone)
 		return 1;
-	}
 
 	crypt_inc_pending(io);
 
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 136d4f71a116..76a5af00a26b 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -352,7 +352,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
 	BUG_ON(num_regions > DM_IO_MAX_REGIONS);
 
 	if (sync)
-		rw |= REQ_SYNC | REQ_UNPLUG;
+		rw |= REQ_SYNC;
 
 	/*
 	 * For multiple regions we need to be careful to rewind
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 924f5f0084c2..1bb73a13ca40 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -37,13 +37,6 @@ struct dm_kcopyd_client {
 	unsigned int nr_pages;
 	unsigned int nr_free_pages;
 
-	/*
-	 * Block devices to unplug.
-	 * Non-NULL pointer means that a block device has some pending requests
-	 * and needs to be unplugged.
-	 */
-	struct block_device *unplug[2];
-
 	struct dm_io_client *io_client;
 
 	wait_queue_head_t destroyq;
@@ -315,31 +308,6 @@ static int run_complete_job(struct kcopyd_job *job)
 	return 0;
 }
 
-/*
- * Unplug the block device at the specified index.
- */
-static void unplug(struct dm_kcopyd_client *kc, int rw)
-{
-	if (kc->unplug[rw] != NULL) {
-		blk_unplug(bdev_get_queue(kc->unplug[rw]));
-		kc->unplug[rw] = NULL;
-	}
-}
-
-/*
- * Prepare block device unplug. If there's another device
- * to be unplugged at the same array index, we unplug that
- * device first.
- */
-static void prepare_unplug(struct dm_kcopyd_client *kc, int rw,
-			   struct block_device *bdev)
-{
-	if (likely(kc->unplug[rw] == bdev))
-		return;
-	unplug(kc, rw);
-	kc->unplug[rw] = bdev;
-}
-
 static void complete_io(unsigned long error, void *context)
 {
 	struct kcopyd_job *job = (struct kcopyd_job *) context;
@@ -386,16 +354,10 @@ static int run_io_job(struct kcopyd_job *job)
 		.client = job->kc->io_client,
 	};
 
-	if (job->rw == READ) {
+	if (job->rw == READ)
 		r = dm_io(&io_req, 1, &job->source, NULL);
-		prepare_unplug(job->kc, READ, job->source.bdev);
-	} else {
-		if (job->num_dests > 1)
-			io_req.bi_rw |= REQ_UNPLUG;
+	else
 		r = dm_io(&io_req, job->num_dests, job->dests, NULL);
-		if (!(io_req.bi_rw & REQ_UNPLUG))
-			prepare_unplug(job->kc, WRITE, job->dests[0].bdev);
-	}
 
 	return r;
 }
@@ -466,6 +428,7 @@ static void do_work(struct work_struct *work)
 {
 	struct dm_kcopyd_client *kc = container_of(work,
 					struct dm_kcopyd_client, kcopyd_work);
+	struct blk_plug plug;
 
 	/*
 	 * The order that these are called is *very* important.
@@ -473,18 +436,12 @@ static void do_work(struct work_struct *work)
 	 * Pages jobs when successful will jump onto the io jobs
 	 * list.  io jobs call wake when they complete and it all
 	 * starts again.
-	 *
-	 * Note that io_jobs add block devices to the unplug array,
-	 * this array is cleared with "unplug" calls. It is thus
-	 * forbidden to run complete_jobs after io_jobs and before
-	 * unplug because the block device could be destroyed in
-	 * job completion callback.
 	 */
+	blk_start_plug(&plug);
 	process_jobs(&kc->complete_jobs, kc, run_complete_job);
 	process_jobs(&kc->pages_jobs, kc, run_pages_job);
 	process_jobs(&kc->io_jobs, kc, run_io_job);
-	unplug(kc, READ);
-	unplug(kc, WRITE);
+	blk_finish_plug(&plug);
 }
 
 /*
@@ -665,8 +622,6 @@ int dm_kcopyd_client_create(unsigned int nr_pages,
 	INIT_LIST_HEAD(&kc->io_jobs);
 	INIT_LIST_HEAD(&kc->pages_jobs);
 
-	memset(kc->unplug, 0, sizeof(kc->unplug));
-
 	kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
 	if (!kc->job_pool)
 		goto bad_slab;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index b9e1e15ef11c..5ef136cdba91 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -394,7 +394,7 @@ static void raid_unplug(struct dm_target_callbacks *cb)
 {
 	struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
 
-	md_raid5_unplug_device(rs->md.private);
+	md_raid5_kick_device(rs->md.private);
 }
 
 /*
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index dee326775c60..976ad4688afc 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -842,8 +842,6 @@ static void do_mirror(struct work_struct *work)
 	do_reads(ms, &reads);
 	do_writes(ms, &writes);
 	do_failures(ms, &failures);
-
-	dm_table_unplug_all(ms->ti->table);
 }
 
 /*-----------------------------------------------------------------
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 38e4eb1bb965..416d4e258df6 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -55,6 +55,7 @@ struct dm_table {
 	struct dm_target *targets;
 
 	unsigned discards_supported:1;
+	unsigned integrity_supported:1;
 
 	/*
 	 * Indicates the rw permissions for the new logical
@@ -859,7 +860,7 @@ int dm_table_alloc_md_mempools(struct dm_table *t)
 		return -EINVAL;
 	}
 
-	t->mempools = dm_alloc_md_mempools(type);
+	t->mempools = dm_alloc_md_mempools(type, t->integrity_supported);
 	if (!t->mempools)
 		return -ENOMEM;
 
@@ -935,8 +936,10 @@ static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device
 	struct dm_dev_internal *dd;
 
 	list_for_each_entry(dd, devices, list)
-		if (bdev_get_integrity(dd->dm_dev.bdev))
+		if (bdev_get_integrity(dd->dm_dev.bdev)) {
+			t->integrity_supported = 1;
 			return blk_integrity_register(dm_disk(md), NULL);
+		}
 
 	return 0;
 }
@@ -1275,29 +1278,6 @@ int dm_table_any_busy_target(struct dm_table *t)
 	return 0;
 }
 
-void dm_table_unplug_all(struct dm_table *t)
-{
-	struct dm_dev_internal *dd;
-	struct list_head *devices = dm_table_get_devices(t);
-	struct dm_target_callbacks *cb;
-
-	list_for_each_entry(dd, devices, list) {
-		struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev);
-		char b[BDEVNAME_SIZE];
-
-		if (likely(q))
-			blk_unplug(q);
-		else
-			DMWARN_LIMIT("%s: Cannot unplug nonexistent device %s",
-				     dm_device_name(t->md),
-				     bdevname(dd->dm_dev.bdev, b));
-	}
-
-	list_for_each_entry(cb, &t->target_callbacks, list)
-		if (cb->unplug_fn)
-			cb->unplug_fn(cb);
-}
-
 struct mapped_device *dm_table_get_md(struct dm_table *t)
 {
 	return t->md;
@@ -1345,4 +1325,3 @@ EXPORT_SYMBOL(dm_table_get_mode);
 EXPORT_SYMBOL(dm_table_get_md);
 EXPORT_SYMBOL(dm_table_put);
 EXPORT_SYMBOL(dm_table_get);
-EXPORT_SYMBOL(dm_table_unplug_all);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eaa3af0e0632..0cf68b478878 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -477,7 +477,8 @@ static void start_io_acct(struct dm_io *io)
 	cpu = part_stat_lock();
 	part_round_stats(cpu, &dm_disk(md)->part0);
 	part_stat_unlock();
-	dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
+	atomic_set(&dm_disk(md)->part0.in_flight[rw],
+		atomic_inc_return(&md->pending[rw]));
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -497,8 +498,8 @@ static void end_io_acct(struct dm_io *io)
 	 * After this is decremented the bio must not be touched if it is
 	 * a flush.
 	 */
-	dm_disk(md)->part0.in_flight[rw] = pending =
-		atomic_dec_return(&md->pending[rw]);
+	pending = atomic_dec_return(&md->pending[rw]);
+	atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
 	pending += atomic_read(&md->pending[rw^0x1]);
 
 	/* nudge anyone waiting on suspend queue */
@@ -807,8 +808,6 @@ void dm_requeue_unmapped_request(struct request *clone)
 	dm_unprep_request(rq);
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (elv_queue_empty(q))
-		blk_plug_device(q);
 	blk_requeue_request(q, rq);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
@@ -1613,10 +1612,10 @@ static void dm_request_fn(struct request_queue *q)
 	 * number of in-flight I/Os after the queue is stopped in
 	 * dm_suspend().
 	 */
-	while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
+	while (!blk_queue_stopped(q)) {
 		rq = blk_peek_request(q);
 		if (!rq)
-			goto plug_and_out;
+			goto delay_and_out;
 
 		/* always use block 0 to find the target for flushes for now */
 		pos = 0;
@@ -1627,7 +1626,7 @@ static void dm_request_fn(struct request_queue *q)
 		BUG_ON(!dm_target_is_valid(ti));
 
 		if (ti->type->busy && ti->type->busy(ti))
-			goto plug_and_out;
+			goto delay_and_out;
 
 		blk_start_request(rq);
 		clone = rq->special;
@@ -1647,11 +1646,8 @@ requeued:
 	BUG_ON(!irqs_disabled());
 	spin_lock(q->queue_lock);
 
-plug_and_out:
-	if (!elv_queue_empty(q))
-		/* Some requests still remain, retry later */
-		blk_plug_device(q);
-
+delay_and_out:
+	blk_delay_queue(q, HZ / 10);
 out:
 	dm_table_put(map);
 
@@ -1680,20 +1676,6 @@ static int dm_lld_busy(struct request_queue *q)
 	return r;
 }
 
-static void dm_unplug_all(struct request_queue *q)
-{
-	struct mapped_device *md = q->queuedata;
-	struct dm_table *map = dm_get_live_table(md);
-
-	if (map) {
-		if (dm_request_based(md))
-			generic_unplug_device(q);
-
-		dm_table_unplug_all(map);
-		dm_table_put(map);
-	}
-}
-
 static int dm_any_congested(void *congested_data, int bdi_bits)
 {
 	int r = bdi_bits;
@@ -1817,7 +1799,6 @@ static void dm_init_md_queue(struct mapped_device *md)
 	md->queue->backing_dev_info.congested_data = md;
 	blk_queue_make_request(md->queue, dm_request);
 	blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
-	md->queue->unplug_fn = dm_unplug_all;
 	blk_queue_merge_bvec(md->queue, dm_merge_bvec);
 	blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
 }
@@ -2263,8 +2244,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
 	int r = 0;
 	DECLARE_WAITQUEUE(wait, current);
 
-	dm_unplug_all(md->queue);
-
 	add_wait_queue(&md->wait, &wait);
 
 	while (1) {
@@ -2539,7 +2518,6 @@ int dm_resume(struct mapped_device *md)
 
 	clear_bit(DMF_SUSPENDED, &md->flags);
 
-	dm_table_unplug_all(map);
 	r = 0;
 out:
 	dm_table_put(map);
@@ -2643,9 +2621,10 @@ int dm_noflush_suspending(struct dm_target *ti)
 }
 EXPORT_SYMBOL_GPL(dm_noflush_suspending);
 
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type)
+struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
 {
 	struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
+	unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS;
 
 	if (!pools)
 		return NULL;
@@ -2662,13 +2641,18 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type)
 	if (!pools->tio_pool)
 		goto free_io_pool_and_out;
 
-	pools->bs = (type == DM_TYPE_BIO_BASED) ?
-		    bioset_create(16, 0) : bioset_create(MIN_IOS, 0);
+	pools->bs = bioset_create(pool_size, 0);
 	if (!pools->bs)
 		goto free_tio_pool_and_out;
 
+	if (integrity && bioset_integrity_create(pools->bs, pool_size))
+		goto free_bioset_and_out;
+
 	return pools;
 
+free_bioset_and_out:
+	bioset_free(pools->bs);
+
 free_tio_pool_and_out:
 	mempool_destroy(pools->tio_pool);
 
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 0c2dd5f4af76..1aaf16746da8 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -149,7 +149,7 @@ void dm_kcopyd_exit(void);
 /*
  * Mempool operations
  */
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type);
+struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity);
 void dm_free_md_mempools(struct dm_md_mempools *pools);
 
 #endif
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 0ed7f6bc2a7f..abfb59a61ede 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -87,22 +87,6 @@ static int linear_mergeable_bvec(struct request_queue *q,
 	return maxsectors << 9;
 }
 
-static void linear_unplug(struct request_queue *q)
-{
-	mddev_t *mddev = q->queuedata;
-	linear_conf_t *conf;
-	int i;
-
-	rcu_read_lock();
-	conf = rcu_dereference(mddev->private);
-
-	for (i=0; i < mddev->raid_disks; i++) {
-		struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev);
-		blk_unplug(r_queue);
-	}
-	rcu_read_unlock();
-}
-
 static int linear_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -224,11 +208,9 @@ static int linear_run (mddev_t *mddev)
 	md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
 
 	blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
-	mddev->queue->unplug_fn = linear_unplug;
 	mddev->queue->backing_dev_info.congested_fn = linear_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
-	md_integrity_register(mddev);
-	return 0;
+	return md_integrity_register(mddev);
 }
 
 static void free_conf(struct rcu_head *head)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index d5ad7723b172..06ecea751a39 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -780,8 +780,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
 	bio->bi_end_io = super_written;
 
 	atomic_inc(&mddev->pending_writes);
-	submit_bio(REQ_WRITE | REQ_SYNC | REQ_UNPLUG | REQ_FLUSH | REQ_FUA,
-		   bio);
+	submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio);
 }
 
 void md_super_wait(mddev_t *mddev)
@@ -809,7 +808,7 @@ int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
 	struct completion event;
 	int ret;
 
-	rw |= REQ_SYNC | REQ_UNPLUG;
+	rw |= REQ_SYNC;
 
 	bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
 		rdev->meta_bdev : rdev->bdev;
@@ -1804,8 +1803,12 @@ int md_integrity_register(mddev_t *mddev)
 			mdname(mddev));
 		return -EINVAL;
 	}
-	printk(KERN_NOTICE "md: data integrity on %s enabled\n",
-		mdname(mddev));
+	printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
+	if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
+		printk(KERN_ERR "md: failed to create integrity pool for %s\n",
+		       mdname(mddev));
+		return -EINVAL;
+	}
 	return 0;
 }
 EXPORT_SYMBOL(md_integrity_register);
@@ -4817,7 +4820,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
 		__md_stop_writes(mddev);
 		md_stop(mddev);
 		mddev->queue->merge_bvec_fn = NULL;
-		mddev->queue->unplug_fn = NULL;
 		mddev->queue->backing_dev_info.congested_fn = NULL;
 
 		/* tell userspace to handle 'inactive' */
@@ -6692,8 +6694,6 @@ EXPORT_SYMBOL_GPL(md_allow_write);
 
 void md_unplug(mddev_t *mddev)
 {
-	if (mddev->queue)
-		blk_unplug(mddev->queue);
 	if (mddev->plug)
 		mddev->plug->unplug_fn(mddev->plug);
 }
@@ -6876,7 +6876,6 @@ void md_do_sync(mddev_t *mddev)
 		     >= mddev->resync_max - mddev->curr_resync_completed
 			    )) {
 			/* time to update curr_resync_completed */
-			md_unplug(mddev);
 			wait_event(mddev->recovery_wait,
 				   atomic_read(&mddev->recovery_active) == 0);
 			mddev->curr_resync_completed = j;
@@ -6952,7 +6951,6 @@ void md_do_sync(mddev_t *mddev)
 		 * about not overloading the IO subsystem. (things like an
 		 * e2fsck being done on the RAID array should execute fast)
 		 */
-		md_unplug(mddev);
 		cond_resched();
 
 		currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
@@ -6971,8 +6969,6 @@ void md_do_sync(mddev_t *mddev)
 	 * this also signals 'finished resyncing' to md_stop
 	 */
  out:
-	md_unplug(mddev);
-
 	wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
 
 	/* tell personality that we are finished */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 3a62d440e27b..c35890990985 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -106,36 +106,6 @@ static void multipath_end_request(struct bio *bio, int error)
 	rdev_dec_pending(rdev, conf->mddev);
 }
 
-static void unplug_slaves(mddev_t *mddev)
-{
-	multipath_conf_t *conf = mddev->private;
-	int i;
-
-	rcu_read_lock();
-	for (i=0; i<mddev->raid_disks; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags)
-		    && atomic_read(&rdev->nr_pending)) {
-			struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
-
-			atomic_inc(&rdev->nr_pending);
-			rcu_read_unlock();
-
-			blk_unplug(r_queue);
-
-			rdev_dec_pending(rdev, mddev);
-			rcu_read_lock();
-		}
-	}
-	rcu_read_unlock();
-}
-
-static void multipath_unplug(struct request_queue *q)
-{
-	unplug_slaves(q->queuedata);
-}
-
-
 static int multipath_make_request(mddev_t *mddev, struct bio * bio)
 {
 	multipath_conf_t *conf = mddev->private;
@@ -345,7 +315,7 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
 			p->rdev = rdev;
 			goto abort;
 		}
-		md_integrity_register(mddev);
+		err = md_integrity_register(mddev);
 	}
 abort:
 
@@ -517,10 +487,12 @@ static int multipath_run (mddev_t *mddev)
 	 */
 	md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
 
-	mddev->queue->unplug_fn = multipath_unplug;
 	mddev->queue->backing_dev_info.congested_fn = multipath_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
-	md_integrity_register(mddev);
+
+	if (md_integrity_register(mddev))
+		goto out_free_conf;
+
 	return 0;
 
 out_free_conf:
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c0ac457f1218..e86bf3682e1e 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -25,21 +25,6 @@
 #include "raid0.h"
 #include "raid5.h"
 
-static void raid0_unplug(struct request_queue *q)
-{
-	mddev_t *mddev = q->queuedata;
-	raid0_conf_t *conf = mddev->private;
-	mdk_rdev_t **devlist = conf->devlist;
-	int raid_disks = conf->strip_zone[0].nb_dev;
-	int i;
-
-	for (i=0; i < raid_disks; i++) {
-		struct request_queue *r_queue = bdev_get_queue(devlist[i]->bdev);
-
-		blk_unplug(r_queue);
-	}
-}
-
 static int raid0_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -272,7 +257,6 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
 		       mdname(mddev),
 		       (unsigned long long)smallest->sectors);
 	}
-	mddev->queue->unplug_fn = raid0_unplug;
 	mddev->queue->backing_dev_info.congested_fn = raid0_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 
@@ -395,8 +379,7 @@ static int raid0_run(mddev_t *mddev)
 
 	blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
 	dump_zones(mddev);
-	md_integrity_register(mddev);
-	return 0;
+	return md_integrity_register(mddev);
 }
 
 static int raid0_stop(mddev_t *mddev)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 06cd712807d0..c2a21ae56d97 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -52,23 +52,16 @@
 #define	NR_RAID1_BIOS 256
 
 
-static void unplug_slaves(mddev_t *mddev);
-
 static void allow_barrier(conf_t *conf);
 static void lower_barrier(conf_t *conf);
 
 static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	struct pool_info *pi = data;
-	r1bio_t *r1_bio;
 	int size = offsetof(r1bio_t, bios[pi->raid_disks]);
 
 	/* allocate a r1bio with room for raid_disks entries in the bios array */
-	r1_bio = kzalloc(size, gfp_flags);
-	if (!r1_bio && pi->mddev)
-		unplug_slaves(pi->mddev);
-
-	return r1_bio;
+	return kzalloc(size, gfp_flags);
 }
 
 static void r1bio_pool_free(void *r1_bio, void *data)
@@ -91,10 +84,8 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 	int i, j;
 
 	r1_bio = r1bio_pool_alloc(gfp_flags, pi);
-	if (!r1_bio) {
-		unplug_slaves(pi->mddev);
+	if (!r1_bio)
 		return NULL;
-	}
 
 	/*
 	 * Allocate bios : 1 for reading, n-1 for writing
@@ -520,37 +511,6 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 	return new_disk;
 }
 
-static void unplug_slaves(mddev_t *mddev)
-{
-	conf_t *conf = mddev->private;
-	int i;
-
-	rcu_read_lock();
-	for (i=0; i<mddev->raid_disks; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
-			struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
-
-			atomic_inc(&rdev->nr_pending);
-			rcu_read_unlock();
-
-			blk_unplug(r_queue);
-
-			rdev_dec_pending(rdev, mddev);
-			rcu_read_lock();
-		}
-	}
-	rcu_read_unlock();
-}
-
-static void raid1_unplug(struct request_queue *q)
-{
-	mddev_t *mddev = q->queuedata;
-
-	unplug_slaves(mddev);
-	md_wakeup_thread(mddev->thread);
-}
-
 static int raid1_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -580,23 +540,16 @@ static int raid1_congested(void *data, int bits)
 }
 
 
-static int flush_pending_writes(conf_t *conf)
+static void flush_pending_writes(conf_t *conf)
 {
 	/* Any writes that have been queued but are awaiting
 	 * bitmap updates get flushed here.
-	 * We return 1 if any requests were actually submitted.
 	 */
-	int rv = 0;
-
 	spin_lock_irq(&conf->device_lock);
 
 	if (conf->pending_bio_list.head) {
 		struct bio *bio;
 		bio = bio_list_get(&conf->pending_bio_list);
-		/* Only take the spinlock to quiet a warning */
-		spin_lock(conf->mddev->queue->queue_lock);
-		blk_remove_plug(conf->mddev->queue);
-		spin_unlock(conf->mddev->queue->queue_lock);
 		spin_unlock_irq(&conf->device_lock);
 		/* flush any pending bitmap writes to
 		 * disk before proceeding w/ I/O */
@@ -608,10 +561,14 @@ static int flush_pending_writes(conf_t *conf)
 			generic_make_request(bio);
 			bio = next;
 		}
-		rv = 1;
 	} else
 		spin_unlock_irq(&conf->device_lock);
-	return rv;
+}
+
+static void md_kick_device(mddev_t *mddev)
+{
+	blk_flush_plug(current);
+	md_wakeup_thread(mddev->thread);
 }
 
 /* Barriers....
@@ -643,8 +600,7 @@ static void raise_barrier(conf_t *conf)
 
 	/* Wait until no block IO is waiting */
 	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
-			    conf->resync_lock,
-			    raid1_unplug(conf->mddev->queue));
+			    conf->resync_lock, md_kick_device(conf->mddev));
 
 	/* block any new IO from starting */
 	conf->barrier++;
@@ -652,8 +608,7 @@ static void raise_barrier(conf_t *conf)
 	/* Now wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock,
-			    raid1_unplug(conf->mddev->queue));
+			    conf->resync_lock, md_kick_device(conf->mddev));
 
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -675,7 +630,7 @@ static void wait_barrier(conf_t *conf)
 		conf->nr_waiting++;
 		wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
 				    conf->resync_lock,
-				    raid1_unplug(conf->mddev->queue));
+				    md_kick_device(conf->mddev));
 		conf->nr_waiting--;
 	}
 	conf->nr_pending++;
@@ -712,7 +667,7 @@ static void freeze_array(conf_t *conf)
 			    conf->nr_pending == conf->nr_queued+1,
 			    conf->resync_lock,
 			    ({ flush_pending_writes(conf);
-			       raid1_unplug(conf->mddev->queue); }));
+			       md_kick_device(conf->mddev); }));
 	spin_unlock_irq(&conf->resync_lock);
 }
 static void unfreeze_array(conf_t *conf)
@@ -962,7 +917,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
 		atomic_inc(&r1_bio->remaining);
 		spin_lock_irqsave(&conf->device_lock, flags);
 		bio_list_add(&conf->pending_bio_list, mbio);
-		blk_plug_device_unlocked(mddev->queue);
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 	}
 	r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
@@ -971,7 +925,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
 	/* In case raid1d snuck in to freeze_array */
 	wake_up(&conf->wait_barrier);
 
-	if (do_sync)
+	if (do_sync || !bitmap)
 		md_wakeup_thread(mddev->thread);
 
 	return 0;
@@ -1178,7 +1132,7 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
 			p->rdev = rdev;
 			goto abort;
 		}
-		md_integrity_register(mddev);
+		err = md_integrity_register(mddev);
 	}
 abort:
 
@@ -1561,7 +1515,6 @@ static void raid1d(mddev_t *mddev)
 	unsigned long flags;
 	conf_t *conf = mddev->private;
 	struct list_head *head = &conf->retry_list;
-	int unplug=0;
 	mdk_rdev_t *rdev;
 
 	md_check_recovery(mddev);
@@ -1569,7 +1522,7 @@ static void raid1d(mddev_t *mddev)
 	for (;;) {
 		char b[BDEVNAME_SIZE];
 
-		unplug += flush_pending_writes(conf);
+		flush_pending_writes(conf);
 
 		spin_lock_irqsave(&conf->device_lock, flags);
 		if (list_empty(head)) {
@@ -1583,10 +1536,9 @@ static void raid1d(mddev_t *mddev)
 
 		mddev = r1_bio->mddev;
 		conf = mddev->private;
-		if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
+		if (test_bit(R1BIO_IsSync, &r1_bio->state))
 			sync_request_write(mddev, r1_bio);
-			unplug = 1;
-		} else {
+		else {
 			int disk;
 
 			/* we got a read error. Maybe the drive is bad.  Maybe just
@@ -1636,14 +1588,11 @@ static void raid1d(mddev_t *mddev)
 				bio->bi_end_io = raid1_end_read_request;
 				bio->bi_rw = READ | do_sync;
 				bio->bi_private = r1_bio;
-				unplug = 1;
 				generic_make_request(bio);
 			}
 		}
 		cond_resched();
 	}
-	if (unplug)
-		unplug_slaves(mddev);
 }
 
 
@@ -2066,11 +2015,9 @@ static int run(mddev_t *mddev)
 
 	md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
 
-	mddev->queue->unplug_fn = raid1_unplug;
 	mddev->queue->backing_dev_info.congested_fn = raid1_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
-	md_integrity_register(mddev);
-	return 0;
+	return md_integrity_register(mddev);
 }
 
 static int stop(mddev_t *mddev)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 747d061d8e05..f7b62370b374 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -57,23 +57,16 @@
  */
 #define	NR_RAID10_BIOS 256
 
-static void unplug_slaves(mddev_t *mddev);
-
 static void allow_barrier(conf_t *conf);
 static void lower_barrier(conf_t *conf);
 
 static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	conf_t *conf = data;
-	r10bio_t *r10_bio;
 	int size = offsetof(struct r10bio_s, devs[conf->copies]);
 
 	/* allocate a r10bio with room for raid_disks entries in the bios array */
-	r10_bio = kzalloc(size, gfp_flags);
-	if (!r10_bio && conf->mddev)
-		unplug_slaves(conf->mddev);
-
-	return r10_bio;
+	return kzalloc(size, gfp_flags);
 }
 
 static void r10bio_pool_free(void *r10_bio, void *data)
@@ -106,10 +99,8 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 	int nalloc;
 
 	r10_bio = r10bio_pool_alloc(gfp_flags, conf);
-	if (!r10_bio) {
-		unplug_slaves(conf->mddev);
+	if (!r10_bio)
 		return NULL;
-	}
 
 	if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
 		nalloc = conf->copies; /* resync */
@@ -597,37 +588,6 @@ rb_out:
 	return disk;
 }
 
-static void unplug_slaves(mddev_t *mddev)
-{
-	conf_t *conf = mddev->private;
-	int i;
-
-	rcu_read_lock();
-	for (i=0; i < conf->raid_disks; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
-			struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
-
-			atomic_inc(&rdev->nr_pending);
-			rcu_read_unlock();
-
-			blk_unplug(r_queue);
-
-			rdev_dec_pending(rdev, mddev);
-			rcu_read_lock();
-		}
-	}
-	rcu_read_unlock();
-}
-
-static void raid10_unplug(struct request_queue *q)
-{
-	mddev_t *mddev = q->queuedata;
-
-	unplug_slaves(q->queuedata);
-	md_wakeup_thread(mddev->thread);
-}
-
 static int raid10_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -649,23 +609,16 @@ static int raid10_congested(void *data, int bits)
 	return ret;
 }
 
-static int flush_pending_writes(conf_t *conf)
+static void flush_pending_writes(conf_t *conf)
 {
 	/* Any writes that have been queued but are awaiting
 	 * bitmap updates get flushed here.
-	 * We return 1 if any requests were actually submitted.
 	 */
-	int rv = 0;
-
 	spin_lock_irq(&conf->device_lock);
 
 	if (conf->pending_bio_list.head) {
 		struct bio *bio;
 		bio = bio_list_get(&conf->pending_bio_list);
-		/* Spinlock only taken to quiet a warning */
-		spin_lock(conf->mddev->queue->queue_lock);
-		blk_remove_plug(conf->mddev->queue);
-		spin_unlock(conf->mddev->queue->queue_lock);
 		spin_unlock_irq(&conf->device_lock);
 		/* flush any pending bitmap writes to disk
 		 * before proceeding w/ I/O */
@@ -677,11 +630,16 @@ static int flush_pending_writes(conf_t *conf)
 			generic_make_request(bio);
 			bio = next;
 		}
-		rv = 1;
 	} else
 		spin_unlock_irq(&conf->device_lock);
-	return rv;
 }
+
+static void md_kick_device(mddev_t *mddev)
+{
+	blk_flush_plug(current);
+	md_wakeup_thread(mddev->thread);
+}
+
 /* Barriers....
  * Sometimes we need to suspend IO while we do something else,
  * either some resync/recovery, or reconfigure the array.
@@ -711,8 +669,7 @@ static void raise_barrier(conf_t *conf, int force)
 
 	/* Wait until no block IO is waiting (unless 'force') */
 	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-			    conf->resync_lock,
-			    raid10_unplug(conf->mddev->queue));
+			    conf->resync_lock, md_kick_device(conf->mddev));
 
 	/* block any new IO from starting */
 	conf->barrier++;
@@ -720,8 +677,7 @@ static void raise_barrier(conf_t *conf, int force)
 	/* No wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock,
-			    raid10_unplug(conf->mddev->queue));
+			    conf->resync_lock, md_kick_device(conf->mddev));
 
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -742,7 +698,7 @@ static void wait_barrier(conf_t *conf)
 		conf->nr_waiting++;
 		wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
 				    conf->resync_lock,
-				    raid10_unplug(conf->mddev->queue));
+				    md_kick_device(conf->mddev));
 		conf->nr_waiting--;
 	}
 	conf->nr_pending++;
@@ -779,7 +735,7 @@ static void freeze_array(conf_t *conf)
 			    conf->nr_pending == conf->nr_queued+1,
 			    conf->resync_lock,
 			    ({ flush_pending_writes(conf);
-			       raid10_unplug(conf->mddev->queue); }));
+			       md_kick_device(conf->mddev); }));
 	spin_unlock_irq(&conf->resync_lock);
 }
 
@@ -974,7 +930,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
 		atomic_inc(&r10_bio->remaining);
 		spin_lock_irqsave(&conf->device_lock, flags);
 		bio_list_add(&conf->pending_bio_list, mbio);
-		blk_plug_device_unlocked(mddev->queue);
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 	}
 
@@ -991,7 +946,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
 	/* In case raid10d snuck in to freeze_array */
 	wake_up(&conf->wait_barrier);
 
-	if (do_sync)
+	if (do_sync || !mddev->bitmap)
 		md_wakeup_thread(mddev->thread);
 
 	return 0;
@@ -1233,7 +1188,7 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
 			p->rdev = rdev;
 			goto abort;
 		}
-		md_integrity_register(mddev);
+		err = md_integrity_register(mddev);
 	}
 abort:
 
@@ -1684,7 +1639,6 @@ static void raid10d(mddev_t *mddev)
 	unsigned long flags;
 	conf_t *conf = mddev->private;
 	struct list_head *head = &conf->retry_list;
-	int unplug=0;
 	mdk_rdev_t *rdev;
 
 	md_check_recovery(mddev);
@@ -1692,7 +1646,7 @@ static void raid10d(mddev_t *mddev)
 	for (;;) {
 		char b[BDEVNAME_SIZE];
 
-		unplug += flush_pending_writes(conf);
+		flush_pending_writes(conf);
 
 		spin_lock_irqsave(&conf->device_lock, flags);
 		if (list_empty(head)) {
@@ -1706,13 +1660,11 @@ static void raid10d(mddev_t *mddev)
 
 		mddev = r10_bio->mddev;
 		conf = mddev->private;
-		if (test_bit(R10BIO_IsSync, &r10_bio->state)) {
+		if (test_bit(R10BIO_IsSync, &r10_bio->state))
 			sync_request_write(mddev, r10_bio);
-			unplug = 1;
-		} else 	if (test_bit(R10BIO_IsRecover, &r10_bio->state)) {
+		else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
 			recovery_request_write(mddev, r10_bio);
-			unplug = 1;
-		} else {
+		else {
 			int mirror;
 			/* we got a read error. Maybe the drive is bad.  Maybe just
 			 * the block and we can fix it.
@@ -1759,14 +1711,11 @@ static void raid10d(mddev_t *mddev)
 				bio->bi_rw = READ | do_sync;
 				bio->bi_private = r10_bio;
 				bio->bi_end_io = raid10_end_read_request;
-				unplug = 1;
 				generic_make_request(bio);
 			}
 		}
 		cond_resched();
 	}
-	if (unplug)
-		unplug_slaves(mddev);
 }
 
 
@@ -2377,7 +2326,6 @@ static int run(mddev_t *mddev)
 	md_set_array_sectors(mddev, size);
 	mddev->resync_max_sectors = size;
 
-	mddev->queue->unplug_fn = raid10_unplug;
 	mddev->queue->backing_dev_info.congested_fn = raid10_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 
@@ -2395,7 +2343,10 @@ static int run(mddev_t *mddev)
 
 	if (conf->near_copies < conf->raid_disks)
 		blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
-	md_integrity_register(mddev);
+
+	if (md_integrity_register(mddev))
+		goto out_free_conf;
+
 	return 0;
 
 out_free_conf:
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 78536fdbd87f..e867ee42b152 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -433,8 +433,6 @@ static int has_failed(raid5_conf_t *conf)
 	return 0;
 }
 
-static void unplug_slaves(mddev_t *mddev);
-
 static struct stripe_head *
 get_active_stripe(raid5_conf_t *conf, sector_t sector,
 		  int previous, int noblock, int noquiesce)
@@ -463,8 +461,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
 						     < (conf->max_nr_stripes *3/4)
 						     || !conf->inactive_blocked),
 						    conf->device_lock,
-						    md_raid5_unplug_device(conf)
-					);
+						    md_raid5_kick_device(conf));
 				conf->inactive_blocked = 0;
 			} else
 				init_stripe(sh, sector, previous);
@@ -1473,8 +1470,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    !list_empty(&conf->inactive_list),
 				    conf->device_lock,
-				    unplug_slaves(conf->mddev)
-			);
+				    blk_flush_plug(current));
 		osh = get_free_stripe(conf);
 		spin_unlock_irq(&conf->device_lock);
 		atomic_set(&nsh->count, 1);
@@ -3645,58 +3641,19 @@ static void activate_bit_delay(raid5_conf_t *conf)
 	}
 }
 
-static void unplug_slaves(mddev_t *mddev)
+void md_raid5_kick_device(raid5_conf_t *conf)
 {
-	raid5_conf_t *conf = mddev->private;
-	int i;
-	int devs = max(conf->raid_disks, conf->previous_raid_disks);
-
-	rcu_read_lock();
-	for (i = 0; i < devs; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
-			struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
-
-			atomic_inc(&rdev->nr_pending);
-			rcu_read_unlock();
-
-			blk_unplug(r_queue);
-
-			rdev_dec_pending(rdev, mddev);
-			rcu_read_lock();
-		}
-	}
-	rcu_read_unlock();
-}
-
-void md_raid5_unplug_device(raid5_conf_t *conf)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&conf->device_lock, flags);
-
-	if (plugger_remove_plug(&conf->plug)) {
-		conf->seq_flush++;
-		raid5_activate_delayed(conf);
-	}
+	blk_flush_plug(current);
+	raid5_activate_delayed(conf);
 	md_wakeup_thread(conf->mddev->thread);
-
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-
-	unplug_slaves(conf->mddev);
 }
-EXPORT_SYMBOL_GPL(md_raid5_unplug_device);
+EXPORT_SYMBOL_GPL(md_raid5_kick_device);
 
 static void raid5_unplug(struct plug_handle *plug)
 {
 	raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
-	md_raid5_unplug_device(conf);
-}
 
-static void raid5_unplug_queue(struct request_queue *q)
-{
-	mddev_t *mddev = q->queuedata;
-	md_raid5_unplug_device(mddev->private);
+	md_raid5_kick_device(conf);
 }
 
 int md_raid5_congested(mddev_t *mddev, int bits)
@@ -4100,7 +4057,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
 				 * add failed due to overlap.  Flush everything
 				 * and wait a while
 				 */
-				md_raid5_unplug_device(conf);
+				md_raid5_kick_device(conf);
 				release_stripe(sh);
 				schedule();
 				goto retry;
@@ -4365,7 +4322,6 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 
 	if (sector_nr >= max_sector) {
 		/* just being told to finish up .. nothing much to do */
-		unplug_slaves(mddev);
 
 		if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
 			end_reshape(conf);
@@ -4569,7 +4525,6 @@ static void raid5d(mddev_t *mddev)
 	spin_unlock_irq(&conf->device_lock);
 
 	async_tx_issue_pending_all();
-	unplug_slaves(mddev);
 
 	pr_debug("--- raid5d inactive\n");
 }
@@ -5204,7 +5159,7 @@ static int run(mddev_t *mddev)
 
 		mddev->queue->backing_dev_info.congested_data = mddev;
 		mddev->queue->backing_dev_info.congested_fn = raid5_congested;
-		mddev->queue->unplug_fn = raid5_unplug_queue;
+		mddev->queue->queue_lock = &conf->device_lock;
 
 		chunk_size = mddev->chunk_sectors << 9;
 		blk_queue_io_min(mddev->queue, chunk_size);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 2ace0582b409..8d563a4f022a 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -503,6 +503,6 @@ static inline int algorithm_is_DDF(int layout)
 }
 
 extern int md_raid5_congested(mddev_t *mddev, int bits);
-extern void md_raid5_unplug_device(raid5_conf_t *conf);
+extern void md_raid5_kick_device(raid5_conf_t *conf);
 extern int raid5_set_cache_size(mddev_t *mddev, int size);
 #endif
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index ae7cad185898..47ec5bc0ed21 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -695,20 +695,22 @@ static int i2o_block_ioctl(struct block_device *bdev, fmode_t mode,
 };
 
 /**
- *	i2o_block_media_changed - Have we seen a media change?
+ *	i2o_block_check_events - Have we seen a media change?
  *	@disk: gendisk which should be verified
+ *	@clearing: events being cleared
  *
  *	Verifies if the media has changed.
  *
  *	Returns 1 if the media was changed or 0 otherwise.
  */
-static int i2o_block_media_changed(struct gendisk *disk)
+static unsigned int i2o_block_check_events(struct gendisk *disk,
+					   unsigned int clearing)
 {
 	struct i2o_block_device *p = disk->private_data;
 
 	if (p->media_change_flag) {
 		p->media_change_flag = 0;
-		return 1;
+		return DISK_EVENT_MEDIA_CHANGE;
 	}
 	return 0;
 }
@@ -895,11 +897,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 {
 	struct request *req;
 
-	while (!blk_queue_plugged(q)) {
-		req = blk_peek_request(q);
-		if (!req)
-			break;
-
+	while ((req = blk_peek_request(q)) != NULL) {
 		if (req->cmd_type == REQ_TYPE_FS) {
 			struct i2o_block_delayed_request *dreq;
 			struct i2o_block_request *ireq = req->special;
@@ -950,7 +948,7 @@ static const struct block_device_operations i2o_block_fops = {
 	.ioctl = i2o_block_ioctl,
 	.compat_ioctl = i2o_block_ioctl,
 	.getgeo = i2o_block_getgeo,
-	.media_changed = i2o_block_media_changed
+	.check_events = i2o_block_check_events,
 };
 
 /**
@@ -1002,6 +1000,7 @@ static struct i2o_block_device *i2o_block_device_alloc(void)
 	gd->major = I2O_MAJOR;
 	gd->queue = queue;
 	gd->fops = &i2o_block_fops;
+	gd->events = DISK_EVENT_MEDIA_CHANGE;
 	gd->private_data = dev;
 
 	dev->gd = gd;
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 4e42d030e097..2ae727568df9 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -55,8 +55,7 @@ static int mmc_queue_thread(void *d)
 
 		spin_lock_irq(q->queue_lock);
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (!blk_queue_plugged(q))
-			req = blk_fetch_request(q);
+		req = blk_fetch_request(q);
 		mq->req = req;
 		spin_unlock_irq(q->queue_lock);
 
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 794bfd962266..4d2df2f76ea0 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1917,7 +1917,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 		return;
 	}
 	/* Now we try to fetch requests from the request queue */
-	while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) {
+	while ((req = blk_peek_request(queue))) {
 		if (basedev->features & DASD_FEATURE_READONLY &&
 		    rq_data_dir(req) == WRITE) {
 			DBF_DEV_EVENT(DBF_ERR, basedev,
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 55d2d0f4eabc..83cea9a55e2f 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -48,14 +48,14 @@
 static DEFINE_MUTEX(tape_block_mutex);
 static int tapeblock_open(struct block_device *, fmode_t);
 static int tapeblock_release(struct gendisk *, fmode_t);
-static int tapeblock_medium_changed(struct gendisk *);
+static unsigned int tapeblock_check_events(struct gendisk *, unsigned int);
 static int tapeblock_revalidate_disk(struct gendisk *);
 
 static const struct block_device_operations tapeblock_fops = {
 	.owner		 = THIS_MODULE,
 	.open		 = tapeblock_open,
 	.release	 = tapeblock_release,
-	.media_changed   = tapeblock_medium_changed,
+	.check_events	 = tapeblock_check_events,
 	.revalidate_disk = tapeblock_revalidate_disk,
 };
 
@@ -161,7 +161,6 @@ tapeblock_requeue(struct work_struct *work) {
 
 	spin_lock_irq(&device->blk_data.request_queue_lock);
 	while (
-		!blk_queue_plugged(queue) &&
 		blk_peek_request(queue) &&
 		nr_queued < TAPEBLOCK_MIN_REQUEUE
 	) {
@@ -237,6 +236,7 @@ tapeblock_setup_device(struct tape_device * device)
 	disk->major = tapeblock_major;
 	disk->first_minor = device->first_minor;
 	disk->fops = &tapeblock_fops;
+	disk->events = DISK_EVENT_MEDIA_CHANGE;
 	disk->private_data = tape_get_device(device);
 	disk->queue = blkdat->request_queue;
 	set_capacity(disk, 0);
@@ -340,8 +340,8 @@ tapeblock_revalidate_disk(struct gendisk *disk)
 	return 0;
 }
 
-static int
-tapeblock_medium_changed(struct gendisk *disk)
+static unsigned int
+tapeblock_check_events(struct gendisk *disk, unsigned int clearing)
 {
 	struct tape_device *device;
 
@@ -349,7 +349,7 @@ tapeblock_medium_changed(struct gendisk *disk)
 	DBF_LH(6, "tapeblock_medium_changed(%p) = %d\n",
 		device, device->blk_data.medium_changed);
 
-	return device->blk_data.medium_changed;
+	return device->blk_data.medium_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
 /*
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 2d63c8ad1442..6d5c7ff43f5b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -67,6 +67,13 @@ static struct scsi_host_sg_pool scsi_sg_pools[] = {
 
 struct kmem_cache *scsi_sdb_cache;
 
+/*
+ * When to reinvoke queueing after a resource shortage. It's 3 msecs to
+ * not change behaviour from the previous unplug mechanism, experimentation
+ * may prove this needs changing.
+ */
+#define SCSI_QUEUE_DELAY	3
+
 static void scsi_run_queue(struct request_queue *q);
 
 /*
@@ -149,14 +156,7 @@ static int __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
 	/*
 	 * Requeue this command.  It will go before all other commands
 	 * that are already in the queue.
-	 *
-	 * NOTE: there is magic here about the way the queue is plugged if
-	 * we have no outstanding commands.
-	 * 
-	 * Although we *don't* plug the queue, we call the request
-	 * function.  The SCSI request function detects the blocked condition
-	 * and plugs the queue appropriately.
-         */
+	 */
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_requeue_request(q, cmd->request);
 	spin_unlock_irqrestore(q->queue_lock, flags);
@@ -1226,11 +1226,11 @@ int scsi_prep_return(struct request_queue *q, struct request *req, int ret)
 	case BLKPREP_DEFER:
 		/*
 		 * If we defer, the blk_peek_request() returns NULL, but the
-		 * queue must be restarted, so we plug here if no returning
-		 * command will automatically do that.
+		 * queue must be restarted, so we schedule a callback to happen
+		 * shortly.
 		 */
 		if (sdev->device_busy == 0)
-			blk_plug_device(q);
+			blk_delay_queue(q, SCSI_QUEUE_DELAY);
 		break;
 	default:
 		req->cmd_flags |= REQ_DONTPREP;
@@ -1269,7 +1269,7 @@ static inline int scsi_dev_queue_ready(struct request_queue *q,
 				   sdev_printk(KERN_INFO, sdev,
 				   "unblocking device at zero depth\n"));
 		} else {
-			blk_plug_device(q);
+			blk_delay_queue(q, SCSI_QUEUE_DELAY);
 			return 0;
 		}
 	}
@@ -1499,7 +1499,7 @@ static void scsi_request_fn(struct request_queue *q)
 	 * the host is no longer able to accept any more requests.
 	 */
 	shost = sdev->host;
-	while (!blk_queue_plugged(q)) {
+	for (;;) {
 		int rtn;
 		/*
 		 * get next queueable request.  We do this early to make sure
@@ -1578,15 +1578,8 @@ static void scsi_request_fn(struct request_queue *q)
 		 */
 		rtn = scsi_dispatch_cmd(cmd);
 		spin_lock_irq(q->queue_lock);
-		if(rtn) {
-			/* we're refusing the command; because of
-			 * the way locks get dropped, we need to 
-			 * check here if plugging is required */
-			if(sdev->device_busy == 0)
-				blk_plug_device(q);
-
-			break;
-		}
+		if (rtn)
+			goto out_delay;
 	}
 
 	goto out;
@@ -1605,9 +1598,10 @@ static void scsi_request_fn(struct request_queue *q)
 	spin_lock_irq(q->queue_lock);
 	blk_requeue_request(q, req);
 	sdev->device_busy--;
-	if(sdev->device_busy == 0)
-		blk_plug_device(q);
- out:
+out_delay:
+	if (sdev->device_busy == 0)
+		blk_delay_queue(q, SCSI_QUEUE_DELAY);
+out:
 	/* must be careful here...if we trigger the ->remove() function
 	 * we cannot be holding the q lock */
 	spin_unlock_irq(q->queue_lock);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 5c3ccfc6b622..2941d2d92c94 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3913,7 +3913,7 @@ fc_bsg_request_handler(struct request_queue *q, struct Scsi_Host *shost,
 	if (!get_device(dev))
 		return;
 
-	while (!blk_queue_plugged(q)) {
+	while (1) {
 		if (rport && (rport->port_state == FC_PORTSTATE_BLOCKED) &&
 		    !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT))
 			break;
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 927e99cb7225..c6fcf76cade5 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -173,11 +173,7 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
 	int ret;
 	int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
 
-	while (!blk_queue_plugged(q)) {
-		req = blk_fetch_request(q);
-		if (!req)
-			break;
-
+	while ((req = blk_fetch_request(q)) != NULL) {
 		spin_unlock_irq(q->queue_lock);
 
 		handler = to_sas_internal(shost->transportt)->f->smp_handler;
diff --git a/drivers/staging/hv/blkvsc_drv.c b/drivers/staging/hv/blkvsc_drv.c
index 6e02f1b0c46f..af789937be4e 100644
--- a/drivers/staging/hv/blkvsc_drv.c
+++ b/drivers/staging/hv/blkvsc_drv.c
@@ -124,7 +124,8 @@ static void blkvsc_shutdown(struct device *device);
 
 static int blkvsc_open(struct block_device *bdev,  fmode_t mode);
 static int blkvsc_release(struct gendisk *disk, fmode_t mode);
-static int blkvsc_media_changed(struct gendisk *gd);
+static unsigned int blkvsc_check_events(struct gendisk *gd,
+					unsigned int clearing);
 static int blkvsc_revalidate_disk(struct gendisk *gd);
 static int blkvsc_getgeo(struct block_device *bd, struct hd_geometry *hg);
 static int blkvsc_ioctl(struct block_device *bd, fmode_t mode,
@@ -155,7 +156,7 @@ static const struct block_device_operations block_ops = {
 	.owner = THIS_MODULE,
 	.open = blkvsc_open,
 	.release = blkvsc_release,
-	.media_changed = blkvsc_media_changed,
+	.check_events = blkvsc_check_events,
 	.revalidate_disk = blkvsc_revalidate_disk,
 	.getgeo = blkvsc_getgeo,
 	.ioctl  = blkvsc_ioctl,
@@ -357,6 +358,7 @@ static int blkvsc_probe(struct device *device)
 	else
 		blkdev->gd->first_minor = 0;
 	blkdev->gd->fops = &block_ops;
+	blkdev->gd->events = DISK_EVENT_MEDIA_CHANGE;
 	blkdev->gd->private_data = blkdev;
 	blkdev->gd->driverfs_dev = &(blkdev->device_ctx->device);
 	sprintf(blkdev->gd->disk_name, "hd%c", 'a' + devnum);
@@ -1337,10 +1339,11 @@ static int blkvsc_release(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static int blkvsc_media_changed(struct gendisk *gd)
+static unsigned int blkvsc_check_events(struct gendisk *gd,
+					unsigned int clearing)
 {
 	DPRINT_DBG(BLKVSC_DRV, "- enter\n");
-	return 1;
+	return DISK_EVENT_MEDIA_CHANGE;
 }
 
 static int blkvsc_revalidate_disk(struct gendisk *gd)
diff --git a/drivers/staging/westbridge/astoria/block/cyasblkdev_block.c b/drivers/staging/westbridge/astoria/block/cyasblkdev_block.c
index e1851f00be56..842cd9214a5e 100644
--- a/drivers/staging/westbridge/astoria/block/cyasblkdev_block.c
+++ b/drivers/staging/westbridge/astoria/block/cyasblkdev_block.c
@@ -381,10 +381,10 @@ static int cyasblkdev_blk_ioctl(
 	return -ENOTTY;
 }
 
-/* Media_changed block_device opp
+/* check_events block_device opp
  * this one is called by kernel to confirm if the media really changed
  * as we indicated by issuing check_disk_change() call */
-int cyasblkdev_media_changed(struct gendisk *gd)
+unsigned int cyasblkdev_check_events(struct gendisk *gd, unsigned int clearing)
 {
 	struct cyasblkdev_blk_data *bd;
 
@@ -402,7 +402,7 @@ int cyasblkdev_media_changed(struct gendisk *gd)
 		#endif
 	}
 
-	/* return media change state "1" yes, 0 no */
+	/* return media change state - DISK_EVENT_MEDIA_CHANGE yes, 0 no */
 	return 0;
 }
 
@@ -432,7 +432,7 @@ static struct block_device_operations cyasblkdev_bdops = {
 	.ioctl			= cyasblkdev_blk_ioctl,
 	/* .getgeo		= cyasblkdev_blk_getgeo, */
 	/* added to support media removal( real and simulated) media */
-	.media_changed  = cyasblkdev_media_changed,
+	.check_events		= cyasblkdev_check_events,
 	/* added to support media removal( real and simulated) media */
 	.revalidate_disk = cyasblkdev_revalidate_disk,
 	.owner			= THIS_MODULE,
@@ -1090,6 +1090,7 @@ static int cyasblkdev_add_disks(int bus_num,
 		bd->user_disk_0->first_minor = devidx << CYASBLKDEV_SHIFT;
 		bd->user_disk_0->minors = 8;
 		bd->user_disk_0->fops = &cyasblkdev_bdops;
+		bd->user_disk_0->events = DISK_EVENT_MEDIA_CHANGE;
 		bd->user_disk_0->private_data = bd;
 		bd->user_disk_0->queue = bd->queue.queue;
 		bd->dbgprn_flags = DBGPRN_RD_RQ;
@@ -1190,6 +1191,7 @@ static int cyasblkdev_add_disks(int bus_num,
 		bd->user_disk_1->first_minor = (devidx + 1) << CYASBLKDEV_SHIFT;
 		bd->user_disk_1->minors = 8;
 		bd->user_disk_1->fops = &cyasblkdev_bdops;
+		bd->user_disk_0->events = DISK_EVENT_MEDIA_CHANGE;
 		bd->user_disk_1->private_data = bd;
 		bd->user_disk_1->queue = bd->queue.queue;
 		bd->dbgprn_flags = DBGPRN_RD_RQ;
@@ -1278,6 +1280,7 @@ static int cyasblkdev_add_disks(int bus_num,
 				(devidx + 2) << CYASBLKDEV_SHIFT;
 			bd->system_disk->minors = 8;
 			bd->system_disk->fops = &cyasblkdev_bdops;
+			bd->system_disk->events = DISK_EVENT_MEDIA_CHANGE;
 			bd->system_disk->private_data = bd;
 			bd->system_disk->queue = bd->queue.queue;
 			/* don't search for vfat
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 3df570db0e4f..eb0afec046e1 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -391,9 +391,8 @@ static int iblock_do_task(struct se_task *task)
 {
 	struct se_device *dev = task->task_se_cmd->se_dev;
 	struct iblock_req *req = IBLOCK_REQ(task);
-	struct iblock_dev *ibd = (struct iblock_dev *)req->ib_dev;
-	struct request_queue *q = bdev_get_queue(ibd->ibd_bd);
 	struct bio *bio = req->ib_bio, *nbio = NULL;
+	struct blk_plug plug;
 	int rw;
 
 	if (task->task_data_direction == DMA_TO_DEVICE) {
@@ -411,6 +410,7 @@ static int iblock_do_task(struct se_task *task)
 		rw = READ;
 	}
 
+	blk_start_plug(&plug);
 	while (bio) {
 		nbio = bio->bi_next;
 		bio->bi_next = NULL;
@@ -420,9 +420,8 @@ static int iblock_do_task(struct se_task *task)
 		submit_bio(rw, bio);
 		bio = nbio;
 	}
+	blk_finish_plug(&plug);
 
-	if (q->unplug_fn)
-		q->unplug_fn(q);
 	return PYX_TRANSPORT_SENT_TO_TRANSPORT;
 }
 
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 81f13958e751..43db715f1502 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -306,7 +306,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = {
 
 static void sysrq_handle_showmem(int key)
 {
-	show_mem();
+	show_mem(0);
 }
 static struct sysrq_key_op sysrq_showmem_op = {
 	.handler	= sysrq_handle_showmem,
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index 6dd3c68c13ad..d6b342b5b423 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -600,7 +600,7 @@ static void fn_scroll_back(struct vc_data *vc)
 
 static void fn_show_mem(struct vc_data *vc)
 {
-	show_mem();
+	show_mem(0);
 }
 
 static void fn_show_state(struct vc_data *vc)
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 92444e94f842..d5250c5aae21 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -72,7 +72,6 @@ static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
 static const struct address_space_operations adfs_aops = {
 	.readpage	= adfs_readpage,
 	.writepage	= adfs_writepage,
-	.sync_page	= block_sync_page,
 	.write_begin	= adfs_write_begin,
 	.write_end	= generic_write_end,
 	.bmap		= _adfs_bmap
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 0a90dcd46de2..acf321b70fcd 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -429,7 +429,6 @@ static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
 const struct address_space_operations affs_aops = {
 	.readpage = affs_readpage,
 	.writepage = affs_writepage,
-	.sync_page = block_sync_page,
 	.write_begin = affs_write_begin,
 	.write_end = generic_write_end,
 	.bmap = _affs_bmap
@@ -786,7 +785,6 @@ out:
 const struct address_space_operations affs_aops_ofs = {
 	.readpage = affs_readpage_ofs,
 	//.writepage = affs_writepage_ofs,
-	//.sync_page = affs_sync_page_ofs,
 	.write_begin = affs_write_begin_ofs,
 	.write_end = affs_write_end_ofs
 };
diff --git a/fs/aio.c b/fs/aio.c
index ebb6a22e4e1b..e29ec485af25 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -34,8 +34,6 @@
 #include <linux/security.h>
 #include <linux/eventfd.h>
 #include <linux/blkdev.h>
-#include <linux/mempool.h>
-#include <linux/hash.h>
 #include <linux/compat.h>
 
 #include <asm/kmap_types.h>
@@ -65,14 +63,6 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
 static DEFINE_SPINLOCK(fput_lock);
 static LIST_HEAD(fput_head);
 
-#define AIO_BATCH_HASH_BITS	3 /* allocated on-stack, so don't go crazy */
-#define AIO_BATCH_HASH_SIZE	(1 << AIO_BATCH_HASH_BITS)
-struct aio_batch_entry {
-	struct hlist_node list;
-	struct address_space *mapping;
-};
-mempool_t *abe_pool;
-
 static void aio_kick_handler(struct work_struct *);
 static void aio_queue_work(struct kioctx *);
 
@@ -86,8 +76,7 @@ static int __init aio_setup(void)
 	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 
 	aio_wq = alloc_workqueue("aio", 0, 1);	/* used to limit concurrency */
-	abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
-	BUG_ON(!aio_wq || !abe_pool);
+	BUG_ON(!aio_wq);
 
 	pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
 
@@ -1525,57 +1514,8 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
 	return 0;
 }
 
-static void aio_batch_add(struct address_space *mapping,
-			  struct hlist_head *batch_hash)
-{
-	struct aio_batch_entry *abe;
-	struct hlist_node *pos;
-	unsigned bucket;
-
-	bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
-	hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
-		if (abe->mapping == mapping)
-			return;
-	}
-
-	abe = mempool_alloc(abe_pool, GFP_KERNEL);
-
-	/*
-	 * we should be using igrab here, but
-	 * we don't want to hammer on the global
-	 * inode spinlock just to take an extra
-	 * reference on a file that we must already
-	 * have a reference to.
-	 *
-	 * When we're called, we always have a reference
-	 * on the file, so we must always have a reference
-	 * on the inode, so ihold() is safe here.
-	 */
-	ihold(mapping->host);
-	abe->mapping = mapping;
-	hlist_add_head(&abe->list, &batch_hash[bucket]);
-	return;
-}
-
-static void aio_batch_free(struct hlist_head *batch_hash)
-{
-	struct aio_batch_entry *abe;
-	struct hlist_node *pos, *n;
-	int i;
-
-	for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
-		hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
-			blk_run_address_space(abe->mapping);
-			iput(abe->mapping->host);
-			hlist_del(&abe->list);
-			mempool_free(abe, abe_pool);
-		}
-	}
-}
-
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
-			 struct iocb *iocb, struct hlist_head *batch_hash,
-			 bool compat)
+			 struct iocb *iocb, bool compat)
 {
 	struct kiocb *req;
 	struct file *file;
@@ -1666,11 +1606,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 			;
 	}
 	spin_unlock_irq(&ctx->ctx_lock);
-	if (req->ki_opcode == IOCB_CMD_PREAD ||
-	    req->ki_opcode == IOCB_CMD_PREADV ||
-	    req->ki_opcode == IOCB_CMD_PWRITE ||
-	    req->ki_opcode == IOCB_CMD_PWRITEV)
-		aio_batch_add(file->f_mapping, batch_hash);
 
 	aio_put_req(req);	/* drop extra ref to req */
 	return 0;
@@ -1687,7 +1622,7 @@ long do_io_submit(aio_context_t ctx_id, long nr,
 	struct kioctx *ctx;
 	long ret = 0;
 	int i;
-	struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, };
+	struct blk_plug plug;
 
 	if (unlikely(nr < 0))
 		return -EINVAL;
@@ -1704,6 +1639,8 @@ long do_io_submit(aio_context_t ctx_id, long nr,
 		return -EINVAL;
 	}
 
+	blk_start_plug(&plug);
+
 	/*
 	 * AKPM: should this return a partial result if some of the IOs were
 	 * successfully submitted?
@@ -1722,11 +1659,11 @@ long do_io_submit(aio_context_t ctx_id, long nr,
 			break;
 		}
 
-		ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash, compat);
+		ret = io_submit_one(ctx, user_iocb, &tmp, compat);
 		if (ret)
 			break;
 	}
-	aio_batch_free(batch_hash);
+	blk_finish_plug(&plug);
 
 	put_ioctx(ctx);
 	return i ? i : ret;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index b1d0c794747b..06457ed8f3e7 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -75,7 +75,6 @@ static const struct inode_operations befs_dir_inode_operations = {
 
 static const struct address_space_operations befs_aops = {
 	.readpage	= befs_readpage,
-	.sync_page	= block_sync_page,
 	.bmap		= befs_bmap,
 };
 
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index eb67edd0f8ea..f20e8a71062f 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -186,7 +186,6 @@ static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
 const struct address_space_operations bfs_aops = {
 	.readpage	= bfs_readpage,
 	.writepage	= bfs_writepage,
-	.sync_page	= block_sync_page,
 	.write_begin	= bfs_write_begin,
 	.write_end	= generic_write_end,
 	.bmap		= bfs_bmap,
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index e49cce234c65..9c5e6b2cd11a 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -761,6 +761,9 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
 	unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
 
+	if (bs->bio_integrity_pool)
+		return 0;
+
 	bs->bio_integrity_pool =
 		mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
 
diff --git a/fs/bio.c b/fs/bio.c
index 4cf2a52fbc54..4d6d4b6c2bf1 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -43,7 +43,7 @@ static mempool_t *bio_split_pool __read_mostly;
  * unsigned short
  */
 #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
+static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
 	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
 };
 #undef BV
@@ -1636,9 +1636,6 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 	if (!bs->bio_pool)
 		goto bad;
 
-	if (bioset_integrity_create(bs, pool_size))
-		goto bad;
-
 	if (!biovec_create_pools(bs, pool_size))
 		return bs;
 
@@ -1656,12 +1653,10 @@ static void __init biovec_init_slabs(void)
 		int size;
 		struct biovec_slab *bvs = bvec_slabs + i;
 
-#ifndef CONFIG_BLK_DEV_INTEGRITY
 		if (bvs->nr_vecs <= BIO_INLINE_VECS) {
 			bvs->slab = NULL;
 			continue;
 		}
-#endif
 
 		size = bvs->nr_vecs * sizeof(struct bio_vec);
 		bvs->slab = kmem_cache_create(bvs->name, size, 0,
@@ -1684,6 +1679,9 @@ static int __init init_bio(void)
 	if (!fs_bio_set)
 		panic("bio: can't allocate bios\n");
 
+	if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
+		panic("bio: can't create integrity pool\n");
+
 	bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
 						     sizeof(struct bio_pair));
 	if (!bio_split_pool)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 889287019599..7d02afb2b7f4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1087,6 +1087,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	if (!disk)
 		goto out;
 
+	disk_block_events(disk);
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
@@ -1108,10 +1109,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					 */
 					disk_put_part(bdev->bd_part);
 					bdev->bd_part = NULL;
-					module_put(disk->fops->owner);
-					put_disk(disk);
 					bdev->bd_disk = NULL;
 					mutex_unlock(&bdev->bd_mutex);
+					disk_unblock_events(disk);
+					module_put(disk->fops->owner);
+					put_disk(disk);
 					goto restart;
 				}
 				if (ret)
@@ -1148,9 +1150,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
 		}
 	} else {
-		module_put(disk->fops->owner);
-		put_disk(disk);
-		disk = NULL;
 		if (bdev->bd_contains == bdev) {
 			if (bdev->bd_disk->fops->open) {
 				ret = bdev->bd_disk->fops->open(bdev, mode);
@@ -1160,11 +1159,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			if (bdev->bd_invalidated)
 				rescan_partitions(bdev->bd_disk, bdev);
 		}
+		/* only one opener holds refs to the module and disk */
+		module_put(disk->fops->owner);
+		put_disk(disk);
 	}
 	bdev->bd_openers++;
 	if (for_part)
 		bdev->bd_part_count++;
 	mutex_unlock(&bdev->bd_mutex);
+	disk_unblock_events(disk);
 	return 0;
 
  out_clear:
@@ -1177,10 +1180,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	bdev->bd_contains = NULL;
  out_unlock_bdev:
 	mutex_unlock(&bdev->bd_mutex);
- out:
-	if (disk)
-		module_put(disk->fops->owner);
+	disk_unblock_events(disk);
+	module_put(disk->fops->owner);
 	put_disk(disk);
+ out:
 	bdput(bdev);
 
 	return ret;
@@ -1446,14 +1449,13 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
 		if (bdev_free) {
 			if (bdev->bd_write_holder) {
 				disk_unblock_events(bdev->bd_disk);
-				bdev->bd_write_holder = false;
-			} else
 				disk_check_events(bdev->bd_disk);
+				bdev->bd_write_holder = false;
+			}
 		}
 
 		mutex_unlock(&bdev->bd_mutex);
-	} else
-		disk_check_events(bdev->bd_disk);
+	}
 
 	return __blkdev_put(bdev, mode, 0);
 }
@@ -1527,7 +1529,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
 static const struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
 	.writepage	= blkdev_writepage,
-	.sync_page	= block_sync_page,
 	.write_begin	= blkdev_write_begin,
 	.write_end	= blkdev_write_end,
 	.writepages	= generic_writepages,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 100b07f021b4..830d261d0e6b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -847,7 +847,6 @@ static const struct address_space_operations btree_aops = {
 	.writepages	= btree_writepages,
 	.releasepage	= btree_releasepage,
 	.invalidatepage = btree_invalidatepage,
-	.sync_page	= block_sync_page,
 #ifdef CONFIG_MIGRATION
 	.migratepage	= btree_migratepage,
 #endif
@@ -1331,82 +1330,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
 }
 
 /*
- * this unplugs every device on the box, and it is only used when page
- * is null
- */
-static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-	struct btrfs_device *device;
-	struct btrfs_fs_info *info;
-
-	info = (struct btrfs_fs_info *)bdi->unplug_io_data;
-	list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
-		if (!device->bdev)
-			continue;
-
-		bdi = blk_get_backing_dev_info(device->bdev);
-		if (bdi->unplug_io_fn)
-			bdi->unplug_io_fn(bdi, page);
-	}
-}
-
-static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-	struct inode *inode;
-	struct extent_map_tree *em_tree;
-	struct extent_map *em;
-	struct address_space *mapping;
-	u64 offset;
-
-	/* the generic O_DIRECT read code does this */
-	if (1 || !page) {
-		__unplug_io_fn(bdi, page);
-		return;
-	}
-
-	/*
-	 * page->mapping may change at any time.  Get a consistent copy
-	 * and use that for everything below
-	 */
-	smp_mb();
-	mapping = page->mapping;
-	if (!mapping)
-		return;
-
-	inode = mapping->host;
-
-	/*
-	 * don't do the expensive searching for a small number of
-	 * devices
-	 */
-	if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) {
-		__unplug_io_fn(bdi, page);
-		return;
-	}
-
-	offset = page_offset(page);
-
-	em_tree = &BTRFS_I(inode)->extent_tree;
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
-	read_unlock(&em_tree->lock);
-	if (!em) {
-		__unplug_io_fn(bdi, page);
-		return;
-	}
-
-	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-		free_extent_map(em);
-		__unplug_io_fn(bdi, page);
-		return;
-	}
-	offset = offset - em->start;
-	btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
-			  em->block_start + offset, page);
-	free_extent_map(em);
-}
-
-/*
  * If this fails, caller must call bdi_destroy() to get rid of the
  * bdi again.
  */
@@ -1420,8 +1343,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
 		return err;
 
 	bdi->ra_pages	= default_backing_dev_info.ra_pages;
-	bdi->unplug_io_fn	= btrfs_unplug_io_fn;
-	bdi->unplug_io_data	= info;
 	bdi->congested_fn	= btrfs_congested_fn;
 	bdi->congested_data	= info;
 	return 0;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 714adc4ac4c2..b5b92824a271 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2188,7 +2188,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 	unsigned long nr_written = 0;
 
 	if (wbc->sync_mode == WB_SYNC_ALL)
-		write_flags = WRITE_SYNC_PLUG;
+		write_flags = WRITE_SYNC;
 	else
 		write_flags = WRITE;
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 512c3d1da083..119520bdb9a5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7340,7 +7340,6 @@ static const struct address_space_operations btrfs_aops = {
 	.writepage	= btrfs_writepage,
 	.writepages	= btrfs_writepages,
 	.readpages	= btrfs_readpages,
-	.sync_page	= block_sync_page,
 	.direct_IO	= btrfs_direct_IO,
 	.invalidatepage = btrfs_invalidatepage,
 	.releasepage	= btrfs_releasepage,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index dd13eb81ee40..9d554e8e6583 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -162,7 +162,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
 	struct bio *cur;
 	int again = 0;
 	unsigned long num_run;
-	unsigned long num_sync_run;
 	unsigned long batch_run = 0;
 	unsigned long limit;
 	unsigned long last_waited = 0;
@@ -173,11 +172,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
 	limit = btrfs_async_submit_limit(fs_info);
 	limit = limit * 2 / 3;
 
-	/* we want to make sure that every time we switch from the sync
-	 * list to the normal list, we unplug
-	 */
-	num_sync_run = 0;
-
 loop:
 	spin_lock(&device->io_lock);
 
@@ -223,15 +217,6 @@ loop_lock:
 
 	spin_unlock(&device->io_lock);
 
-	/*
-	 * if we're doing the regular priority list, make sure we unplug
-	 * for any high prio bios we've sent down
-	 */
-	if (pending_bios == &device->pending_bios && num_sync_run > 0) {
-		num_sync_run = 0;
-		blk_run_backing_dev(bdi, NULL);
-	}
-
 	while (pending) {
 
 		rmb();
@@ -259,19 +244,11 @@ loop_lock:
 
 		BUG_ON(atomic_read(&cur->bi_cnt) == 0);
 
-		if (cur->bi_rw & REQ_SYNC)
-			num_sync_run++;
-
 		submit_bio(cur->bi_rw, cur);
 		num_run++;
 		batch_run++;
-		if (need_resched()) {
-			if (num_sync_run) {
-				blk_run_backing_dev(bdi, NULL);
-				num_sync_run = 0;
-			}
+		if (need_resched())
 			cond_resched();
-		}
 
 		/*
 		 * we made progress, there is more work to do and the bdi
@@ -304,13 +281,8 @@ loop_lock:
 				 * against it before looping
 				 */
 				last_waited = ioc->last_waited;
-				if (need_resched()) {
-					if (num_sync_run) {
-						blk_run_backing_dev(bdi, NULL);
-						num_sync_run = 0;
-					}
+				if (need_resched())
 					cond_resched();
-				}
 				continue;
 			}
 			spin_lock(&device->io_lock);
@@ -323,22 +295,6 @@ loop_lock:
 		}
 	}
 
-	if (num_sync_run) {
-		num_sync_run = 0;
-		blk_run_backing_dev(bdi, NULL);
-	}
-	/*
-	 * IO has already been through a long path to get here.  Checksumming,
-	 * async helper threads, perhaps compression.  We've done a pretty
-	 * good job of collecting a batch of IO and should just unplug
-	 * the device right away.
-	 *
-	 * This will help anyone who is waiting on the IO, they might have
-	 * already unplugged, but managed to do so before the bio they
-	 * cared about found its way down here.
-	 */
-	blk_run_backing_dev(bdi, NULL);
-
 	cond_resched();
 	if (again)
 		goto loop;
@@ -2955,7 +2911,7 @@ static int find_live_mirror(struct map_lookup *map, int first, int num,
 static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
 			     u64 logical, u64 *length,
 			     struct btrfs_multi_bio **multi_ret,
-			     int mirror_num, struct page *unplug_page)
+			     int mirror_num)
 {
 	struct extent_map *em;
 	struct map_lookup *map;
@@ -2987,11 +2943,6 @@ again:
 	em = lookup_extent_mapping(em_tree, logical, *length);
 	read_unlock(&em_tree->lock);
 
-	if (!em && unplug_page) {
-		kfree(multi);
-		return 0;
-	}
-
 	if (!em) {
 		printk(KERN_CRIT "unable to find logical %llu len %llu\n",
 		       (unsigned long long)logical,
@@ -3047,13 +2998,13 @@ again:
 		*length = em->len - offset;
 	}
 
-	if (!multi_ret && !unplug_page)
+	if (!multi_ret)
 		goto out;
 
 	num_stripes = 1;
 	stripe_index = 0;
 	if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-		if (unplug_page || (rw & REQ_WRITE))
+		if (rw & REQ_WRITE)
 			num_stripes = map->num_stripes;
 		else if (mirror_num)
 			stripe_index = mirror_num - 1;
@@ -3075,7 +3026,7 @@ again:
 		stripe_index = do_div(stripe_nr, factor);
 		stripe_index *= map->sub_stripes;
 
-		if (unplug_page || (rw & REQ_WRITE))
+		if (rw & REQ_WRITE)
 			num_stripes = map->sub_stripes;
 		else if (mirror_num)
 			stripe_index += mirror_num - 1;
@@ -3095,22 +3046,10 @@ again:
 	BUG_ON(stripe_index >= map->num_stripes);
 
 	for (i = 0; i < num_stripes; i++) {
-		if (unplug_page) {
-			struct btrfs_device *device;
-			struct backing_dev_info *bdi;
-
-			device = map->stripes[stripe_index].dev;
-			if (device->bdev) {
-				bdi = blk_get_backing_dev_info(device->bdev);
-				if (bdi->unplug_io_fn)
-					bdi->unplug_io_fn(bdi, unplug_page);
-			}
-		} else {
-			multi->stripes[i].physical =
-				map->stripes[stripe_index].physical +
-				stripe_offset + stripe_nr * map->stripe_len;
-			multi->stripes[i].dev = map->stripes[stripe_index].dev;
-		}
+		multi->stripes[i].physical =
+			map->stripes[stripe_index].physical +
+			stripe_offset + stripe_nr * map->stripe_len;
+		multi->stripes[i].dev = map->stripes[stripe_index].dev;
 		stripe_index++;
 	}
 	if (multi_ret) {
@@ -3128,7 +3067,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
 		      struct btrfs_multi_bio **multi_ret, int mirror_num)
 {
 	return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
-				 mirror_num, NULL);
+				 mirror_num);
 }
 
 int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@ -3196,14 +3135,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
 	return 0;
 }
 
-int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
-		      u64 logical, struct page *page)
-{
-	u64 length = PAGE_CACHE_SIZE;
-	return __btrfs_map_block(map_tree, READ, logical, &length,
-				 NULL, 0, page);
-}
-
 static void end_bio_multi_stripe(struct bio *bio, int err)
 {
 	struct btrfs_multi_bio *multi = bio->bi_private;
diff --git a/fs/buffer.c b/fs/buffer.c
index 2219a76e2caf..2e6b1a387b7e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -54,23 +54,15 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
 }
 EXPORT_SYMBOL(init_buffer);
 
-static int sync_buffer(void *word)
+static int sleep_on_buffer(void *word)
 {
-	struct block_device *bd;
-	struct buffer_head *bh
-		= container_of(word, struct buffer_head, b_state);
-
-	smp_mb();
-	bd = bh->b_bdev;
-	if (bd)
-		blk_run_address_space(bd->bd_inode->i_mapping);
 	io_schedule();
 	return 0;
 }
 
 void __lock_buffer(struct buffer_head *bh)
 {
-	wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer,
+	wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
 							TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__lock_buffer);
@@ -90,7 +82,7 @@ EXPORT_SYMBOL(unlock_buffer);
  */
 void __wait_on_buffer(struct buffer_head * bh)
 {
-	wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__wait_on_buffer);
 
@@ -749,10 +741,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 {
 	struct buffer_head *bh;
 	struct list_head tmp;
-	struct address_space *mapping, *prev_mapping = NULL;
+	struct address_space *mapping;
 	int err = 0, err2;
+	struct blk_plug plug;
 
 	INIT_LIST_HEAD(&tmp);
+	blk_start_plug(&plug);
 
 	spin_lock(lock);
 	while (!list_empty(list)) {
@@ -775,7 +769,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 				 * still in flight on potentially older
 				 * contents.
 				 */
-				write_dirty_buffer(bh, WRITE_SYNC_PLUG);
+				write_dirty_buffer(bh, WRITE_SYNC);
 
 				/*
 				 * Kick off IO for the previous mapping. Note
@@ -783,16 +777,16 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 				 * wait_on_buffer() will do that for us
 				 * through sync_buffer().
 				 */
-				if (prev_mapping && prev_mapping != mapping)
-					blk_run_address_space(prev_mapping);
-				prev_mapping = mapping;
-
 				brelse(bh);
 				spin_lock(lock);
 			}
 		}
 	}
 
+	spin_unlock(lock);
+	blk_finish_plug(&plug);
+	spin_lock(lock);
+
 	while (!list_empty(&tmp)) {
 		bh = BH_ENTRY(tmp.prev);
 		get_bh(bh);
@@ -1614,14 +1608,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
  * prevents this contention from occurring.
  *
  * If block_write_full_page() is called with wbc->sync_mode ==
- * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this
- * causes the writes to be flagged as synchronous writes, but the
- * block device queue will NOT be unplugged, since usually many pages
- * will be pushed to the out before the higher-level caller actually
- * waits for the writes to be completed.  The various wait functions,
- * such as wait_on_writeback_range() will ultimately call sync_page()
- * which will ultimately call blk_run_backing_dev(), which will end up
- * unplugging the device queue.
+ * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
+ * causes the writes to be flagged as synchronous writes.
  */
 static int __block_write_full_page(struct inode *inode, struct page *page,
 			get_block_t *get_block, struct writeback_control *wbc,
@@ -1634,7 +1622,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	const unsigned blocksize = 1 << inode->i_blkbits;
 	int nr_underway = 0;
 	int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
-			WRITE_SYNC_PLUG : WRITE);
+			WRITE_SYNC : WRITE);
 
 	BUG_ON(!PageLocked(page));
 
@@ -3138,17 +3126,6 @@ out:
 }
 EXPORT_SYMBOL(try_to_free_buffers);
 
-void block_sync_page(struct page *page)
-{
-	struct address_space *mapping;
-
-	smp_mb();
-	mapping = page_mapping(page);
-	if (mapping)
-		blk_run_backing_dev(mapping->backing_dev_info, page);
-}
-EXPORT_SYMBOL(block_sync_page);
-
 /*
  * There are no bdflush tunables left.  But distributions are
  * still running obsolete flush daemons, so we terminate them here.
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e964b1cd5dd0..c27d236738fc 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1569,34 +1569,6 @@ int cifs_fsync(struct file *file, int datasync)
 	return rc;
 }
 
-/* static void cifs_sync_page(struct page *page)
-{
-	struct address_space *mapping;
-	struct inode *inode;
-	unsigned long index = page->index;
-	unsigned int rpages = 0;
-	int rc = 0;
-
-	cFYI(1, "sync page %p", page);
-	mapping = page->mapping;
-	if (!mapping)
-		return 0;
-	inode = mapping->host;
-	if (!inode)
-		return; */
-
-/*	fill in rpages then
-	result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
-
-/*	cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
-
-#if 0
-	if (rc < 0)
-		return rc;
-	return 0;
-#endif
-} */
-
 /*
  * As file closes, flush all cached write data for this inode checking
  * for write behind errors.
@@ -2510,7 +2482,6 @@ const struct address_space_operations cifs_addr_ops = {
 	.set_page_dirty = __set_page_dirty_nobuffers,
 	.releasepage = cifs_release_page,
 	.invalidatepage = cifs_invalidate_page,
-	/* .sync_page = cifs_sync_page, */
 	/* .direct_IO = */
 };
 
@@ -2528,6 +2499,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
 	.set_page_dirty = __set_page_dirty_nobuffers,
 	.releasepage = cifs_release_page,
 	.invalidatepage = cifs_invalidate_page,
-	/* .sync_page = cifs_sync_page, */
 	/* .direct_IO = */
 };
diff --git a/fs/direct-io.c b/fs/direct-io.c
index dcb5577cde1d..ac5f164170e3 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1110,11 +1110,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	    ((rw & READ) || (dio->result == dio->size)))
 		ret = -EIOCBQUEUED;
 
-	if (ret != -EIOCBQUEUED) {
-		/* All IO is now issued, send it on its way */
-		blk_run_address_space(inode->i_mapping);
+	if (ret != -EIOCBQUEUED)
 		dio_await_completion(dio);
-	}
 
 	/*
 	 * Sync will always be dropping the final ref and completing the
@@ -1176,7 +1173,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct dio *dio;
 
 	if (rw & WRITE)
-		rw = WRITE_ODIRECT_PLUG;
+		rw = WRITE_ODIRECT;
 
 	if (bdev)
 		bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index a8e7797b9477..9c13412e6c99 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -23,7 +23,6 @@ static sector_t _efs_bmap(struct address_space *mapping, sector_t block)
 }
 static const struct address_space_operations efs_aops = {
 	.readpage = efs_readpage,
-	.sync_page = block_sync_page,
 	.bmap = _efs_bmap
 };
 
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 0c713cfbebf0..8472c098445d 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -823,7 +823,6 @@ const struct address_space_operations exofs_aops = {
 	.direct_IO	= NULL, /* TODO: Should be trivial to do */
 
 	/* With these NULL has special meaning or default is not exported */
-	.sync_page	= NULL,
 	.get_xip_mem	= NULL,
 	.migratepage	= NULL,
 	.launder_page	= NULL,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 40ad210a5049..c47f706878b5 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -860,7 +860,6 @@ const struct address_space_operations ext2_aops = {
 	.readpage		= ext2_readpage,
 	.readpages		= ext2_readpages,
 	.writepage		= ext2_writepage,
-	.sync_page		= block_sync_page,
 	.write_begin		= ext2_write_begin,
 	.write_end		= ext2_write_end,
 	.bmap			= ext2_bmap,
@@ -880,7 +879,6 @@ const struct address_space_operations ext2_nobh_aops = {
 	.readpage		= ext2_readpage,
 	.readpages		= ext2_readpages,
 	.writepage		= ext2_nobh_writepage,
-	.sync_page		= block_sync_page,
 	.write_begin		= ext2_nobh_write_begin,
 	.write_end		= nobh_write_end,
 	.bmap			= ext2_bmap,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ae94f6d949f5..fe2541d250e4 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1894,7 +1894,6 @@ static const struct address_space_operations ext3_ordered_aops = {
 	.readpage		= ext3_readpage,
 	.readpages		= ext3_readpages,
 	.writepage		= ext3_ordered_writepage,
-	.sync_page		= block_sync_page,
 	.write_begin		= ext3_write_begin,
 	.write_end		= ext3_ordered_write_end,
 	.bmap			= ext3_bmap,
@@ -1910,7 +1909,6 @@ static const struct address_space_operations ext3_writeback_aops = {
 	.readpage		= ext3_readpage,
 	.readpages		= ext3_readpages,
 	.writepage		= ext3_writeback_writepage,
-	.sync_page		= block_sync_page,
 	.write_begin		= ext3_write_begin,
 	.write_end		= ext3_writeback_write_end,
 	.bmap			= ext3_bmap,
@@ -1926,7 +1924,6 @@ static const struct address_space_operations ext3_journalled_aops = {
 	.readpage		= ext3_readpage,
 	.readpages		= ext3_readpages,
 	.writepage		= ext3_journalled_writepage,
-	.sync_page		= block_sync_page,
 	.write_begin		= ext3_write_begin,
 	.write_end		= ext3_journalled_write_end,
 	.set_page_dirty		= ext3_journalled_set_page_dirty,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9f7f9e49914f..9297ad46c465 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3903,7 +3903,6 @@ static const struct address_space_operations ext4_ordered_aops = {
 	.readpage		= ext4_readpage,
 	.readpages		= ext4_readpages,
 	.writepage		= ext4_writepage,
-	.sync_page		= block_sync_page,
 	.write_begin		= ext4_write_begin,
 	.write_end		= ext4_ordered_write_end,
 	.bmap			= ext4_bmap,
@@ -3919,7 +3918,6 @@ static const struct address_space_operations ext4_writeback_aops = {
 	.readpage		= ext4_readpage,
 	.readpages		= ext4_readpages,
 	.writepage		= ext4_writepage,
-	.sync_page		= block_sync_page,
 	.write_begin		= ext4_write_begin,
 	.write_end		= ext4_writeback_write_end,
 	.bmap			= ext4_bmap,
@@ -3935,7 +3933,6 @@ static const struct address_space_operations ext4_journalled_aops = {
 	.readpage		= ext4_readpage,
 	.readpages		= ext4_readpages,
 	.writepage		= ext4_writepage,
-	.sync_page		= block_sync_page,
 	.write_begin		= ext4_write_begin,
 	.write_end		= ext4_journalled_write_end,
 	.set_page_dirty		= ext4_journalled_set_page_dirty,
@@ -3951,7 +3948,6 @@ static const struct address_space_operations ext4_da_aops = {
 	.readpages		= ext4_readpages,
 	.writepage		= ext4_writepage,
 	.writepages		= ext4_da_writepages,
-	.sync_page		= block_sync_page,
 	.write_begin		= ext4_da_write_begin,
 	.write_end		= ext4_da_write_end,
 	.bmap			= ext4_bmap,
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 955cc309142f..e2cd90e4bb7c 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -310,8 +310,7 @@ static int io_submit_init(struct ext4_io_submit *io,
 	io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
 
 	io->io_bio = bio;
-	io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
-			WRITE_SYNC_PLUG : WRITE);
+	io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
 	io->io_next_block = bh->b_blocknr;
 	return 0;
 }
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 0e277ec4b612..8d68690bdcf1 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -236,7 +236,6 @@ static const struct address_space_operations fat_aops = {
 	.readpages	= fat_readpages,
 	.writepage	= fat_writepage,
 	.writepages	= fat_writepages,
-	.sync_page	= block_sync_page,
 	.write_begin	= fat_write_begin,
 	.write_end	= fat_write_end,
 	.direct_IO	= fat_direct_IO,
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 1429f3ae1e86..5d318c44f855 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -44,7 +44,6 @@ static sector_t		vxfs_bmap(struct address_space *, sector_t);
 const struct address_space_operations vxfs_aops = {
 	.readpage =		vxfs_readpage,
 	.bmap =			vxfs_bmap,
-	.sync_page =		block_sync_page,
 };
 
 inline void
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 051b1a084528..cc6ec4b2f0ff 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -870,7 +870,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 
 	fc->bdi.name = "fuse";
 	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
-	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
 
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index aad77e4f61b5..c71995b111bf 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1117,7 +1117,6 @@ static const struct address_space_operations gfs2_writeback_aops = {
 	.writepages = gfs2_writeback_writepages,
 	.readpage = gfs2_readpage,
 	.readpages = gfs2_readpages,
-	.sync_page = block_sync_page,
 	.write_begin = gfs2_write_begin,
 	.write_end = gfs2_write_end,
 	.bmap = gfs2_bmap,
@@ -1133,7 +1132,6 @@ static const struct address_space_operations gfs2_ordered_aops = {
 	.writepage = gfs2_ordered_writepage,
 	.readpage = gfs2_readpage,
 	.readpages = gfs2_readpages,
-	.sync_page = block_sync_page,
 	.write_begin = gfs2_write_begin,
 	.write_end = gfs2_write_end,
 	.set_page_dirty = gfs2_set_page_dirty,
@@ -1151,7 +1149,6 @@ static const struct address_space_operations gfs2_jdata_aops = {
 	.writepages = gfs2_jdata_writepages,
 	.readpage = gfs2_readpage,
 	.readpages = gfs2_readpages,
-	.sync_page = block_sync_page,
 	.write_begin = gfs2_write_begin,
 	.write_end = gfs2_write_end,
 	.set_page_dirty = gfs2_set_page_dirty,
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index e7ed31f858dd..5b102c1887fd 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -121,7 +121,7 @@ __acquires(&sdp->sd_ail_lock)
 			lock_buffer(bh);
 			if (test_clear_buffer_dirty(bh)) {
 				bh->b_end_io = end_buffer_write_sync;
-				submit_bh(WRITE_SYNC_PLUG, bh);
+				submit_bh(WRITE_SYNC, bh);
 			} else {
 				unlock_buffer(bh);
 				brelse(bh);
@@ -647,7 +647,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
 		lock_buffer(bh);
 		if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
 			bh->b_end_io = end_buffer_write_sync;
-			submit_bh(WRITE_SYNC_PLUG, bh);
+			submit_bh(WRITE_SYNC, bh);
 		} else {
 			unlock_buffer(bh);
 			brelse(bh);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index e919abf25ecd..51d27f00ebb4 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -204,7 +204,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
 		}
 
 		gfs2_log_unlock(sdp);
-		submit_bh(WRITE_SYNC_PLUG, bh);
+		submit_bh(WRITE_SYNC, bh);
 		gfs2_log_lock(sdp);
 
 		n = 0;
@@ -214,7 +214,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
 			gfs2_log_unlock(sdp);
 			lock_buffer(bd2->bd_bh);
 			bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
-			submit_bh(WRITE_SYNC_PLUG, bh);
+			submit_bh(WRITE_SYNC, bh);
 			gfs2_log_lock(sdp);
 			if (++n >= num)
 				break;
@@ -356,7 +356,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 		sdp->sd_log_num_revoke--;
 
 		if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
-			submit_bh(WRITE_SYNC_PLUG, bh);
+			submit_bh(WRITE_SYNC, bh);
 
 			bh = gfs2_log_get_buf(sdp);
 			mh = (struct gfs2_meta_header *)bh->b_data;
@@ -373,7 +373,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 	}
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 
-	submit_bh(WRITE_SYNC_PLUG, bh);
+	submit_bh(WRITE_SYNC, bh);
 }
 
 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
@@ -575,7 +575,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
 	ptr = bh_log_ptr(bh);
 	
 	get_bh(bh);
-	submit_bh(WRITE_SYNC_PLUG, bh);
+	submit_bh(WRITE_SYNC, bh);
 	gfs2_log_lock(sdp);
 	while(!list_empty(list)) {
 		bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
@@ -601,7 +601,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
 		} else {
 			bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
 		}
-		submit_bh(WRITE_SYNC_PLUG, bh1);
+		submit_bh(WRITE_SYNC, bh1);
 		gfs2_log_lock(sdp);
 		ptr += 2;
 	}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 01d97f486553..675349b5a133 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 	struct buffer_head *bh, *head;
 	int nr_underway = 0;
 	int write_op = REQ_META |
-		(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE);
+		(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(!page_has_buffers(page));
@@ -94,7 +94,6 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 const struct address_space_operations gfs2_meta_aops = {
 	.writepage = gfs2_aspace_writepage,
 	.releasepage = gfs2_releasepage,
-	.sync_page = block_sync_page,
 };
 
 /**
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index dffb4e996643..fff16c968e67 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,6 @@ static int hfs_writepages(struct address_space *mapping,
 const struct address_space_operations hfs_btree_aops = {
 	.readpage	= hfs_readpage,
 	.writepage	= hfs_writepage,
-	.sync_page	= block_sync_page,
 	.write_begin	= hfs_write_begin,
 	.write_end	= generic_write_end,
 	.bmap		= hfs_bmap,
@@ -160,7 +159,6 @@ const struct address_space_operations hfs_btree_aops = {
 const struct address_space_operations hfs_aops = {
 	.readpage	= hfs_readpage,
 	.writepage	= hfs_writepage,
-	.sync_page	= block_sync_page,
 	.write_begin	= hfs_write_begin,
 	.write_end	= generic_write_end,
 	.bmap		= hfs_bmap,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index a8df651747f0..b248a6cfcad9 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -146,7 +146,6 @@ static int hfsplus_writepages(struct address_space *mapping,
 const struct address_space_operations hfsplus_btree_aops = {
 	.readpage	= hfsplus_readpage,
 	.writepage	= hfsplus_writepage,
-	.sync_page	= block_sync_page,
 	.write_begin	= hfsplus_write_begin,
 	.write_end	= generic_write_end,
 	.bmap		= hfsplus_bmap,
@@ -156,7 +155,6 @@ const struct address_space_operations hfsplus_btree_aops = {
 const struct address_space_operations hfsplus_aops = {
 	.readpage	= hfsplus_readpage,
 	.writepage	= hfsplus_writepage,
-	.sync_page	= block_sync_page,
 	.write_begin	= hfsplus_write_begin,
 	.write_end	= generic_write_end,
 	.bmap		= hfsplus_bmap,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 2dbae20450f8..9b9eb6933e43 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -119,7 +119,6 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
 const struct address_space_operations hpfs_aops = {
 	.readpage = hpfs_readpage,
 	.writepage = hpfs_writepage,
-	.sync_page = block_sync_page,
 	.write_begin = hpfs_write_begin,
 	.write_end = generic_write_end,
 	.bmap = _hpfs_bmap
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index a0f3833c0dbf..3db5ba4568fc 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1158,7 +1158,6 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
 
 static const struct address_space_operations isofs_aops = {
 	.readpage = isofs_readpage,
-	.sync_page = block_sync_page,
 	.bmap = _isofs_bmap
 };
 
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 34a4861c14b8..da871ee084d3 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/bio.h>
+#include <linux/blkdev.h>
 
 /*
  * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -294,7 +295,7 @@ void journal_commit_transaction(journal_t *journal)
 	int first_tag = 0;
 	int tag_flag;
 	int i;
-	int write_op = WRITE_SYNC;
+	struct blk_plug plug;
 
 	/*
 	 * First job: lock down the current transaction and wait for
@@ -327,13 +328,6 @@ void journal_commit_transaction(journal_t *journal)
 	spin_lock(&journal->j_state_lock);
 	commit_transaction->t_state = T_LOCKED;
 
-	/*
-	 * Use plugged writes here, since we want to submit several before
-	 * we unplug the device. We don't do explicit unplugging in here,
-	 * instead we rely on sync_buffer() doing the unplug for us.
-	 */
-	if (commit_transaction->t_synchronous_commit)
-		write_op = WRITE_SYNC_PLUG;
 	spin_lock(&commit_transaction->t_handle_lock);
 	while (commit_transaction->t_updates) {
 		DEFINE_WAIT(wait);
@@ -418,8 +412,10 @@ void journal_commit_transaction(journal_t *journal)
 	 * Now start flushing things to disk, in the order they appear
 	 * on the transaction lists.  Data blocks go first.
 	 */
+	blk_start_plug(&plug);
 	err = journal_submit_data_buffers(journal, commit_transaction,
-					  write_op);
+					  WRITE_SYNC);
+	blk_finish_plug(&plug);
 
 	/*
 	 * Wait for all previously submitted IO to complete.
@@ -480,7 +476,9 @@ void journal_commit_transaction(journal_t *journal)
 		err = 0;
 	}
 
-	journal_write_revoke_records(journal, commit_transaction, write_op);
+	blk_start_plug(&plug);
+
+	journal_write_revoke_records(journal, commit_transaction, WRITE_SYNC);
 
 	/*
 	 * If we found any dirty or locked buffers, then we should have
@@ -650,7 +648,7 @@ start_journal_io:
 				clear_buffer_dirty(bh);
 				set_buffer_uptodate(bh);
 				bh->b_end_io = journal_end_buffer_io_sync;
-				submit_bh(write_op, bh);
+				submit_bh(WRITE_SYNC, bh);
 			}
 			cond_resched();
 
@@ -661,6 +659,8 @@ start_journal_io:
 		}
 	}
 
+	blk_finish_plug(&plug);
+
 	/* Lo and behold: we have just managed to send a transaction to
            the log.  Before we can commit it, wait for the IO so far to
            complete.  Control buffers being written are on the
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f3ad1598b201..fa36d7662b21 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -137,9 +137,9 @@ static int journal_submit_commit_record(journal_t *journal,
 	if (journal->j_flags & JBD2_BARRIER &&
 	    !JBD2_HAS_INCOMPAT_FEATURE(journal,
 				       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
-		ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh);
+		ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh);
 	else
-		ret = submit_bh(WRITE_SYNC_PLUG, bh);
+		ret = submit_bh(WRITE_SYNC, bh);
 
 	*cbh = bh;
 	return ret;
@@ -329,7 +329,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	int tag_bytes = journal_tag_bytes(journal);
 	struct buffer_head *cbh = NULL; /* For transactional checksums */
 	__u32 crc32_sum = ~0;
-	int write_op = WRITE_SYNC;
+	struct blk_plug plug;
 
 	/*
 	 * First job: lock down the current transaction and wait for
@@ -363,13 +363,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	write_lock(&journal->j_state_lock);
 	commit_transaction->t_state = T_LOCKED;
 
-	/*
-	 * Use plugged writes here, since we want to submit several before
-	 * we unplug the device. We don't do explicit unplugging in here,
-	 * instead we rely on sync_buffer() doing the unplug for us.
-	 */
-	if (commit_transaction->t_synchronous_commit)
-		write_op = WRITE_SYNC_PLUG;
 	trace_jbd2_commit_locking(journal, commit_transaction);
 	stats.run.rs_wait = commit_transaction->t_max_wait;
 	stats.run.rs_locked = jiffies;
@@ -469,8 +462,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	if (err)
 		jbd2_journal_abort(journal, err);
 
+	blk_start_plug(&plug);
 	jbd2_journal_write_revoke_records(journal, commit_transaction,
-					  write_op);
+					  WRITE_SYNC);
+	blk_finish_plug(&plug);
 
 	jbd_debug(3, "JBD: commit phase 2\n");
 
@@ -497,6 +492,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	err = 0;
 	descriptor = NULL;
 	bufs = 0;
+	blk_start_plug(&plug);
 	while (commit_transaction->t_buffers) {
 
 		/* Find the next buffer to be journaled... */
@@ -658,7 +654,7 @@ start_journal_io:
 				clear_buffer_dirty(bh);
 				set_buffer_uptodate(bh);
 				bh->b_end_io = journal_end_buffer_io_sync;
-				submit_bh(write_op, bh);
+				submit_bh(WRITE_SYNC, bh);
 			}
 			cond_resched();
 			stats.run.rs_blocks_logged += bufs;
@@ -699,6 +695,8 @@ start_journal_io:
 			__jbd2_journal_abort_hard(journal);
 	}
 
+	blk_finish_plug(&plug);
+
 	/* Lo and behold: we have just managed to send a transaction to
            the log.  Before we can commit it, wait for the IO so far to
            complete.  Control buffers being written are on the
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 9978803ceedc..eddbb373209e 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -352,7 +352,6 @@ const struct address_space_operations jfs_aops = {
 	.readpages	= jfs_readpages,
 	.writepage	= jfs_writepage,
 	.writepages	= jfs_writepages,
-	.sync_page	= block_sync_page,
 	.write_begin	= jfs_write_begin,
 	.write_end	= nobh_write_end,
 	.bmap		= jfs_bmap,
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 48b44bd8267b..6740d34cd82b 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -583,7 +583,6 @@ static void metapage_invalidatepage(struct page *page, unsigned long offset)
 const struct address_space_operations jfs_metapage_aops = {
 	.readpage	= metapage_readpage,
 	.writepage	= metapage_writepage,
-	.sync_page	= block_sync_page,
 	.releasepage	= metapage_releasepage,
 	.invalidatepage	= metapage_invalidatepage,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 723bc5bca09a..1adc8d455f0e 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -39,7 +39,6 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
 	bio.bi_end_io = request_complete;
 
 	submit_bio(rw, &bio);
-	generic_unplug_device(bdev_get_queue(bdev));
 	wait_for_completion(&complete);
 	return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
 }
@@ -168,7 +167,6 @@ static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len)
 	}
 	len = PAGE_ALIGN(len);
 	__bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
-	generic_unplug_device(bdev_get_queue(logfs_super(sb)->s_bdev));
 }
 
 
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index ae0b83f476a6..adcdc0a4e182 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -399,7 +399,6 @@ static sector_t minix_bmap(struct address_space *mapping, sector_t block)
 static const struct address_space_operations minix_aops = {
 	.readpage = minix_readpage,
 	.writepage = minix_writepage,
-	.sync_page = block_sync_page,
 	.write_begin = minix_write_begin,
 	.write_end = generic_write_end,
 	.bmap = minix_bmap
diff --git a/fs/mpage.c b/fs/mpage.c
index d78455a81ec9..0afc809e46e0 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -364,6 +364,9 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	sector_t last_block_in_bio = 0;
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
+	struct blk_plug plug;
+
+	blk_start_plug(&plug);
 
 	map_bh.b_state = 0;
 	map_bh.b_size = 0;
@@ -385,6 +388,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	BUG_ON(!list_empty(pages));
 	if (bio)
 		mpage_bio_submit(READ, bio);
+	blk_finish_plug(&plug);
 	return 0;
 }
 EXPORT_SYMBOL(mpage_readpages);
@@ -666,8 +670,11 @@ int
 mpage_writepages(struct address_space *mapping,
 		struct writeback_control *wbc, get_block_t get_block)
 {
+	struct blk_plug plug;
 	int ret;
 
+	blk_start_plug(&plug);
+
 	if (!get_block)
 		ret = generic_writepages(mapping, wbc);
 	else {
@@ -682,6 +689,7 @@ mpage_writepages(struct address_space *mapping,
 		if (mpd.bio)
 			mpage_bio_submit(WRITE, mpd.bio);
 	}
+	blk_finish_plug(&plug);
 	return ret;
 }
 EXPORT_SYMBOL(mpage_writepages);
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 85f7baa15f5d..609cd223eea8 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,15 +34,10 @@
 #include "page.h"
 #include "btnode.h"
 
-
-static const struct address_space_operations def_btnode_aops = {
-	.sync_page		= block_sync_page,
-};
-
 void nilfs_btnode_cache_init(struct address_space *btnc,
 			     struct backing_dev_info *bdi)
 {
-	nilfs_mapping_init(btnc, bdi, &def_btnode_aops);
+	nilfs_mapping_init(btnc, bdi);
 }
 
 void nilfs_btnode_cache_clear(struct address_space *btnc)
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index caf9a6a3fb54..1c2a3e23f8b2 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -49,7 +49,6 @@
 #include "ifile.h"
 
 static const struct address_space_operations def_gcinode_aops = {
-	.sync_page		= block_sync_page,
 };
 
 /*
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index d5625be236a8..c0aa27490c02 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -280,7 +280,6 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 const struct address_space_operations nilfs_aops = {
 	.writepage		= nilfs_writepage,
 	.readpage		= nilfs_readpage,
-	.sync_page		= block_sync_page,
 	.writepages		= nilfs_writepages,
 	.set_page_dirty		= nilfs_set_page_dirty,
 	.readpages		= nilfs_readpages,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a0babd2bff6a..a649b05f7069 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -399,7 +399,6 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
 
 static const struct address_space_operations def_mdt_aops = {
 	.writepage		= nilfs_mdt_write_page,
-	.sync_page		= block_sync_page,
 };
 
 static const struct inode_operations def_mdt_iops;
@@ -438,10 +437,6 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size,
 	mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
 }
 
-static const struct address_space_operations shadow_map_aops = {
-	.sync_page		= block_sync_page,
-};
-
 /**
  * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file
  * @inode: inode of the metadata file
@@ -455,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
 
 	INIT_LIST_HEAD(&shadow->frozen_buffers);
 	address_space_init_once(&shadow->frozen_data);
-	nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
+	nilfs_mapping_init(&shadow->frozen_data, bdi);
 	address_space_init_once(&shadow->frozen_btnodes);
-	nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
+	nilfs_mapping_init(&shadow->frozen_btnodes, bdi);
 	mi->mi_shadow = shadow;
 	return 0;
 }
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index a585b35fd6bc..4d2a1ee0eb47 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -493,15 +493,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
 }
 
 void nilfs_mapping_init(struct address_space *mapping,
-			struct backing_dev_info *bdi,
-			const struct address_space_operations *aops)
+			struct backing_dev_info *bdi)
 {
 	mapping->host = NULL;
 	mapping->flags = 0;
 	mapping_set_gfp_mask(mapping, GFP_NOFS);
 	mapping->assoc_mapping = NULL;
 	mapping->backing_dev_info = bdi;
-	mapping->a_ops = aops;
+	mapping->a_ops = NULL;
 }
 
 /*
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 2a00953ebd5f..f06b79ad7493 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -62,8 +62,7 @@ int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
 void nilfs_copy_back_pages(struct address_space *, struct address_space *);
 void nilfs_clear_dirty_pages(struct address_space *);
 void nilfs_mapping_init(struct address_space *mapping,
-			struct backing_dev_info *bdi,
-			const struct address_space_operations *aops);
+			struct backing_dev_info *bdi);
 unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
 unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
 					    sector_t start_blk,
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 0f83e93935b2..2853ff20f85a 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -509,7 +509,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
 		 * Last BIO is always sent through the following
 		 * submission.
 		 */
-		rw |= REQ_SYNC | REQ_UNPLUG;
+		rw |= REQ_SYNC;
 		res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
 	}
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index c3c2c7ac9020..0b1e885b8cf8 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1543,8 +1543,6 @@ err_out:
  */
 const struct address_space_operations ntfs_aops = {
 	.readpage	= ntfs_readpage,	/* Fill page with data. */
-	.sync_page	= block_sync_page,	/* Currently, just unplugs the
-						   disk request queue. */
 #ifdef NTFS_RW
 	.writepage	= ntfs_writepage,	/* Write dirty page to disk. */
 #endif /* NTFS_RW */
@@ -1560,8 +1558,6 @@ const struct address_space_operations ntfs_aops = {
  */
 const struct address_space_operations ntfs_mst_aops = {
 	.readpage	= ntfs_readpage,	/* Fill page with data. */
-	.sync_page	= block_sync_page,	/* Currently, just unplugs the
-						   disk request queue. */
 #ifdef NTFS_RW
 	.writepage	= ntfs_writepage,	/* Write dirty page to disk. */
 	.set_page_dirty	= __set_page_dirty_nobuffers,	/* Set the page dirty
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 6551c7cbad92..ef9ed854255c 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -698,8 +698,7 @@ lock_retry_remap:
 					"uptodate! Unplugging the disk queue "
 					"and rescheduling.");
 			get_bh(tbh);
-			blk_run_address_space(mapping);
-			schedule();
+			io_schedule();
 			put_bh(tbh);
 			if (unlikely(!buffer_uptodate(tbh)))
 				goto read_err;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1fbb0e20131b..daea0359e974 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2043,7 +2043,6 @@ const struct address_space_operations ocfs2_aops = {
 	.write_begin		= ocfs2_write_begin,
 	.write_end		= ocfs2_write_end,
 	.bmap			= ocfs2_bmap,
-	.sync_page		= block_sync_page,
 	.direct_IO		= ocfs2_direct_IO,
 	.invalidatepage		= ocfs2_invalidatepage,
 	.releasepage		= ocfs2_releasepage,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index b108e863d8f6..1adab287bd24 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -367,11 +367,7 @@ static inline void o2hb_bio_wait_dec(struct o2hb_bio_wait_ctxt *wc,
 static void o2hb_wait_on_io(struct o2hb_region *reg,
 			    struct o2hb_bio_wait_ctxt *wc)
 {
-	struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping;
-
-	blk_run_address_space(mapping);
 	o2hb_bio_wait_dec(wc, 1);
-
 	wait_for_completion(&wc->wc_io_complete);
 }
 
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 8a6d34fa668a..d738a7e493dd 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -372,7 +372,6 @@ const struct address_space_operations omfs_aops = {
 	.readpages = omfs_readpages,
 	.writepage = omfs_writepage,
 	.writepages = omfs_writepages,
-	.sync_page = block_sync_page,
 	.write_begin = omfs_write_begin,
 	.write_end = generic_write_end,
 	.bmap = omfs_bmap,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 9c21119512b9..ac546975031f 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -290,7 +290,8 @@ ssize_t part_inflight_show(struct device *dev,
 {
 	struct hd_struct *p = dev_to_part(dev);
 
-	return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
+	return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
+		atomic_read(&p->in_flight[1]));
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index e63b4171d583..2b0646613f5a 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -335,7 +335,6 @@ static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
 static const struct address_space_operations qnx4_aops = {
 	.readpage	= qnx4_readpage,
 	.writepage	= qnx4_writepage,
-	.sync_page	= block_sync_page,
 	.write_begin	= qnx4_write_begin,
 	.write_end	= generic_write_end,
 	.bmap		= qnx4_bmap
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 1bba24bad820..4fd5bb33dbb5 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3217,7 +3217,6 @@ const struct address_space_operations reiserfs_address_space_operations = {
 	.readpages = reiserfs_readpages,
 	.releasepage = reiserfs_releasepage,
 	.invalidatepage = reiserfs_invalidatepage,
-	.sync_page = block_sync_page,
 	.write_begin = reiserfs_write_begin,
 	.write_end = reiserfs_write_end,
 	.bmap = reiserfs_aop_bmap,
diff --git a/fs/super.c b/fs/super.c
index e84864908264..8a06881b1920 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -71,6 +71,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 #else
 		INIT_LIST_HEAD(&s->s_files);
 #endif
+		s->s_bdi = &default_backing_dev_info;
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_BL_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
@@ -936,6 +937,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
 	sb = root->d_sb;
 	BUG_ON(!sb);
 	WARN_ON(!sb->s_bdi);
+	WARN_ON(sb->s_bdi == &default_backing_dev_info);
 	sb->s_flags |= MS_BORN;
 
 	error = security_sb_kern_mount(sb, flags, secdata);
diff --git a/fs/sync.c b/fs/sync.c
index 92ca208777d5..c38ec163da6c 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -34,7 +34,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 	 * This should be safe, as we require bdi backing to actually
 	 * write out data in the first place
 	 */
-	if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info)
+	if (sb->s_bdi == &noop_backing_dev_info)
 		return 0;
 
 	if (sb->s_qcop && sb->s_qcop->quota_sync)
@@ -80,7 +80,7 @@ EXPORT_SYMBOL_GPL(sync_filesystem);
 
 static void sync_one_sb(struct super_block *sb, void *arg)
 {
-	if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi)
+	if (!(sb->s_flags & MS_RDONLY))
 		__sync_filesystem(sb, *(int *)arg);
 }
 /*
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 9ca66276315e..fa8d43c92bb8 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -488,7 +488,6 @@ static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
 const struct address_space_operations sysv_aops = {
 	.readpage = sysv_readpage,
 	.writepage = sysv_writepage,
-	.sync_page = block_sync_page,
 	.write_begin = sysv_write_begin,
 	.write_end = generic_write_end,
 	.bmap = sysv_bmap
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index e5dc1e120e8d..6ddd9973e681 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2011,7 +2011,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
 	 */
 	c->bdi.name = "ubifs",
 	c->bdi.capabilities = BDI_CAP_MAP_COPY;
-	c->bdi.unplug_io_fn = default_unplug_io_fn;
 	err  = bdi_init(&c->bdi);
 	if (err)
 		goto out_close;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index f391a2adc699..2a346bb1d9f5 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -98,7 +98,6 @@ static int udf_adinicb_write_end(struct file *file,
 const struct address_space_operations udf_adinicb_aops = {
 	.readpage	= udf_adinicb_readpage,
 	.writepage	= udf_adinicb_writepage,
-	.sync_page	= block_sync_page,
 	.write_begin = simple_write_begin,
 	.write_end = udf_adinicb_write_end,
 };
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index ccc814321414..1d1358ed80c1 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -140,7 +140,6 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
 const struct address_space_operations udf_aops = {
 	.readpage	= udf_readpage,
 	.writepage	= udf_writepage,
-	.sync_page	= block_sync_page,
 	.write_begin		= udf_write_begin,
 	.write_end		= generic_write_end,
 	.bmap		= udf_bmap,
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 03c255f12df5..27a4babe7df0 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -552,7 +552,6 @@ static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
 const struct address_space_operations ufs_aops = {
 	.readpage = ufs_readpage,
 	.writepage = ufs_writepage,
-	.sync_page = block_sync_page,
 	.write_begin = ufs_write_begin,
 	.write_end = generic_write_end,
 	.bmap = ufs_bmap
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index e56a4f567212..11014302c9ca 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -479,7 +479,7 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
 			break;
 		if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
 			ufs_sync_inode (inode);
-		blk_run_address_space(inode->i_mapping);
+		blk_flush_plug(current);
 		yield();
 	}
 
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 8c5c87277456..52dbd14260ba 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -413,8 +413,7 @@ xfs_submit_ioend_bio(
 	if (xfs_ioend_new_eof(ioend))
 		xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
 
-	submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
-		   WRITE_SYNC_PLUG : WRITE, bio);
+	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
 }
 
 STATIC struct bio *
@@ -1495,7 +1494,6 @@ const struct address_space_operations xfs_address_space_operations = {
 	.readpages		= xfs_vm_readpages,
 	.writepage		= xfs_vm_writepage,
 	.writepages		= xfs_vm_writepages,
-	.sync_page		= block_sync_page,
 	.releasepage		= xfs_vm_releasepage,
 	.invalidatepage		= xfs_vm_invalidatepage,
 	.write_begin		= xfs_vm_write_begin,
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 5cb230f2cb4f..c05324d3282c 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -990,7 +990,7 @@ xfs_buf_lock(
 	if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
 		xfs_log_force(bp->b_target->bt_mount, 0);
 	if (atomic_read(&bp->b_io_remaining))
-		blk_run_address_space(bp->b_target->bt_mapping);
+		blk_flush_plug(current);
 	down(&bp->b_sema);
 	XB_SET_OWNER(bp);
 
@@ -1034,9 +1034,7 @@ xfs_buf_wait_unpin(
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (atomic_read(&bp->b_pin_count) == 0)
 			break;
-		if (atomic_read(&bp->b_io_remaining))
-			blk_run_address_space(bp->b_target->bt_mapping);
-		schedule();
+		io_schedule();
 	}
 	remove_wait_queue(&bp->b_waiters, &wait);
 	set_current_state(TASK_RUNNING);
@@ -1442,7 +1440,7 @@ xfs_buf_iowait(
 	trace_xfs_buf_iowait(bp, _RET_IP_);
 
 	if (atomic_read(&bp->b_io_remaining))
-		blk_run_address_space(bp->b_target->bt_mapping);
+		blk_flush_plug(current);
 	wait_for_completion(&bp->b_iowait);
 
 	trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1666,7 +1664,6 @@ xfs_mapping_buftarg(
 	struct inode		*inode;
 	struct address_space	*mapping;
 	static const struct address_space_operations mapping_aops = {
-		.sync_page = block_sync_page,
 		.migratepage = fail_migrate_page,
 	};
 
@@ -1947,7 +1944,7 @@ xfsbufd(
 			count++;
 		}
 		if (count)
-			blk_run_address_space(target->bt_mapping);
+			blk_flush_plug(current);
 
 	} while (!kthread_should_stop());
 
@@ -1995,7 +1992,7 @@ xfs_flush_buftarg(
 
 	if (wait) {
 		/* Expedite and wait for IO to complete. */
-		blk_run_address_space(target->bt_mapping);
+		blk_flush_plug(current);
 		while (!list_empty(&wait_list)) {
 			bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
 
diff --git a/include/drm/drm.h b/include/drm/drm.h
index 9ac431396176..4be33b4ca2f8 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h
@@ -463,12 +463,15 @@ struct drm_irq_busid {
 enum drm_vblank_seq_type {
 	_DRM_VBLANK_ABSOLUTE = 0x0,	/**< Wait for specific vblank sequence number */
 	_DRM_VBLANK_RELATIVE = 0x1,	/**< Wait for given number of vblanks */
+	/* bits 1-6 are reserved for high crtcs */
+	_DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e,
 	_DRM_VBLANK_EVENT = 0x4000000,   /**< Send event instead of blocking */
 	_DRM_VBLANK_FLIP = 0x8000000,   /**< Scheduled buffer swap should flip */
 	_DRM_VBLANK_NEXTONMISS = 0x10000000,	/**< If missed, wait for next vblank */
 	_DRM_VBLANK_SECONDARY = 0x20000000,	/**< Secondary display controller */
 	_DRM_VBLANK_SIGNAL = 0x40000000	/**< Send signal instead of blocking, unsupported */
 };
+#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1
 
 #define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE)
 #define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \
@@ -753,6 +756,7 @@ struct drm_event_vblank {
 };
 
 #define DRM_CAP_DUMB_BUFFER 0x1
+#define DRM_CAP_VBLANK_HIGH_CRTC 0x2
 
 /* typedef area */
 #ifndef __KERNEL__
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 4ce34fa937d4..96f4094b706d 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -66,8 +66,6 @@ struct backing_dev_info {
 	unsigned int capabilities; /* Device capabilities */
 	congested_fn *congested_fn; /* Function pointer if device is md/dm */
 	void *congested_data;	/* Pointer to aux data for congested func */
-	void (*unplug_io_fn)(struct backing_dev_info *, struct page *);
-	void *unplug_io_data;
 
 	char *name;
 
@@ -251,7 +249,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 
 extern struct backing_dev_info default_backing_dev_info;
 extern struct backing_dev_info noop_backing_dev_info;
-void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page);
 
 int writeback_in_progress(struct backing_dev_info *bdi);
 
@@ -336,17 +333,4 @@ static inline int bdi_sched_wait(void *word)
 	return 0;
 }
 
-static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
-				       struct page *page)
-{
-	if (bdi && bdi->unplug_io_fn)
-		bdi->unplug_io_fn(bdi, page);
-}
-
-static inline void blk_run_address_space(struct address_space *mapping)
-{
-	if (mapping)
-		blk_run_backing_dev(mapping->backing_dev_info, NULL);
-}
-
 #endif		/* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 35dcdb3589bc..ce33e6868a2f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -304,7 +304,6 @@ struct biovec_slab {
 };
 
 extern struct bio_set *fs_bio_set;
-extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly;
 
 /*
  * a small number of entries is fine, not going to be performance critical.
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 46ad5197537a..be50d9e70a7d 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -128,7 +128,6 @@ enum rq_flag_bits {
 	__REQ_NOIDLE,		/* don't anticipate more IO after this one */
 
 	/* bio only flags */
-	__REQ_UNPLUG,		/* unplug the immediately after submission */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 	__REQ_THROTTLED,	/* This bio has already been subjected to
 				 * throttling rules. Don't do it again. */
@@ -148,9 +147,11 @@ enum rq_flag_bits {
 	__REQ_ALLOCED,		/* request came from our alloc pool */
 	__REQ_COPY_USER,	/* contains copies of user pages */
 	__REQ_FLUSH,		/* request for cache flush */
+	__REQ_FLUSH_SEQ,	/* request for flush sequence */
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
 	__REQ_SECURE,		/* secure discard (used with __REQ_DISCARD) */
+	__REQ_ON_PLUG,		/* on plug list */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -170,7 +171,6 @@ enum rq_flag_bits {
 	 REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
 #define REQ_CLONE_MASK		REQ_COMMON_MASK
 
-#define REQ_UNPLUG		(1 << __REQ_UNPLUG)
 #define REQ_RAHEAD		(1 << __REQ_RAHEAD)
 #define REQ_THROTTLED		(1 << __REQ_THROTTLED)
 
@@ -188,8 +188,10 @@ enum rq_flag_bits {
 #define REQ_ALLOCED		(1 << __REQ_ALLOCED)
 #define REQ_COPY_USER		(1 << __REQ_COPY_USER)
 #define REQ_FLUSH		(1 << __REQ_FLUSH)
+#define REQ_FLUSH_SEQ		(1 << __REQ_FLUSH_SEQ)
 #define REQ_IO_STAT		(1 << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)
 #define REQ_SECURE		(1 << __REQ_SECURE)
+#define REQ_ON_PLUG		(1 << __REQ_ON_PLUG)
 
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d5063e1b5555..16a902f099ac 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -108,11 +108,17 @@ struct request {
 
 	/*
 	 * Three pointers are available for the IO schedulers, if they need
-	 * more they have to dynamically allocate it.
+	 * more they have to dynamically allocate it.  Flush requests are
+	 * never put on the IO scheduler. So let the flush fields share
+	 * space with the three elevator_private pointers.
 	 */
-	void *elevator_private;
-	void *elevator_private2;
-	void *elevator_private3;
+	union {
+		void *elevator_private[3];
+		struct {
+			unsigned int		seq;
+			struct list_head	list;
+		} flush;
+	};
 
 	struct gendisk *rq_disk;
 	struct hd_struct *part;
@@ -190,7 +196,6 @@ typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
-typedef void (unplug_fn) (struct request_queue *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -273,7 +278,6 @@ struct request_queue
 	make_request_fn		*make_request_fn;
 	prep_rq_fn		*prep_rq_fn;
 	unprep_rq_fn		*unprep_rq_fn;
-	unplug_fn		*unplug_fn;
 	merge_bvec_fn		*merge_bvec_fn;
 	softirq_done_fn		*softirq_done_fn;
 	rq_timed_out_fn		*rq_timed_out_fn;
@@ -287,12 +291,9 @@ struct request_queue
 	struct request		*boundary_rq;
 
 	/*
-	 * Auto-unplugging state
+	 * Delayed queue handling
 	 */
-	struct timer_list	unplug_timer;
-	int			unplug_thresh;	/* After this many requests */
-	unsigned long		unplug_delay;	/* After this many jiffies */
-	struct work_struct	unplug_work;
+	struct delayed_work	delay_work;
 
 	struct backing_dev_info	backing_dev_info;
 
@@ -363,11 +364,12 @@ struct request_queue
 	 * for flush operations
 	 */
 	unsigned int		flush_flags;
-	unsigned int		flush_seq;
-	int			flush_err;
+	unsigned int		flush_pending_idx:1;
+	unsigned int		flush_running_idx:1;
+	unsigned long		flush_pending_since;
+	struct list_head	flush_queue[2];
+	struct list_head	flush_data_in_flight;
 	struct request		flush_rq;
-	struct request		*orig_flush_rq;
-	struct list_head	pending_flushes;
 
 	struct mutex		sysfs_lock;
 
@@ -387,14 +389,13 @@ struct request_queue
 #define QUEUE_FLAG_ASYNCFULL	4	/* write queue has been filled */
 #define QUEUE_FLAG_DEAD		5	/* queue being torn down */
 #define QUEUE_FLAG_REENTER	6	/* Re-entrancy avoidance */
-#define QUEUE_FLAG_PLUGGED	7	/* queue is plugged */
-#define QUEUE_FLAG_ELVSWITCH	8	/* don't use elevator, just do FIFO */
-#define QUEUE_FLAG_BIDI		9	/* queue supports bidi requests */
-#define QUEUE_FLAG_NOMERGES    10	/* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP   11	/* force complete on same CPU */
-#define QUEUE_FLAG_FAIL_IO     12	/* fake timeout */
-#define QUEUE_FLAG_STACKABLE   13	/* supports request stacking */
-#define QUEUE_FLAG_NONROT      14	/* non-rotational device (SSD) */
+#define QUEUE_FLAG_ELVSWITCH	7	/* don't use elevator, just do FIFO */
+#define QUEUE_FLAG_BIDI		8	/* queue supports bidi requests */
+#define QUEUE_FLAG_NOMERGES     9	/* disable merge attempts */
+#define QUEUE_FLAG_SAME_COMP   10	/* force complete on same CPU */
+#define QUEUE_FLAG_FAIL_IO     11	/* fake timeout */
+#define QUEUE_FLAG_STACKABLE   12	/* supports request stacking */
+#define QUEUE_FLAG_NONROT      13	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
 #define QUEUE_FLAG_DISCARD     16	/* supports DISCARD */
@@ -472,7 +473,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 	__clear_bit(flag, &q->queue_flags);
 }
 
-#define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
@@ -667,9 +667,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 extern void blk_rq_unprep_clone(struct request *rq);
 extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
-extern void blk_plug_device(struct request_queue *);
-extern void blk_plug_device_unlocked(struct request_queue *);
-extern int blk_remove_plug(struct request_queue *);
+extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			  unsigned int, void __user *);
@@ -713,7 +711,6 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *,
 			  struct request *, int);
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
-extern void blk_unplug(struct request_queue *q);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
@@ -850,7 +847,6 @@ extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bd
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
-extern void generic_unplug_device(struct request_queue *);
 extern long nr_blockdev_pages(void);
 
 int blk_get_queue(struct request_queue *);
@@ -858,6 +854,31 @@ struct request_queue *blk_alloc_queue(gfp_t);
 struct request_queue *blk_alloc_queue_node(gfp_t, int);
 extern void blk_put_queue(struct request_queue *);
 
+struct blk_plug {
+	unsigned long magic;
+	struct list_head list;
+	unsigned int should_sort;
+};
+
+extern void blk_start_plug(struct blk_plug *);
+extern void blk_finish_plug(struct blk_plug *);
+extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);
+
+static inline void blk_flush_plug(struct task_struct *tsk)
+{
+	struct blk_plug *plug = tsk->plug;
+
+	if (unlikely(plug))
+		__blk_flush_plug(tsk, plug);
+}
+
+static inline bool blk_needs_flush_plug(struct task_struct *tsk)
+{
+	struct blk_plug *plug = tsk->plug;
+
+	return plug && !list_empty(&plug->list);
+}
+
 /*
  * tag stuff
  */
@@ -1135,7 +1156,6 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
 extern int blk_throtl_init(struct request_queue *q);
 extern void blk_throtl_exit(struct request_queue *q);
 extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
-extern void throtl_shutdown_timer_wq(struct request_queue *q);
 #else /* CONFIG_BLK_DEV_THROTTLING */
 static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
 {
@@ -1144,7 +1164,6 @@ static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
 
 static inline int blk_throtl_init(struct request_queue *q) { return 0; }
 static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
-static inline void throtl_shutdown_timer_wq(struct request_queue *q) {}
 #endif /* CONFIG_BLK_DEV_THROTTLING */
 
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
@@ -1278,6 +1297,26 @@ static inline long nr_blockdev_pages(void)
 	return 0;
 }
 
+struct blk_plug {
+};
+
+static inline void blk_start_plug(struct blk_plug *plug)
+{
+}
+
+static inline void blk_finish_plug(struct blk_plug *plug)
+{
+}
+
+static inline void blk_flush_plug(struct task_struct *task)
+{
+}
+
+static inline bool blk_needs_flush_plug(struct task_struct *tsk)
+{
+	return false;
+}
+
 #endif /* CONFIG_BLOCK */
 
 #endif
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 68d1fe7b877c..f5df23561b96 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -219,7 +219,6 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
 int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 				get_block_t get_block);
-void block_sync_page(struct page *);
 sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
 int block_truncate_page(struct address_space *, loff_t, get_block_t *);
 int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned,
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 272496d1fae4..e2768834f397 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -286,11 +286,6 @@ void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callback
 int dm_table_complete(struct dm_table *t);
 
 /*
- * Unplug all devices in a table.
- */
-void dm_table_unplug_all(struct dm_table *t);
-
-/*
  * Table reference counting.
  */
 struct dm_table *dm_get_live_table(struct mapped_device *md);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 4d857973d2c9..d93efcc44570 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -20,7 +20,6 @@ typedef void (elevator_bio_merged_fn) (struct request_queue *,
 typedef int (elevator_dispatch_fn) (struct request_queue *, int);
 
 typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
-typedef int (elevator_queue_empty_fn) (struct request_queue *);
 typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
 typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
 typedef int (elevator_may_queue_fn) (struct request_queue *, int);
@@ -46,7 +45,6 @@ struct elevator_ops
 	elevator_activate_req_fn *elevator_activate_req_fn;
 	elevator_deactivate_req_fn *elevator_deactivate_req_fn;
 
-	elevator_queue_empty_fn *elevator_queue_empty_fn;
 	elevator_completed_req_fn *elevator_completed_req_fn;
 
 	elevator_request_list_fn *elevator_former_req_fn;
@@ -101,17 +99,17 @@ struct elevator_queue
  */
 extern void elv_dispatch_sort(struct request_queue *, struct request *);
 extern void elv_dispatch_add_tail(struct request_queue *, struct request *);
-extern void elv_add_request(struct request_queue *, struct request *, int, int);
-extern void __elv_add_request(struct request_queue *, struct request *, int, int);
+extern void elv_add_request(struct request_queue *, struct request *, int);
+extern void __elv_add_request(struct request_queue *, struct request *, int);
 extern void elv_insert(struct request_queue *, struct request *, int);
 extern int elv_merge(struct request_queue *, struct request **, struct bio *);
+extern int elv_try_merge(struct request *, struct bio *);
 extern void elv_merge_requests(struct request_queue *, struct request *,
 			       struct request *);
 extern void elv_merged_request(struct request_queue *, struct request *, int);
 extern void elv_bio_merged(struct request_queue *q, struct request *,
 				struct bio *);
 extern void elv_requeue_request(struct request_queue *, struct request *);
-extern int elv_queue_empty(struct request_queue *);
 extern struct request *elv_former_request(struct request_queue *, struct request *);
 extern struct request *elv_latter_request(struct request_queue *, struct request *);
 extern int elv_register_queue(struct request_queue *q);
@@ -167,6 +165,8 @@ extern struct request *elv_rb_find(struct rb_root *, sector_t);
 #define ELEVATOR_INSERT_BACK	2
 #define ELEVATOR_INSERT_SORT	3
 #define ELEVATOR_INSERT_REQUEUE	4
+#define ELEVATOR_INSERT_FLUSH	5
+#define ELEVATOR_INSERT_SORT_MERGE	6
 
 /*
  * return values from elevator_may_queue_fn
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4dda076c24a1..ce7e18555197 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -138,16 +138,10 @@ struct inodes_stat_t {
  *			block layer could (in theory) choose to ignore this
  *			request if it runs into resource problems.
  * WRITE		A normal async write. Device will be plugged.
- * WRITE_SYNC_PLUG	Synchronous write. Identical to WRITE, but passes down
+ * WRITE_SYNC		Synchronous write. Identical to WRITE, but passes down
  *			the hint that someone will be waiting on this IO
- *			shortly. The device must still be unplugged explicitly,
- *			WRITE_SYNC_PLUG does not do this as we could be
- *			submitting more writes before we actually wait on any
- *			of them.
- * WRITE_SYNC		Like WRITE_SYNC_PLUG, but also unplugs the device
- *			immediately after submission. The write equivalent
- *			of READ_SYNC.
- * WRITE_ODIRECT_PLUG	Special case write for O_DIRECT only.
+ *			shortly. The write equivalent of READ_SYNC.
+ * WRITE_ODIRECT	Special case write for O_DIRECT only.
  * WRITE_FLUSH		Like WRITE_SYNC but with preceding cache flush.
  * WRITE_FUA		Like WRITE_SYNC but data is guaranteed to be on
  *			non-volatile media on completion.
@@ -163,18 +157,14 @@ struct inodes_stat_t {
 #define WRITE			RW_MASK
 #define READA			RWA_MASK
 
-#define READ_SYNC		(READ | REQ_SYNC | REQ_UNPLUG)
+#define READ_SYNC		(READ | REQ_SYNC)
 #define READ_META		(READ | REQ_META)
-#define WRITE_SYNC_PLUG		(WRITE | REQ_SYNC | REQ_NOIDLE)
-#define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
-#define WRITE_ODIRECT_PLUG	(WRITE | REQ_SYNC)
+#define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE)
+#define WRITE_ODIRECT		(WRITE | REQ_SYNC)
 #define WRITE_META		(WRITE | REQ_META)
-#define WRITE_FLUSH		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
-				 REQ_FLUSH)
-#define WRITE_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
-				 REQ_FUA)
-#define WRITE_FLUSH_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
-				 REQ_FLUSH | REQ_FUA)
+#define WRITE_FLUSH		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
+#define WRITE_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
+#define WRITE_FLUSH_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
 
 #define SEL_IN		1
 #define SEL_OUT		2
@@ -586,7 +576,6 @@ typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
 struct address_space_operations {
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
 	int (*readpage)(struct file *, struct page *);
-	void (*sync_page)(struct page *);
 
 	/* Write back some dirty pages from this mapping. */
 	int (*writepages)(struct address_space *, struct writeback_control *);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c0d5f6945c1e..d764a426e9fd 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -109,7 +109,7 @@ struct hd_struct {
 	int make_it_fail;
 #endif
 	unsigned long stamp;
-	int in_flight[2];
+	atomic_t in_flight[2];
 #ifdef	CONFIG_SMP
 	struct disk_stats __percpu *dkstats;
 #else
@@ -370,21 +370,21 @@ static inline void free_part_stats(struct hd_struct *part)
 
 static inline void part_inc_in_flight(struct hd_struct *part, int rw)
 {
-	part->in_flight[rw]++;
+	atomic_inc(&part->in_flight[rw]);
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight[rw]++;
+		atomic_inc(&part_to_disk(part)->part0.in_flight[rw]);
 }
 
 static inline void part_dec_in_flight(struct hd_struct *part, int rw)
 {
-	part->in_flight[rw]--;
+	atomic_dec(&part->in_flight[rw]);
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight[rw]--;
+		atomic_dec(&part_to_disk(part)->part0.in_flight[rw]);
 }
 
 static inline int part_in_flight(struct hd_struct *part)
 {
-	return part->in_flight[0] + part->in_flight[1];
+	return atomic_read(&part->in_flight[0]) + atomic_read(&part->in_flight[1]);
 }
 
 static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk)
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 092e4250a458..10ca03d0a250 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -297,6 +297,7 @@ extern int
 kgdb_handle_exception(int ex_vector, int signo, int err_code,
 		      struct pt_regs *regs);
 extern int kgdb_nmicallback(int cpu, void *regs);
+extern void gdbstub_exit(int status);
 
 extern int			kgdb_single_step;
 extern atomic_t			kgdb_active;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f9535b2c9558..7606d7db96c9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -861,7 +861,7 @@ extern void pagefault_out_of_memory(void);
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 
 /*
- * Flags passed to __show_mem() and __show_free_areas() to suppress output in
+ * Flags passed to show_mem() and __show_free_areas() to suppress output in
  * various contexts.
  */
 #define SHOW_MEM_FILTER_NODES	(0x0001u)	/* filter disallowed nodes */
@@ -1360,8 +1360,7 @@ extern void setup_per_zone_wmarks(void);
 extern void calculate_zone_inactive_ratio(struct zone *zone);
 extern void mem_init(void);
 extern void __init mmap_init(void);
-extern void show_mem(void);
-extern void __show_mem(unsigned int flags);
+extern void show_mem(unsigned int flags);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern int after_bootmem;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 29ebba54c238..c11950652646 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -298,7 +298,6 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 
 extern void __lock_page(struct page *page);
 extern int __lock_page_killable(struct page *page);
-extern void __lock_page_nosync(struct page *page);
 extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 				unsigned int flags);
 extern void unlock_page(struct page *page);
@@ -342,17 +341,6 @@ static inline int lock_page_killable(struct page *page)
 }
 
 /*
- * lock_page_nosync should only be used if we can't pin the page's inode.
- * Doesn't play quite so well with block device plugging.
- */
-static inline void lock_page_nosync(struct page *page)
-{
-	might_sleep();
-	if (!trylock_page(page))
-		__lock_page_nosync(page);
-}
-	
-/*
  * lock_page_or_retry - Lock the page, unless this would block and the
  * caller indicated that it can handle a retry.
  */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 98fc7ed4b191..b8369d522bf8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -99,6 +99,7 @@ struct robust_list_head;
 struct bio_list;
 struct fs_struct;
 struct perf_event_context;
+struct blk_plug;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -1428,6 +1429,11 @@ struct task_struct {
 /* stacked block device info */
 	struct bio_list *bio_list;
 
+#ifdef CONFIG_BLOCK
+/* stack plugging */
+	struct blk_plug *plug;
+#endif
+
 /* VM state */
 	struct reclaim_state *reclaim_state;
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ed6ebe690f4a..a5c6da5d8df8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -309,8 +309,6 @@ extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
 					struct page **pagep, swp_entry_t *ent);
 #endif
 
-extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
-
 #ifdef CONFIG_SWAP
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct page *);
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 481a7bd2dfe7..a11db956dd62 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -1093,3 +1093,33 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd)
 	put_packet(remcom_out_buffer);
 	return 0;
 }
+
+/**
+ * gdbstub_exit - Send an exit message to GDB
+ * @status: The exit code to report.
+ */
+void gdbstub_exit(int status)
+{
+	unsigned char checksum, ch, buffer[3];
+	int loop;
+
+	buffer[0] = 'W';
+	buffer[1] = hex_asc_hi(status);
+	buffer[2] = hex_asc_lo(status);
+
+	dbg_io_ops->write_char('$');
+	checksum = 0;
+
+	for (loop = 0; loop < 3; loop++) {
+		ch = buffer[loop];
+		checksum += ch;
+		dbg_io_ops->write_char(ch);
+	}
+
+	dbg_io_ops->write_char('#');
+	dbg_io_ops->write_char(hex_asc_hi(checksum));
+	dbg_io_ops->write_char(hex_asc_lo(checksum));
+
+	/* make sure the output is flushed, lest the bootloader clobber it */
+	dbg_io_ops->flush();
+}
diff --git a/kernel/exit.c b/kernel/exit.c
index f9a45ebcc7b1..6a488ad2dce5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
 	profile_task_exit(tsk);
 
 	WARN_ON(atomic_read(&tsk->fs_excl));
+	WARN_ON(blk_needs_flush_plug(tsk));
 
 	if (unlikely(in_interrupt()))
 		panic("Aiee, killing interrupt handler!");
diff --git a/kernel/fork.c b/kernel/fork.c
index 457fff2e17e0..e7548dee636b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1205,6 +1205,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	 * Clear TID on mm_release()?
 	 */
 	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
+#ifdef CONFIG_BLOCK
+	p->plug = NULL;
+#endif
 #ifdef CONFIG_FUTEX
 	p->robust_list = NULL;
 #ifdef CONFIG_COMPAT
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
index 83bbc7c02df9..d09dd10c5a5e 100644
--- a/kernel/power/block_io.c
+++ b/kernel/power/block_io.c
@@ -28,7 +28,7 @@
 static int submit(int rw, struct block_device *bdev, sector_t sector,
 		struct page *page, struct bio **bio_chain)
 {
-	const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG;
+	const int bio_rw = rw | REQ_SYNC;
 	struct bio *bio;
 
 	bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
diff --git a/kernel/sched.c b/kernel/sched.c
index 480adeb63f8f..ae659b99ce73 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4115,6 +4115,16 @@ need_resched:
 		switch_count = &prev->nvcsw;
 	}
 
+	/*
+	 * If we are going to sleep and we have plugged IO queued, make
+	 * sure to submit it to avoid deadlocks.
+	 */
+	if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
+		raw_spin_unlock(&rq->lock);
+		blk_flush_plug(prev);
+		raw_spin_lock(&rq->lock);
+	}
+
 	pre_schedule(rq, prev);
 
 	if (unlikely(!rq->nr_running))
@@ -5528,6 +5538,7 @@ void __sched io_schedule(void)
 
 	delayacct_blkio_start();
 	atomic_inc(&rq->nr_iowait);
+	blk_flush_plug(current);
 	current->in_iowait = 1;
 	schedule();
 	current->in_iowait = 0;
@@ -5543,6 +5554,7 @@ long __sched io_schedule_timeout(long timeout)
 
 	delayacct_blkio_start();
 	atomic_inc(&rq->nr_iowait);
+	blk_flush_plug(current);
 	current->in_iowait = 1;
 	ret = schedule_timeout(timeout);
 	current->in_iowait = 0;
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index cbafed7d4f38..7aa40f8e182d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -703,28 +703,21 @@ void blk_trace_shutdown(struct request_queue *q)
  *
  **/
 static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
-				    u32 what)
+			     u32 what)
 {
 	struct blk_trace *bt = q->blk_trace;
-	int rw = rq->cmd_flags & 0x03;
 
 	if (likely(!bt))
 		return;
 
-	if (rq->cmd_flags & REQ_DISCARD)
-		rw |= REQ_DISCARD;
-
-	if (rq->cmd_flags & REQ_SECURE)
-		rw |= REQ_SECURE;
-
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		what |= BLK_TC_ACT(BLK_TC_PC);
-		__blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
+		__blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags,
 				what, rq->errors, rq->cmd_len, rq->cmd);
 	} else  {
 		what |= BLK_TC_ACT(BLK_TC_FS);
-		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw,
-				what, rq->errors, 0, NULL);
+		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+				rq->cmd_flags, what, rq->errors, 0, NULL);
 	}
 }
 
diff --git a/lib/show_mem.c b/lib/show_mem.c
index d8d602b58c31..90cbe4bb5960 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -9,7 +9,7 @@
 #include <linux/nmi.h>
 #include <linux/quicklist.h>
 
-void __show_mem(unsigned int filter)
+void show_mem(unsigned int filter)
 {
 	pg_data_t *pgdat;
 	unsigned long total = 0, reserved = 0, shared = 0,
@@ -61,8 +61,3 @@ void __show_mem(unsigned int filter)
 		quicklist_total_size());
 #endif
 }
-
-void show_mem(void)
-{
-	__show_mem(0);
-}
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 027100d30227..8fe9d3407921 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -14,17 +14,11 @@
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
-void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-}
-EXPORT_SYMBOL(default_unplug_io_fn);
-
 struct backing_dev_info default_backing_dev_info = {
 	.name		= "default",
 	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
-	.unplug_io_fn	= default_unplug_io_fn,
 };
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
@@ -604,7 +598,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (sb->s_bdi == bdi)
-			sb->s_bdi = NULL;
+			sb->s_bdi = &default_backing_dev_info;
 	}
 	spin_unlock(&sb_lock);
 }
diff --git a/mm/filemap.c b/mm/filemap.c
index f807afda86f2..04d1992fd86b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -164,45 +164,15 @@ void delete_from_page_cache(struct page *page)
 }
 EXPORT_SYMBOL(delete_from_page_cache);
 
-static int sync_page(void *word)
+static int sleep_on_page(void *word)
 {
-	struct address_space *mapping;
-	struct page *page;
-
-	page = container_of((unsigned long *)word, struct page, flags);
-
-	/*
-	 * page_mapping() is being called without PG_locked held.
-	 * Some knowledge of the state and use of the page is used to
-	 * reduce the requirements down to a memory barrier.
-	 * The danger here is of a stale page_mapping() return value
-	 * indicating a struct address_space different from the one it's
-	 * associated with when it is associated with one.
-	 * After smp_mb(), it's either the correct page_mapping() for
-	 * the page, or an old page_mapping() and the page's own
-	 * page_mapping() has gone NULL.
-	 * The ->sync_page() address_space operation must tolerate
-	 * page_mapping() going NULL. By an amazing coincidence,
-	 * this comes about because none of the users of the page
-	 * in the ->sync_page() methods make essential use of the
-	 * page_mapping(), merely passing the page down to the backing
-	 * device's unplug functions when it's non-NULL, which in turn
-	 * ignore it for all cases but swap, where only page_private(page) is
-	 * of interest. When page_mapping() does go NULL, the entire
-	 * call stack gracefully ignores the page and returns.
-	 * -- wli
-	 */
-	smp_mb();
-	mapping = page_mapping(page);
-	if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
-		mapping->a_ops->sync_page(page);
 	io_schedule();
 	return 0;
 }
 
-static int sync_page_killable(void *word)
+static int sleep_on_page_killable(void *word)
 {
-	sync_page(word);
+	sleep_on_page(word);
 	return fatal_signal_pending(current) ? -EINTR : 0;
 }
 
@@ -558,12 +528,6 @@ struct page *__page_cache_alloc(gfp_t gfp)
 EXPORT_SYMBOL(__page_cache_alloc);
 #endif
 
-static int __sleep_on_page_lock(void *word)
-{
-	io_schedule();
-	return 0;
-}
-
 /*
  * In order to wait for pages to become available there must be
  * waitqueues associated with pages. By using a hash table of
@@ -591,7 +555,7 @@ void wait_on_page_bit(struct page *page, int bit_nr)
 	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
 
 	if (test_bit(bit_nr, &page->flags))
-		__wait_on_bit(page_waitqueue(page), &wait, sync_page,
+		__wait_on_bit(page_waitqueue(page), &wait, sleep_on_page,
 							TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(wait_on_page_bit);
@@ -655,17 +619,12 @@ EXPORT_SYMBOL(end_page_writeback);
 /**
  * __lock_page - get a lock on the page, assuming we need to sleep to get it
  * @page: the page to lock
- *
- * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary.  If some
- * random driver's requestfn sets TASK_RUNNING, we could busywait.  However
- * chances are that on the second loop, the block layer's plug list is empty,
- * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
  */
 void __lock_page(struct page *page)
 {
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
-	__wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
+	__wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
 							TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__lock_page);
@@ -675,24 +634,10 @@ int __lock_page_killable(struct page *page)
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
 	return __wait_on_bit_lock(page_waitqueue(page), &wait,
-					sync_page_killable, TASK_KILLABLE);
+					sleep_on_page_killable, TASK_KILLABLE);
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
 
-/**
- * __lock_page_nosync - get a lock on the page, without calling sync_page()
- * @page: the page to lock
- *
- * Variant of lock_page that does not require the caller to hold a reference
- * on the page's mapping.
- */
-void __lock_page_nosync(struct page *page)
-{
-	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
-	__wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
-							TASK_UNINTERRUPTIBLE);
-}
-
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 			 unsigned int flags)
 {
@@ -1407,12 +1352,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	unsigned long seg = 0;
 	size_t count;
 	loff_t *ppos = &iocb->ki_pos;
+	struct blk_plug plug;
 
 	count = 0;
 	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
 	if (retval)
 		return retval;
 
+	blk_start_plug(&plug);
+
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
 	if (filp->f_flags & O_DIRECT) {
 		loff_t size;
@@ -1485,6 +1433,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 			break;
 	}
 out:
+	blk_finish_plug(&plug);
 	return retval;
 }
 EXPORT_SYMBOL(generic_file_aio_read);
@@ -2596,11 +2545,13 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
+	struct blk_plug plug;
 	ssize_t ret;
 
 	BUG_ON(iocb->ki_pos != pos);
 
 	mutex_lock(&inode->i_mutex);
+	blk_start_plug(&plug);
 	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
 	mutex_unlock(&inode->i_mutex);
 
@@ -2611,6 +2562,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		if (err < 0 && ret > 0)
 			ret = err;
 	}
+	blk_finish_plug(&plug);
 	return ret;
 }
 EXPORT_SYMBOL(generic_file_aio_write);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index e0af336530c6..37feb9fec228 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -945,7 +945,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
 		collect_procs(ppage, &tokill);
 
 	if (hpage != ppage)
-		lock_page_nosync(ppage);
+		lock_page(ppage);
 
 	ret = try_to_unmap(ppage, ttu);
 	if (ret != SWAP_SUCCESS)
@@ -1038,7 +1038,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 			 * Check "just unpoisoned", "filter hit", and
 			 * "race with other subpage."
 			 */
-			lock_page_nosync(hpage);
+			lock_page(hpage);
 			if (!PageHWPoison(hpage)
 			    || (hwpoison_filter(p) && TestClearPageHWPoison(p))
 			    || (p != hpage && TestSetPageHWPoison(hpage))) {
@@ -1088,7 +1088,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 	 * It's very difficult to mess with pages currently under IO
 	 * and in many cases impossible, so we just avoid it here.
 	 */
-	lock_page_nosync(hpage);
+	lock_page(hpage);
 
 	/*
 	 * unpoison always clear PG_hwpoison inside page lock
@@ -1231,7 +1231,7 @@ int unpoison_memory(unsigned long pfn)
 		return 0;
 	}
 
-	lock_page_nosync(page);
+	lock_page(page);
 	/*
 	 * This test is racy because PG_hwpoison is set outside of page lock.
 	 * That's acceptable because that won't trigger kernel panic. Instead,
diff --git a/mm/nommu.c b/mm/nommu.c
index e629143f9440..cb86e7d5e7f5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1842,10 +1842,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
 
-void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-}
-
 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long pgoff, unsigned long flags)
 {
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 62a5cec08a17..6a819d1b2c7d 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -406,7 +406,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 	task_unlock(current);
 	dump_stack();
 	mem_cgroup_print_oom_info(mem, p);
-	__show_mem(SHOW_MEM_FILTER_NODES);
+	show_mem(SHOW_MEM_FILTER_NODES);
 	if (sysctl_oom_dump_tasks)
 		dump_tasks(mem, nodemask);
 }
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 632b46479c94..31f698862420 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1040,11 +1040,17 @@ static int __writepage(struct page *page, struct writeback_control *wbc,
 int generic_writepages(struct address_space *mapping,
 		       struct writeback_control *wbc)
 {
+	struct blk_plug plug;
+	int ret;
+
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
 		return 0;
 
-	return write_cache_pages(mapping, wbc, __writepage, mapping);
+	blk_start_plug(&plug);
+	ret = write_cache_pages(mapping, wbc, __writepage, mapping);
+	blk_finish_plug(&plug);
+	return ret;
 }
 
 EXPORT_SYMBOL(generic_writepages);
@@ -1251,7 +1257,7 @@ int set_page_dirty_lock(struct page *page)
 {
 	int ret;
 
-	lock_page_nosync(page);
+	lock_page(page);
 	ret = set_page_dirty(page);
 	unlock_page(page);
 	return ret;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8e5726ab0d85..d6e7ba7373be 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2195,7 +2195,7 @@ nopage:
 			current->comm, order, gfp_mask);
 		dump_stack();
 		if (!should_suppress_show_mem())
-			__show_mem(filter);
+			show_mem(filter);
 	}
 	return page;
 got_pg:
diff --git a/mm/page_io.c b/mm/page_io.c
index 2dee975bf469..dc76b4d0611e 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -106,7 +106,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
 		goto out;
 	}
 	if (wbc->sync_mode == WB_SYNC_ALL)
-		rw |= REQ_SYNC | REQ_UNPLUG;
+		rw |= REQ_SYNC;
 	count_vm_event(PSWPOUT);
 	set_page_writeback(page);
 	unlock_page(page);
diff --git a/mm/readahead.c b/mm/readahead.c
index 77506a291a2d..2c0cc489e288 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -109,9 +109,12 @@ EXPORT_SYMBOL(read_cache_pages);
 static int read_pages(struct address_space *mapping, struct file *filp,
 		struct list_head *pages, unsigned nr_pages)
 {
+	struct blk_plug plug;
 	unsigned page_idx;
 	int ret;
 
+	blk_start_plug(&plug);
+
 	if (mapping->a_ops->readpages) {
 		ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
 		/* Clean up the remaining pages */
@@ -129,7 +132,10 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 		page_cache_release(page);
 	}
 	ret = 0;
+
 out:
+	blk_finish_plug(&plug);
+
 	return ret;
 }
 
@@ -554,17 +560,5 @@ page_cache_async_readahead(struct address_space *mapping,
 
 	/* do read-ahead */
 	ondemand_readahead(mapping, ra, filp, true, offset, req_size);
-
-#ifdef CONFIG_BLOCK
-	/*
-	 * Normally the current page is !uptodate and lock_page() will be
-	 * immediately called to implicitly unplug the device. However this
-	 * is not always true for RAID conifgurations, where data arrives
-	 * not strictly in their submission order. In this case we need to
-	 * explicitly kick off the IO.
-	 */
-	if (PageUptodate(page))
-		blk_run_backing_dev(mapping->backing_dev_info, NULL);
-#endif
 }
 EXPORT_SYMBOL_GPL(page_cache_async_readahead);
diff --git a/mm/shmem.c b/mm/shmem.c
index 91ce9a1024d7..58da7c150ba6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -224,7 +224,6 @@ static const struct vm_operations_struct shmem_vm_ops;
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
-	.unplug_io_fn	= default_unplug_io_fn,
 };
 
 static LIST_HEAD(shmem_swaplist);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 5c8cfabbc9bc..46680461785b 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -24,12 +24,10 @@
 
 /*
  * swapper_space is a fiction, retained to simplify the path through
- * vmscan's shrink_page_list, to make sync_page look nicer, and to allow
- * future use of radix_tree tags in the swap cache.
+ * vmscan's shrink_page_list.
  */
 static const struct address_space_operations swap_aops = {
 	.writepage	= swap_writepage,
-	.sync_page	= block_sync_page,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
 	.migratepage	= migrate_page,
 };
@@ -37,7 +35,6 @@ static const struct address_space_operations swap_aops = {
 static struct backing_dev_info swap_backing_dev_info = {
 	.name		= "swap",
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
-	.unplug_io_fn	= swap_unplug_io_fn,
 };
 
 struct address_space swapper_space = {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 039e61677635..8c6b3ce38f09 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -95,39 +95,6 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
 }
 
 /*
- * We need this because the bdev->unplug_fn can sleep and we cannot
- * hold swap_lock while calling the unplug_fn. And swap_lock
- * cannot be turned into a mutex.
- */
-static DECLARE_RWSEM(swap_unplug_sem);
-
-void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
-{
-	swp_entry_t entry;
-
-	down_read(&swap_unplug_sem);
-	entry.val = page_private(page);
-	if (PageSwapCache(page)) {
-		struct block_device *bdev = swap_info[swp_type(entry)]->bdev;
-		struct backing_dev_info *bdi;
-
-		/*
-		 * If the page is removed from swapcache from under us (with a
-		 * racy try_to_unuse/swapoff) we need an additional reference
-		 * count to avoid reading garbage from page_private(page) above.
-		 * If the WARN_ON triggers during a swapoff it maybe the race
-		 * condition and it's harmless. However if it triggers without
-		 * swapoff it signals a problem.
-		 */
-		WARN_ON(page_count(page) <= 1);
-
-		bdi = bdev->bd_inode->i_mapping->backing_dev_info;
-		blk_run_backing_dev(bdi, page);
-	}
-	up_read(&swap_unplug_sem);
-}
-
-/*
  * swapon tell device that all the old swap contents can be discarded,
  * to allow the swap device to optimize its wear-levelling.
  */
@@ -1662,10 +1629,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		goto out_dput;
 	}
 
-	/* wait for any unplug function to finish */
-	down_write(&swap_unplug_sem);
-	up_write(&swap_unplug_sem);
-
 	destroy_swap_extents(p);
 	if (p->flags & SWP_CONTINUED)
 		free_swap_count_continuations(p);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 060e4c191403..f73b8657c2d0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -358,7 +358,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
 static void handle_write_error(struct address_space *mapping,
 				struct page *page, int error)
 {
-	lock_page_nosync(page);
+	lock_page(page);
 	if (page_mapping(page) == mapping)
 		mapping_set_error(mapping, error);
 	unlock_page(page);