summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig5
-rw-r--r--lib/Kconfig.debug150
-rw-r--r--lib/Kconfig.kgdb21
-rw-r--r--lib/Makefile13
-rw-r--r--lib/asn1_decoder.c10
-rw-r--r--lib/atomic64.c17
-rw-r--r--lib/bitmap.c2
-rw-r--r--lib/bug.c4
-rw-r--r--lib/bust_spinlocks.c3
-rw-r--r--lib/checksum.c2
-rw-r--r--lib/cpu_rmap.c54
-rw-r--r--lib/cpumask.c2
-rw-r--r--lib/debugobjects.c21
-rw-r--r--lib/decompress_unlzo.c2
-rw-r--r--lib/devres.c60
-rw-r--r--lib/digsig.c43
-rw-r--r--lib/dma-debug.c95
-rw-r--r--lib/dynamic_debug.c174
-rw-r--r--lib/earlycpio.c145
-rw-r--r--lib/hexdump.c4
-rw-r--r--lib/idr.c482
-rw-r--r--lib/interval_tree_test_main.c7
-rw-r--r--lib/kfifo.c607
-rw-r--r--lib/kstrtox.c64
-rw-r--r--lib/locking-selftest.c34
-rw-r--r--lib/lru_cache.c358
-rw-r--r--lib/lzo/Makefile2
-rw-r--r--lib/lzo/lzo1x_compress.c335
-rw-r--r--lib/lzo/lzo1x_decompress.c255
-rw-r--r--lib/lzo/lzo1x_decompress_safe.c237
-rw-r--r--lib/lzo/lzodefs.h38
-rw-r--r--lib/mpi/longlong.h19
-rw-r--r--lib/mpi/mpi-internal.h4
-rw-r--r--lib/mpi/mpicoder.c8
-rw-r--r--lib/of-reconfig-notifier-error-inject.c (renamed from lib/pSeries-reconfig-notifier-error-inject.c)22
-rw-r--r--lib/parser.c6
-rw-r--r--lib/percpu-rwsem.c165
-rw-r--r--lib/raid6/Makefile9
-rw-r--r--lib/raid6/algos.c12
-rw-r--r--lib/raid6/altivec.uc3
-rw-r--r--lib/raid6/avx2.c251
-rw-r--r--lib/raid6/mmx.c2
-rw-r--r--lib/raid6/recov_avx2.c323
-rw-r--r--lib/raid6/recov_ssse3.c4
-rw-r--r--lib/raid6/sse1.c2
-rw-r--r--lib/raid6/sse2.c8
-rw-r--r--lib/raid6/test/Makefile29
-rw-r--r--lib/raid6/x86.h14
-rw-r--r--lib/random32.c97
-rw-r--r--lib/rbtree.c20
-rw-r--r--lib/rbtree_test.c8
-rw-r--r--lib/rwsem-spinlock.c69
-rw-r--r--lib/rwsem.c75
-rw-r--r--lib/scatterlist.c89
-rw-r--r--lib/swiotlb.c306
-rw-r--r--lib/vsprintf.c116
-rw-r--r--lib/xz/Kconfig34
57 files changed, 3561 insertions, 1380 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 4b31a46fb307..3958dc4389f9 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -42,6 +42,9 @@ config GENERIC_IO
config STMP_DEVICE
bool
+config PERCPU_RWSEM
+ boolean
+
config CRC_CCITT
tristate "CRC-CCITT functions"
help
@@ -319,7 +322,7 @@ config CPUMASK_OFFSTACK
config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
- depends on EXPERIMENTAL && BROKEN
+ depends on BROKEN
config CPU_RMAP
bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 28e9d6c98941..28be08c09bab 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -134,7 +134,7 @@ config DEBUG_SECTION_MISMATCH
any use of code/data previously in these sections would
most likely result in an oops.
In the code, functions and variables are annotated with
- __init, __devinit, etc. (see the full list in include/linux/init.h),
+ __init, __cpuinit, etc. (see the full list in include/linux/init.h),
which results in the code/data being placed in specific sections.
The section mismatch analysis is always performed after a full
kernel build, and enabling this option causes the following
@@ -243,8 +243,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
config PANIC_ON_OOPS
- bool "Panic on Oops" if EXPERT
- default n
+ bool "Panic on Oops"
help
Say Y here to enable the kernel to panic when it oopses. This
has the same effect as setting oops=panic on the kernel command
@@ -455,7 +454,7 @@ config HAVE_DEBUG_KMEMLEAK
config DEBUG_KMEMLEAK
bool "Kernel memory leak detector"
- depends on DEBUG_KERNEL && EXPERIMENTAL && HAVE_DEBUG_KMEMLEAK
+ depends on DEBUG_KERNEL && HAVE_DEBUG_KMEMLEAK
select DEBUG_FS
select STACKTRACE if STACKTRACE_SUPPORT
select KALLSYMS
@@ -605,61 +604,6 @@ config PROVE_LOCKING
For more details, see Documentation/lockdep-design.txt.
-config PROVE_RCU
- bool "RCU debugging: prove RCU correctness"
- depends on PROVE_LOCKING
- default n
- help
- This feature enables lockdep extensions that check for correct
- use of RCU APIs. This is currently under development. Say Y
- if you want to debug RCU usage or help work on the PROVE_RCU
- feature.
-
- Say N if you are unsure.
-
-config PROVE_RCU_REPEATEDLY
- bool "RCU debugging: don't disable PROVE_RCU on first splat"
- depends on PROVE_RCU
- default n
- help
- By itself, PROVE_RCU will disable checking upon issuing the
- first warning (or "splat"). This feature prevents such
- disabling, allowing multiple RCU-lockdep warnings to be printed
- on a single reboot.
-
- Say Y to allow multiple RCU-lockdep warnings per boot.
-
- Say N if you are unsure.
-
-config PROVE_RCU_DELAY
- bool "RCU debugging: preemptible RCU race provocation"
- depends on DEBUG_KERNEL && PREEMPT_RCU
- default n
- help
- There is a class of races that involve an unlikely preemption
- of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
- been set to INT_MIN. This feature inserts a delay at that
- point to increase the probability of these races.
-
- Say Y to increase probability of preemption of __rcu_read_unlock().
-
- Say N if you are unsure.
-
-config SPARSE_RCU_POINTER
- bool "RCU debugging: sparse-based checks for pointer usage"
- default n
- help
- This feature enables the __rcu sparse annotation for
- RCU-protected pointers. This annotation will cause sparse
- to flag any non-RCU used of annotated pointers. This can be
- helpful when debugging RCU usage. Please note that this feature
- is not intended to enforce code cleanliness; it is instead merely
- a debugging aid.
-
- Say Y to make sparse flag questionable use of RCU-protected pointers
-
- Say N if you are unsure.
-
config LOCKDEP
bool
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -730,7 +674,7 @@ config STACKTRACE
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
- depends on DEBUG_KERNEL && !IA64 && !PARISC
+ depends on DEBUG_KERNEL && !IA64 && !PARISC && !METAG
help
Enables the display of the minimum amount of free stack which each
task has ever had available in the sysrq-T and sysrq-P debug output.
@@ -911,7 +855,7 @@ config FRAME_POINTER
bool "Compile the kernel with frame pointers"
depends on DEBUG_KERNEL && \
(CRIS || M68K || FRV || UML || \
- AVR32 || SUPERH || BLACKFIN || MN10300) || \
+ AVR32 || SUPERH || BLACKFIN || MN10300 || METAG) || \
ARCH_WANT_FRAME_POINTERS
default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS
help
@@ -937,6 +881,63 @@ config BOOT_PRINTK_DELAY
BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect
what it believes to be lockup conditions.
+menu "RCU Debugging"
+
+config PROVE_RCU
+ bool "RCU debugging: prove RCU correctness"
+ depends on PROVE_LOCKING
+ default n
+ help
+ This feature enables lockdep extensions that check for correct
+ use of RCU APIs. This is currently under development. Say Y
+ if you want to debug RCU usage or help work on the PROVE_RCU
+ feature.
+
+ Say N if you are unsure.
+
+config PROVE_RCU_REPEATEDLY
+ bool "RCU debugging: don't disable PROVE_RCU on first splat"
+ depends on PROVE_RCU
+ default n
+ help
+ By itself, PROVE_RCU will disable checking upon issuing the
+ first warning (or "splat"). This feature prevents such
+ disabling, allowing multiple RCU-lockdep warnings to be printed
+ on a single reboot.
+
+ Say Y to allow multiple RCU-lockdep warnings per boot.
+
+ Say N if you are unsure.
+
+config PROVE_RCU_DELAY
+ bool "RCU debugging: preemptible RCU race provocation"
+ depends on DEBUG_KERNEL && PREEMPT_RCU
+ default n
+ help
+ There is a class of races that involve an unlikely preemption
+ of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
+ been set to INT_MIN. This feature inserts a delay at that
+ point to increase the probability of these races.
+
+ Say Y to increase probability of preemption of __rcu_read_unlock().
+
+ Say N if you are unsure.
+
+config SPARSE_RCU_POINTER
+ bool "RCU debugging: sparse-based checks for pointer usage"
+ default n
+ help
+ This feature enables the __rcu sparse annotation for
+ RCU-protected pointers. This annotation will cause sparse
+ to flag any non-RCU used of annotated pointers. This can be
+ helpful when debugging RCU usage. Please note that this feature
+ is not intended to enforce code cleanliness; it is instead merely
+ a debugging aid.
+
+ Say Y to make sparse flag questionable use of RCU-protected pointers
+
+ Say N if you are unsure.
+
config RCU_TORTURE_TEST
tristate "torture tests for RCU"
depends on DEBUG_KERNEL
@@ -970,9 +971,9 @@ config RCU_TORTURE_TEST_RUNNABLE
config RCU_CPU_STALL_TIMEOUT
int "RCU CPU stall timeout in seconds"
- depends on TREE_RCU || TREE_PREEMPT_RCU
+ depends on RCU_STALL_COMMON
range 3 300
- default 60
+ default 21
help
If a given RCU grace period extends more than the specified
number of seconds, a CPU stall warning is printed. If the
@@ -1008,6 +1009,7 @@ config RCU_CPU_STALL_INFO
config RCU_TRACE
bool "Enable tracing for RCU"
depends on DEBUG_KERNEL
+ select TRACE_CLOCK
help
This option provides tracing in RCU which presents stats
in debugfs for debugging RCU implementation.
@@ -1015,6 +1017,8 @@ config RCU_TRACE
Say Y here if you want to enable RCU tracing
Say N if you are unsure.
+endmenu # "RCU Debugging"
+
config KPROBES_SANITY_TEST
bool "Kprobes sanity tests"
depends on DEBUG_KERNEL
@@ -1115,7 +1119,7 @@ config NOTIFIER_ERROR_INJECTION
depends on DEBUG_KERNEL
select DEBUG_FS
help
- This option provides the ability to inject artifical errors to
+ This option provides the ability to inject artificial errors to
specified notifier chain callbacks. It is useful to test the error
handling of notifier call chain failures.
@@ -1126,7 +1130,7 @@ config CPU_NOTIFIER_ERROR_INJECT
depends on HOTPLUG_CPU && NOTIFIER_ERROR_INJECTION
help
This option provides a kernel module that can be used to test
- the error handling of the cpu notifiers by injecting artifical
+ the error handling of the cpu notifiers by injecting artificial
errors to CPU notifier chain callbacks. It is controlled through
debugfs interface under /sys/kernel/debug/notifier-error-inject/cpu
@@ -1150,7 +1154,7 @@ config PM_NOTIFIER_ERROR_INJECT
depends on PM && NOTIFIER_ERROR_INJECTION
default m if PM_DEBUG
help
- This option provides the ability to inject artifical errors to
+ This option provides the ability to inject artificial errors to
PM notifier chain callbacks. It is controlled through debugfs
interface /sys/kernel/debug/notifier-error-inject/pm
@@ -1173,7 +1177,7 @@ config MEMORY_NOTIFIER_ERROR_INJECT
tristate "Memory hotplug notifier error injection module"
depends on MEMORY_HOTPLUG_SPARSE && NOTIFIER_ERROR_INJECTION
help
- This option provides the ability to inject artifical errors to
+ This option provides the ability to inject artificial errors to
memory hotplug notifier chain callbacks. It is controlled through
debugfs interface under /sys/kernel/debug/notifier-error-inject/memory
@@ -1192,14 +1196,14 @@ config MEMORY_NOTIFIER_ERROR_INJECT
If unsure, say N.
-config PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT
- tristate "pSeries reconfig notifier error injection module"
- depends on PPC_PSERIES && NOTIFIER_ERROR_INJECTION
+config OF_RECONFIG_NOTIFIER_ERROR_INJECT
+ tristate "OF reconfig notifier error injection module"
+ depends on OF_DYNAMIC && NOTIFIER_ERROR_INJECTION
help
- This option provides the ability to inject artifical errors to
- pSeries reconfig notifier chain callbacks. It is controlled
+ This option provides the ability to inject artificial errors to
+ OF reconfig notifier chain callbacks. It is controlled
through debugfs interface under
- /sys/kernel/debug/notifier-error-inject/pSeries-reconfig/
+ /sys/kernel/debug/notifier-error-inject/OF-reconfig/
If the notifier call chain should be failed with some events
notified, write the error code to "actions/<notifier event>/error".
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 43cb93fa2651..140e87824173 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -5,7 +5,7 @@ config HAVE_ARCH_KGDB
menuconfig KGDB
bool "KGDB: kernel debugger"
depends on HAVE_ARCH_KGDB
- depends on DEBUG_KERNEL && EXPERIMENTAL
+ depends on DEBUG_KERNEL
help
If you say Y here, it will be possible to remotely debug the
kernel using gdb. It is recommended but not required, that
@@ -22,6 +22,7 @@ config KGDB_SERIAL_CONSOLE
tristate "KGDB: use kgdb over the serial console"
select CONSOLE_POLL
select MAGIC_SYSRQ
+ depends on TTY
default y
help
Share a serial console with kgdb. Sysrq-g must be used
@@ -79,4 +80,22 @@ config KDB_KEYBOARD
help
KDB can use a PS/2 type keyboard for an input device
+config KDB_CONTINUE_CATASTROPHIC
+ int "KDB: continue after catastrophic errors"
+ depends on KGDB_KDB
+ default "0"
+ help
+ This integer controls the behaviour of kdb when the kernel gets a
+ catastrophic error, i.e. for a panic or oops.
+ When KDB is active and a catastrophic error occurs, nothing extra
+ will happen until you type 'go'.
+ CONFIG_KDB_CONTINUE_CATASTROPHIC == 0 (default). The first time
+ you type 'go', you will be warned by kdb. The secend time you type
+ 'go', KDB tries to continue. No guarantees that the
+ kernel is still usable in this situation.
+ CONFIG_KDB_CONTINUE_CATASTROPHIC == 1. KDB tries to continue.
+ No guarantees that the kernel is still usable in this situation.
+ CONFIG_KDB_CONTINUE_CATASTROPHIC == 2. KDB forces a reboot.
+ If you are not sure, say 0.
+
endif # KGDB
diff --git a/lib/Makefile b/lib/Makefile
index 821a16229111..d7946ff75b2e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
idr.o int_sqrt.o extable.o \
sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
- is_single_threaded.o plist.o decompress.o
+ is_single_threaded.o plist.o decompress.o kobject_uevent.o \
+ earlycpio.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
@@ -22,7 +23,7 @@ lib-y += kobject.o klist.o
obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
- bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o
+ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o
obj-y += kstrtox.o
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
@@ -31,7 +32,6 @@ CFLAGS_kobject.o += -DDEBUG
CFLAGS_kobject_uevent.o += -DDEBUG
endif
-lib-$(CONFIG_HOTPLUG) += kobject_uevent.o
obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o
obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
@@ -40,6 +40,7 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
@@ -94,8 +95,8 @@ obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
-obj-$(CONFIG_PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT) += \
- pSeries-reconfig-notifier-error-inject.o
+obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
+ of-reconfig-notifier-error-inject.o
lib-$(CONFIG_GENERIC_BUG) += bug.o
@@ -163,7 +164,7 @@ $(obj)/crc32table.h: $(obj)/gen_crc32table
#
obj-$(CONFIG_OID_REGISTRY) += oid_registry.o
-$(obj)/oid_registry.c: $(obj)/oid_registry_data.c
+$(obj)/oid_registry.o: $(obj)/oid_registry_data.c
$(obj)/oid_registry_data.c: $(srctree)/include/linux/oid_registry.h \
$(src)/build_OID_registry
diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index de2c8b5a715b..11b9b01fda6b 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -81,7 +81,7 @@ next_tag:
goto next_tag;
}
- if (unlikely((tag & 0x1f) == 0x1f)) {
+ if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) {
do {
if (unlikely(datalen - dp < 2))
goto data_overrun_error;
@@ -91,12 +91,12 @@ next_tag:
/* Extract the length */
len = data[dp++];
- if (len < 0x7f) {
+ if (len <= 0x7f) {
dp += len;
goto next_tag;
}
- if (unlikely(len == 0x80)) {
+ if (unlikely(len == ASN1_INDEFINITE_LENGTH)) {
/* Indefinite length */
if (unlikely((tag & ASN1_CONS_BIT) == ASN1_PRIM << 5))
goto indefinite_len_primitive;
@@ -222,7 +222,7 @@ next_op:
if (unlikely(dp >= datalen - 1))
goto data_overrun_error;
tag = data[dp++];
- if (unlikely((tag & 0x1f) == 0x1f))
+ if (unlikely((tag & 0x1f) == ASN1_LONG_TAG))
goto long_tag_not_supported;
if (op & ASN1_OP_MATCH__ANY) {
@@ -254,7 +254,7 @@ next_op:
len = data[dp++];
if (len > 0x7f) {
- if (unlikely(len == 0x80)) {
+ if (unlikely(len == ASN1_INDEFINITE_LENGTH)) {
/* Indefinite length */
if (unlikely(!(tag & ASN1_CONS_BIT)))
goto indefinite_len_primitive;
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 978537809d84..08a4f068e61e 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -31,7 +31,11 @@
static union {
raw_spinlock_t lock;
char pad[L1_CACHE_BYTES];
-} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp;
+} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = {
+ [0 ... (NR_LOCKS - 1)] = {
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock),
+ },
+};
static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
{
@@ -173,14 +177,3 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u)
return ret;
}
EXPORT_SYMBOL(atomic64_add_unless);
-
-static int init_atomic64_lock(void)
-{
- int i;
-
- for (i = 0; i < NR_LOCKS; ++i)
- raw_spin_lock_init(&atomic64_lock[i].lock);
- return 0;
-}
-
-pure_initcall(init_atomic64_lock);
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06fdfa1aeba7..06f7e4fe8d2d 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -353,7 +353,7 @@ again:
EXPORT_SYMBOL(bitmap_find_next_zero_area);
/*
- * Bitmap printing & parsing functions: first version by Bill Irwin,
+ * Bitmap printing & parsing functions: first version by Nadia Yvette Chambers,
* second version by Paul Jackson, third by Joe Korty.
*/
diff --git a/lib/bug.c b/lib/bug.c
index a28c1415357c..168603477f02 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -55,6 +55,7 @@ static inline unsigned long bug_addr(const struct bug_entry *bug)
}
#ifdef CONFIG_MODULES
+/* Updates are protected by module mutex */
static LIST_HEAD(module_bug_list);
static const struct bug_entry *module_find_bug(unsigned long bugaddr)
@@ -165,7 +166,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
print_modules();
show_regs(regs);
print_oops_end_marker();
- add_taint(BUG_GET_TAINT(bug));
+ /* Just a warning, don't kill lockdep. */
+ add_taint(BUG_GET_TAINT(bug), LOCKDEP_STILL_OK);
return BUG_TRAP_TYPE_WARN;
}
diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c
index 9681d54b95d1..f8e0e5367398 100644
--- a/lib/bust_spinlocks.c
+++ b/lib/bust_spinlocks.c
@@ -8,6 +8,7 @@
*/
#include <linux/kernel.h>
+#include <linux/printk.h>
#include <linux/spinlock.h>
#include <linux/tty.h>
#include <linux/wait.h>
@@ -28,5 +29,3 @@ void __attribute__((weak)) bust_spinlocks(int yes)
wake_up_klogd();
}
}
-
-
diff --git a/lib/checksum.c b/lib/checksum.c
index 12dceb27ff20..129775eb6de6 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -102,6 +102,7 @@ out:
}
#endif
+#ifndef ip_fast_csum
/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries.
@@ -111,6 +112,7 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
return (__force __sum16)~do_csum(iph, ihl*4);
}
EXPORT_SYMBOL(ip_fast_csum);
+#endif
/*
* computes the checksum of a memory block at buff, length len,
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
index 145dec5267c9..5fbed5caba6e 100644
--- a/lib/cpu_rmap.c
+++ b/lib/cpu_rmap.c
@@ -45,6 +45,7 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
if (!rmap)
return NULL;
+ kref_init(&rmap->refcount);
rmap->obj = (void **)((char *)rmap + obj_offset);
/* Initially assign CPUs to objects on a rota, since we have
@@ -63,6 +64,35 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
}
EXPORT_SYMBOL(alloc_cpu_rmap);
+/**
+ * cpu_rmap_release - internal reclaiming helper called from kref_put
+ * @ref: kref to struct cpu_rmap
+ */
+static void cpu_rmap_release(struct kref *ref)
+{
+ struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
+ kfree(rmap);
+}
+
+/**
+ * cpu_rmap_get - internal helper to get new ref on a cpu_rmap
+ * @rmap: reverse-map allocated with alloc_cpu_rmap()
+ */
+static inline void cpu_rmap_get(struct cpu_rmap *rmap)
+{
+ kref_get(&rmap->refcount);
+}
+
+/**
+ * cpu_rmap_put - release ref on a cpu_rmap
+ * @rmap: reverse-map allocated with alloc_cpu_rmap()
+ */
+int cpu_rmap_put(struct cpu_rmap *rmap)
+{
+ return kref_put(&rmap->refcount, cpu_rmap_release);
+}
+EXPORT_SYMBOL(cpu_rmap_put);
+
/* Reevaluate nearest object for given CPU, comparing with the given
* neighbours at the given distance.
*/
@@ -197,8 +227,7 @@ struct irq_glue {
* free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
* @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
*
- * Must be called in process context, before freeing the IRQs, and
- * without holding any locks required by global workqueue items.
+ * Must be called in process context, before freeing the IRQs.
*/
void free_irq_cpu_rmap(struct cpu_rmap *rmap)
{
@@ -212,12 +241,18 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap)
glue = rmap->obj[index];
irq_set_affinity_notifier(glue->notify.irq, NULL);
}
- irq_run_affinity_notifiers();
- kfree(rmap);
+ cpu_rmap_put(rmap);
}
EXPORT_SYMBOL(free_irq_cpu_rmap);
+/**
+ * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
+ * @notify: struct irq_affinity_notify passed by irq/manage.c
+ * @mask: cpu mask for new SMP affinity
+ *
+ * This is executed in workqueue context.
+ */
static void
irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
{
@@ -230,10 +265,16 @@ irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
}
+/**
+ * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
+ * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
+ */
static void irq_cpu_rmap_release(struct kref *ref)
{
struct irq_glue *glue =
container_of(ref, struct irq_glue, notify.kref);
+
+ cpu_rmap_put(glue->rmap);
kfree(glue);
}
@@ -258,10 +299,13 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
glue->notify.notify = irq_cpu_rmap_notify;
glue->notify.release = irq_cpu_rmap_release;
glue->rmap = rmap;
+ cpu_rmap_get(rmap);
glue->index = cpu_rmap_add(rmap, glue);
rc = irq_set_affinity_notifier(irq, &glue->notify);
- if (rc)
+ if (rc) {
+ cpu_rmap_put(glue->rmap);
kfree(glue);
+ }
return rc;
}
EXPORT_SYMBOL(irq_cpu_rmap_add);
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 402a54ac35cb..d327b87c99b7 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -161,6 +161,6 @@ EXPORT_SYMBOL(free_cpumask_var);
*/
void __init free_bootmem_cpumask_var(cpumask_var_t mask)
{
- free_bootmem((unsigned long)mask, cpumask_size());
+ free_bootmem(__pa(mask), cpumask_size());
}
#endif
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index d11808ca4bc4..37061ede8b81 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -109,11 +109,10 @@ static void fill_pool(void)
*/
static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b)
{
- struct hlist_node *node;
struct debug_obj *obj;
int cnt = 0;
- hlist_for_each_entry(obj, node, &b->list, node) {
+ hlist_for_each_entry(obj, &b->list, node) {
cnt++;
if (obj->object == addr)
return obj;
@@ -213,7 +212,7 @@ static void free_object(struct debug_obj *obj)
static void debug_objects_oom(void)
{
struct debug_bucket *db = obj_hash;
- struct hlist_node *node, *tmp;
+ struct hlist_node *tmp;
HLIST_HEAD(freelist);
struct debug_obj *obj;
unsigned long flags;
@@ -227,7 +226,7 @@ static void debug_objects_oom(void)
raw_spin_unlock_irqrestore(&db->lock, flags);
/* Now free them */
- hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
+ hlist_for_each_entry_safe(obj, tmp, &freelist, node) {
hlist_del(&obj->node);
free_object(obj);
}
@@ -658,7 +657,7 @@ debug_object_active_state(void *addr, struct debug_obj_descr *descr,
static void __debug_check_no_obj_freed(const void *address, unsigned long size)
{
unsigned long flags, oaddr, saddr, eaddr, paddr, chunks;
- struct hlist_node *node, *tmp;
+ struct hlist_node *tmp;
HLIST_HEAD(freelist);
struct debug_obj_descr *descr;
enum debug_obj_state state;
@@ -678,7 +677,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size)
repeat:
cnt = 0;
raw_spin_lock_irqsave(&db->lock, flags);
- hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) {
+ hlist_for_each_entry_safe(obj, tmp, &db->list, node) {
cnt++;
oaddr = (unsigned long) obj->object;
if (oaddr < saddr || oaddr >= eaddr)
@@ -702,7 +701,7 @@ repeat:
raw_spin_unlock_irqrestore(&db->lock, flags);
/* Now free them */
- hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
+ hlist_for_each_entry_safe(obj, tmp, &freelist, node) {
hlist_del(&obj->node);
free_object(obj);
}
@@ -1013,7 +1012,7 @@ void __init debug_objects_early_init(void)
static int __init debug_objects_replace_static_objects(void)
{
struct debug_bucket *db = obj_hash;
- struct hlist_node *node, *tmp;
+ struct hlist_node *tmp;
struct debug_obj *obj, *new;
HLIST_HEAD(objects);
int i, cnt = 0;
@@ -1033,7 +1032,7 @@ static int __init debug_objects_replace_static_objects(void)
local_irq_disable();
/* Remove the statically allocated objects from the pool */
- hlist_for_each_entry_safe(obj, node, tmp, &obj_pool, node)
+ hlist_for_each_entry_safe(obj, tmp, &obj_pool, node)
hlist_del(&obj->node);
/* Move the allocated objects to the pool */
hlist_move_list(&objects, &obj_pool);
@@ -1042,7 +1041,7 @@ static int __init debug_objects_replace_static_objects(void)
for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
hlist_move_list(&db->list, &objects);
- hlist_for_each_entry(obj, node, &objects, node) {
+ hlist_for_each_entry(obj, &objects, node) {
new = hlist_entry(obj_pool.first, typeof(*obj), node);
hlist_del(&new->node);
/* copy object data */
@@ -1057,7 +1056,7 @@ static int __init debug_objects_replace_static_objects(void)
obj_pool_used);
return 0;
free:
- hlist_for_each_entry_safe(obj, node, tmp, &objects, node) {
+ hlist_for_each_entry_safe(obj, tmp, &objects, node) {
hlist_del(&obj->node);
kmem_cache_free(obj_cache, obj);
}
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index 4531294fa62f..960183d4258f 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -31,7 +31,7 @@
*/
#ifdef STATIC
-#include "lzo/lzo1x_decompress.c"
+#include "lzo/lzo1x_decompress_safe.c"
#else
#include <linux/decompress/unlzo.h>
#endif
diff --git a/lib/devres.c b/lib/devres.c
index 80b9c76d436a..823533138fa0 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -1,3 +1,4 @@
+#include <linux/err.h>
#include <linux/pci.h>
#include <linux/io.h>
#include <linux/gfp.h>
@@ -86,22 +87,24 @@ void devm_iounmap(struct device *dev, void __iomem *addr)
EXPORT_SYMBOL(devm_iounmap);
/**
- * devm_request_and_ioremap() - Check, request region, and ioremap resource
- * @dev: Generic device to handle the resource for
+ * devm_ioremap_resource() - check, request region, and ioremap resource
+ * @dev: generic device to handle the resource for
* @res: resource to be handled
*
- * Takes all necessary steps to ioremap a mem resource. Uses managed device, so
- * everything is undone on driver detach. Checks arguments, so you can feed
- * it the result from e.g. platform_get_resource() directly. Returns the
- * remapped pointer or NULL on error. Usage example:
+ * Checks that a resource is a valid memory region, requests the memory region
+ * and ioremaps it either as cacheable or as non-cacheable memory depending on
+ * the resource's flags. All operations are managed and will be undone on
+ * driver detach.
+ *
+ * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code
+ * on failure. Usage example:
*
* res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- * base = devm_request_and_ioremap(&pdev->dev, res);
- * if (!base)
- * return -EADDRNOTAVAIL;
+ * base = devm_ioremap_resource(&pdev->dev, res);
+ * if (IS_ERR(base))
+ * return PTR_ERR(base);
*/
-void __iomem *devm_request_and_ioremap(struct device *dev,
- struct resource *res)
+void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
{
resource_size_t size;
const char *name;
@@ -111,7 +114,7 @@ void __iomem *devm_request_and_ioremap(struct device *dev,
if (!res || resource_type(res) != IORESOURCE_MEM) {
dev_err(dev, "invalid resource\n");
- return NULL;
+ return ERR_PTR(-EINVAL);
}
size = resource_size(res);
@@ -119,7 +122,7 @@ void __iomem *devm_request_and_ioremap(struct device *dev,
if (!devm_request_mem_region(dev, res->start, size, name)) {
dev_err(dev, "can't request region for resource %pR\n", res);
- return NULL;
+ return ERR_PTR(-EBUSY);
}
if (res->flags & IORESOURCE_CACHEABLE)
@@ -130,10 +133,39 @@ void __iomem *devm_request_and_ioremap(struct device *dev,
if (!dest_ptr) {
dev_err(dev, "ioremap failed for resource %pR\n", res);
devm_release_mem_region(dev, res->start, size);
+ dest_ptr = ERR_PTR(-ENOMEM);
}
return dest_ptr;
}
+EXPORT_SYMBOL(devm_ioremap_resource);
+
+/**
+ * devm_request_and_ioremap() - Check, request region, and ioremap resource
+ * @dev: Generic device to handle the resource for
+ * @res: resource to be handled
+ *
+ * Takes all necessary steps to ioremap a mem resource. Uses managed device, so
+ * everything is undone on driver detach. Checks arguments, so you can feed
+ * it the result from e.g. platform_get_resource() directly. Returns the
+ * remapped pointer or NULL on error. Usage example:
+ *
+ * res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ * base = devm_request_and_ioremap(&pdev->dev, res);
+ * if (!base)
+ * return -EADDRNOTAVAIL;
+ */
+void __iomem *devm_request_and_ioremap(struct device *device,
+ struct resource *res)
+{
+ void __iomem *dest_ptr;
+
+ dest_ptr = devm_ioremap_resource(device, res);
+ if (IS_ERR(dest_ptr))
+ return NULL;
+
+ return dest_ptr;
+}
EXPORT_SYMBOL(devm_request_and_ioremap);
#ifdef CONFIG_HAS_IOPORT
@@ -195,6 +227,7 @@ void devm_ioport_unmap(struct device *dev, void __iomem *addr)
devm_ioport_map_match, (void *)addr));
}
EXPORT_SYMBOL(devm_ioport_unmap);
+#endif /* CONFIG_HAS_IOPORT */
#ifdef CONFIG_PCI
/*
@@ -400,4 +433,3 @@ void pcim_iounmap_regions(struct pci_dev *pdev, int mask)
}
EXPORT_SYMBOL(pcim_iounmap_regions);
#endif /* CONFIG_PCI */
-#endif /* CONFIG_HAS_IOPORT */
diff --git a/lib/digsig.c b/lib/digsig.c
index 8c0e62975c88..2f31e6a45f0a 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -30,11 +30,10 @@
static struct crypto_shash *shash;
-static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg,
- unsigned long msglen,
- unsigned long modulus_bitlen,
- unsigned char *out,
- unsigned long *outlen)
+static const char *pkcs_1_v1_5_decode_emsa(const unsigned char *msg,
+ unsigned long msglen,
+ unsigned long modulus_bitlen,
+ unsigned long *outlen)
{
unsigned long modulus_len, ps_len, i;
@@ -42,11 +41,11 @@ static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg,
/* test message size */
if ((msglen > modulus_len) || (modulus_len < 11))
- return -EINVAL;
+ return NULL;
/* separate encoded message */
- if ((msg[0] != 0x00) || (msg[1] != (unsigned char)1))
- return -EINVAL;
+ if (msg[0] != 0x00 || msg[1] != 0x01)
+ return NULL;
for (i = 2; i < modulus_len - 1; i++)
if (msg[i] != 0xFF)
@@ -56,19 +55,13 @@ static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg,
if (msg[i] != 0)
/* There was no octet with hexadecimal value 0x00
to separate ps from m. */
- return -EINVAL;
+ return NULL;
ps_len = i - 2;
- if (*outlen < (msglen - (2 + ps_len + 1))) {
- *outlen = msglen - (2 + ps_len + 1);
- return -EOVERFLOW;
- }
-
*outlen = (msglen - (2 + ps_len + 1));
- memcpy(out, &msg[2 + ps_len + 1], *outlen);
- return 0;
+ return msg + 2 + ps_len + 1;
}
/*
@@ -83,7 +76,8 @@ static int digsig_verify_rsa(struct key *key,
unsigned long mlen, mblen;
unsigned nret, l;
int head, i;
- unsigned char *out1 = NULL, *out2 = NULL;
+ unsigned char *out1 = NULL;
+ const char *m;
MPI in = NULL, res = NULL, pkey[2];
uint8_t *p, *datap, *endp;
struct user_key_payload *ukp;
@@ -120,7 +114,7 @@ static int digsig_verify_rsa(struct key *key,
}
mblen = mpi_get_nbits(pkey[0]);
- mlen = (mblen + 7)/8;
+ mlen = DIV_ROUND_UP(mblen, 8);
if (mlen == 0)
goto err;
@@ -129,10 +123,6 @@ static int digsig_verify_rsa(struct key *key,
if (!out1)
goto err;
- out2 = kzalloc(mlen, GFP_KERNEL);
- if (!out2)
- goto err;
-
nret = siglen;
in = mpi_read_from_buffer(sig, &nret);
if (!in)
@@ -162,18 +152,17 @@ static int digsig_verify_rsa(struct key *key,
memset(out1, 0, head);
memcpy(out1 + head, p, l);
- err = pkcs_1_v1_5_decode_emsa(out1, len, mblen, out2, &len);
- if (err)
- goto err;
+ kfree(p);
+
+ m = pkcs_1_v1_5_decode_emsa(out1, len, mblen, &len);
- if (len != hlen || memcmp(out2, h, hlen))
+ if (!m || len != hlen || memcmp(m, h, hlen))
err = -EINVAL;
err:
mpi_free(in);
mpi_free(res);
kfree(out1);
- kfree(out2);
while (--i >= 0)
mpi_free(pkey[i]);
err1:
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d84beb994f36..d87a17a819d0 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -45,6 +45,12 @@ enum {
dma_debug_coherent,
};
+enum map_err_types {
+ MAP_ERR_CHECK_NOT_APPLICABLE,
+ MAP_ERR_NOT_CHECKED,
+ MAP_ERR_CHECKED,
+};
+
#define DMA_DEBUG_STACKTRACE_ENTRIES 5
struct dma_debug_entry {
@@ -57,6 +63,7 @@ struct dma_debug_entry {
int direction;
int sg_call_ents;
int sg_mapped_ents;
+ enum map_err_types map_err_type;
#ifdef CONFIG_STACKTRACE
struct stack_trace stacktrace;
unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
@@ -114,6 +121,12 @@ static struct device_driver *current_driver __read_mostly;
static DEFINE_RWLOCK(driver_name_lock);
+static const char *const maperr2str[] = {
+ [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable",
+ [MAP_ERR_NOT_CHECKED] = "dma map error not checked",
+ [MAP_ERR_CHECKED] = "dma map error checked",
+};
+
static const char *type2name[4] = { "single", "page",
"scather-gather", "coherent" };
@@ -376,11 +389,12 @@ void debug_dma_dump_mappings(struct device *dev)
list_for_each_entry(entry, &bucket->list, list) {
if (!dev || dev == entry->dev) {
dev_info(entry->dev,
- "%s idx %d P=%Lx D=%Lx L=%Lx %s\n",
+ "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n",
type2name[entry->type], idx,
(unsigned long long)entry->paddr,
entry->dev_addr, entry->size,
- dir2name[entry->direction]);
+ dir2name[entry->direction],
+ maperr2str[entry->map_err_type]);
}
}
@@ -844,21 +858,25 @@ static void check_unmap(struct dma_debug_entry *ref)
struct hash_bucket *bucket;
unsigned long flags;
- if (dma_mapping_error(ref->dev, ref->dev_addr)) {
- err_printk(ref->dev, NULL, "DMA-API: device driver tries "
- "to free an invalid DMA memory address\n");
- return;
- }
-
bucket = get_hash_bucket(ref, &flags);
entry = bucket_find_exact(bucket, ref);
if (!entry) {
- err_printk(ref->dev, NULL, "DMA-API: device driver tries "
- "to free DMA memory it has not allocated "
- "[device address=0x%016llx] [size=%llu bytes]\n",
- ref->dev_addr, ref->size);
- goto out;
+ /* must drop lock before calling dma_mapping_error */
+ put_hash_bucket(bucket, &flags);
+
+ if (dma_mapping_error(ref->dev, ref->dev_addr)) {
+ err_printk(ref->dev, NULL,
+ "DMA-API: device driver tries to free an "
+ "invalid DMA memory address\n");
+ } else {
+ err_printk(ref->dev, NULL,
+ "DMA-API: device driver tries to free DMA "
+ "memory it has not allocated [device "
+ "address=0x%016llx] [size=%llu bytes]\n",
+ ref->dev_addr, ref->size);
+ }
+ return;
}
if (ref->size != entry->size) {
@@ -910,10 +928,18 @@ static void check_unmap(struct dma_debug_entry *ref)
dir2name[ref->direction]);
}
+ if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
+ err_printk(ref->dev, entry,
+ "DMA-API: device driver failed to check map error"
+ "[device address=0x%016llx] [size=%llu bytes] "
+ "[mapped as %s]",
+ ref->dev_addr, ref->size,
+ type2name[entry->type]);
+ }
+
hash_bucket_del(entry);
dma_entry_free(entry);
-out:
put_hash_bucket(bucket, &flags);
}
@@ -1017,7 +1043,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
if (unlikely(global_disable))
return;
- if (unlikely(dma_mapping_error(dev, dma_addr)))
+ if (dma_mapping_error(dev, dma_addr))
return;
entry = dma_entry_alloc();
@@ -1030,6 +1056,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
entry->dev_addr = dma_addr;
entry->size = size;
entry->direction = direction;
+ entry->map_err_type = MAP_ERR_NOT_CHECKED;
if (map_single)
entry->type = dma_debug_single;
@@ -1045,6 +1072,44 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
}
EXPORT_SYMBOL(debug_dma_map_page);
+void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ struct dma_debug_entry ref;
+ struct dma_debug_entry *entry;
+ struct hash_bucket *bucket;
+ unsigned long flags;
+
+ if (unlikely(global_disable))
+ return;
+
+ ref.dev = dev;
+ ref.dev_addr = dma_addr;
+ bucket = get_hash_bucket(&ref, &flags);
+
+ list_for_each_entry(entry, &bucket->list, list) {
+ if (!exact_match(&ref, entry))
+ continue;
+
+ /*
+ * The same physical address can be mapped multiple
+ * times. Without a hardware IOMMU this results in the
+ * same device addresses being put into the dma-debug
+ * hash multiple times too. This can result in false
+ * positives being reported. Therefore we implement a
+ * best-fit algorithm here which updates the first entry
+ * from the hash which fits the reference value and is
+ * not currently listed as being checked.
+ */
+ if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
+ entry->map_err_type = MAP_ERR_CHECKED;
+ break;
+ }
+ }
+
+ put_hash_bucket(bucket, &flags);
+}
+EXPORT_SYMBOL(debug_dma_mapping_error);
+
void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
size_t size, int direction, bool map_single)
{
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index e7f7d993357a..5276b99ca650 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -59,16 +59,9 @@ struct ddebug_iter {
static DEFINE_MUTEX(ddebug_lock);
static LIST_HEAD(ddebug_tables);
-static int verbose = 0;
+static int verbose;
module_param(verbose, int, 0644);
-/* Return the last part of a pathname */
-static inline const char *basename(const char *path)
-{
- const char *tail = strrchr(path, '/');
- return tail ? tail+1 : path;
-}
-
/* Return the path relative to source root */
static inline const char *trim_prefix(const char *path)
{
@@ -107,24 +100,32 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
return buf;
}
-#define vpr_info(fmt, ...) \
- if (verbose) do { pr_info(fmt, ##__VA_ARGS__); } while (0)
-
-#define vpr_info_dq(q, msg) \
+#define vpr_info(fmt, ...) \
do { \
- /* trim last char off format print */ \
- vpr_info("%s: func=\"%s\" file=\"%s\" " \
- "module=\"%s\" format=\"%.*s\" " \
- "lineno=%u-%u", \
- msg, \
- q->function ? q->function : "", \
- q->filename ? q->filename : "", \
- q->module ? q->module : "", \
- (int)(q->format ? strlen(q->format) - 1 : 0), \
- q->format ? q->format : "", \
- q->first_lineno, q->last_lineno); \
+ if (verbose) \
+ pr_info(fmt, ##__VA_ARGS__); \
} while (0)
+static void vpr_info_dq(const struct ddebug_query *query, const char *msg)
+{
+ /* trim any trailing newlines */
+ int fmtlen = 0;
+
+ if (query->format) {
+ fmtlen = strlen(query->format);
+ while (fmtlen && query->format[fmtlen - 1] == '\n')
+ fmtlen--;
+ }
+
+ vpr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u\n",
+ msg,
+ query->function ? query->function : "",
+ query->filename ? query->filename : "",
+ query->module ? query->module : "",
+ fmtlen, query->format ? query->format : "",
+ query->first_lineno, query->last_lineno);
+}
+
/*
* Search the tables for _ddebug's which match the given `query' and
* apply the `flags' and `mask' to them. Returns number of matching
@@ -148,13 +149,13 @@ static int ddebug_change(const struct ddebug_query *query,
if (query->module && strcmp(query->module, dt->mod_name))
continue;
- for (i = 0 ; i < dt->num_ddebugs ; i++) {
+ for (i = 0; i < dt->num_ddebugs; i++) {
struct _ddebug *dp = &dt->ddebugs[i];
/* match against the source filename */
if (query->filename &&
strcmp(query->filename, dp->filename) &&
- strcmp(query->filename, basename(dp->filename)) &&
+ strcmp(query->filename, kbasename(dp->filename)) &&
strcmp(query->filename, trim_prefix(dp->filename)))
continue;
@@ -183,10 +184,10 @@ static int ddebug_change(const struct ddebug_query *query,
continue;
dp->flags = newflags;
vpr_info("changed %s:%d [%s]%s =%s\n",
- trim_prefix(dp->filename), dp->lineno,
- dt->mod_name, dp->function,
- ddebug_describe_flags(dp, flagbuf,
- sizeof(flagbuf)));
+ trim_prefix(dp->filename), dp->lineno,
+ dt->mod_name, dp->function,
+ ddebug_describe_flags(dp, flagbuf,
+ sizeof(flagbuf)));
}
}
mutex_unlock(&ddebug_lock);
@@ -220,19 +221,23 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords)
/* find `end' of word, whitespace separated or quoted */
if (*buf == '"' || *buf == '\'') {
int quote = *buf++;
- for (end = buf ; *end && *end != quote ; end++)
+ for (end = buf; *end && *end != quote; end++)
;
- if (!*end)
+ if (!*end) {
+ pr_err("unclosed quote: %s\n", buf);
return -EINVAL; /* unclosed quote */
+ }
} else {
- for (end = buf ; *end && !isspace(*end) ; end++)
+ for (end = buf; *end && !isspace(*end); end++)
;
BUG_ON(end == buf);
}
/* `buf' is start of word, `end' is one past its end */
- if (nwords == maxwords)
+ if (nwords == maxwords) {
+ pr_err("too many words, legal max <=%d\n", maxwords);
return -EINVAL; /* ran out of words[] before bytes */
+ }
if (*end)
*end++ = '\0'; /* terminate the word */
words[nwords++] = buf;
@@ -242,7 +247,7 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords)
if (verbose) {
int i;
pr_info("split into words:");
- for (i = 0 ; i < nwords ; i++)
+ for (i = 0; i < nwords; i++)
pr_cont(" \"%s\"", words[i]);
pr_cont("\n");
}
@@ -264,7 +269,11 @@ static inline int parse_lineno(const char *str, unsigned int *val)
return 0;
}
*val = simple_strtoul(str, &end, 10);
- return end == NULL || end == str || *end != '\0' ? -EINVAL : 0;
+ if (end == NULL || end == str || *end != '\0') {
+ pr_err("bad line-number: %s\n", str);
+ return -EINVAL;
+ }
+ return 0;
}
/*
@@ -293,11 +302,11 @@ static char *unescape(char *str)
in += 2;
continue;
} else if (isodigit(in[1]) &&
- isodigit(in[2]) &&
- isodigit(in[3])) {
- *out++ = ((in[1] - '0')<<6) |
- ((in[2] - '0')<<3) |
- (in[3] - '0');
+ isodigit(in[2]) &&
+ isodigit(in[3])) {
+ *out++ = (((in[1] - '0') << 6) |
+ ((in[2] - '0') << 3) |
+ (in[3] - '0'));
in += 4;
continue;
}
@@ -315,8 +324,8 @@ static int check_set(const char **dest, char *src, char *name)
if (*dest) {
rc = -EINVAL;
- pr_err("match-spec:%s val:%s overridden by %s",
- name, *dest, src);
+ pr_err("match-spec:%s val:%s overridden by %s\n",
+ name, *dest, src);
}
*dest = src;
return rc;
@@ -344,40 +353,46 @@ static int ddebug_parse_query(char *words[], int nwords,
int rc;
/* check we have an even number of words */
- if (nwords % 2 != 0)
+ if (nwords % 2 != 0) {
+ pr_err("expecting pairs of match-spec <value>\n");
return -EINVAL;
+ }
memset(query, 0, sizeof(*query));
if (modname)
/* support $modname.dyndbg=<multiple queries> */
query->module = modname;
- for (i = 0 ; i < nwords ; i += 2) {
- if (!strcmp(words[i], "func"))
+ for (i = 0; i < nwords; i += 2) {
+ if (!strcmp(words[i], "func")) {
rc = check_set(&query->function, words[i+1], "func");
- else if (!strcmp(words[i], "file"))
+ } else if (!strcmp(words[i], "file")) {
rc = check_set(&query->filename, words[i+1], "file");
- else if (!strcmp(words[i], "module"))
+ } else if (!strcmp(words[i], "module")) {
rc = check_set(&query->module, words[i+1], "module");
- else if (!strcmp(words[i], "format"))
+ } else if (!strcmp(words[i], "format")) {
rc = check_set(&query->format, unescape(words[i+1]),
- "format");
- else if (!strcmp(words[i], "line")) {
+ "format");
+ } else if (!strcmp(words[i], "line")) {
char *first = words[i+1];
char *last = strchr(first, '-');
if (query->first_lineno || query->last_lineno) {
- pr_err("match-spec:line given 2 times\n");
+ pr_err("match-spec: line used 2x\n");
return -EINVAL;
}
if (last)
*last++ = '\0';
- if (parse_lineno(first, &query->first_lineno) < 0)
+ if (parse_lineno(first, &query->first_lineno) < 0) {
+ pr_err("line-number is <0\n");
return -EINVAL;
+ }
if (last) {
/* range <first>-<last> */
if (parse_lineno(last, &query->last_lineno)
< query->first_lineno) {
- pr_err("last-line < 1st-line\n");
+ pr_err("last-line:%d < 1st-line:%d\n",
+ query->last_lineno,
+ query->first_lineno);
return -EINVAL;
}
} else {
@@ -413,19 +428,22 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
op = *str++;
break;
default:
+ pr_err("bad flag-op %c, at start of %s\n", *str, str);
return -EINVAL;
}
vpr_info("op='%c'\n", op);
- for ( ; *str ; ++str) {
+ for (; *str ; ++str) {
for (i = ARRAY_SIZE(opt_array) - 1; i >= 0; i--) {
if (*str == opt_array[i].opt_char) {
flags |= opt_array[i].flag;
break;
}
}
- if (i < 0)
+ if (i < 0) {
+ pr_err("unknown flag '%c' in \"%s\"\n", *str, str);
return -EINVAL;
+ }
}
vpr_info("flags=0x%x\n", flags);
@@ -457,16 +475,22 @@ static int ddebug_exec_query(char *query_string, const char *modname)
char *words[MAXWORDS];
nwords = ddebug_tokenize(query_string, words, MAXWORDS);
- if (nwords <= 0)
+ if (nwords <= 0) {
+ pr_err("tokenize failed\n");
return -EINVAL;
- if (ddebug_parse_query(words, nwords-1, &query, modname))
+ }
+ /* check flags 1st (last arg) so query is pairs of spec,val */
+ if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) {
+ pr_err("flags parse failed\n");
return -EINVAL;
- if (ddebug_parse_flags(words[nwords-1], &flags, &mask))
+ }
+ if (ddebug_parse_query(words, nwords-1, &query, modname)) {
+ pr_err("query parse failed\n");
return -EINVAL;
-
+ }
/* actually go and implement the change */
nfound = ddebug_change(&query, flags, mask);
- vpr_info_dq((&query), (nfound) ? "applied" : "no-match");
+ vpr_info_dq(&query, nfound ? "applied" : "no-match");
return nfound;
}
@@ -495,8 +519,9 @@ static int ddebug_exec_queries(char *query, const char *modname)
if (rc < 0) {
errs++;
exitcode = rc;
- } else
+ } else {
nfound += rc;
+ }
i++;
}
vpr_info("processed %d queries, with %d matches, %d errs\n",
@@ -772,7 +797,7 @@ static void *ddebug_proc_next(struct seq_file *m, void *p, loff_t *pos)
struct _ddebug *dp;
vpr_info("called m=%p p=%p *pos=%lld\n",
- m, p, (unsigned long long)*pos);
+ m, p, (unsigned long long)*pos);
if (p == SEQ_START_TOKEN)
dp = ddebug_iter_first(iter);
@@ -798,14 +823,14 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
if (p == SEQ_START_TOKEN) {
seq_puts(m,
- "# filename:lineno [module]function flags format\n");
+ "# filename:lineno [module]function flags format\n");
return 0;
}
seq_printf(m, "%s:%u [%s]%s =%s \"",
- trim_prefix(dp->filename), dp->lineno,
- iter->table->mod_name, dp->function,
- ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf)));
+ trim_prefix(dp->filename), dp->lineno,
+ iter->table->mod_name, dp->function,
+ ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf)));
seq_escape(m, dp->format, "\t\r\n\"");
seq_puts(m, "\"\n");
@@ -852,7 +877,7 @@ static int ddebug_proc_open(struct inode *inode, struct file *file)
kfree(iter);
return err;
}
- ((struct seq_file *) file->private_data)->private = iter;
+ ((struct seq_file *)file->private_data)->private = iter;
return 0;
}
@@ -1009,8 +1034,7 @@ static int __init dynamic_debug_init(void)
int verbose_bytes = 0;
if (__start___verbose == __stop___verbose) {
- pr_warn("_ddebug table is empty in a "
- "CONFIG_DYNAMIC_DEBUG build");
+ pr_warn("_ddebug table is empty in a CONFIG_DYNAMIC_DEBUG build\n");
return 1;
}
iter = __start___verbose;
@@ -1037,18 +1061,16 @@ static int __init dynamic_debug_init(void)
goto out_err;
ddebug_init_success = 1;
- vpr_info("%d modules, %d entries and %d bytes in ddebug tables,"
- " %d bytes in (readonly) verbose section\n",
- modct, entries, (int)( modct * sizeof(struct ddebug_table)),
- verbose_bytes + (int)(__stop___verbose - __start___verbose));
+ vpr_info("%d modules, %d entries and %d bytes in ddebug tables, %d bytes in (readonly) verbose section\n",
+ modct, entries, (int)(modct * sizeof(struct ddebug_table)),
+ verbose_bytes + (int)(__stop___verbose - __start___verbose));
/* apply ddebug_query boot param, dont unload tables on err */
if (ddebug_setup_string[0] != '\0') {
- pr_warn("ddebug_query param name is deprecated,"
- " change it to dyndbg\n");
+ pr_warn("ddebug_query param name is deprecated, change it to dyndbg\n");
ret = ddebug_exec_queries(ddebug_setup_string, NULL);
if (ret < 0)
- pr_warn("Invalid ddebug boot param %s",
+ pr_warn("Invalid ddebug boot param %s\n",
ddebug_setup_string);
else
pr_info("%d changes by ddebug_query\n", ret);
diff --git a/lib/earlycpio.c b/lib/earlycpio.c
new file mode 100644
index 000000000000..8078ef49cb79
--- /dev/null
+++ b/lib/earlycpio.c
@@ -0,0 +1,145 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2012 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available
+ * under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * earlycpio.c
+ *
+ * Find a specific cpio member; must precede any compressed content.
+ * This is used to locate data items in the initramfs used by the
+ * kernel itself during early boot (before the main initramfs is
+ * decompressed.) It is the responsibility of the initramfs creator
+ * to ensure that these items are uncompressed at the head of the
+ * blob. Depending on the boot loader or package tool that may be a
+ * separate file or part of the same file.
+ */
+
+#include <linux/earlycpio.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+enum cpio_fields {
+ C_MAGIC,
+ C_INO,
+ C_MODE,
+ C_UID,
+ C_GID,
+ C_NLINK,
+ C_MTIME,
+ C_FILESIZE,
+ C_MAJ,
+ C_MIN,
+ C_RMAJ,
+ C_RMIN,
+ C_NAMESIZE,
+ C_CHKSUM,
+ C_NFIELDS
+};
+
+/**
+ * cpio_data find_cpio_data - Search for files in an uncompressed cpio
+ * @path: The directory to search for, including a slash at the end
+ * @data: Pointer to the the cpio archive or a header inside
+ * @len: Remaining length of the cpio based on data pointer
+ * @offset: When a matching file is found, this is the offset to the
+ * beginning of the cpio. It can be used to iterate through
+ * the cpio to find all files inside of a directory path
+ *
+ * @return: struct cpio_data containing the address, length and
+ * filename (with the directory path cut off) of the found file.
+ * If you search for a filename and not for files in a directory,
+ * pass the absolute path of the filename in the cpio and make sure
+ * the match returned an empty filename string.
+ */
+
+struct cpio_data __cpuinit find_cpio_data(const char *path, void *data,
+ size_t len, long *offset)
+{
+ const size_t cpio_header_len = 8*C_NFIELDS - 2;
+ struct cpio_data cd = { NULL, 0, "" };
+ const char *p, *dptr, *nptr;
+ unsigned int ch[C_NFIELDS], *chp, v;
+ unsigned char c, x;
+ size_t mypathsize = strlen(path);
+ int i, j;
+
+ p = data;
+
+ while (len > cpio_header_len) {
+ if (!*p) {
+ /* All cpio headers need to be 4-byte aligned */
+ p += 4;
+ len -= 4;
+ continue;
+ }
+
+ j = 6; /* The magic field is only 6 characters */
+ chp = ch;
+ for (i = C_NFIELDS; i; i--) {
+ v = 0;
+ while (j--) {
+ v <<= 4;
+ c = *p++;
+
+ x = c - '0';
+ if (x < 10) {
+ v += x;
+ continue;
+ }
+
+ x = (c | 0x20) - 'a';
+ if (x < 6) {
+ v += x + 10;
+ continue;
+ }
+
+ goto quit; /* Invalid hexadecimal */
+ }
+ *chp++ = v;
+ j = 8; /* All other fields are 8 characters */
+ }
+
+ if ((ch[C_MAGIC] - 0x070701) > 1)
+ goto quit; /* Invalid magic */
+
+ len -= cpio_header_len;
+
+ dptr = PTR_ALIGN(p + ch[C_NAMESIZE], 4);
+ nptr = PTR_ALIGN(dptr + ch[C_FILESIZE], 4);
+
+ if (nptr > p + len || dptr < p || nptr < dptr)
+ goto quit; /* Buffer overrun */
+
+ if ((ch[C_MODE] & 0170000) == 0100000 &&
+ ch[C_NAMESIZE] >= mypathsize &&
+ !memcmp(p, path, mypathsize)) {
+ *offset = (long)nptr - (long)data;
+ if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) {
+ pr_warn(
+ "File %s exceeding MAX_CPIO_FILE_NAME [%d]\n",
+ p, MAX_CPIO_FILE_NAME);
+ }
+ strlcpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME);
+
+ cd.data = (void *)dptr;
+ cd.size = ch[C_FILESIZE];
+ return cd; /* Found it! */
+ }
+ len -= (nptr - p);
+ p = nptr;
+ }
+
+quit:
+ return cd;
+}
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 6540d657dca4..3f0494c9d57a 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -227,6 +227,7 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
}
EXPORT_SYMBOL(print_hex_dump);
+#if !defined(CONFIG_DYNAMIC_DEBUG)
/**
* print_hex_dump_bytes - shorthand form of print_hex_dump() with default params
* @prefix_str: string to prefix each line with;
@@ -246,4 +247,5 @@ void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
buf, len, true);
}
EXPORT_SYMBOL(print_hex_dump_bytes);
-#endif
+#endif /* !defined(CONFIG_DYNAMIC_DEBUG) */
+#endif /* defined(CONFIG_PRINTK) */
diff --git a/lib/idr.c b/lib/idr.c
index 648239079dd2..322e2816f2fb 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -35,10 +35,41 @@
#include <linux/string.h>
#include <linux/idr.h>
#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+
+#define MAX_IDR_SHIFT (sizeof(int) * 8 - 1)
+#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
+
+/* Leave the possibility of an incomplete final layer */
+#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS)
+
+/* Number of id_layer structs to leave in free list */
+#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2)
static struct kmem_cache *idr_layer_cache;
+static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head);
+static DEFINE_PER_CPU(int, idr_preload_cnt);
static DEFINE_SPINLOCK(simple_ida_lock);
+/* the maximum ID which can be allocated given idr->layers */
+static int idr_max(int layers)
+{
+ int bits = min_t(int, layers * IDR_BITS, MAX_IDR_SHIFT);
+
+ return (1 << bits) - 1;
+}
+
+/*
+ * Prefix mask for an idr_layer at @layer. For layer 0, the prefix mask is
+ * all bits except for the lower IDR_BITS. For layer 1, 2 * IDR_BITS, and
+ * so on.
+ */
+static int idr_layer_prefix_mask(int layer)
+{
+ return ~idr_max(layer + 1);
+}
+
static struct idr_layer *get_from_free_list(struct idr *idp)
{
struct idr_layer *p;
@@ -54,6 +85,62 @@ static struct idr_layer *get_from_free_list(struct idr *idp)
return(p);
}
+/**
+ * idr_layer_alloc - allocate a new idr_layer
+ * @gfp_mask: allocation mask
+ * @layer_idr: optional idr to allocate from
+ *
+ * If @layer_idr is %NULL, directly allocate one using @gfp_mask or fetch
+ * one from the per-cpu preload buffer. If @layer_idr is not %NULL, fetch
+ * an idr_layer from @idr->id_free.
+ *
+ * @layer_idr is to maintain backward compatibility with the old alloc
+ * interface - idr_pre_get() and idr_get_new*() - and will be removed
+ * together with per-pool preload buffer.
+ */
+static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr)
+{
+ struct idr_layer *new;
+
+ /* this is the old path, bypass to get_from_free_list() */
+ if (layer_idr)
+ return get_from_free_list(layer_idr);
+
+ /*
+ * Try to allocate directly from kmem_cache. We want to try this
+ * before preload buffer; otherwise, non-preloading idr_alloc()
+ * users will end up taking advantage of preloading ones. As the
+ * following is allowed to fail for preloaded cases, suppress
+ * warning this time.
+ */
+ new = kmem_cache_zalloc(idr_layer_cache, gfp_mask | __GFP_NOWARN);
+ if (new)
+ return new;
+
+ /*
+ * Try to fetch one from the per-cpu preload buffer if in process
+ * context. See idr_preload() for details.
+ */
+ if (!in_interrupt()) {
+ preempt_disable();
+ new = __this_cpu_read(idr_preload_head);
+ if (new) {
+ __this_cpu_write(idr_preload_head, new->ary[0]);
+ __this_cpu_dec(idr_preload_cnt);
+ new->ary[0] = NULL;
+ }
+ preempt_enable();
+ if (new)
+ return new;
+ }
+
+ /*
+ * Both failed. Try kmem_cache again w/o adding __GFP_NOWARN so
+ * that memory allocation failure warning is printed as intended.
+ */
+ return kmem_cache_zalloc(idr_layer_cache, gfp_mask);
+}
+
static void idr_layer_rcu_free(struct rcu_head *head)
{
struct idr_layer *layer;
@@ -62,8 +149,10 @@ static void idr_layer_rcu_free(struct rcu_head *head)
kmem_cache_free(idr_layer_cache, layer);
}
-static inline void free_layer(struct idr_layer *p)
+static inline void free_layer(struct idr *idr, struct idr_layer *p)
{
+ if (idr->hint && idr->hint == p)
+ RCU_INIT_POINTER(idr->hint, NULL);
call_rcu(&p->rcu_head, idr_layer_rcu_free);
}
@@ -92,35 +181,22 @@ static void idr_mark_full(struct idr_layer **pa, int id)
struct idr_layer *p = pa[0];
int l = 0;
- __set_bit(id & IDR_MASK, &p->bitmap);
+ __set_bit(id & IDR_MASK, p->bitmap);
/*
* If this layer is full mark the bit in the layer above to
* show that this part of the radix tree is full. This may
* complete the layer above and require walking up the radix
* tree.
*/
- while (p->bitmap == IDR_FULL) {
+ while (bitmap_full(p->bitmap, IDR_SIZE)) {
if (!(p = pa[++l]))
break;
id = id >> IDR_BITS;
- __set_bit((id & IDR_MASK), &p->bitmap);
+ __set_bit((id & IDR_MASK), p->bitmap);
}
}
-/**
- * idr_pre_get - reserve resources for idr allocation
- * @idp: idr handle
- * @gfp_mask: memory allocation flags
- *
- * This function should be called prior to calling the idr_get_new* functions.
- * It preallocates enough memory to satisfy the worst possible allocation. The
- * caller should pass in GFP_KERNEL if possible. This of course requires that
- * no spinning locks be held.
- *
- * If the system is REALLY out of memory this function returns %0,
- * otherwise %1.
- */
-int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
+int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
{
while (idp->id_free_cnt < MAX_IDR_FREE) {
struct idr_layer *new;
@@ -131,14 +207,30 @@ int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
}
return 1;
}
-EXPORT_SYMBOL(idr_pre_get);
+EXPORT_SYMBOL(__idr_pre_get);
-static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
+/**
+ * sub_alloc - try to allocate an id without growing the tree depth
+ * @idp: idr handle
+ * @starting_id: id to start search at
+ * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer
+ * @gfp_mask: allocation mask for idr_layer_alloc()
+ * @layer_idr: optional idr passed to idr_layer_alloc()
+ *
+ * Allocate an id in range [@starting_id, INT_MAX] from @idp without
+ * growing its depth. Returns
+ *
+ * the allocated id >= 0 if successful,
+ * -EAGAIN if the tree needs to grow for allocation to succeed,
+ * -ENOSPC if the id space is exhausted,
+ * -ENOMEM if more idr_layers need to be allocated.
+ */
+static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa,
+ gfp_t gfp_mask, struct idr *layer_idr)
{
int n, m, sh;
struct idr_layer *p, *new;
int l, id, oid;
- unsigned long bm;
id = *starting_id;
restart:
@@ -150,8 +242,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
* We run around this while until we reach the leaf node...
*/
n = (id >> (IDR_BITS*l)) & IDR_MASK;
- bm = ~p->bitmap;
- m = find_next_bit(&bm, IDR_SIZE, n);
+ m = find_next_zero_bit(p->bitmap, IDR_SIZE, n);
if (m == IDR_SIZE) {
/* no space available go back to previous layer. */
l++;
@@ -161,7 +252,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
/* if already at the top layer, we need to grow */
if (id >= 1 << (idp->layers * IDR_BITS)) {
*starting_id = id;
- return IDR_NEED_TO_GROW;
+ return -EAGAIN;
}
p = pa[l];
BUG_ON(!p);
@@ -180,17 +271,18 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
id = ((id >> sh) ^ n ^ m) << sh;
}
if ((id >= MAX_IDR_BIT) || (id < 0))
- return IDR_NOMORE_SPACE;
+ return -ENOSPC;
if (l == 0)
break;
/*
* Create the layer below if it is missing.
*/
if (!p->ary[m]) {
- new = get_from_free_list(idp);
+ new = idr_layer_alloc(gfp_mask, layer_idr);
if (!new)
- return -1;
+ return -ENOMEM;
new->layer = l-1;
+ new->prefix = id & idr_layer_prefix_mask(new->layer);
rcu_assign_pointer(p->ary[m], new);
p->count++;
}
@@ -203,7 +295,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
}
static int idr_get_empty_slot(struct idr *idp, int starting_id,
- struct idr_layer **pa)
+ struct idr_layer **pa, gfp_t gfp_mask,
+ struct idr *layer_idr)
{
struct idr_layer *p, *new;
int layers, v, id;
@@ -214,8 +307,8 @@ build_up:
p = idp->top;
layers = idp->layers;
if (unlikely(!p)) {
- if (!(p = get_from_free_list(idp)))
- return -1;
+ if (!(p = idr_layer_alloc(gfp_mask, layer_idr)))
+ return -ENOMEM;
p->layer = 0;
layers = 1;
}
@@ -223,7 +316,7 @@ build_up:
* Add a new layer to the top of the tree if the requested
* id is larger than the currently allocated space.
*/
- while ((layers < (MAX_IDR_LEVEL - 1)) && (id >= (1 << (layers*IDR_BITS)))) {
+ while (id > idr_max(layers)) {
layers++;
if (!p->count) {
/* special case: if the tree is currently empty,
@@ -231,9 +324,10 @@ build_up:
* upwards.
*/
p->layer++;
+ WARN_ON_ONCE(p->prefix);
continue;
}
- if (!(new = get_from_free_list(idp))) {
+ if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) {
/*
* The allocation failed. If we built part of
* the structure tear it down.
@@ -242,110 +336,164 @@ build_up:
for (new = p; p && p != idp->top; new = p) {
p = p->ary[0];
new->ary[0] = NULL;
- new->bitmap = new->count = 0;
+ new->count = 0;
+ bitmap_clear(new->bitmap, 0, IDR_SIZE);
__move_to_free_list(idp, new);
}
spin_unlock_irqrestore(&idp->lock, flags);
- return -1;
+ return -ENOMEM;
}
new->ary[0] = p;
new->count = 1;
new->layer = layers-1;
- if (p->bitmap == IDR_FULL)
- __set_bit(0, &new->bitmap);
+ new->prefix = id & idr_layer_prefix_mask(new->layer);
+ if (bitmap_full(p->bitmap, IDR_SIZE))
+ __set_bit(0, new->bitmap);
p = new;
}
rcu_assign_pointer(idp->top, p);
idp->layers = layers;
- v = sub_alloc(idp, &id, pa);
- if (v == IDR_NEED_TO_GROW)
+ v = sub_alloc(idp, &id, pa, gfp_mask, layer_idr);
+ if (v == -EAGAIN)
goto build_up;
return(v);
}
-static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
+/*
+ * @id and @pa are from a successful allocation from idr_get_empty_slot().
+ * Install the user pointer @ptr and mark the slot full.
+ */
+static void idr_fill_slot(struct idr *idr, void *ptr, int id,
+ struct idr_layer **pa)
{
- struct idr_layer *pa[MAX_IDR_LEVEL];
- int id;
+ /* update hint used for lookup, cleared from free_layer() */
+ rcu_assign_pointer(idr->hint, pa[0]);
- id = idr_get_empty_slot(idp, starting_id, pa);
- if (id >= 0) {
- /*
- * Successfully found an empty slot. Install the user
- * pointer and mark the slot full.
- */
- rcu_assign_pointer(pa[0]->ary[id & IDR_MASK],
- (struct idr_layer *)ptr);
- pa[0]->count++;
- idr_mark_full(pa, id);
- }
+ rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], (struct idr_layer *)ptr);
+ pa[0]->count++;
+ idr_mark_full(pa, id);
+}
- return id;
+int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
+{
+ struct idr_layer *pa[MAX_IDR_LEVEL + 1];
+ int rv;
+
+ rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp);
+ if (rv < 0)
+ return rv == -ENOMEM ? -EAGAIN : rv;
+
+ idr_fill_slot(idp, ptr, rv, pa);
+ *id = rv;
+ return 0;
}
+EXPORT_SYMBOL(__idr_get_new_above);
/**
- * idr_get_new_above - allocate new idr entry above or equal to a start id
- * @idp: idr handle
- * @ptr: pointer you want associated with the id
- * @starting_id: id to start search at
- * @id: pointer to the allocated handle
+ * idr_preload - preload for idr_alloc()
+ * @gfp_mask: allocation mask to use for preloading
+ *
+ * Preload per-cpu layer buffer for idr_alloc(). Can only be used from
+ * process context and each idr_preload() invocation should be matched with
+ * idr_preload_end(). Note that preemption is disabled while preloaded.
*
- * This is the allocate id function. It should be called with any
- * required locks.
+ * The first idr_alloc() in the preloaded section can be treated as if it
+ * were invoked with @gfp_mask used for preloading. This allows using more
+ * permissive allocation masks for idrs protected by spinlocks.
*
- * If allocation from IDR's private freelist fails, idr_get_new_above() will
- * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill
- * IDR's preallocation and then retry the idr_get_new_above() call.
+ * For example, if idr_alloc() below fails, the failure can be treated as
+ * if idr_alloc() were called with GFP_KERNEL rather than GFP_NOWAIT.
*
- * If the idr is full idr_get_new_above() will return %-ENOSPC.
+ * idr_preload(GFP_KERNEL);
+ * spin_lock(lock);
*
- * @id returns a value in the range @starting_id ... %0x7fffffff
+ * id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT);
+ *
+ * spin_unlock(lock);
+ * idr_preload_end();
+ * if (id < 0)
+ * error;
*/
-int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
+void idr_preload(gfp_t gfp_mask)
{
- int rv;
+ /*
+ * Consuming preload buffer from non-process context breaks preload
+ * allocation guarantee. Disallow usage from those contexts.
+ */
+ WARN_ON_ONCE(in_interrupt());
+ might_sleep_if(gfp_mask & __GFP_WAIT);
+
+ preempt_disable();
- rv = idr_get_new_above_int(idp, ptr, starting_id);
/*
- * This is a cheap hack until the IDR code can be fixed to
- * return proper error values.
+ * idr_alloc() is likely to succeed w/o full idr_layer buffer and
+ * return value from idr_alloc() needs to be checked for failure
+ * anyway. Silently give up if allocation fails. The caller can
+ * treat failures from idr_alloc() as if idr_alloc() were called
+ * with @gfp_mask which should be enough.
*/
- if (rv < 0)
- return _idr_rc_to_errno(rv);
- *id = rv;
- return 0;
+ while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
+ struct idr_layer *new;
+
+ preempt_enable();
+ new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
+ preempt_disable();
+ if (!new)
+ break;
+
+ /* link the new one to per-cpu preload list */
+ new->ary[0] = __this_cpu_read(idr_preload_head);
+ __this_cpu_write(idr_preload_head, new);
+ __this_cpu_inc(idr_preload_cnt);
+ }
}
-EXPORT_SYMBOL(idr_get_new_above);
+EXPORT_SYMBOL(idr_preload);
/**
- * idr_get_new - allocate new idr entry
- * @idp: idr handle
- * @ptr: pointer you want associated with the id
- * @id: pointer to the allocated handle
+ * idr_alloc - allocate new idr entry
+ * @idr: the (initialized) idr
+ * @ptr: pointer to be associated with the new id
+ * @start: the minimum id (inclusive)
+ * @end: the maximum id (exclusive, <= 0 for max)
+ * @gfp_mask: memory allocation flags
*
- * If allocation from IDR's private freelist fails, idr_get_new_above() will
- * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill
- * IDR's preallocation and then retry the idr_get_new_above() call.
+ * Allocate an id in [start, end) and associate it with @ptr. If no ID is
+ * available in the specified range, returns -ENOSPC. On memory allocation
+ * failure, returns -ENOMEM.
*
- * If the idr is full idr_get_new_above() will return %-ENOSPC.
+ * Note that @end is treated as max when <= 0. This is to always allow
+ * using @start + N as @end as long as N is inside integer range.
*
- * @id returns a value in the range %0 ... %0x7fffffff
+ * The user is responsible for exclusively synchronizing all operations
+ * which may modify @idr. However, read-only accesses such as idr_find()
+ * or iteration can be performed under RCU read lock provided the user
+ * destroys @ptr in RCU-safe way after removal from idr.
*/
-int idr_get_new(struct idr *idp, void *ptr, int *id)
+int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
{
- int rv;
+ int max = end > 0 ? end - 1 : INT_MAX; /* inclusive upper limit */
+ struct idr_layer *pa[MAX_IDR_LEVEL + 1];
+ int id;
- rv = idr_get_new_above_int(idp, ptr, 0);
- /*
- * This is a cheap hack until the IDR code can be fixed to
- * return proper error values.
- */
- if (rv < 0)
- return _idr_rc_to_errno(rv);
- *id = rv;
- return 0;
+ might_sleep_if(gfp_mask & __GFP_WAIT);
+
+ /* sanity checks */
+ if (WARN_ON_ONCE(start < 0))
+ return -EINVAL;
+ if (unlikely(max < start))
+ return -ENOSPC;
+
+ /* allocate id */
+ id = idr_get_empty_slot(idr, start, pa, gfp_mask, NULL);
+ if (unlikely(id < 0))
+ return id;
+ if (unlikely(id > max))
+ return -ENOSPC;
+
+ idr_fill_slot(idr, ptr, id, pa);
+ return id;
}
-EXPORT_SYMBOL(idr_get_new);
+EXPORT_SYMBOL_GPL(idr_alloc);
static void idr_remove_warning(int id)
{
@@ -357,7 +505,7 @@ static void idr_remove_warning(int id)
static void sub_remove(struct idr *idp, int shift, int id)
{
struct idr_layer *p = idp->top;
- struct idr_layer **pa[MAX_IDR_LEVEL];
+ struct idr_layer **pa[MAX_IDR_LEVEL + 1];
struct idr_layer ***paa = &pa[0];
struct idr_layer *to_free;
int n;
@@ -367,26 +515,26 @@ static void sub_remove(struct idr *idp, int shift, int id)
while ((shift > 0) && p) {
n = (id >> shift) & IDR_MASK;
- __clear_bit(n, &p->bitmap);
+ __clear_bit(n, p->bitmap);
*++paa = &p->ary[n];
p = p->ary[n];
shift -= IDR_BITS;
}
n = id & IDR_MASK;
- if (likely(p != NULL && test_bit(n, &p->bitmap))){
- __clear_bit(n, &p->bitmap);
+ if (likely(p != NULL && test_bit(n, p->bitmap))) {
+ __clear_bit(n, p->bitmap);
rcu_assign_pointer(p->ary[n], NULL);
to_free = NULL;
while(*paa && ! --((**paa)->count)){
if (to_free)
- free_layer(to_free);
+ free_layer(idp, to_free);
to_free = **paa;
**paa-- = NULL;
}
if (!*paa)
idp->layers = 0;
if (to_free)
- free_layer(to_free);
+ free_layer(idp, to_free);
} else
idr_remove_warning(id);
}
@@ -401,8 +549,8 @@ void idr_remove(struct idr *idp, int id)
struct idr_layer *p;
struct idr_layer *to_free;
- /* Mask off upper bits we don't use for the search. */
- id &= MAX_IDR_MASK;
+ if (id < 0)
+ return;
sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
@@ -417,8 +565,9 @@ void idr_remove(struct idr *idp, int id)
p = idp->top->ary[0];
rcu_assign_pointer(idp->top, p);
--idp->layers;
- to_free->bitmap = to_free->count = 0;
- free_layer(to_free);
+ to_free->count = 0;
+ bitmap_clear(to_free->bitmap, 0, IDR_SIZE);
+ free_layer(idp, to_free);
}
while (idp->id_free_cnt >= MAX_IDR_FREE) {
p = get_from_free_list(idp);
@@ -433,34 +582,21 @@ void idr_remove(struct idr *idp, int id)
}
EXPORT_SYMBOL(idr_remove);
-/**
- * idr_remove_all - remove all ids from the given idr tree
- * @idp: idr handle
- *
- * idr_destroy() only frees up unused, cached idp_layers, but this
- * function will remove all id mappings and leave all idp_layers
- * unused.
- *
- * A typical clean-up sequence for objects stored in an idr tree will
- * use idr_for_each() to free all objects, if necessay, then
- * idr_remove_all() to remove all ids, and idr_destroy() to free
- * up the cached idr_layers.
- */
-void idr_remove_all(struct idr *idp)
+void __idr_remove_all(struct idr *idp)
{
int n, id, max;
int bt_mask;
struct idr_layer *p;
- struct idr_layer *pa[MAX_IDR_LEVEL];
+ struct idr_layer *pa[MAX_IDR_LEVEL + 1];
struct idr_layer **paa = &pa[0];
n = idp->layers * IDR_BITS;
p = idp->top;
rcu_assign_pointer(idp->top, NULL);
- max = 1 << n;
+ max = idr_max(idp->layers);
id = 0;
- while (id < max) {
+ while (id >= 0 && id <= max) {
while (n > IDR_BITS && p) {
n -= IDR_BITS;
*paa++ = p;
@@ -472,21 +608,32 @@ void idr_remove_all(struct idr *idp)
/* Get the highest bit that the above add changed from 0->1. */
while (n < fls(id ^ bt_mask)) {
if (p)
- free_layer(p);
+ free_layer(idp, p);
n += IDR_BITS;
p = *--paa;
}
}
idp->layers = 0;
}
-EXPORT_SYMBOL(idr_remove_all);
+EXPORT_SYMBOL(__idr_remove_all);
/**
* idr_destroy - release all cached layers within an idr tree
* @idp: idr handle
+ *
+ * Free all id mappings and all idp_layers. After this function, @idp is
+ * completely unused and can be freed / recycled. The caller is
+ * responsible for ensuring that no one else accesses @idp during or after
+ * idr_destroy().
+ *
+ * A typical clean-up sequence for objects stored in an idr tree will use
+ * idr_for_each() to free all objects, if necessay, then idr_destroy() to
+ * free up the id mappings and cached idr_layers.
*/
void idr_destroy(struct idr *idp)
{
+ __idr_remove_all(idp);
+
while (idp->id_free_cnt) {
struct idr_layer *p = get_from_free_list(idp);
kmem_cache_free(idr_layer_cache, p);
@@ -494,32 +641,20 @@ void idr_destroy(struct idr *idp)
}
EXPORT_SYMBOL(idr_destroy);
-/**
- * idr_find - return pointer for given id
- * @idp: idr handle
- * @id: lookup key
- *
- * Return the pointer given the id it has been registered with. A %NULL
- * return indicates that @id is not valid or you passed %NULL in
- * idr_get_new().
- *
- * This function can be called under rcu_read_lock(), given that the leaf
- * pointers lifetimes are correctly managed.
- */
-void *idr_find(struct idr *idp, int id)
+void *idr_find_slowpath(struct idr *idp, int id)
{
int n;
struct idr_layer *p;
+ if (id < 0)
+ return NULL;
+
p = rcu_dereference_raw(idp->top);
if (!p)
return NULL;
n = (p->layer+1) * IDR_BITS;
- /* Mask off upper bits we don't use for the search. */
- id &= MAX_IDR_MASK;
-
- if (id >= (1 << n))
+ if (id > idr_max(p->layer + 1))
return NULL;
BUG_ON(n == 0);
@@ -530,7 +665,7 @@ void *idr_find(struct idr *idp, int id)
}
return((void *)p);
}
-EXPORT_SYMBOL(idr_find);
+EXPORT_SYMBOL(idr_find_slowpath);
/**
* idr_for_each - iterate through all stored pointers
@@ -555,15 +690,15 @@ int idr_for_each(struct idr *idp,
{
int n, id, max, error = 0;
struct idr_layer *p;
- struct idr_layer *pa[MAX_IDR_LEVEL];
+ struct idr_layer *pa[MAX_IDR_LEVEL + 1];
struct idr_layer **paa = &pa[0];
n = idp->layers * IDR_BITS;
p = rcu_dereference_raw(idp->top);
- max = 1 << n;
+ max = idr_max(idp->layers);
id = 0;
- while (id < max) {
+ while (id >= 0 && id <= max) {
while (n > 0 && p) {
n -= IDR_BITS;
*paa++ = p;
@@ -601,7 +736,7 @@ EXPORT_SYMBOL(idr_for_each);
*/
void *idr_get_next(struct idr *idp, int *nextidp)
{
- struct idr_layer *p, *pa[MAX_IDR_LEVEL];
+ struct idr_layer *p, *pa[MAX_IDR_LEVEL + 1];
struct idr_layer **paa = &pa[0];
int id = *nextidp;
int n, max;
@@ -611,9 +746,9 @@ void *idr_get_next(struct idr *idp, int *nextidp)
if (!p)
return NULL;
n = (p->layer + 1) * IDR_BITS;
- max = 1 << n;
+ max = idr_max(p->layer + 1);
- while (id < max) {
+ while (id >= 0 && id <= max) {
while (n > 0 && p) {
n -= IDR_BITS;
*paa++ = p;
@@ -625,7 +760,14 @@ void *idr_get_next(struct idr *idp, int *nextidp)
return p;
}
- id += 1 << n;
+ /*
+ * Proceed to the next layer at the current level. Unlike
+ * idr_for_each(), @id isn't guaranteed to be aligned to
+ * layer boundary at this point and adding 1 << n may
+ * incorrectly skip IDs. Make sure we jump to the
+ * beginning of the next layer using round_up().
+ */
+ id = round_up(id + 1, 1 << n);
while (n < fls(id)) {
n += IDR_BITS;
p = *--paa;
@@ -653,14 +795,15 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
int n;
struct idr_layer *p, *old_p;
+ if (id < 0)
+ return ERR_PTR(-EINVAL);
+
p = idp->top;
if (!p)
return ERR_PTR(-EINVAL);
n = (p->layer+1) * IDR_BITS;
- id &= MAX_IDR_MASK;
-
if (id >= (1 << n))
return ERR_PTR(-EINVAL);
@@ -671,7 +814,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
}
n = id & IDR_MASK;
- if (unlikely(p == NULL || !test_bit(n, &p->bitmap)))
+ if (unlikely(p == NULL || !test_bit(n, p->bitmap)))
return ERR_PTR(-ENOENT);
old_p = p->ary[n];
@@ -745,7 +888,7 @@ static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
{
/* allocate idr_layers */
- if (!idr_pre_get(&ida->idr, gfp_mask))
+ if (!__idr_pre_get(&ida->idr, gfp_mask))
return 0;
/* allocate free_bitmap */
@@ -780,7 +923,7 @@ EXPORT_SYMBOL(ida_pre_get);
*/
int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
{
- struct idr_layer *pa[MAX_IDR_LEVEL];
+ struct idr_layer *pa[MAX_IDR_LEVEL + 1];
struct ida_bitmap *bitmap;
unsigned long flags;
int idr_id = starting_id / IDA_BITMAP_BITS;
@@ -789,9 +932,9 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
restart:
/* get vacant slot */
- t = idr_get_empty_slot(&ida->idr, idr_id, pa);
+ t = idr_get_empty_slot(&ida->idr, idr_id, pa, 0, &ida->idr);
if (t < 0)
- return _idr_rc_to_errno(t);
+ return t == -ENOMEM ? -EAGAIN : t;
if (t * IDA_BITMAP_BITS >= MAX_IDR_BIT)
return -ENOSPC;
@@ -852,25 +995,6 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
EXPORT_SYMBOL(ida_get_new_above);
/**
- * ida_get_new - allocate new ID
- * @ida: idr handle
- * @p_id: pointer to the allocated handle
- *
- * Allocate new ID. It should be called with any required locks.
- *
- * If memory is required, it will return %-EAGAIN, you should unlock
- * and go back to the idr_pre_get() call. If the idr is full, it will
- * return %-ENOSPC.
- *
- * @p_id returns a value in the range %0 ... %0x7fffffff.
- */
-int ida_get_new(struct ida *ida, int *p_id)
-{
- return ida_get_new_above(ida, 0, p_id);
-}
-EXPORT_SYMBOL(ida_get_new);
-
-/**
* ida_remove - remove the given ID
* @ida: ida handle
* @id: ID to free
@@ -887,7 +1011,7 @@ void ida_remove(struct ida *ida, int id)
/* clear full bits while looking up the leaf idr_layer */
while ((shift > 0) && p) {
n = (idr_id >> shift) & IDR_MASK;
- __clear_bit(n, &p->bitmap);
+ __clear_bit(n, p->bitmap);
p = p->ary[n];
shift -= IDR_BITS;
}
@@ -896,7 +1020,7 @@ void ida_remove(struct ida *ida, int id)
goto err;
n = idr_id & IDR_MASK;
- __clear_bit(n, &p->bitmap);
+ __clear_bit(n, p->bitmap);
bitmap = (void *)p->ary[n];
if (!test_bit(offset, bitmap->bitmap))
@@ -905,7 +1029,7 @@ void ida_remove(struct ida *ida, int id)
/* update bitmap and remove it if empty */
__clear_bit(offset, bitmap->bitmap);
if (--bitmap->nr_busy == 0) {
- __set_bit(n, &p->bitmap); /* to please idr_remove() */
+ __set_bit(n, p->bitmap); /* to please idr_remove() */
idr_remove(&ida->idr, idr_id);
free_bitmap(ida, bitmap);
}
diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test_main.c
index b25903987f7a..245900b98c8e 100644
--- a/lib/interval_tree_test_main.c
+++ b/lib/interval_tree_test_main.c
@@ -30,7 +30,8 @@ static void init(void)
{
int i;
for (i = 0; i < NODES; i++) {
- u32 a = prandom32(&rnd), b = prandom32(&rnd);
+ u32 a = prandom_u32_state(&rnd);
+ u32 b = prandom_u32_state(&rnd);
if (a <= b) {
nodes[i].start = a;
nodes[i].last = b;
@@ -40,7 +41,7 @@ static void init(void)
}
}
for (i = 0; i < SEARCHES; i++)
- queries[i] = prandom32(&rnd);
+ queries[i] = prandom_u32_state(&rnd);
}
static int interval_tree_test_init(void)
@@ -51,7 +52,7 @@ static int interval_tree_test_init(void)
printk(KERN_ALERT "interval tree insert/remove");
- prandom32_seed(&rnd, 3141592653589793238ULL);
+ prandom_seed_state(&rnd, 3141592653589793238ULL);
init();
time1 = get_cycles();
diff --git a/lib/kfifo.c b/lib/kfifo.c
new file mode 100644
index 000000000000..7b7f83027b7b
--- /dev/null
+++ b/lib/kfifo.c
@@ -0,0 +1,607 @@
+/*
+ * A generic kernel FIFO implementation
+ *
+ * Copyright (C) 2009/2010 Stefani Seibold <stefani@seibold.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/log2.h>
+#include <linux/uaccess.h>
+#include <linux/kfifo.h>
+
+/*
+ * internal helper to calculate the unused elements in a fifo
+ */
+static inline unsigned int kfifo_unused(struct __kfifo *fifo)
+{
+ return (fifo->mask + 1) - (fifo->in - fifo->out);
+}
+
+int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
+ size_t esize, gfp_t gfp_mask)
+{
+ /*
+ * round down to the next power of 2, since our 'let the indices
+ * wrap' technique works only in this case.
+ */
+ size = roundup_pow_of_two(size);
+
+ fifo->in = 0;
+ fifo->out = 0;
+ fifo->esize = esize;
+
+ if (size < 2) {
+ fifo->data = NULL;
+ fifo->mask = 0;
+ return -EINVAL;
+ }
+
+ fifo->data = kmalloc(size * esize, gfp_mask);
+
+ if (!fifo->data) {
+ fifo->mask = 0;
+ return -ENOMEM;
+ }
+ fifo->mask = size - 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(__kfifo_alloc);
+
+void __kfifo_free(struct __kfifo *fifo)
+{
+ kfree(fifo->data);
+ fifo->in = 0;
+ fifo->out = 0;
+ fifo->esize = 0;
+ fifo->data = NULL;
+ fifo->mask = 0;
+}
+EXPORT_SYMBOL(__kfifo_free);
+
+int __kfifo_init(struct __kfifo *fifo, void *buffer,
+ unsigned int size, size_t esize)
+{
+ size /= esize;
+
+ size = roundup_pow_of_two(size);
+
+ fifo->in = 0;
+ fifo->out = 0;
+ fifo->esize = esize;
+ fifo->data = buffer;
+
+ if (size < 2) {
+ fifo->mask = 0;
+ return -EINVAL;
+ }
+ fifo->mask = size - 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(__kfifo_init);
+
+static void kfifo_copy_in(struct __kfifo *fifo, const void *src,
+ unsigned int len, unsigned int off)
+{
+ unsigned int size = fifo->mask + 1;
+ unsigned int esize = fifo->esize;
+ unsigned int l;
+
+ off &= fifo->mask;
+ if (esize != 1) {
+ off *= esize;
+ size *= esize;
+ len *= esize;
+ }
+ l = min(len, size - off);
+
+ memcpy(fifo->data + off, src, l);
+ memcpy(fifo->data, src + l, len - l);
+ /*
+ * make sure that the data in the fifo is up to date before
+ * incrementing the fifo->in index counter
+ */
+ smp_wmb();
+}
+
+unsigned int __kfifo_in(struct __kfifo *fifo,
+ const void *buf, unsigned int len)
+{
+ unsigned int l;
+
+ l = kfifo_unused(fifo);
+ if (len > l)
+ len = l;
+
+ kfifo_copy_in(fifo, buf, len, fifo->in);
+ fifo->in += len;
+ return len;
+}
+EXPORT_SYMBOL(__kfifo_in);
+
+static void kfifo_copy_out(struct __kfifo *fifo, void *dst,
+ unsigned int len, unsigned int off)
+{
+ unsigned int size = fifo->mask + 1;
+ unsigned int esize = fifo->esize;
+ unsigned int l;
+
+ off &= fifo->mask;
+ if (esize != 1) {
+ off *= esize;
+ size *= esize;
+ len *= esize;
+ }
+ l = min(len, size - off);
+
+ memcpy(dst, fifo->data + off, l);
+ memcpy(dst + l, fifo->data, len - l);
+ /*
+ * make sure that the data is copied before
+ * incrementing the fifo->out index counter
+ */
+ smp_wmb();
+}
+
+unsigned int __kfifo_out_peek(struct __kfifo *fifo,
+ void *buf, unsigned int len)
+{
+ unsigned int l;
+
+ l = fifo->in - fifo->out;
+ if (len > l)
+ len = l;
+
+ kfifo_copy_out(fifo, buf, len, fifo->out);
+ return len;
+}
+EXPORT_SYMBOL(__kfifo_out_peek);
+
+unsigned int __kfifo_out(struct __kfifo *fifo,
+ void *buf, unsigned int len)
+{
+ len = __kfifo_out_peek(fifo, buf, len);
+ fifo->out += len;
+ return len;
+}
+EXPORT_SYMBOL(__kfifo_out);
+
+static unsigned long kfifo_copy_from_user(struct __kfifo *fifo,
+ const void __user *from, unsigned int len, unsigned int off,
+ unsigned int *copied)
+{
+ unsigned int size = fifo->mask + 1;
+ unsigned int esize = fifo->esize;
+ unsigned int l;
+ unsigned long ret;
+
+ off &= fifo->mask;
+ if (esize != 1) {
+ off *= esize;
+ size *= esize;
+ len *= esize;
+ }
+ l = min(len, size - off);
+
+ ret = copy_from_user(fifo->data + off, from, l);
+ if (unlikely(ret))
+ ret = DIV_ROUND_UP(ret + len - l, esize);
+ else {
+ ret = copy_from_user(fifo->data, from + l, len - l);
+ if (unlikely(ret))
+ ret = DIV_ROUND_UP(ret, esize);
+ }
+ /*
+ * make sure that the data in the fifo is up to date before
+ * incrementing the fifo->in index counter
+ */
+ smp_wmb();
+ *copied = len - ret;
+ /* return the number of elements which are not copied */
+ return ret;
+}
+
+int __kfifo_from_user(struct __kfifo *fifo, const void __user *from,
+ unsigned long len, unsigned int *copied)
+{
+ unsigned int l;
+ unsigned long ret;
+ unsigned int esize = fifo->esize;
+ int err;
+
+ if (esize != 1)
+ len /= esize;
+
+ l = kfifo_unused(fifo);
+ if (len > l)
+ len = l;
+
+ ret = kfifo_copy_from_user(fifo, from, len, fifo->in, copied);
+ if (unlikely(ret)) {
+ len -= ret;
+ err = -EFAULT;
+ } else
+ err = 0;
+ fifo->in += len;
+ return err;
+}
+EXPORT_SYMBOL(__kfifo_from_user);
+
+static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to,
+ unsigned int len, unsigned int off, unsigned int *copied)
+{
+ unsigned int l;
+ unsigned long ret;
+ unsigned int size = fifo->mask + 1;
+ unsigned int esize = fifo->esize;
+
+ off &= fifo->mask;
+ if (esize != 1) {
+ off *= esize;
+ size *= esize;
+ len *= esize;
+ }
+ l = min(len, size - off);
+
+ ret = copy_to_user(to, fifo->data + off, l);
+ if (unlikely(ret))
+ ret = DIV_ROUND_UP(ret + len - l, esize);
+ else {
+ ret = copy_to_user(to + l, fifo->data, len - l);
+ if (unlikely(ret))
+ ret = DIV_ROUND_UP(ret, esize);
+ }
+ /*
+ * make sure that the data is copied before
+ * incrementing the fifo->out index counter
+ */
+ smp_wmb();
+ *copied = len - ret;
+ /* return the number of elements which are not copied */
+ return ret;
+}
+
+int __kfifo_to_user(struct __kfifo *fifo, void __user *to,
+ unsigned long len, unsigned int *copied)
+{
+ unsigned int l;
+ unsigned long ret;
+ unsigned int esize = fifo->esize;
+ int err;
+
+ if (esize != 1)
+ len /= esize;
+
+ l = fifo->in - fifo->out;
+ if (len > l)
+ len = l;
+ ret = kfifo_copy_to_user(fifo, to, len, fifo->out, copied);
+ if (unlikely(ret)) {
+ len -= ret;
+ err = -EFAULT;
+ } else
+ err = 0;
+ fifo->out += len;
+ return err;
+}
+EXPORT_SYMBOL(__kfifo_to_user);
+
+static int setup_sgl_buf(struct scatterlist *sgl, void *buf,
+ int nents, unsigned int len)
+{
+ int n;
+ unsigned int l;
+ unsigned int off;
+ struct page *page;
+
+ if (!nents)
+ return 0;
+
+ if (!len)
+ return 0;
+
+ n = 0;
+ page = virt_to_page(buf);
+ off = offset_in_page(buf);
+ l = 0;
+
+ while (len >= l + PAGE_SIZE - off) {
+ struct page *npage;
+
+ l += PAGE_SIZE;
+ buf += PAGE_SIZE;
+ npage = virt_to_page(buf);
+ if (page_to_phys(page) != page_to_phys(npage) - l) {
+ sg_set_page(sgl, page, l - off, off);
+ sgl = sg_next(sgl);
+ if (++n == nents || sgl == NULL)
+ return n;
+ page = npage;
+ len -= l - off;
+ l = off = 0;
+ }
+ }
+ sg_set_page(sgl, page, len, off);
+ return n + 1;
+}
+
+static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl,
+ int nents, unsigned int len, unsigned int off)
+{
+ unsigned int size = fifo->mask + 1;
+ unsigned int esize = fifo->esize;
+ unsigned int l;
+ unsigned int n;
+
+ off &= fifo->mask;
+ if (esize != 1) {
+ off *= esize;
+ size *= esize;
+ len *= esize;
+ }
+ l = min(len, size - off);
+
+ n = setup_sgl_buf(sgl, fifo->data + off, nents, l);
+ n += setup_sgl_buf(sgl + n, fifo->data, nents - n, len - l);
+
+ return n;
+}
+
+unsigned int __kfifo_dma_in_prepare(struct __kfifo *fifo,
+ struct scatterlist *sgl, int nents, unsigned int len)
+{
+ unsigned int l;
+
+ l = kfifo_unused(fifo);
+ if (len > l)
+ len = l;
+
+ return setup_sgl(fifo, sgl, nents, len, fifo->in);
+}
+EXPORT_SYMBOL(__kfifo_dma_in_prepare);
+
+unsigned int __kfifo_dma_out_prepare(struct __kfifo *fifo,
+ struct scatterlist *sgl, int nents, unsigned int len)
+{
+ unsigned int l;
+
+ l = fifo->in - fifo->out;
+ if (len > l)
+ len = l;
+
+ return setup_sgl(fifo, sgl, nents, len, fifo->out);
+}
+EXPORT_SYMBOL(__kfifo_dma_out_prepare);
+
+unsigned int __kfifo_max_r(unsigned int len, size_t recsize)
+{
+ unsigned int max = (1 << (recsize << 3)) - 1;
+
+ if (len > max)
+ return max;
+ return len;
+}
+EXPORT_SYMBOL(__kfifo_max_r);
+
+#define __KFIFO_PEEK(data, out, mask) \
+ ((data)[(out) & (mask)])
+/*
+ * __kfifo_peek_n internal helper function for determinate the length of
+ * the next record in the fifo
+ */
+static unsigned int __kfifo_peek_n(struct __kfifo *fifo, size_t recsize)
+{
+ unsigned int l;
+ unsigned int mask = fifo->mask;
+ unsigned char *data = fifo->data;
+
+ l = __KFIFO_PEEK(data, fifo->out, mask);
+
+ if (--recsize)
+ l |= __KFIFO_PEEK(data, fifo->out + 1, mask) << 8;
+
+ return l;
+}
+
+#define __KFIFO_POKE(data, in, mask, val) \
+ ( \
+ (data)[(in) & (mask)] = (unsigned char)(val) \
+ )
+
+/*
+ * __kfifo_poke_n internal helper function for storeing the length of
+ * the record into the fifo
+ */
+static void __kfifo_poke_n(struct __kfifo *fifo, unsigned int n, size_t recsize)
+{
+ unsigned int mask = fifo->mask;
+ unsigned char *data = fifo->data;
+
+ __KFIFO_POKE(data, fifo->in, mask, n);
+
+ if (recsize > 1)
+ __KFIFO_POKE(data, fifo->in + 1, mask, n >> 8);
+}
+
+unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize)
+{
+ return __kfifo_peek_n(fifo, recsize);
+}
+EXPORT_SYMBOL(__kfifo_len_r);
+
+unsigned int __kfifo_in_r(struct __kfifo *fifo, const void *buf,
+ unsigned int len, size_t recsize)
+{
+ if (len + recsize > kfifo_unused(fifo))
+ return 0;
+
+ __kfifo_poke_n(fifo, len, recsize);
+
+ kfifo_copy_in(fifo, buf, len, fifo->in + recsize);
+ fifo->in += len + recsize;
+ return len;
+}
+EXPORT_SYMBOL(__kfifo_in_r);
+
+static unsigned int kfifo_out_copy_r(struct __kfifo *fifo,
+ void *buf, unsigned int len, size_t recsize, unsigned int *n)
+{
+ *n = __kfifo_peek_n(fifo, recsize);
+
+ if (len > *n)
+ len = *n;
+
+ kfifo_copy_out(fifo, buf, len, fifo->out + recsize);
+ return len;
+}
+
+unsigned int __kfifo_out_peek_r(struct __kfifo *fifo, void *buf,
+ unsigned int len, size_t recsize)
+{
+ unsigned int n;
+
+ if (fifo->in == fifo->out)
+ return 0;
+
+ return kfifo_out_copy_r(fifo, buf, len, recsize, &n);
+}
+EXPORT_SYMBOL(__kfifo_out_peek_r);
+
+unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf,
+ unsigned int len, size_t recsize)
+{
+ unsigned int n;
+
+ if (fifo->in == fifo->out)
+ return 0;
+
+ len = kfifo_out_copy_r(fifo, buf, len, recsize, &n);
+ fifo->out += n + recsize;
+ return len;
+}
+EXPORT_SYMBOL(__kfifo_out_r);
+
+void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize)
+{
+ unsigned int n;
+
+ n = __kfifo_peek_n(fifo, recsize);
+ fifo->out += n + recsize;
+}
+EXPORT_SYMBOL(__kfifo_skip_r);
+
+int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from,
+ unsigned long len, unsigned int *copied, size_t recsize)
+{
+ unsigned long ret;
+
+ len = __kfifo_max_r(len, recsize);
+
+ if (len + recsize > kfifo_unused(fifo)) {
+ *copied = 0;
+ return 0;
+ }
+
+ __kfifo_poke_n(fifo, len, recsize);
+
+ ret = kfifo_copy_from_user(fifo, from, len, fifo->in + recsize, copied);
+ if (unlikely(ret)) {
+ *copied = 0;
+ return -EFAULT;
+ }
+ fifo->in += len + recsize;
+ return 0;
+}
+EXPORT_SYMBOL(__kfifo_from_user_r);
+
+int __kfifo_to_user_r(struct __kfifo *fifo, void __user *to,
+ unsigned long len, unsigned int *copied, size_t recsize)
+{
+ unsigned long ret;
+ unsigned int n;
+
+ if (fifo->in == fifo->out) {
+ *copied = 0;
+ return 0;
+ }
+
+ n = __kfifo_peek_n(fifo, recsize);
+ if (len > n)
+ len = n;
+
+ ret = kfifo_copy_to_user(fifo, to, len, fifo->out + recsize, copied);
+ if (unlikely(ret)) {
+ *copied = 0;
+ return -EFAULT;
+ }
+ fifo->out += n + recsize;
+ return 0;
+}
+EXPORT_SYMBOL(__kfifo_to_user_r);
+
+unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo,
+ struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
+{
+ if (!nents)
+ BUG();
+
+ len = __kfifo_max_r(len, recsize);
+
+ if (len + recsize > kfifo_unused(fifo))
+ return 0;
+
+ return setup_sgl(fifo, sgl, nents, len, fifo->in + recsize);
+}
+EXPORT_SYMBOL(__kfifo_dma_in_prepare_r);
+
+void __kfifo_dma_in_finish_r(struct __kfifo *fifo,
+ unsigned int len, size_t recsize)
+{
+ len = __kfifo_max_r(len, recsize);
+ __kfifo_poke_n(fifo, len, recsize);
+ fifo->in += len + recsize;
+}
+EXPORT_SYMBOL(__kfifo_dma_in_finish_r);
+
+unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo,
+ struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
+{
+ if (!nents)
+ BUG();
+
+ len = __kfifo_max_r(len, recsize);
+
+ if (len + recsize > fifo->in - fifo->out)
+ return 0;
+
+ return setup_sgl(fifo, sgl, nents, len, fifo->out + recsize);
+}
+EXPORT_SYMBOL(__kfifo_dma_out_prepare_r);
+
+void __kfifo_dma_out_finish_r(struct __kfifo *fifo, size_t recsize)
+{
+ unsigned int len;
+
+ len = __kfifo_peek_n(fifo, recsize);
+ fifo->out += len + recsize;
+}
+EXPORT_SYMBOL(__kfifo_dma_out_finish_r);
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index c3615eab0cc3..f78ae0c0c4e2 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -104,6 +104,22 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
return 0;
}
+/**
+ * kstrtoull - convert a string to an unsigned long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
{
if (s[0] == '+')
@@ -112,6 +128,22 @@ int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
}
EXPORT_SYMBOL(kstrtoull);
+/**
+ * kstrtoll - convert a string to a long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
int kstrtoll(const char *s, unsigned int base, long long *res)
{
unsigned long long tmp;
@@ -168,6 +200,22 @@ int _kstrtol(const char *s, unsigned int base, long *res)
}
EXPORT_SYMBOL(_kstrtol);
+/**
+ * kstrtouint - convert a string to an unsigned int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
int kstrtouint(const char *s, unsigned int base, unsigned int *res)
{
unsigned long long tmp;
@@ -183,6 +231,22 @@ int kstrtouint(const char *s, unsigned int base, unsigned int *res)
}
EXPORT_SYMBOL(kstrtouint);
+/**
+ * kstrtoint - convert a string to an int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
int kstrtoint(const char *s, unsigned int base, int *res)
{
long long tmp;
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 7aae0f2a5e0a..c3eb261a7df3 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -47,10 +47,10 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose);
* Normal standalone locks, for the circular and irq-context
* dependency tests:
*/
-static DEFINE_SPINLOCK(lock_A);
-static DEFINE_SPINLOCK(lock_B);
-static DEFINE_SPINLOCK(lock_C);
-static DEFINE_SPINLOCK(lock_D);
+static DEFINE_RAW_SPINLOCK(lock_A);
+static DEFINE_RAW_SPINLOCK(lock_B);
+static DEFINE_RAW_SPINLOCK(lock_C);
+static DEFINE_RAW_SPINLOCK(lock_D);
static DEFINE_RWLOCK(rwlock_A);
static DEFINE_RWLOCK(rwlock_B);
@@ -73,12 +73,12 @@ static DECLARE_RWSEM(rwsem_D);
* but X* and Y* are different classes. We do this so that
* we do not trigger a real lockup:
*/
-static DEFINE_SPINLOCK(lock_X1);
-static DEFINE_SPINLOCK(lock_X2);
-static DEFINE_SPINLOCK(lock_Y1);
-static DEFINE_SPINLOCK(lock_Y2);
-static DEFINE_SPINLOCK(lock_Z1);
-static DEFINE_SPINLOCK(lock_Z2);
+static DEFINE_RAW_SPINLOCK(lock_X1);
+static DEFINE_RAW_SPINLOCK(lock_X2);
+static DEFINE_RAW_SPINLOCK(lock_Y1);
+static DEFINE_RAW_SPINLOCK(lock_Y2);
+static DEFINE_RAW_SPINLOCK(lock_Z1);
+static DEFINE_RAW_SPINLOCK(lock_Z2);
static DEFINE_RWLOCK(rwlock_X1);
static DEFINE_RWLOCK(rwlock_X2);
@@ -107,10 +107,10 @@ static DECLARE_RWSEM(rwsem_Z2);
*/
#define INIT_CLASS_FUNC(class) \
static noinline void \
-init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \
- struct rw_semaphore *rwsem) \
+init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \
+ struct mutex *mutex, struct rw_semaphore *rwsem)\
{ \
- spin_lock_init(lock); \
+ raw_spin_lock_init(lock); \
rwlock_init(rwlock); \
mutex_init(mutex); \
init_rwsem(rwsem); \
@@ -168,10 +168,10 @@ static void init_shared_classes(void)
* Shortcuts for lock/unlock API variants, to keep
* the testcases compact:
*/
-#define L(x) spin_lock(&lock_##x)
-#define U(x) spin_unlock(&lock_##x)
+#define L(x) raw_spin_lock(&lock_##x)
+#define U(x) raw_spin_unlock(&lock_##x)
#define LU(x) L(x); U(x)
-#define SI(x) spin_lock_init(&lock_##x)
+#define SI(x) raw_spin_lock_init(&lock_##x)
#define WL(x) write_lock(&rwlock_##x)
#define WU(x) write_unlock(&rwlock_##x)
@@ -911,7 +911,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
#define I2(x) \
do { \
- spin_lock_init(&lock_##x); \
+ raw_spin_lock_init(&lock_##x); \
rwlock_init(&rwlock_##x); \
mutex_init(&mutex_##x); \
init_rwsem(&rwsem_##x); \
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index a07e7268d7ed..8335d39d2ccd 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -44,8 +44,8 @@ MODULE_LICENSE("GPL");
} while (0)
#define RETURN(x...) do { \
- clear_bit(__LC_PARANOIA, &lc->flags); \
- smp_mb__after_clear_bit(); return x ; } while (0)
+ clear_bit_unlock(__LC_PARANOIA, &lc->flags); \
+ return x ; } while (0)
/* BUG() if e is not one of the elements tracked by lc */
#define PARANOIA_LC_ELEMENT(lc, e) do { \
@@ -55,9 +55,40 @@ MODULE_LICENSE("GPL");
BUG_ON(i >= lc_->nr_elements); \
BUG_ON(lc_->lc_element[i] != e_); } while (0)
+
+/* We need to atomically
+ * - try to grab the lock (set LC_LOCKED)
+ * - only if there is no pending transaction
+ * (neither LC_DIRTY nor LC_STARVING is set)
+ * Because of PARANOIA_ENTRY() above abusing lc->flags as well,
+ * it is not sufficient to just say
+ * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
+ */
+int lc_try_lock(struct lru_cache *lc)
+{
+ unsigned long val;
+ do {
+ val = cmpxchg(&lc->flags, 0, LC_LOCKED);
+ } while (unlikely (val == LC_PARANOIA));
+ /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
+ return 0 == val;
+#if 0
+ /* Alternative approach, spin in case someone enters or leaves a
+ * PARANOIA_ENTRY()/RETURN() section. */
+ unsigned long old, new, val;
+ do {
+ old = lc->flags & LC_PARANOIA;
+ new = old | LC_LOCKED;
+ val = cmpxchg(&lc->flags, old, new);
+ } while (unlikely (val == (old ^ LC_PARANOIA)));
+ return old == val;
+#endif
+}
+
/**
* lc_create - prepares to track objects in an active set
* @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
+ * @max_pending_changes: maximum changes to accumulate until a transaction is required
* @e_count: number of elements allowed to be active simultaneously
* @e_size: size of the tracked objects
* @e_off: offset to the &struct lc_element member in a tracked object
@@ -66,6 +97,7 @@ MODULE_LICENSE("GPL");
* or NULL on (allocation) failure.
*/
struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+ unsigned max_pending_changes,
unsigned e_count, size_t e_size, size_t e_off)
{
struct hlist_head *slot = NULL;
@@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
INIT_LIST_HEAD(&lc->in_use);
INIT_LIST_HEAD(&lc->lru);
INIT_LIST_HEAD(&lc->free);
+ INIT_LIST_HEAD(&lc->to_be_changed);
lc->name = name;
lc->element_size = e_size;
lc->element_off = e_off;
lc->nr_elements = e_count;
- lc->new_number = LC_FREE;
+ lc->max_pending_changes = max_pending_changes;
lc->lc_cache = cache;
lc->lc_element = element;
lc->lc_slot = slot;
@@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
e = p + e_off;
e->lc_index = i;
e->lc_number = LC_FREE;
+ e->lc_new_number = LC_FREE;
list_add(&e->list, &lc->free);
element[i] = e;
}
@@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc)
INIT_LIST_HEAD(&lc->in_use);
INIT_LIST_HEAD(&lc->lru);
INIT_LIST_HEAD(&lc->free);
+ INIT_LIST_HEAD(&lc->to_be_changed);
lc->used = 0;
lc->hits = 0;
lc->misses = 0;
lc->starving = 0;
- lc->dirty = 0;
+ lc->locked = 0;
lc->changed = 0;
+ lc->pending_changes = 0;
lc->flags = 0;
- lc->changing_element = NULL;
- lc->new_number = LC_FREE;
memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
for (i = 0; i < lc->nr_elements; i++) {
@@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc)
/* re-init it */
e->lc_index = i;
e->lc_number = LC_FREE;
+ e->lc_new_number = LC_FREE;
list_add(&e->list, &lc->free);
}
}
@@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
/* NOTE:
* total calls to lc_get are
* (starving + hits + misses)
- * misses include "dirty" count (update from an other thread in
+ * misses include "locked" count (update from an other thread in
* progress) and "changed", when this in fact lead to an successful
* update of the cache.
*/
return seq_printf(seq, "\t%s: used:%u/%u "
- "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
+ "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
lc->name, lc->used, lc->nr_elements,
- lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
+ lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
}
static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
@@ -224,6 +259,26 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
}
+static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
+ bool include_changing)
+{
+ struct lc_element *e;
+
+ BUG_ON(!lc);
+ BUG_ON(!lc->nr_elements);
+ hlist_for_each_entry(e, lc_hash_slot(lc, enr), colision) {
+ /* "about to be changed" elements, pending transaction commit,
+ * are hashed by their "new number". "Normal" elements have
+ * lc_number == lc_new_number. */
+ if (e->lc_new_number != enr)
+ continue;
+ if (e->lc_new_number == e->lc_number || include_changing)
+ return e;
+ break;
+ }
+ return NULL;
+}
+
/**
* lc_find - find element by label, if present in the hash table
* @lc: The lru_cache object
@@ -232,38 +287,28 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
* Returns the pointer to an element, if the element with the requested
* "label" or element number is present in the hash table,
* or NULL if not found. Does not change the refcnt.
+ * Ignores elements that are "about to be used", i.e. not yet in the active
+ * set, but still pending transaction commit.
*/
struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
{
- struct hlist_node *n;
- struct lc_element *e;
-
- BUG_ON(!lc);
- BUG_ON(!lc->nr_elements);
- hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
- if (e->lc_number == enr)
- return e;
- }
- return NULL;
+ return __lc_find(lc, enr, 0);
}
-/* returned element will be "recycled" immediately */
-static struct lc_element *lc_evict(struct lru_cache *lc)
+/**
+ * lc_is_used - find element by label
+ * @lc: The lru_cache object
+ * @enr: element number
+ *
+ * Returns true, if the element with the requested "label" or element number is
+ * present in the hash table, and is used (refcnt > 0).
+ * Also finds elements that are not _currently_ used but only "about to be
+ * used", i.e. on the "to_be_changed" list, pending transaction commit.
+ */
+bool lc_is_used(struct lru_cache *lc, unsigned int enr)
{
- struct list_head *n;
- struct lc_element *e;
-
- if (list_empty(&lc->lru))
- return NULL;
-
- n = lc->lru.prev;
- e = list_entry(n, struct lc_element, list);
-
- PARANOIA_LC_ELEMENT(lc, e);
-
- list_del(&e->list);
- hlist_del(&e->colision);
- return e;
+ struct lc_element *e = __lc_find(lc, enr, 1);
+ return e && e->refcnt;
}
/**
@@ -280,22 +325,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e)
PARANOIA_LC_ELEMENT(lc, e);
BUG_ON(e->refcnt);
- e->lc_number = LC_FREE;
+ e->lc_number = e->lc_new_number = LC_FREE;
hlist_del_init(&e->colision);
list_move(&e->list, &lc->free);
RETURN();
}
-static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
+static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
{
struct list_head *n;
+ struct lc_element *e;
+
+ if (!list_empty(&lc->free))
+ n = lc->free.next;
+ else if (!list_empty(&lc->lru))
+ n = lc->lru.prev;
+ else
+ return NULL;
- if (list_empty(&lc->free))
- return lc_evict(lc);
+ e = list_entry(n, struct lc_element, list);
+ PARANOIA_LC_ELEMENT(lc, e);
- n = lc->free.next;
- list_del(n);
- return list_entry(n, struct lc_element, list);
+ e->lc_new_number = new_number;
+ if (!hlist_unhashed(&e->colision))
+ __hlist_del(&e->colision);
+ hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
+ list_move(&e->list, &lc->to_be_changed);
+
+ return e;
}
static int lc_unused_element_available(struct lru_cache *lc)
@@ -308,45 +365,7 @@ static int lc_unused_element_available(struct lru_cache *lc)
return 0;
}
-
-/**
- * lc_get - get element by label, maybe change the active set
- * @lc: the lru cache to operate on
- * @enr: the label to look up
- *
- * Finds an element in the cache, increases its usage count,
- * "touches" and returns it.
- *
- * In case the requested number is not present, it needs to be added to the
- * cache. Therefore it is possible that an other element becomes evicted from
- * the cache. In either case, the user is notified so he is able to e.g. keep
- * a persistent log of the cache changes, and therefore the objects in use.
- *
- * Return values:
- * NULL
- * The cache was marked %LC_STARVING,
- * or the requested label was not in the active set
- * and a changing transaction is still pending (@lc was marked %LC_DIRTY).
- * Or no unused or free element could be recycled (@lc will be marked as
- * %LC_STARVING, blocking further lc_get() operations).
- *
- * pointer to the element with the REQUESTED element number.
- * In this case, it can be used right away
- *
- * pointer to an UNUSED element with some different element number,
- * where that different number may also be %LC_FREE.
- *
- * In this case, the cache is marked %LC_DIRTY (blocking further changes),
- * and the returned element pointer is removed from the lru list and
- * hash collision chains. The user now should do whatever housekeeping
- * is necessary.
- * Then he must call lc_changed(lc,element_pointer), to finish
- * the change.
- *
- * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
- * any cache set change.
- */
-struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
+static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change)
{
struct lc_element *e;
@@ -356,8 +375,12 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
RETURN(NULL);
}
- e = lc_find(lc, enr);
- if (e) {
+ e = __lc_find(lc, enr, 1);
+ /* if lc_new_number != lc_number,
+ * this enr is currently being pulled in already,
+ * and will be available once the pending transaction
+ * has been committed. */
+ if (e && e->lc_new_number == e->lc_number) {
++lc->hits;
if (e->refcnt++ == 0)
lc->used++;
@@ -366,6 +389,26 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
}
++lc->misses;
+ if (!may_change)
+ RETURN(NULL);
+
+ /* It has been found above, but on the "to_be_changed" list, not yet
+ * committed. Don't pull it in twice, wait for the transaction, then
+ * try again */
+ if (e)
+ RETURN(NULL);
+
+ /* To avoid races with lc_try_lock(), first, mark us dirty
+ * (using test_and_set_bit, as it implies memory barriers), ... */
+ test_and_set_bit(__LC_DIRTY, &lc->flags);
+
+ /* ... only then check if it is locked anyways. If lc_unlock clears
+ * the dirty bit again, that's not a problem, we will come here again.
+ */
+ if (test_bit(__LC_LOCKED, &lc->flags)) {
+ ++lc->locked;
+ RETURN(NULL);
+ }
/* In case there is nothing available and we can not kick out
* the LRU element, we have to wait ...
@@ -375,71 +418,109 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
RETURN(NULL);
}
- /* it was not present in the active set.
- * we are going to recycle an unused (or even "free") element.
- * user may need to commit a transaction to record that change.
- * we serialize on flags & TF_DIRTY */
- if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
- ++lc->dirty;
+ /* It was not present in the active set. We are going to recycle an
+ * unused (or even "free") element, but we won't accumulate more than
+ * max_pending_changes changes. */
+ if (lc->pending_changes >= lc->max_pending_changes)
RETURN(NULL);
- }
- e = lc_get_unused_element(lc);
+ e = lc_prepare_for_change(lc, enr);
BUG_ON(!e);
clear_bit(__LC_STARVING, &lc->flags);
BUG_ON(++e->refcnt != 1);
lc->used++;
-
- lc->changing_element = e;
- lc->new_number = enr;
+ lc->pending_changes++;
RETURN(e);
}
-/* similar to lc_get,
- * but only gets a new reference on an existing element.
- * you either get the requested element, or NULL.
- * will be consolidated into one function.
+/**
+ * lc_get - get element by label, maybe change the active set
+ * @lc: the lru cache to operate on
+ * @enr: the label to look up
+ *
+ * Finds an element in the cache, increases its usage count,
+ * "touches" and returns it.
+ *
+ * In case the requested number is not present, it needs to be added to the
+ * cache. Therefore it is possible that an other element becomes evicted from
+ * the cache. In either case, the user is notified so he is able to e.g. keep
+ * a persistent log of the cache changes, and therefore the objects in use.
+ *
+ * Return values:
+ * NULL
+ * The cache was marked %LC_STARVING,
+ * or the requested label was not in the active set
+ * and a changing transaction is still pending (@lc was marked %LC_DIRTY).
+ * Or no unused or free element could be recycled (@lc will be marked as
+ * %LC_STARVING, blocking further lc_get() operations).
+ *
+ * pointer to the element with the REQUESTED element number.
+ * In this case, it can be used right away
+ *
+ * pointer to an UNUSED element with some different element number,
+ * where that different number may also be %LC_FREE.
+ *
+ * In this case, the cache is marked %LC_DIRTY,
+ * so lc_try_lock() will no longer succeed.
+ * The returned element pointer is moved to the "to_be_changed" list,
+ * and registered with the new element number on the hash collision chains,
+ * so it is possible to pick it up from lc_is_used().
+ * Up to "max_pending_changes" (see lc_create()) can be accumulated.
+ * The user now should do whatever housekeeping is necessary,
+ * typically serialize on lc_try_lock_for_transaction(), then call
+ * lc_committed(lc) and lc_unlock(), to finish the change.
+ *
+ * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
+ * any cache set change.
*/
-struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
+struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
{
- struct lc_element *e;
-
- PARANOIA_ENTRY();
- if (lc->flags & LC_STARVING) {
- ++lc->starving;
- RETURN(NULL);
- }
+ return __lc_get(lc, enr, 1);
+}
- e = lc_find(lc, enr);
- if (e) {
- ++lc->hits;
- if (e->refcnt++ == 0)
- lc->used++;
- list_move(&e->list, &lc->in_use); /* Not evictable... */
- }
- RETURN(e);
+/**
+ * lc_try_get - get element by label, if present; do not change the active set
+ * @lc: the lru cache to operate on
+ * @enr: the label to look up
+ *
+ * Finds an element in the cache, increases its usage count,
+ * "touches" and returns it.
+ *
+ * Return values:
+ * NULL
+ * The cache was marked %LC_STARVING,
+ * or the requested label was not in the active set
+ *
+ * pointer to the element with the REQUESTED element number.
+ * In this case, it can be used right away
+ */
+struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
+{
+ return __lc_get(lc, enr, 0);
}
/**
- * lc_changed - tell @lc that the change has been recorded
+ * lc_committed - tell @lc that pending changes have been recorded
* @lc: the lru cache to operate on
- * @e: the element pending label change
+ *
+ * User is expected to serialize on explicit lc_try_lock_for_transaction()
+ * before the transaction is started, and later needs to lc_unlock() explicitly
+ * as well.
*/
-void lc_changed(struct lru_cache *lc, struct lc_element *e)
+void lc_committed(struct lru_cache *lc)
{
+ struct lc_element *e, *tmp;
+
PARANOIA_ENTRY();
- BUG_ON(e != lc->changing_element);
- PARANOIA_LC_ELEMENT(lc, e);
- ++lc->changed;
- e->lc_number = lc->new_number;
- list_add(&e->list, &lc->in_use);
- hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
- lc->changing_element = NULL;
- lc->new_number = LC_FREE;
- clear_bit(__LC_DIRTY, &lc->flags);
- smp_mb__after_clear_bit();
+ list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
+ /* count number of changes, not number of transactions */
+ ++lc->changed;
+ e->lc_number = e->lc_new_number;
+ list_move(&e->list, &lc->in_use);
+ }
+ lc->pending_changes = 0;
RETURN();
}
@@ -458,13 +539,12 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
PARANOIA_ENTRY();
PARANOIA_LC_ELEMENT(lc, e);
BUG_ON(e->refcnt == 0);
- BUG_ON(e == lc->changing_element);
+ BUG_ON(e->lc_number != e->lc_new_number);
if (--e->refcnt == 0) {
/* move it to the front of LRU. */
list_move(&e->list, &lc->lru);
lc->used--;
- clear_bit(__LC_STARVING, &lc->flags);
- smp_mb__after_clear_bit();
+ clear_bit_unlock(__LC_STARVING, &lc->flags);
}
RETURN(e->refcnt);
}
@@ -504,16 +584,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
void lc_set(struct lru_cache *lc, unsigned int enr, int index)
{
struct lc_element *e;
+ struct list_head *lh;
if (index < 0 || index >= lc->nr_elements)
return;
e = lc_element_by_index(lc, index);
- e->lc_number = enr;
+ BUG_ON(e->lc_number != e->lc_new_number);
+ BUG_ON(e->refcnt != 0);
+ e->lc_number = e->lc_new_number = enr;
hlist_del_init(&e->colision);
- hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
- list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
+ if (enr == LC_FREE)
+ lh = &lc->free;
+ else {
+ hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
+ lh = &lc->lru;
+ }
+ list_move(&e->list, lh);
}
/**
@@ -553,8 +641,10 @@ EXPORT_SYMBOL(lc_try_get);
EXPORT_SYMBOL(lc_find);
EXPORT_SYMBOL(lc_get);
EXPORT_SYMBOL(lc_put);
-EXPORT_SYMBOL(lc_changed);
+EXPORT_SYMBOL(lc_committed);
EXPORT_SYMBOL(lc_element_by_index);
EXPORT_SYMBOL(lc_index_of);
EXPORT_SYMBOL(lc_seq_printf_stats);
EXPORT_SYMBOL(lc_seq_dump_details);
+EXPORT_SYMBOL(lc_try_lock);
+EXPORT_SYMBOL(lc_is_used);
diff --git a/lib/lzo/Makefile b/lib/lzo/Makefile
index e764116ea12d..f0f7d7ca2b83 100644
--- a/lib/lzo/Makefile
+++ b/lib/lzo/Makefile
@@ -1,5 +1,5 @@
lzo_compress-objs := lzo1x_compress.o
-lzo_decompress-objs := lzo1x_decompress.o
+lzo_decompress-objs := lzo1x_decompress_safe.o
obj-$(CONFIG_LZO_COMPRESS) += lzo_compress.o
obj-$(CONFIG_LZO_DECOMPRESS) += lzo_decompress.o
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c
index a6040990a62e..236eb21167b5 100644
--- a/lib/lzo/lzo1x_compress.c
+++ b/lib/lzo/lzo1x_compress.c
@@ -1,194 +1,243 @@
/*
- * LZO1X Compressor from MiniLZO
+ * LZO1X Compressor from LZO
*
- * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
+ * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
*
* The full LZO package can be found at:
* http://www.oberhumer.com/opensource/lzo/
*
- * Changed for kernel use by:
+ * Changed for Linux kernel use by:
* Nitin Gupta <nitingupta910@gmail.com>
* Richard Purdie <rpurdie@openedhand.com>
*/
#include <linux/module.h>
#include <linux/kernel.h>
-#include <linux/lzo.h>
#include <asm/unaligned.h>
+#include <linux/lzo.h>
#include "lzodefs.h"
static noinline size_t
-_lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
- unsigned char *out, size_t *out_len, void *wrkmem)
+lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
+ unsigned char *out, size_t *out_len,
+ size_t ti, void *wrkmem)
{
+ const unsigned char *ip;
+ unsigned char *op;
const unsigned char * const in_end = in + in_len;
- const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5;
- const unsigned char ** const dict = wrkmem;
- const unsigned char *ip = in, *ii = ip;
- const unsigned char *end, *m, *m_pos;
- size_t m_off, m_len, dindex;
- unsigned char *op = out;
+ const unsigned char * const ip_end = in + in_len - 20;
+ const unsigned char *ii;
+ lzo_dict_t * const dict = (lzo_dict_t *) wrkmem;
- ip += 4;
+ op = out;
+ ip = in;
+ ii = ip;
+ ip += ti < 4 ? 4 - ti : 0;
for (;;) {
- dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK;
- m_pos = dict[dindex];
-
- if (m_pos < in)
- goto literal;
-
- if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET))
- goto literal;
-
- m_off = ip - m_pos;
- if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
- goto try_match;
-
- dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f);
- m_pos = dict[dindex];
-
- if (m_pos < in)
- goto literal;
-
- if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET))
- goto literal;
-
- m_off = ip - m_pos;
- if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
- goto try_match;
-
- goto literal;
-
-try_match:
- if (get_unaligned((const unsigned short *)m_pos)
- == get_unaligned((const unsigned short *)ip)) {
- if (likely(m_pos[2] == ip[2]))
- goto match;
- }
-
+ const unsigned char *m_pos;
+ size_t t, m_len, m_off;
+ u32 dv;
literal:
- dict[dindex] = ip;
- ++ip;
+ ip += 1 + ((ip - ii) >> 5);
+next:
if (unlikely(ip >= ip_end))
break;
- continue;
-
-match:
- dict[dindex] = ip;
- if (ip != ii) {
- size_t t = ip - ii;
+ dv = get_unaligned_le32(ip);
+ t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
+ m_pos = in + dict[t];
+ dict[t] = (lzo_dict_t) (ip - in);
+ if (unlikely(dv != get_unaligned_le32(m_pos)))
+ goto literal;
+ ii -= ti;
+ ti = 0;
+ t = ip - ii;
+ if (t != 0) {
if (t <= 3) {
op[-2] |= t;
- } else if (t <= 18) {
+ COPY4(op, ii);
+ op += t;
+ } else if (t <= 16) {
*op++ = (t - 3);
+ COPY8(op, ii);
+ COPY8(op + 8, ii + 8);
+ op += t;
} else {
- size_t tt = t - 18;
-
- *op++ = 0;
- while (tt > 255) {
- tt -= 255;
+ if (t <= 18) {
+ *op++ = (t - 3);
+ } else {
+ size_t tt = t - 18;
*op++ = 0;
+ while (unlikely(tt > 255)) {
+ tt -= 255;
+ *op++ = 0;
+ }
+ *op++ = tt;
}
- *op++ = tt;
+ do {
+ COPY8(op, ii);
+ COPY8(op + 8, ii + 8);
+ op += 16;
+ ii += 16;
+ t -= 16;
+ } while (t >= 16);
+ if (t > 0) do {
+ *op++ = *ii++;
+ } while (--t > 0);
}
- do {
- *op++ = *ii++;
- } while (--t > 0);
}
- ip += 3;
- if (m_pos[3] != *ip++ || m_pos[4] != *ip++
- || m_pos[5] != *ip++ || m_pos[6] != *ip++
- || m_pos[7] != *ip++ || m_pos[8] != *ip++) {
- --ip;
- m_len = ip - ii;
+ m_len = 4;
+ {
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
+ u64 v;
+ v = get_unaligned((const u64 *) (ip + m_len)) ^
+ get_unaligned((const u64 *) (m_pos + m_len));
+ if (unlikely(v == 0)) {
+ do {
+ m_len += 8;
+ v = get_unaligned((const u64 *) (ip + m_len)) ^
+ get_unaligned((const u64 *) (m_pos + m_len));
+ if (unlikely(ip + m_len >= ip_end))
+ goto m_len_done;
+ } while (v == 0);
+ }
+# if defined(__LITTLE_ENDIAN)
+ m_len += (unsigned) __builtin_ctzll(v) / 8;
+# elif defined(__BIG_ENDIAN)
+ m_len += (unsigned) __builtin_clzll(v) / 8;
+# else
+# error "missing endian definition"
+# endif
+#elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32)
+ u32 v;
+ v = get_unaligned((const u32 *) (ip + m_len)) ^
+ get_unaligned((const u32 *) (m_pos + m_len));
+ if (unlikely(v == 0)) {
+ do {
+ m_len += 4;
+ v = get_unaligned((const u32 *) (ip + m_len)) ^
+ get_unaligned((const u32 *) (m_pos + m_len));
+ if (v != 0)
+ break;
+ m_len += 4;
+ v = get_unaligned((const u32 *) (ip + m_len)) ^
+ get_unaligned((const u32 *) (m_pos + m_len));
+ if (unlikely(ip + m_len >= ip_end))
+ goto m_len_done;
+ } while (v == 0);
+ }
+# if defined(__LITTLE_ENDIAN)
+ m_len += (unsigned) __builtin_ctz(v) / 8;
+# elif defined(__BIG_ENDIAN)
+ m_len += (unsigned) __builtin_clz(v) / 8;
+# else
+# error "missing endian definition"
+# endif
+#else
+ if (unlikely(ip[m_len] == m_pos[m_len])) {
+ do {
+ m_len += 1;
+ if (ip[m_len] != m_pos[m_len])
+ break;
+ m_len += 1;
+ if (ip[m_len] != m_pos[m_len])
+ break;
+ m_len += 1;
+ if (ip[m_len] != m_pos[m_len])
+ break;
+ m_len += 1;
+ if (ip[m_len] != m_pos[m_len])
+ break;
+ m_len += 1;
+ if (ip[m_len] != m_pos[m_len])
+ break;
+ m_len += 1;
+ if (ip[m_len] != m_pos[m_len])
+ break;
+ m_len += 1;
+ if (ip[m_len] != m_pos[m_len])
+ break;
+ m_len += 1;
+ if (unlikely(ip + m_len >= ip_end))
+ goto m_len_done;
+ } while (ip[m_len] == m_pos[m_len]);
+ }
+#endif
+ }
+m_len_done:
- if (m_off <= M2_MAX_OFFSET) {
- m_off -= 1;
- *op++ = (((m_len - 1) << 5)
- | ((m_off & 7) << 2));
- *op++ = (m_off >> 3);
- } else if (m_off <= M3_MAX_OFFSET) {
- m_off -= 1;
+ m_off = ip - m_pos;
+ ip += m_len;
+ ii = ip;
+ if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
+ m_off -= 1;
+ *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
+ *op++ = (m_off >> 3);
+ } else if (m_off <= M3_MAX_OFFSET) {
+ m_off -= 1;
+ if (m_len <= M3_MAX_LEN)
*op++ = (M3_MARKER | (m_len - 2));
- goto m3_m4_offset;
- } else {
- m_off -= 0x4000;
-
- *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11)
- | (m_len - 2));
- goto m3_m4_offset;
+ else {
+ m_len -= M3_MAX_LEN;
+ *op++ = M3_MARKER | 0;
+ while (unlikely(m_len > 255)) {
+ m_len -= 255;
+ *op++ = 0;
+ }
+ *op++ = (m_len);
}
+ *op++ = (m_off << 2);
+ *op++ = (m_off >> 6);
} else {
- end = in_end;
- m = m_pos + M2_MAX_LEN + 1;
-
- while (ip < end && *m == *ip) {
- m++;
- ip++;
- }
- m_len = ip - ii;
-
- if (m_off <= M3_MAX_OFFSET) {
- m_off -= 1;
- if (m_len <= 33) {
- *op++ = (M3_MARKER | (m_len - 2));
- } else {
- m_len -= 33;
- *op++ = M3_MARKER | 0;
- goto m3_m4_len;
- }
- } else {
- m_off -= 0x4000;
- if (m_len <= M4_MAX_LEN) {
- *op++ = (M4_MARKER
- | ((m_off & 0x4000) >> 11)
+ m_off -= 0x4000;
+ if (m_len <= M4_MAX_LEN)
+ *op++ = (M4_MARKER | ((m_off >> 11) & 8)
| (m_len - 2));
- } else {
- m_len -= M4_MAX_LEN;
- *op++ = (M4_MARKER
- | ((m_off & 0x4000) >> 11));
-m3_m4_len:
- while (m_len > 255) {
- m_len -= 255;
- *op++ = 0;
- }
-
- *op++ = (m_len);
+ else {
+ m_len -= M4_MAX_LEN;
+ *op++ = (M4_MARKER | ((m_off >> 11) & 8));
+ while (unlikely(m_len > 255)) {
+ m_len -= 255;
+ *op++ = 0;
}
+ *op++ = (m_len);
}
-m3_m4_offset:
- *op++ = ((m_off & 63) << 2);
+ *op++ = (m_off << 2);
*op++ = (m_off >> 6);
}
-
- ii = ip;
- if (unlikely(ip >= ip_end))
- break;
+ goto next;
}
-
*out_len = op - out;
- return in_end - ii;
+ return in_end - (ii - ti);
}
-int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out,
- size_t *out_len, void *wrkmem)
+int lzo1x_1_compress(const unsigned char *in, size_t in_len,
+ unsigned char *out, size_t *out_len,
+ void *wrkmem)
{
- const unsigned char *ii;
+ const unsigned char *ip = in;
unsigned char *op = out;
- size_t t;
+ size_t l = in_len;
+ size_t t = 0;
- if (unlikely(in_len <= M2_MAX_LEN + 5)) {
- t = in_len;
- } else {
- t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem);
+ while (l > 20) {
+ size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
+ uintptr_t ll_end = (uintptr_t) ip + ll;
+ if ((ll_end + ((t + ll) >> 5)) <= ll_end)
+ break;
+ BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
+ memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
+ t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem);
+ ip += ll;
op += *out_len;
+ l -= ll;
}
+ t += l;
if (t > 0) {
- ii = in + in_len - t;
+ const unsigned char *ii = in + in_len - t;
if (op == out && t <= 238) {
*op++ = (17 + t);
@@ -198,16 +247,21 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out,
*op++ = (t - 3);
} else {
size_t tt = t - 18;
-
*op++ = 0;
while (tt > 255) {
tt -= 255;
*op++ = 0;
}
-
*op++ = tt;
}
- do {
+ if (t >= 16) do {
+ COPY8(op, ii);
+ COPY8(op + 8, ii + 8);
+ op += 16;
+ ii += 16;
+ t -= 16;
+ } while (t >= 16);
+ if (t > 0) do {
*op++ = *ii++;
} while (--t > 0);
}
@@ -223,4 +277,3 @@ EXPORT_SYMBOL_GPL(lzo1x_1_compress);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("LZO1X-1 Compressor");
-
diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
deleted file mode 100644
index f2fd09850223..000000000000
--- a/lib/lzo/lzo1x_decompress.c
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * LZO1X Decompressor from MiniLZO
- *
- * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
- *
- * The full LZO package can be found at:
- * http://www.oberhumer.com/opensource/lzo/
- *
- * Changed for kernel use by:
- * Nitin Gupta <nitingupta910@gmail.com>
- * Richard Purdie <rpurdie@openedhand.com>
- */
-
-#ifndef STATIC
-#include <linux/module.h>
-#include <linux/kernel.h>
-#endif
-
-#include <asm/unaligned.h>
-#include <linux/lzo.h>
-#include "lzodefs.h"
-
-#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
-#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x))
-#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op)
-
-#define COPY4(dst, src) \
- put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
-
-int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
- unsigned char *out, size_t *out_len)
-{
- const unsigned char * const ip_end = in + in_len;
- unsigned char * const op_end = out + *out_len;
- const unsigned char *ip = in, *m_pos;
- unsigned char *op = out;
- size_t t;
-
- *out_len = 0;
-
- if (*ip > 17) {
- t = *ip++ - 17;
- if (t < 4)
- goto match_next;
- if (HAVE_OP(t, op_end, op))
- goto output_overrun;
- if (HAVE_IP(t + 1, ip_end, ip))
- goto input_overrun;
- do {
- *op++ = *ip++;
- } while (--t > 0);
- goto first_literal_run;
- }
-
- while ((ip < ip_end)) {
- t = *ip++;
- if (t >= 16)
- goto match;
- if (t == 0) {
- if (HAVE_IP(1, ip_end, ip))
- goto input_overrun;
- while (*ip == 0) {
- t += 255;
- ip++;
- if (HAVE_IP(1, ip_end, ip))
- goto input_overrun;
- }
- t += 15 + *ip++;
- }
- if (HAVE_OP(t + 3, op_end, op))
- goto output_overrun;
- if (HAVE_IP(t + 4, ip_end, ip))
- goto input_overrun;
-
- COPY4(op, ip);
- op += 4;
- ip += 4;
- if (--t > 0) {
- if (t >= 4) {
- do {
- COPY4(op, ip);
- op += 4;
- ip += 4;
- t -= 4;
- } while (t >= 4);
- if (t > 0) {
- do {
- *op++ = *ip++;
- } while (--t > 0);
- }
- } else {
- do {
- *op++ = *ip++;
- } while (--t > 0);
- }
- }
-
-first_literal_run:
- t = *ip++;
- if (t >= 16)
- goto match;
- m_pos = op - (1 + M2_MAX_OFFSET);
- m_pos -= t >> 2;
- m_pos -= *ip++ << 2;
-
- if (HAVE_LB(m_pos, out, op))
- goto lookbehind_overrun;
-
- if (HAVE_OP(3, op_end, op))
- goto output_overrun;
- *op++ = *m_pos++;
- *op++ = *m_pos++;
- *op++ = *m_pos;
-
- goto match_done;
-
- do {
-match:
- if (t >= 64) {
- m_pos = op - 1;
- m_pos -= (t >> 2) & 7;
- m_pos -= *ip++ << 3;
- t = (t >> 5) - 1;
- if (HAVE_LB(m_pos, out, op))
- goto lookbehind_overrun;
- if (HAVE_OP(t + 3 - 1, op_end, op))
- goto output_overrun;
- goto copy_match;
- } else if (t >= 32) {
- t &= 31;
- if (t == 0) {
- if (HAVE_IP(1, ip_end, ip))
- goto input_overrun;
- while (*ip == 0) {
- t += 255;
- ip++;
- if (HAVE_IP(1, ip_end, ip))
- goto input_overrun;
- }
- t += 31 + *ip++;
- }
- m_pos = op - 1;
- m_pos -= get_unaligned_le16(ip) >> 2;
- ip += 2;
- } else if (t >= 16) {
- m_pos = op;
- m_pos -= (t & 8) << 11;
-
- t &= 7;
- if (t == 0) {
- if (HAVE_IP(1, ip_end, ip))
- goto input_overrun;
- while (*ip == 0) {
- t += 255;
- ip++;
- if (HAVE_IP(1, ip_end, ip))
- goto input_overrun;
- }
- t += 7 + *ip++;
- }
- m_pos -= get_unaligned_le16(ip) >> 2;
- ip += 2;
- if (m_pos == op)
- goto eof_found;
- m_pos -= 0x4000;
- } else {
- m_pos = op - 1;
- m_pos -= t >> 2;
- m_pos -= *ip++ << 2;
-
- if (HAVE_LB(m_pos, out, op))
- goto lookbehind_overrun;
- if (HAVE_OP(2, op_end, op))
- goto output_overrun;
-
- *op++ = *m_pos++;
- *op++ = *m_pos;
- goto match_done;
- }
-
- if (HAVE_LB(m_pos, out, op))
- goto lookbehind_overrun;
- if (HAVE_OP(t + 3 - 1, op_end, op))
- goto output_overrun;
-
- if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) {
- COPY4(op, m_pos);
- op += 4;
- m_pos += 4;
- t -= 4 - (3 - 1);
- do {
- COPY4(op, m_pos);
- op += 4;
- m_pos += 4;
- t -= 4;
- } while (t >= 4);
- if (t > 0)
- do {
- *op++ = *m_pos++;
- } while (--t > 0);
- } else {
-copy_match:
- *op++ = *m_pos++;
- *op++ = *m_pos++;
- do {
- *op++ = *m_pos++;
- } while (--t > 0);
- }
-match_done:
- t = ip[-2] & 3;
- if (t == 0)
- break;
-match_next:
- if (HAVE_OP(t, op_end, op))
- goto output_overrun;
- if (HAVE_IP(t + 1, ip_end, ip))
- goto input_overrun;
-
- *op++ = *ip++;
- if (t > 1) {
- *op++ = *ip++;
- if (t > 2)
- *op++ = *ip++;
- }
-
- t = *ip++;
- } while (ip < ip_end);
- }
-
- *out_len = op - out;
- return LZO_E_EOF_NOT_FOUND;
-
-eof_found:
- *out_len = op - out;
- return (ip == ip_end ? LZO_E_OK :
- (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
-input_overrun:
- *out_len = op - out;
- return LZO_E_INPUT_OVERRUN;
-
-output_overrun:
- *out_len = op - out;
- return LZO_E_OUTPUT_OVERRUN;
-
-lookbehind_overrun:
- *out_len = op - out;
- return LZO_E_LOOKBEHIND_OVERRUN;
-}
-#ifndef STATIC
-EXPORT_SYMBOL_GPL(lzo1x_decompress_safe);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("LZO1X Decompressor");
-
-#endif
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
new file mode 100644
index 000000000000..569985d522d5
--- /dev/null
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -0,0 +1,237 @@
+/*
+ * LZO1X Decompressor from LZO
+ *
+ * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
+ *
+ * The full LZO package can be found at:
+ * http://www.oberhumer.com/opensource/lzo/
+ *
+ * Changed for Linux kernel use by:
+ * Nitin Gupta <nitingupta910@gmail.com>
+ * Richard Purdie <rpurdie@openedhand.com>
+ */
+
+#ifndef STATIC
+#include <linux/module.h>
+#include <linux/kernel.h>
+#endif
+#include <asm/unaligned.h>
+#include <linux/lzo.h>
+#include "lzodefs.h"
+
+#define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x))
+#define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x))
+#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun
+#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun
+#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun
+
+int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
+ unsigned char *out, size_t *out_len)
+{
+ unsigned char *op;
+ const unsigned char *ip;
+ size_t t, next;
+ size_t state = 0;
+ const unsigned char *m_pos;
+ const unsigned char * const ip_end = in + in_len;
+ unsigned char * const op_end = out + *out_len;
+
+ op = out;
+ ip = in;
+
+ if (unlikely(in_len < 3))
+ goto input_overrun;
+ if (*ip > 17) {
+ t = *ip++ - 17;
+ if (t < 4) {
+ next = t;
+ goto match_next;
+ }
+ goto copy_literal_run;
+ }
+
+ for (;;) {
+ t = *ip++;
+ if (t < 16) {
+ if (likely(state == 0)) {
+ if (unlikely(t == 0)) {
+ while (unlikely(*ip == 0)) {
+ t += 255;
+ ip++;
+ NEED_IP(1);
+ }
+ t += 15 + *ip++;
+ }
+ t += 3;
+copy_literal_run:
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) {
+ const unsigned char *ie = ip + t;
+ unsigned char *oe = op + t;
+ do {
+ COPY8(op, ip);
+ op += 8;
+ ip += 8;
+ COPY8(op, ip);
+ op += 8;
+ ip += 8;
+ } while (ip < ie);
+ ip = ie;
+ op = oe;
+ } else
+#endif
+ {
+ NEED_OP(t);
+ NEED_IP(t + 3);
+ do {
+ *op++ = *ip++;
+ } while (--t > 0);
+ }
+ state = 4;
+ continue;
+ } else if (state != 4) {
+ next = t & 3;
+ m_pos = op - 1;
+ m_pos -= t >> 2;
+ m_pos -= *ip++ << 2;
+ TEST_LB(m_pos);
+ NEED_OP(2);
+ op[0] = m_pos[0];
+ op[1] = m_pos[1];
+ op += 2;
+ goto match_next;
+ } else {
+ next = t & 3;
+ m_pos = op - (1 + M2_MAX_OFFSET);
+ m_pos -= t >> 2;
+ m_pos -= *ip++ << 2;
+ t = 3;
+ }
+ } else if (t >= 64) {
+ next = t & 3;
+ m_pos = op - 1;
+ m_pos -= (t >> 2) & 7;
+ m_pos -= *ip++ << 3;
+ t = (t >> 5) - 1 + (3 - 1);
+ } else if (t >= 32) {
+ t = (t & 31) + (3 - 1);
+ if (unlikely(t == 2)) {
+ while (unlikely(*ip == 0)) {
+ t += 255;
+ ip++;
+ NEED_IP(1);
+ }
+ t += 31 + *ip++;
+ NEED_IP(2);
+ }
+ m_pos = op - 1;
+ next = get_unaligned_le16(ip);
+ ip += 2;
+ m_pos -= next >> 2;
+ next &= 3;
+ } else {
+ m_pos = op;
+ m_pos -= (t & 8) << 11;
+ t = (t & 7) + (3 - 1);
+ if (unlikely(t == 2)) {
+ while (unlikely(*ip == 0)) {
+ t += 255;
+ ip++;
+ NEED_IP(1);
+ }
+ t += 7 + *ip++;
+ NEED_IP(2);
+ }
+ next = get_unaligned_le16(ip);
+ ip += 2;
+ m_pos -= next >> 2;
+ next &= 3;
+ if (m_pos == op)
+ goto eof_found;
+ m_pos -= 0x4000;
+ }
+ TEST_LB(m_pos);
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ if (op - m_pos >= 8) {
+ unsigned char *oe = op + t;
+ if (likely(HAVE_OP(t + 15))) {
+ do {
+ COPY8(op, m_pos);
+ op += 8;
+ m_pos += 8;
+ COPY8(op, m_pos);
+ op += 8;
+ m_pos += 8;
+ } while (op < oe);
+ op = oe;
+ if (HAVE_IP(6)) {
+ state = next;
+ COPY4(op, ip);
+ op += next;
+ ip += next;
+ continue;
+ }
+ } else {
+ NEED_OP(t);
+ do {
+ *op++ = *m_pos++;
+ } while (op < oe);
+ }
+ } else
+#endif
+ {
+ unsigned char *oe = op + t;
+ NEED_OP(t);
+ op[0] = m_pos[0];
+ op[1] = m_pos[1];
+ op += 2;
+ m_pos += 2;
+ do {
+ *op++ = *m_pos++;
+ } while (op < oe);
+ }
+match_next:
+ state = next;
+ t = next;
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ if (likely(HAVE_IP(6) && HAVE_OP(4))) {
+ COPY4(op, ip);
+ op += t;
+ ip += t;
+ } else
+#endif
+ {
+ NEED_IP(t + 3);
+ NEED_OP(t);
+ while (t > 0) {
+ *op++ = *ip++;
+ t--;
+ }
+ }
+ }
+
+eof_found:
+ *out_len = op - out;
+ return (t != 3 ? LZO_E_ERROR :
+ ip == ip_end ? LZO_E_OK :
+ ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN);
+
+input_overrun:
+ *out_len = op - out;
+ return LZO_E_INPUT_OVERRUN;
+
+output_overrun:
+ *out_len = op - out;
+ return LZO_E_OUTPUT_OVERRUN;
+
+lookbehind_overrun:
+ *out_len = op - out;
+ return LZO_E_LOOKBEHIND_OVERRUN;
+}
+#ifndef STATIC
+EXPORT_SYMBOL_GPL(lzo1x_decompress_safe);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZO1X Decompressor");
+
+#endif
diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h
index b6d482c492ef..6710b83ce72e 100644
--- a/lib/lzo/lzodefs.h
+++ b/lib/lzo/lzodefs.h
@@ -1,19 +1,37 @@
/*
* lzodefs.h -- architecture, OS and compiler specific defines
*
- * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
+ * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
*
* The full LZO package can be found at:
* http://www.oberhumer.com/opensource/lzo/
*
- * Changed for kernel use by:
+ * Changed for Linux kernel use by:
* Nitin Gupta <nitingupta910@gmail.com>
* Richard Purdie <rpurdie@openedhand.com>
*/
-#define LZO_VERSION 0x2020
-#define LZO_VERSION_STRING "2.02"
-#define LZO_VERSION_DATE "Oct 17 2005"
+
+#define COPY4(dst, src) \
+ put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
+#if defined(__x86_64__)
+#define COPY8(dst, src) \
+ put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst))
+#else
+#define COPY8(dst, src) \
+ COPY4(dst, src); COPY4((dst) + 4, (src) + 4)
+#endif
+
+#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN)
+#error "conflicting endian definitions"
+#elif defined(__x86_64__)
+#define LZO_USE_CTZ64 1
+#define LZO_USE_CTZ32 1
+#elif defined(__i386__) || defined(__powerpc__)
+#define LZO_USE_CTZ32 1
+#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5)
+#define LZO_USE_CTZ32 1
+#endif
#define M1_MAX_OFFSET 0x0400
#define M2_MAX_OFFSET 0x0800
@@ -34,10 +52,8 @@
#define M3_MARKER 32
#define M4_MARKER 16
-#define D_BITS 14
-#define D_MASK ((1u << D_BITS) - 1)
+#define lzo_dict_t unsigned short
+#define D_BITS 13
+#define D_SIZE (1u << D_BITS)
+#define D_MASK (D_SIZE - 1)
#define D_HIGH ((D_MASK >> 1) + 1)
-
-#define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \
- << (s1)) ^ (p)[0])
-#define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0])
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index 678ce4f1e124..095ab157a521 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -641,7 +641,14 @@ do { \
************** MIPS *****************
***************************************/
#if defined(__mips__) && W_TYPE_SIZE == 32
-#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
+#define umul_ppmm(w1, w0, u, v) \
+do { \
+ UDItype __ll = (UDItype)(u) * (v); \
+ w1 = __ll >> 32; \
+ w0 = __ll; \
+} while (0)
+#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("multu %2,%3" \
: "=l" ((USItype)(w0)), \
@@ -666,7 +673,15 @@ do { \
************** MIPS/64 **************
***************************************/
#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
-#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
+#define umul_ppmm(w1, w0, u, v) \
+do { \
+ typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \
+ __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \
+ w1 = __ll >> 64; \
+ w0 = __ll; \
+} while (0)
+#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("dmultu %2,%3" \
: "=l" ((UDItype)(w0)), \
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
index 77adcf6bc257..60cf765628e9 100644
--- a/lib/mpi/mpi-internal.h
+++ b/lib/mpi/mpi-internal.h
@@ -65,10 +65,6 @@
typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */
typedef int mpi_size_t; /* (must be a signed type) */
-#define ABS(x) (x >= 0 ? x : -x)
-#define MIN(l, o) ((l) < (o) ? (l) : (o))
-#define MAX(h, i) ((h) > (i) ? (h) : (i))
-
static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
{
if (a->alloced < b)
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 3962b7f7fe3f..5f9c44cdf1f5 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -52,7 +52,7 @@ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes)
else
nbits = 0;
- nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+ nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
val = mpi_alloc(nlimbs);
if (!val)
return NULL;
@@ -96,8 +96,8 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
buffer += 2;
nread = 2;
- nbytes = (nbits + 7) / 8;
- nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+ nbytes = DIV_ROUND_UP(nbits, 8);
+ nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
val = mpi_alloc(nlimbs);
if (!val)
return NULL;
@@ -193,7 +193,7 @@ int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign)
int nlimbs;
int i;
- nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+ nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
if (RESIZE_IF_NEEDED(a, nlimbs) < 0)
return -ENOMEM;
a->sign = sign;
diff --git a/lib/pSeries-reconfig-notifier-error-inject.c b/lib/of-reconfig-notifier-error-inject.c
index 7f7c98dcd5c4..8dc79861758a 100644
--- a/lib/pSeries-reconfig-notifier-error-inject.c
+++ b/lib/of-reconfig-notifier-error-inject.c
@@ -1,20 +1,20 @@
#include <linux/kernel.h>
#include <linux/module.h>
-
-#include <asm/pSeries_reconfig.h>
+#include <linux/of.h>
#include "notifier-error-inject.h"
static int priority;
module_param(priority, int, 0);
-MODULE_PARM_DESC(priority, "specify pSeries reconfig notifier priority");
+MODULE_PARM_DESC(priority, "specify OF reconfig notifier priority");
static struct notifier_err_inject reconfig_err_inject = {
.actions = {
- { NOTIFIER_ERR_INJECT_ACTION(PSERIES_RECONFIG_ADD) },
- { NOTIFIER_ERR_INJECT_ACTION(PSERIES_RECONFIG_REMOVE) },
- { NOTIFIER_ERR_INJECT_ACTION(PSERIES_DRCONF_MEM_ADD) },
- { NOTIFIER_ERR_INJECT_ACTION(PSERIES_DRCONF_MEM_REMOVE) },
+ { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_ATTACH_NODE) },
+ { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_DETACH_NODE) },
+ { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_ADD_PROPERTY) },
+ { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_REMOVE_PROPERTY) },
+ { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_UPDATE_PROPERTY) },
{}
}
};
@@ -25,12 +25,12 @@ static int err_inject_init(void)
{
int err;
- dir = notifier_err_inject_init("pSeries-reconfig",
+ dir = notifier_err_inject_init("OF-reconfig",
notifier_err_inject_dir, &reconfig_err_inject, priority);
if (IS_ERR(dir))
return PTR_ERR(dir);
- err = pSeries_reconfig_notifier_register(&reconfig_err_inject.nb);
+ err = of_reconfig_notifier_register(&reconfig_err_inject.nb);
if (err)
debugfs_remove_recursive(dir);
@@ -39,13 +39,13 @@ static int err_inject_init(void)
static void err_inject_exit(void)
{
- pSeries_reconfig_notifier_unregister(&reconfig_err_inject.nb);
+ of_reconfig_notifier_unregister(&reconfig_err_inject.nb);
debugfs_remove_recursive(dir);
}
module_init(err_inject_init);
module_exit(err_inject_exit);
-MODULE_DESCRIPTION("pSeries reconfig notifier error injection module");
+MODULE_DESCRIPTION("OF reconfig notifier error injection module");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
diff --git a/lib/parser.c b/lib/parser.c
index 52cfa69f73df..807b2aaa33fa 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -157,7 +157,7 @@ static int match_number(substring_t *s, int *result, int base)
*
* Description: Attempts to parse the &substring_t @s as a decimal integer. On
* success, sets @result to the integer represented by the string and returns 0.
- * Returns either -ENOMEM or -EINVAL on failure.
+ * Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
*/
int match_int(substring_t *s, int *result)
{
@@ -171,7 +171,7 @@ int match_int(substring_t *s, int *result)
*
* Description: Attempts to parse the &substring_t @s as an octal integer. On
* success, sets @result to the integer represented by the string and returns
- * 0. Returns either -ENOMEM or -EINVAL on failure.
+ * 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
*/
int match_octal(substring_t *s, int *result)
{
@@ -185,7 +185,7 @@ int match_octal(substring_t *s, int *result)
*
* Description: Attempts to parse the &substring_t @s as a hexadecimal integer.
* On success, sets @result to the integer represented by the string and
- * returns 0. Returns either -ENOMEM or -EINVAL on failure.
+ * returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
*/
int match_hex(substring_t *s, int *result)
{
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
new file mode 100644
index 000000000000..652a8ee8efe9
--- /dev/null
+++ b/lib/percpu-rwsem.c
@@ -0,0 +1,165 @@
+#include <linux/atomic.h>
+#include <linux/rwsem.h>
+#include <linux/percpu.h>
+#include <linux/wait.h>
+#include <linux/lockdep.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+
+int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
+ const char *name, struct lock_class_key *rwsem_key)
+{
+ brw->fast_read_ctr = alloc_percpu(int);
+ if (unlikely(!brw->fast_read_ctr))
+ return -ENOMEM;
+
+ /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
+ __init_rwsem(&brw->rw_sem, name, rwsem_key);
+ atomic_set(&brw->write_ctr, 0);
+ atomic_set(&brw->slow_read_ctr, 0);
+ init_waitqueue_head(&brw->write_waitq);
+ return 0;
+}
+
+void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
+{
+ free_percpu(brw->fast_read_ctr);
+ brw->fast_read_ctr = NULL; /* catch use after free bugs */
+}
+
+/*
+ * This is the fast-path for down_read/up_read, it only needs to ensure
+ * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
+ * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
+ * serialize with the preempt-disabled section below.
+ *
+ * The nontrivial part is that we should guarantee acquire/release semantics
+ * in case when
+ *
+ * R_W: down_write() comes after up_read(), the writer should see all
+ * changes done by the reader
+ * or
+ * W_R: down_read() comes after up_write(), the reader should see all
+ * changes done by the writer
+ *
+ * If this helper fails the callers rely on the normal rw_semaphore and
+ * atomic_dec_and_test(), so in this case we have the necessary barriers.
+ *
+ * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
+ * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
+ * reader inside the critical section. See the comments in down_write and
+ * up_write below.
+ */
+static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
+{
+ bool success = false;
+
+ preempt_disable();
+ if (likely(!atomic_read(&brw->write_ctr))) {
+ __this_cpu_add(*brw->fast_read_ctr, val);
+ success = true;
+ }
+ preempt_enable();
+
+ return success;
+}
+
+/*
+ * Like the normal down_read() this is not recursive, the writer can
+ * come after the first percpu_down_read() and create the deadlock.
+ *
+ * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
+ * percpu_up_read() does rwsem_release(). This pairs with the usage
+ * of ->rw_sem in percpu_down/up_write().
+ */
+void percpu_down_read(struct percpu_rw_semaphore *brw)
+{
+ might_sleep();
+ if (likely(update_fast_ctr(brw, +1))) {
+ rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+ return;
+ }
+
+ down_read(&brw->rw_sem);
+ atomic_inc(&brw->slow_read_ctr);
+ /* avoid up_read()->rwsem_release() */
+ __up_read(&brw->rw_sem);
+}
+
+void percpu_up_read(struct percpu_rw_semaphore *brw)
+{
+ rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
+
+ if (likely(update_fast_ctr(brw, -1)))
+ return;
+
+ /* false-positive is possible but harmless */
+ if (atomic_dec_and_test(&brw->slow_read_ctr))
+ wake_up_all(&brw->write_waitq);
+}
+
+static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
+{
+ unsigned int sum = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ sum += per_cpu(*brw->fast_read_ctr, cpu);
+ per_cpu(*brw->fast_read_ctr, cpu) = 0;
+ }
+
+ return sum;
+}
+
+/*
+ * A writer increments ->write_ctr to force the readers to switch to the
+ * slow mode, note the atomic_read() check in update_fast_ctr().
+ *
+ * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
+ * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
+ * counter it represents the number of active readers.
+ *
+ * Finally the writer takes ->rw_sem for writing and blocks the new readers,
+ * then waits until the slow counter becomes zero.
+ */
+void percpu_down_write(struct percpu_rw_semaphore *brw)
+{
+ /* tell update_fast_ctr() there is a pending writer */
+ atomic_inc(&brw->write_ctr);
+ /*
+ * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
+ * so that update_fast_ctr() can't succeed.
+ *
+ * 2. Ensures we see the result of every previous this_cpu_add() in
+ * update_fast_ctr().
+ *
+ * 3. Ensures that if any reader has exited its critical section via
+ * fast-path, it executes a full memory barrier before we return.
+ * See R_W case in the comment above update_fast_ctr().
+ */
+ synchronize_sched_expedited();
+
+ /* exclude other writers, and block the new readers completely */
+ down_write(&brw->rw_sem);
+
+ /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
+ atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
+
+ /* wait for all readers to complete their percpu_up_read() */
+ wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
+}
+
+void percpu_up_write(struct percpu_rw_semaphore *brw)
+{
+ /* release the lock, but the readers can't use the fast-path */
+ up_write(&brw->rw_sem);
+ /*
+ * Insert the barrier before the next fast-path in down_read,
+ * see W_R case in the comment above update_fast_ctr().
+ */
+ synchronize_sched_expedited();
+ /* the last writer unblocks update_fast_ctr() */
+ atomic_dec(&brw->write_ctr);
+}
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index de06dfe165b8..9f7c184725d7 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -1,8 +1,11 @@
obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
-raid6_pq-y += algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \
- int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
- altivec8.o mmx.o sse1.o sse2.o
+raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
+ int8.o int16.o int32.o
+
+raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+
hostprogs-y += mktables
quiet_cmd_unroll = UNROLL $@
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 589f5f50ad2e..6d7316fe9f30 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -45,11 +45,20 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_sse1x2,
&raid6_sse2x1,
&raid6_sse2x2,
+#ifdef CONFIG_AS_AVX2
+ &raid6_avx2x1,
+ &raid6_avx2x2,
+#endif
#endif
#if defined(__x86_64__) && !defined(__arch_um__)
&raid6_sse2x1,
&raid6_sse2x2,
&raid6_sse2x4,
+#ifdef CONFIG_AS_AVX2
+ &raid6_avx2x1,
+ &raid6_avx2x2,
+ &raid6_avx2x4,
+#endif
#endif
#ifdef CONFIG_ALTIVEC
&raid6_altivec1,
@@ -72,6 +81,9 @@ EXPORT_SYMBOL_GPL(raid6_datap_recov);
const struct raid6_recov_calls *const raid6_recov_algos[] = {
#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+#ifdef CONFIG_AS_AVX2
+ &raid6_recov_avx2,
+#endif
&raid6_recov_ssse3,
#endif
&raid6_recov_intx1,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index b71012b756f4..7cc12b532e95 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,13 +24,10 @@
#include <linux/raid/pq.h>
-#ifdef CONFIG_ALTIVEC
-
#include <altivec.h>
#ifdef __KERNEL__
# include <asm/cputable.h>
# include <asm/switch_to.h>
-#endif
/*
* This is the C data type to use. We use a vector of
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
new file mode 100644
index 000000000000..bc3b1dd436eb
--- /dev/null
+++ b/lib/raid6/avx2.c
@@ -0,0 +1,251 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
+ *
+ * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * AVX2 implementation of RAID-6 syndrome functions
+ *
+ */
+
+#ifdef CONFIG_AS_AVX2
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static const struct raid6_avx2_constants {
+ u64 x1d[4];
+} raid6_avx2_constants __aligned(32) = {
+ { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
+ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
+};
+
+static int raid6_have_avx2(void)
+{
+ return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
+}
+
+/*
+ * Plain AVX2 implementation
+ */
+static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+ asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */
+
+ for (d = 0; d < bytes; d += 32) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+ asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
+ asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
+ asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
+ for (z = z0-2; z >= 0; z--) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+ asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
+ asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+ asm volatile("vpand %ymm0,%ymm5,%ymm5");
+ asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+ asm volatile("vpxor %ymm6,%ymm2,%ymm2");
+ asm volatile("vpxor %ymm6,%ymm4,%ymm4");
+ asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
+ }
+ asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
+ asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+ asm volatile("vpand %ymm0,%ymm5,%ymm5");
+ asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+ asm volatile("vpxor %ymm6,%ymm2,%ymm2");
+ asm volatile("vpxor %ymm6,%ymm4,%ymm4");
+
+ asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+ asm volatile("vpxor %ymm2,%ymm2,%ymm2");
+ asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+ asm volatile("vpxor %ymm4,%ymm4,%ymm4");
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x1 = {
+ raid6_avx21_gen_syndrome,
+ raid6_have_avx2,
+ "avx2x1",
+ 1 /* Has cache hints */
+};
+
+/*
+ * Unrolled-by-2 AVX2 implementation
+ */
+static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+ asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
+
+ /* We uniformly assume a single prefetch covers at least 32 bytes */
+ for (d = 0; d < bytes; d += 64) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
+ asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
+ asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
+ asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
+ asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
+ for (z = z0-1; z >= 0; z--) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
+ asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
+ asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
+ asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+ asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
+ asm volatile("vpand %ymm0,%ymm5,%ymm5");
+ asm volatile("vpand %ymm0,%ymm7,%ymm7");
+ asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+ asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+ asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
+ asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
+ asm volatile("vpxor %ymm5,%ymm2,%ymm2");
+ asm volatile("vpxor %ymm7,%ymm3,%ymm3");
+ asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+ asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+ }
+ asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+ asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
+ asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+ asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x2 = {
+ raid6_avx22_gen_syndrome,
+ raid6_have_avx2,
+ "avx2x2",
+ 1 /* Has cache hints */
+};
+
+#ifdef CONFIG_X86_64
+
+/*
+ * Unrolled-by-4 AVX2 implementation
+ */
+static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+ asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
+ asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */
+ asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */
+ asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */
+ asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */
+ asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */
+ asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */
+ asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */
+ asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */
+
+ for (d = 0; d < bytes; d += 128) {
+ for (z = z0; z >= 0; z--) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
+ asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
+ asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
+ asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
+ asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
+ asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+ asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
+ asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
+ asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
+ asm volatile("vpand %ymm0,%ymm5,%ymm5");
+ asm volatile("vpand %ymm0,%ymm7,%ymm7");
+ asm volatile("vpand %ymm0,%ymm13,%ymm13");
+ asm volatile("vpand %ymm0,%ymm15,%ymm15");
+ asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+ asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+ asm volatile("vpxor %ymm13,%ymm12,%ymm12");
+ asm volatile("vpxor %ymm15,%ymm14,%ymm14");
+ asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
+ asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
+ asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
+ asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
+ asm volatile("vpxor %ymm5,%ymm2,%ymm2");
+ asm volatile("vpxor %ymm7,%ymm3,%ymm3");
+ asm volatile("vpxor %ymm13,%ymm10,%ymm10");
+ asm volatile("vpxor %ymm15,%ymm11,%ymm11");
+ asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+ asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+ asm volatile("vpxor %ymm13,%ymm12,%ymm12");
+ asm volatile("vpxor %ymm15,%ymm14,%ymm14");
+ }
+ asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+ asm volatile("vpxor %ymm2,%ymm2,%ymm2");
+ asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
+ asm volatile("vpxor %ymm3,%ymm3,%ymm3");
+ asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
+ asm volatile("vpxor %ymm10,%ymm10,%ymm10");
+ asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
+ asm volatile("vpxor %ymm11,%ymm11,%ymm11");
+ asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+ asm volatile("vpxor %ymm4,%ymm4,%ymm4");
+ asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
+ asm volatile("vpxor %ymm6,%ymm6,%ymm6");
+ asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
+ asm volatile("vpxor %ymm12,%ymm12,%ymm12");
+ asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
+ asm volatile("vpxor %ymm14,%ymm14,%ymm14");
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x4 = {
+ raid6_avx24_gen_syndrome,
+ raid6_have_avx2,
+ "avx2x4",
+ 1 /* Has cache hints */
+};
+#endif
+
+#endif /* CONFIG_AS_AVX2 */
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
index 279347f23094..590c71c9e200 100644
--- a/lib/raid6/mmx.c
+++ b/lib/raid6/mmx.c
@@ -16,7 +16,7 @@
* MMX implementation of RAID-6 syndrome functions
*/
-#if defined(__i386__) && !defined(__arch_um__)
+#ifdef CONFIG_X86_32
#include <linux/raid/pq.h>
#include "x86.h"
diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c
new file mode 100644
index 000000000000..e1eea433a493
--- /dev/null
+++ b/lib/raid6/recov_avx2.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#if CONFIG_AS_AVX2
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static int raid6_has_avx2(void)
+{
+ return boot_cpu_has(X86_FEATURE_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX);
+}
+
+static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
+ int failb, void **ptrs)
+{
+ u8 *p, *q, *dp, *dq;
+ const u8 *pbmul; /* P multiplier table for B data */
+ const u8 *qmul; /* Q multiplier table (for both) */
+ const u8 x0f = 0x0f;
+
+ p = (u8 *)ptrs[disks-2];
+ q = (u8 *)ptrs[disks-1];
+
+ /* Compute syndrome with zero for the missing data pages
+ Use the dead data pages as temporary storage for
+ delta p and delta q */
+ dp = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks-2] = dp;
+ dq = (u8 *)ptrs[failb];
+ ptrs[failb] = (void *)raid6_empty_zero_page;
+ ptrs[disks-1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+ ptrs[disks-2] = p;
+ ptrs[disks-1] = q;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
+ raid6_gfexp[failb]]];
+
+ kernel_fpu_begin();
+
+ /* ymm0 = x0f[16] */
+ asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
+
+ while (bytes) {
+#ifdef CONFIG_X86_64
+ asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
+ asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
+ asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
+ asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
+ asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
+ asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
+ asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
+ asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
+
+ /*
+ * 1 = dq[0] ^ q[0]
+ * 9 = dq[32] ^ q[32]
+ * 0 = dp[0] ^ p[0]
+ * 8 = dp[32] ^ p[32]
+ */
+
+ asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
+ asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
+
+ asm volatile("vpsraw $4, %ymm1, %ymm3");
+ asm volatile("vpsraw $4, %ymm9, %ymm12");
+ asm volatile("vpand %ymm7, %ymm1, %ymm1");
+ asm volatile("vpand %ymm7, %ymm9, %ymm9");
+ asm volatile("vpand %ymm7, %ymm3, %ymm3");
+ asm volatile("vpand %ymm7, %ymm12, %ymm12");
+ asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
+ asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
+ asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
+ asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
+ asm volatile("vpxor %ymm14, %ymm15, %ymm15");
+ asm volatile("vpxor %ymm4, %ymm5, %ymm5");
+
+ /*
+ * 5 = qx[0]
+ * 15 = qx[32]
+ */
+
+ asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
+ asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
+ asm volatile("vpsraw $4, %ymm0, %ymm2");
+ asm volatile("vpsraw $4, %ymm8, %ymm6");
+ asm volatile("vpand %ymm7, %ymm0, %ymm3");
+ asm volatile("vpand %ymm7, %ymm8, %ymm14");
+ asm volatile("vpand %ymm7, %ymm2, %ymm2");
+ asm volatile("vpand %ymm7, %ymm6, %ymm6");
+ asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
+ asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
+ asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
+ asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
+ asm volatile("vpxor %ymm4, %ymm1, %ymm1");
+ asm volatile("vpxor %ymm12, %ymm13, %ymm13");
+
+ /*
+ * 1 = pbmul[px[0]]
+ * 13 = pbmul[px[32]]
+ */
+ asm volatile("vpxor %ymm5, %ymm1, %ymm1");
+ asm volatile("vpxor %ymm15, %ymm13, %ymm13");
+
+ /*
+ * 1 = db = DQ
+ * 13 = db[32] = DQ[32]
+ */
+ asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+ asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
+ asm volatile("vpxor %ymm1, %ymm0, %ymm0");
+ asm volatile("vpxor %ymm13, %ymm8, %ymm8");
+
+ asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
+ asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dp += 64;
+ dq += 64;
+#else
+ asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
+ asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
+ asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
+ asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
+
+ /* 1 = dq ^ q; 0 = dp ^ p */
+
+ asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
+ asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
+
+ /*
+ * 1 = dq ^ q
+ * 3 = dq ^ p >> 4
+ */
+ asm volatile("vpsraw $4, %ymm1, %ymm3");
+ asm volatile("vpand %ymm7, %ymm1, %ymm1");
+ asm volatile("vpand %ymm7, %ymm3, %ymm3");
+ asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
+ asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
+ asm volatile("vpxor %ymm4, %ymm5, %ymm5");
+
+ /* 5 = qx */
+
+ asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
+ asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
+
+ asm volatile("vpsraw $4, %ymm0, %ymm2");
+ asm volatile("vpand %ymm7, %ymm0, %ymm3");
+ asm volatile("vpand %ymm7, %ymm2, %ymm2");
+ asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
+ asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
+ asm volatile("vpxor %ymm4, %ymm1, %ymm1");
+
+ /* 1 = pbmul[px] */
+ asm volatile("vpxor %ymm5, %ymm1, %ymm1");
+ /* 1 = db = DQ */
+ asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+
+ asm volatile("vpxor %ymm1, %ymm0, %ymm0");
+ asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
+
+ bytes -= 32;
+ p += 32;
+ q += 32;
+ dp += 32;
+ dq += 32;
+#endif
+ }
+
+ kernel_fpu_end();
+}
+
+static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
+ void **ptrs)
+{
+ u8 *p, *q, *dq;
+ const u8 *qmul; /* Q multiplier table */
+ const u8 x0f = 0x0f;
+
+ p = (u8 *)ptrs[disks-2];
+ q = (u8 *)ptrs[disks-1];
+
+ /* Compute syndrome with zero for the missing data page
+ Use the dead data page as temporary storage for delta q */
+ dq = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks-1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+ ptrs[disks-1] = q;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ kernel_fpu_begin();
+
+ asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
+
+ while (bytes) {
+#ifdef CONFIG_X86_64
+ asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
+ asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
+ asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
+ asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
+
+ /*
+ * 3 = q[0] ^ dq[0]
+ * 8 = q[32] ^ dq[32]
+ */
+ asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
+ asm volatile("vmovapd %ymm0, %ymm13");
+ asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
+ asm volatile("vmovapd %ymm1, %ymm14");
+
+ asm volatile("vpsraw $4, %ymm3, %ymm6");
+ asm volatile("vpsraw $4, %ymm8, %ymm12");
+ asm volatile("vpand %ymm7, %ymm3, %ymm3");
+ asm volatile("vpand %ymm7, %ymm8, %ymm8");
+ asm volatile("vpand %ymm7, %ymm6, %ymm6");
+ asm volatile("vpand %ymm7, %ymm12, %ymm12");
+ asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
+ asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
+ asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
+ asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
+ asm volatile("vpxor %ymm0, %ymm1, %ymm1");
+ asm volatile("vpxor %ymm13, %ymm14, %ymm14");
+
+ /*
+ * 1 = qmul[q[0] ^ dq[0]]
+ * 14 = qmul[q[32] ^ dq[32]]
+ */
+ asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
+ asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
+ asm volatile("vpxor %ymm1, %ymm2, %ymm2");
+ asm volatile("vpxor %ymm14, %ymm12, %ymm12");
+
+ /*
+ * 2 = p[0] ^ qmul[q[0] ^ dq[0]]
+ * 12 = p[32] ^ qmul[q[32] ^ dq[32]]
+ */
+
+ asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+ asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
+ asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
+ asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dq += 64;
+#else
+ asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
+ asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
+
+ /* 3 = q ^ dq */
+
+ asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
+ asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
+
+ asm volatile("vpsraw $4, %ymm3, %ymm6");
+ asm volatile("vpand %ymm7, %ymm3, %ymm3");
+ asm volatile("vpand %ymm7, %ymm6, %ymm6");
+ asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
+ asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
+ asm volatile("vpxor %ymm0, %ymm1, %ymm1");
+
+ /* 1 = qmul[q ^ dq] */
+
+ asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
+ asm volatile("vpxor %ymm1, %ymm2, %ymm2");
+
+ /* 2 = p ^ qmul[q ^ dq] */
+
+ asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+ asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
+
+ bytes -= 32;
+ p += 32;
+ q += 32;
+ dq += 32;
+#endif
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_avx2 = {
+ .data2 = raid6_2data_recov_avx2,
+ .datap = raid6_datap_recov_avx2,
+ .valid = raid6_has_avx2,
+#ifdef CONFIG_X86_64
+ .name = "avx2x2",
+#else
+ .name = "avx2x1",
+#endif
+ .priority = 2,
+};
+
+#else
+#warning "your version of binutils lacks AVX2 support"
+#endif
diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c
index ecb710c0b4d9..a9168328f03b 100644
--- a/lib/raid6/recov_ssse3.c
+++ b/lib/raid6/recov_ssse3.c
@@ -7,8 +7,6 @@
* of the License.
*/
-#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
-
#include <linux/raid/pq.h>
#include "x86.h"
@@ -332,5 +330,3 @@ const struct raid6_recov_calls raid6_recov_ssse3 = {
#endif
.priority = 1,
};
-
-#endif
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
index 10dd91948c07..f76297139445 100644
--- a/lib/raid6/sse1.c
+++ b/lib/raid6/sse1.c
@@ -21,7 +21,7 @@
* worthwhile as a separate implementation.
*/
-#if defined(__i386__) && !defined(__arch_um__)
+#ifdef CONFIG_X86_32
#include <linux/raid/pq.h>
#include "x86.h"
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index bc2d57daa589..85b82c85f28e 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -17,8 +17,6 @@
*
*/
-#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
-
#include <linux/raid/pq.h>
#include "x86.h"
@@ -159,9 +157,7 @@ const struct raid6_calls raid6_sse2x2 = {
1 /* Has cache hints */
};
-#endif
-
-#if defined(__x86_64__) && !defined(__arch_um__)
+#ifdef CONFIG_X86_64
/*
* Unrolled-by-4 SSE2 implementation
@@ -259,4 +255,4 @@ const struct raid6_calls raid6_sse2x4 = {
1 /* Has cache hints */
};
-#endif
+#endif /* CONFIG_X86_64 */
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index c76151d94764..087332dbf8aa 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -10,6 +10,31 @@ LD = ld
AWK = awk -f
AR = ar
RANLIB = ranlib
+OBJS = int1.o int2.o int4.o int8.o int16.o int32.o recov.o algos.o tables.o
+
+ARCH := $(shell uname -m 2>/dev/null | sed -e /s/i.86/i386/)
+ifeq ($(ARCH),i386)
+ CFLAGS += -DCONFIG_X86_32
+ IS_X86 = yes
+endif
+ifeq ($(ARCH),x86_64)
+ CFLAGS += -DCONFIG_X86_64
+ IS_X86 = yes
+endif
+
+ifeq ($(IS_X86),yes)
+ OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o
+ CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \
+ gcc -c -x assembler - >&/dev/null && \
+ rm ./-.o && echo -DCONFIG_AS_AVX2=1)
+else
+ HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\
+ gcc -c -x c - >&/dev/null && \
+ rm ./-.o && echo yes)
+ ifeq ($(HAS_ALTIVEC),yes)
+ OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+ endif
+endif
.c.o:
$(CC) $(CFLAGS) -c -o $@ $<
@@ -22,9 +47,7 @@ RANLIB = ranlib
all: raid6.a raid6test
-raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
- altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \
- tables.o
+raid6.a: $(OBJS)
rm -f $@
$(AR) cq $@ $^
$(RANLIB) $@
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index d55d63232c55..b7595484a815 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -45,19 +45,23 @@ static inline void kernel_fpu_end(void)
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
/* Should work well enough on modern CPUs for testing */
static inline int boot_cpu_has(int flag)
{
- u32 eax = (flag & 0x20) ? 0x80000001 : 1;
- u32 ecx, edx;
+ u32 eax, ebx, ecx, edx;
+
+ eax = (flag & 0x100) ? 7 :
+ (flag & 0x20) ? 0x80000001 : 1;
+ ecx = 0;
asm volatile("cpuid"
- : "+a" (eax), "=d" (edx), "=c" (ecx)
- : : "ebx");
+ : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx));
- return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1;
+ return ((flag & 0x100 ? ebx :
+ (flag & 0x80) ? ecx : edx) >> (flag & 31)) & 1;
}
#endif /* ndef __KERNEL__ */
diff --git a/lib/random32.c b/lib/random32.c
index 938bde5876ac..52280d5526be 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -42,13 +42,13 @@
static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
/**
- * prandom32 - seeded pseudo-random number generator.
+ * prandom_u32_state - seeded pseudo-random number generator.
* @state: pointer to state structure holding seeded state.
*
* This is used for pseudo-randomness with no outside seeding.
- * For more random results, use random32().
+ * For more random results, use prandom_u32().
*/
-u32 prandom32(struct rnd_state *state)
+u32 prandom_u32_state(struct rnd_state *state)
{
#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
@@ -58,32 +58,81 @@ u32 prandom32(struct rnd_state *state)
return (state->s1 ^ state->s2 ^ state->s3);
}
-EXPORT_SYMBOL(prandom32);
+EXPORT_SYMBOL(prandom_u32_state);
/**
- * random32 - pseudo random number generator
+ * prandom_u32 - pseudo random number generator
*
* A 32 bit pseudo-random number is generated using a fast
* algorithm suitable for simulation. This algorithm is NOT
* considered safe for cryptographic use.
*/
-u32 random32(void)
+u32 prandom_u32(void)
{
unsigned long r;
struct rnd_state *state = &get_cpu_var(net_rand_state);
- r = prandom32(state);
+ r = prandom_u32_state(state);
put_cpu_var(state);
return r;
}
-EXPORT_SYMBOL(random32);
+EXPORT_SYMBOL(prandom_u32);
+
+/*
+ * prandom_bytes_state - get the requested number of pseudo-random bytes
+ *
+ * @state: pointer to state structure holding seeded state.
+ * @buf: where to copy the pseudo-random bytes to
+ * @bytes: the requested number of bytes
+ *
+ * This is used for pseudo-randomness with no outside seeding.
+ * For more random results, use prandom_bytes().
+ */
+void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes)
+{
+ unsigned char *p = buf;
+ int i;
+
+ for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) {
+ u32 random = prandom_u32_state(state);
+ int j;
+
+ for (j = 0; j < sizeof(u32); j++) {
+ p[i + j] = random;
+ random >>= BITS_PER_BYTE;
+ }
+ }
+ if (i < bytes) {
+ u32 random = prandom_u32_state(state);
+
+ for (; i < bytes; i++) {
+ p[i] = random;
+ random >>= BITS_PER_BYTE;
+ }
+ }
+}
+EXPORT_SYMBOL(prandom_bytes_state);
+
+/**
+ * prandom_bytes - get the requested number of pseudo-random bytes
+ * @buf: where to copy the pseudo-random bytes to
+ * @bytes: the requested number of bytes
+ */
+void prandom_bytes(void *buf, int bytes)
+{
+ struct rnd_state *state = &get_cpu_var(net_rand_state);
+
+ prandom_bytes_state(state, buf, bytes);
+ put_cpu_var(state);
+}
+EXPORT_SYMBOL(prandom_bytes);
/**
- * srandom32 - add entropy to pseudo random number generator
+ * prandom_seed - add entropy to pseudo random number generator
* @seed: seed value
*
- * Add some additional seeding to the random32() pool.
+ * Add some additional seeding to the prandom pool.
*/
-void srandom32(u32 entropy)
+void prandom_seed(u32 entropy)
{
int i;
/*
@@ -95,13 +144,13 @@ void srandom32(u32 entropy)
state->s1 = __seed(state->s1 ^ entropy, 1);
}
}
-EXPORT_SYMBOL(srandom32);
+EXPORT_SYMBOL(prandom_seed);
/*
* Generate some initially weak seeding values to allow
- * to start the random32() engine.
+ * to start the prandom_u32() engine.
*/
-static int __init random32_init(void)
+static int __init prandom_init(void)
{
int i;
@@ -114,22 +163,22 @@ static int __init random32_init(void)
state->s3 = __seed(LCG(state->s2), 15);
/* "warm it up" */
- prandom32(state);
- prandom32(state);
- prandom32(state);
- prandom32(state);
- prandom32(state);
- prandom32(state);
+ prandom_u32_state(state);
+ prandom_u32_state(state);
+ prandom_u32_state(state);
+ prandom_u32_state(state);
+ prandom_u32_state(state);
+ prandom_u32_state(state);
}
return 0;
}
-core_initcall(random32_init);
+core_initcall(prandom_init);
/*
* Generate better values after random number generator
* is fully initialized.
*/
-static int __init random32_reseed(void)
+static int __init prandom_reseed(void)
{
int i;
@@ -143,8 +192,8 @@ static int __init random32_reseed(void)
state->s3 = __seed(seeds[2], 15);
/* mix it in */
- prandom32(state);
+ prandom_u32_state(state);
}
return 0;
}
-late_initcall(random32_reseed);
+late_initcall(prandom_reseed);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 4f56a11d67fa..c0e31fe2fabf 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -194,8 +194,12 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
}
}
-__always_inline void
-__rb_erase_color(struct rb_node *parent, struct rb_root *root,
+/*
+ * Inline version for rb_erase() use - we want to be able to inline
+ * and eliminate the dummy_rotate callback there
+ */
+static __always_inline void
+____rb_erase_color(struct rb_node *parent, struct rb_root *root,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
{
struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
@@ -355,6 +359,13 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
}
}
}
+
+/* Non-inline version for rb_erase_augmented() use */
+void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ ____rb_erase_color(parent, root, augment_rotate);
+}
EXPORT_SYMBOL(__rb_erase_color);
/*
@@ -380,7 +391,10 @@ EXPORT_SYMBOL(rb_insert_color);
void rb_erase(struct rb_node *node, struct rb_root *root)
{
- rb_erase_augmented(node, root, &dummy_callbacks);
+ struct rb_node *rebalance;
+ rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
+ if (rebalance)
+ ____rb_erase_color(rebalance, root, dummy_rotate);
}
EXPORT_SYMBOL(rb_erase);
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index 268b23951fec..af38aedbd874 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -96,8 +96,8 @@ static void init(void)
{
int i;
for (i = 0; i < NODES; i++) {
- nodes[i].key = prandom32(&rnd);
- nodes[i].val = prandom32(&rnd);
+ nodes[i].key = prandom_u32_state(&rnd);
+ nodes[i].val = prandom_u32_state(&rnd);
}
}
@@ -118,7 +118,7 @@ static void check(int nr_nodes)
{
struct rb_node *rb;
int count = 0;
- int blacks;
+ int blacks = 0;
u32 prev_key = 0;
for (rb = rb_first(&root); rb; rb = rb_next(rb)) {
@@ -155,7 +155,7 @@ static int rbtree_test_init(void)
printk(KERN_ALERT "rbtree testing");
- prandom32_seed(&rnd, 3141592653589793238ULL);
+ prandom_seed_state(&rnd, 3141592653589793238ULL);
init();
time1 = get_cycles();
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index 7e0d6a58fc83..7542afbb22b3 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -73,20 +73,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
goto dont_wake_writers;
}
- /* if we are allowed to wake writers try to grant a single write lock
- * if there's a writer at the front of the queue
- * - we leave the 'waiting count' incremented to signify potential
- * contention
+ /*
+ * as we support write lock stealing, we can't set sem->activity
+ * to -1 here to indicate we get the lock. Instead, we wake it up
+ * to let it go get it again.
*/
if (waiter->flags & RWSEM_WAITING_FOR_WRITE) {
- sem->activity = -1;
- list_del(&waiter->list);
- tsk = waiter->task;
- /* Don't touch waiter after ->task has been NULLed */
- smp_mb();
- waiter->task = NULL;
- wake_up_process(tsk);
- put_task_struct(tsk);
+ wake_up_process(waiter->task);
goto out;
}
@@ -121,18 +114,10 @@ static inline struct rw_semaphore *
__rwsem_wake_one_writer(struct rw_semaphore *sem)
{
struct rwsem_waiter *waiter;
- struct task_struct *tsk;
-
- sem->activity = -1;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
- list_del(&waiter->list);
+ wake_up_process(waiter->task);
- tsk = waiter->task;
- smp_mb();
- waiter->task = NULL;
- wake_up_process(tsk);
- put_task_struct(tsk);
return sem;
}
@@ -204,7 +189,6 @@ int __down_read_trylock(struct rw_semaphore *sem)
/*
* get a write lock on the semaphore
- * - we increment the waiting count anyway to indicate an exclusive lock
*/
void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
{
@@ -214,37 +198,32 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
raw_spin_lock_irqsave(&sem->wait_lock, flags);
- if (sem->activity == 0 && list_empty(&sem->wait_list)) {
- /* granted */
- sem->activity = -1;
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
- goto out;
- }
-
- tsk = current;
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-
/* set up my own style of waitqueue */
+ tsk = current;
waiter.task = tsk;
waiter.flags = RWSEM_WAITING_FOR_WRITE;
- get_task_struct(tsk);
-
list_add_tail(&waiter.list, &sem->wait_list);
- /* we don't need to touch the semaphore struct anymore */
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
- /* wait to be given the lock */
+ /* wait for someone to release the lock */
for (;;) {
- if (!waiter.task)
+ /*
+ * That is the key to support write lock stealing: allows the
+ * task already on CPU to get the lock soon rather than put
+ * itself into sleep and waiting for system woke it or someone
+ * else in the head of the wait list up.
+ */
+ if (sem->activity == 0)
break;
- schedule();
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+ schedule();
+ raw_spin_lock_irqsave(&sem->wait_lock, flags);
}
+ /* got the lock */
+ sem->activity = -1;
+ list_del(&waiter.list);
- tsk->state = TASK_RUNNING;
- out:
- ;
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
void __sched __down_write(struct rw_semaphore *sem)
@@ -262,8 +241,8 @@ int __down_write_trylock(struct rw_semaphore *sem)
raw_spin_lock_irqsave(&sem->wait_lock, flags);
- if (sem->activity == 0 && list_empty(&sem->wait_list)) {
- /* granted */
+ if (sem->activity == 0) {
+ /* got the lock */
sem->activity = -1;
ret = 1;
}
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 8337e1b9bb8d..ad5e0df16ab4 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -2,6 +2,8 @@
*
* Written by David Howells (dhowells@redhat.com).
* Derived from arch/i386/kernel/semaphore.c
+ *
+ * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
*/
#include <linux/rwsem.h>
#include <linux/sched.h>
@@ -60,7 +62,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
struct rwsem_waiter *waiter;
struct task_struct *tsk;
struct list_head *next;
- signed long oldcount, woken, loop, adjustment;
+ signed long woken, loop, adjustment;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
@@ -72,30 +74,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
*/
goto out;
- /* There's a writer at the front of the queue - try to grant it the
- * write lock. However, we only wake this writer if we can transition
- * the active part of the count from 0 -> 1
- */
- adjustment = RWSEM_ACTIVE_WRITE_BIAS;
- if (waiter->list.next == &sem->wait_list)
- adjustment -= RWSEM_WAITING_BIAS;
-
- try_again_write:
- oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
- if (oldcount & RWSEM_ACTIVE_MASK)
- /* Someone grabbed the sem already */
- goto undo_write;
-
- /* We must be careful not to touch 'waiter' after we set ->task = NULL.
- * It is an allocated on the waiter's stack and may become invalid at
- * any time after that point (due to a wakeup from another source).
- */
- list_del(&waiter->list);
- tsk = waiter->task;
- smp_mb();
- waiter->task = NULL;
- wake_up_process(tsk);
- put_task_struct(tsk);
+ /* Wake up the writing waiter and let the task grab the sem: */
+ wake_up_process(waiter->task);
goto out;
readers_only:
@@ -157,12 +137,40 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
out:
return sem;
+}
+
+/* Try to get write sem, caller holds sem->wait_lock: */
+static int try_get_writer_sem(struct rw_semaphore *sem,
+ struct rwsem_waiter *waiter)
+{
+ struct rwsem_waiter *fwaiter;
+ long oldcount, adjustment;
- /* undo the change to the active count, but check for a transition
- * 1->0 */
- undo_write:
+ /* only steal when first waiter is writing */
+ fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+ if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE))
+ return 0;
+
+ adjustment = RWSEM_ACTIVE_WRITE_BIAS;
+ /* Only one waiter in the queue: */
+ if (fwaiter == waiter && waiter->list.next == &sem->wait_list)
+ adjustment -= RWSEM_WAITING_BIAS;
+
+try_again_write:
+ oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
+ if (!(oldcount & RWSEM_ACTIVE_MASK)) {
+ /* No active lock: */
+ struct task_struct *tsk = waiter->task;
+
+ list_del(&waiter->list);
+ smp_mb();
+ put_task_struct(tsk);
+ tsk->state = TASK_RUNNING;
+ return 1;
+ }
+ /* some one grabbed the sem already */
if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
- goto out;
+ return 0;
goto try_again_write;
}
@@ -210,6 +218,15 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
for (;;) {
if (!waiter.task)
break;
+
+ raw_spin_lock_irq(&sem->wait_lock);
+ /* Try to get the writer sem, may steal from the head writer: */
+ if (flags == RWSEM_WAITING_FOR_WRITE)
+ if (try_get_writer_sem(sem, &waiter)) {
+ raw_spin_unlock_irq(&sem->wait_lock);
+ return sem;
+ }
+ raw_spin_unlock_irq(&sem->wait_lock);
schedule();
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
}
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 3675452b23ca..b83c144d731f 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -248,7 +248,8 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
unsigned int left;
#ifndef ARCH_HAS_SG_CHAIN
- BUG_ON(nents > max_ents);
+ if (WARN_ON_ONCE(nents > max_ents))
+ return -EINVAL;
#endif
memset(table, 0, sizeof(*table));
@@ -393,6 +394,44 @@ int sg_alloc_table_from_pages(struct sg_table *sgt,
}
EXPORT_SYMBOL(sg_alloc_table_from_pages);
+void __sg_page_iter_start(struct sg_page_iter *piter,
+ struct scatterlist *sglist, unsigned int nents,
+ unsigned long pgoffset)
+{
+ piter->__pg_advance = 0;
+ piter->__nents = nents;
+
+ piter->page = NULL;
+ piter->sg = sglist;
+ piter->sg_pgoffset = pgoffset;
+}
+EXPORT_SYMBOL(__sg_page_iter_start);
+
+static int sg_page_count(struct scatterlist *sg)
+{
+ return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT;
+}
+
+bool __sg_page_iter_next(struct sg_page_iter *piter)
+{
+ if (!piter->__nents || !piter->sg)
+ return false;
+
+ piter->sg_pgoffset += piter->__pg_advance;
+ piter->__pg_advance = 1;
+
+ while (piter->sg_pgoffset >= sg_page_count(piter->sg)) {
+ piter->sg_pgoffset -= sg_page_count(piter->sg);
+ piter->sg = sg_next(piter->sg);
+ if (!--piter->__nents || !piter->sg)
+ return false;
+ }
+ piter->page = nth_page(sg_page(piter->sg), piter->sg_pgoffset);
+
+ return true;
+}
+EXPORT_SYMBOL(__sg_page_iter_next);
+
/**
* sg_miter_start - start mapping iteration over a sg list
* @miter: sg mapping iter to be started
@@ -410,9 +449,7 @@ void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
{
memset(miter, 0, sizeof(struct sg_mapping_iter));
- miter->__sg = sgl;
- miter->__nents = nents;
- miter->__offset = 0;
+ __sg_page_iter_start(&miter->piter, sgl, nents, 0);
WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
miter->__flags = flags;
}
@@ -437,36 +474,35 @@ EXPORT_SYMBOL(sg_miter_start);
*/
bool sg_miter_next(struct sg_mapping_iter *miter)
{
- unsigned int off, len;
-
- /* check for end and drop resources from the last iteration */
- if (!miter->__nents)
- return false;
-
sg_miter_stop(miter);
- /* get to the next sg if necessary. __offset is adjusted by stop */
- while (miter->__offset == miter->__sg->length) {
- if (--miter->__nents) {
- miter->__sg = sg_next(miter->__sg);
- miter->__offset = 0;
- } else
+ /*
+ * Get to the next page if necessary.
+ * __remaining, __offset is adjusted by sg_miter_stop
+ */
+ if (!miter->__remaining) {
+ struct scatterlist *sg;
+ unsigned long pgoffset;
+
+ if (!__sg_page_iter_next(&miter->piter))
return false;
- }
- /* map the next page */
- off = miter->__sg->offset + miter->__offset;
- len = miter->__sg->length - miter->__offset;
+ sg = miter->piter.sg;
+ pgoffset = miter->piter.sg_pgoffset;
- miter->page = nth_page(sg_page(miter->__sg), off >> PAGE_SHIFT);
- off &= ~PAGE_MASK;
- miter->length = min_t(unsigned int, len, PAGE_SIZE - off);
- miter->consumed = miter->length;
+ miter->__offset = pgoffset ? 0 : sg->offset;
+ miter->__remaining = sg->offset + sg->length -
+ (pgoffset << PAGE_SHIFT) - miter->__offset;
+ miter->__remaining = min_t(unsigned long, miter->__remaining,
+ PAGE_SIZE - miter->__offset);
+ }
+ miter->page = miter->piter.page;
+ miter->consumed = miter->length = miter->__remaining;
if (miter->__flags & SG_MITER_ATOMIC)
- miter->addr = kmap_atomic(miter->page) + off;
+ miter->addr = kmap_atomic(miter->page) + miter->__offset;
else
- miter->addr = kmap(miter->page) + off;
+ miter->addr = kmap(miter->page) + miter->__offset;
return true;
}
@@ -493,6 +529,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
/* drop resources from the last iteration */
if (miter->addr) {
miter->__offset += miter->consumed;
+ miter->__remaining -= miter->consumed;
if (miter->__flags & SG_MITER_TO_SG)
flush_kernel_dcache_page(miter->page);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index f114bf6a8e13..bfe02b8fc55b 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -57,7 +57,7 @@ int swiotlb_force;
* swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
* API.
*/
-static char *io_tlb_start, *io_tlb_end;
+static phys_addr_t io_tlb_start, io_tlb_end;
/*
* The number of IO TLB blocks (in groups of 64) between io_tlb_start and
@@ -70,7 +70,7 @@ static unsigned long io_tlb_nslabs;
*/
static unsigned long io_tlb_overflow = 32*1024;
-static void *io_tlb_overflow_buffer;
+static phys_addr_t io_tlb_overflow_buffer;
/*
* This is a free list describing the number of free entries available from
@@ -122,30 +122,49 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
return phys_to_dma(hwdev, virt_to_phys(address));
}
+static bool no_iotlb_memory;
+
void swiotlb_print_info(void)
{
unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
- phys_addr_t pstart, pend;
+ unsigned char *vstart, *vend;
+
+ if (no_iotlb_memory) {
+ pr_warn("software IO TLB: No low mem\n");
+ return;
+ }
- pstart = virt_to_phys(io_tlb_start);
- pend = virt_to_phys(io_tlb_end);
+ vstart = phys_to_virt(io_tlb_start);
+ vend = phys_to_virt(io_tlb_end);
printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
- (unsigned long long)pstart, (unsigned long long)pend - 1,
- bytes >> 20, io_tlb_start, io_tlb_end - 1);
+ (unsigned long long)io_tlb_start,
+ (unsigned long long)io_tlb_end,
+ bytes >> 20, vstart, vend - 1);
}
-void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
{
+ void *v_overflow_buffer;
unsigned long i, bytes;
bytes = nslabs << IO_TLB_SHIFT;
io_tlb_nslabs = nslabs;
- io_tlb_start = tlb;
+ io_tlb_start = __pa(tlb);
io_tlb_end = io_tlb_start + bytes;
/*
+ * Get the overflow emergency buffer
+ */
+ v_overflow_buffer = alloc_bootmem_low_pages_nopanic(
+ PAGE_ALIGN(io_tlb_overflow));
+ if (!v_overflow_buffer)
+ return -ENOMEM;
+
+ io_tlb_overflow_buffer = __pa(v_overflow_buffer);
+
+ /*
* Allocate and initialize the free list array. This array is used
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
* between io_tlb_start and io_tlb_end.
@@ -156,23 +175,22 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
io_tlb_index = 0;
io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
- /*
- * Get the overflow emergency buffer
- */
- io_tlb_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow));
- if (!io_tlb_overflow_buffer)
- panic("Cannot allocate SWIOTLB overflow buffer!\n");
if (verbose)
swiotlb_print_info();
+
+ return 0;
}
/*
* Statically reserve bounce buffer space and initialize bounce buffer data
* structures for the software IO TLB used to implement the DMA API.
*/
-static void __init
-swiotlb_init_with_default_size(size_t default_size, int verbose)
+void __init
+swiotlb_init(int verbose)
{
+ /* default to 64MB */
+ size_t default_size = 64UL<<20;
+ unsigned char *vstart;
unsigned long bytes;
if (!io_tlb_nslabs) {
@@ -182,20 +200,16 @@ swiotlb_init_with_default_size(size_t default_size, int verbose)
bytes = io_tlb_nslabs << IO_TLB_SHIFT;
- /*
- * Get IO TLB memory from the low pages
- */
- io_tlb_start = alloc_bootmem_low_pages(PAGE_ALIGN(bytes));
- if (!io_tlb_start)
- panic("Cannot allocate SWIOTLB buffer");
-
- swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
-}
+ /* Get IO TLB memory from the low pages */
+ vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes));
+ if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
+ return;
-void __init
-swiotlb_init(int verbose)
-{
- swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */
+ if (io_tlb_start)
+ free_bootmem(io_tlb_start,
+ PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+ pr_warn("Cannot allocate SWIOTLB buffer");
+ no_iotlb_memory = true;
}
/*
@@ -207,6 +221,7 @@ int
swiotlb_late_init_with_default_size(size_t default_size)
{
unsigned long bytes, req_nslabs = io_tlb_nslabs;
+ unsigned char *vstart = NULL;
unsigned int order;
int rc = 0;
@@ -223,14 +238,14 @@ swiotlb_late_init_with_default_size(size_t default_size)
bytes = io_tlb_nslabs << IO_TLB_SHIFT;
while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
- io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
- order);
- if (io_tlb_start)
+ vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+ order);
+ if (vstart)
break;
order--;
}
- if (!io_tlb_start) {
+ if (!vstart) {
io_tlb_nslabs = req_nslabs;
return -ENOMEM;
}
@@ -239,9 +254,9 @@ swiotlb_late_init_with_default_size(size_t default_size)
"for software IO TLB\n", (PAGE_SIZE << order) >> 20);
io_tlb_nslabs = SLABS_PER_PAGE << order;
}
- rc = swiotlb_late_init_with_tbl(io_tlb_start, io_tlb_nslabs);
+ rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
if (rc)
- free_pages((unsigned long)io_tlb_start, order);
+ free_pages((unsigned long)vstart, order);
return rc;
}
@@ -249,14 +264,25 @@ int
swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
{
unsigned long i, bytes;
+ unsigned char *v_overflow_buffer;
bytes = nslabs << IO_TLB_SHIFT;
io_tlb_nslabs = nslabs;
- io_tlb_start = tlb;
+ io_tlb_start = virt_to_phys(tlb);
io_tlb_end = io_tlb_start + bytes;
- memset(io_tlb_start, 0, bytes);
+ memset(tlb, 0, bytes);
+
+ /*
+ * Get the overflow emergency buffer
+ */
+ v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
+ get_order(io_tlb_overflow));
+ if (!v_overflow_buffer)
+ goto cleanup2;
+
+ io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
/*
* Allocate and initialize the free list array. This array is used
@@ -266,7 +292,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
get_order(io_tlb_nslabs * sizeof(int)));
if (!io_tlb_list)
- goto cleanup2;
+ goto cleanup3;
for (i = 0; i < io_tlb_nslabs; i++)
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
@@ -277,18 +303,10 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
get_order(io_tlb_nslabs *
sizeof(phys_addr_t)));
if (!io_tlb_orig_addr)
- goto cleanup3;
+ goto cleanup4;
memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t));
- /*
- * Get the overflow emergency buffer
- */
- io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
- get_order(io_tlb_overflow));
- if (!io_tlb_overflow_buffer)
- goto cleanup4;
-
swiotlb_print_info();
late_alloc = 1;
@@ -296,42 +314,42 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
return 0;
cleanup4:
- free_pages((unsigned long)io_tlb_orig_addr,
- get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
- io_tlb_orig_addr = NULL;
-cleanup3:
free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
sizeof(int)));
io_tlb_list = NULL;
+cleanup3:
+ free_pages((unsigned long)v_overflow_buffer,
+ get_order(io_tlb_overflow));
+ io_tlb_overflow_buffer = 0;
cleanup2:
- io_tlb_end = NULL;
- io_tlb_start = NULL;
+ io_tlb_end = 0;
+ io_tlb_start = 0;
io_tlb_nslabs = 0;
return -ENOMEM;
}
void __init swiotlb_free(void)
{
- if (!io_tlb_overflow_buffer)
+ if (!io_tlb_orig_addr)
return;
if (late_alloc) {
- free_pages((unsigned long)io_tlb_overflow_buffer,
+ free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
get_order(io_tlb_overflow));
free_pages((unsigned long)io_tlb_orig_addr,
get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
sizeof(int)));
- free_pages((unsigned long)io_tlb_start,
+ free_pages((unsigned long)phys_to_virt(io_tlb_start),
get_order(io_tlb_nslabs << IO_TLB_SHIFT));
} else {
- free_bootmem_late(__pa(io_tlb_overflow_buffer),
+ free_bootmem_late(io_tlb_overflow_buffer,
PAGE_ALIGN(io_tlb_overflow));
free_bootmem_late(__pa(io_tlb_orig_addr),
PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
free_bootmem_late(__pa(io_tlb_list),
PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
- free_bootmem_late(__pa(io_tlb_start),
+ free_bootmem_late(io_tlb_start,
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
}
io_tlb_nslabs = 0;
@@ -339,21 +357,21 @@ void __init swiotlb_free(void)
static int is_swiotlb_buffer(phys_addr_t paddr)
{
- return paddr >= virt_to_phys(io_tlb_start) &&
- paddr < virt_to_phys(io_tlb_end);
+ return paddr >= io_tlb_start && paddr < io_tlb_end;
}
/*
* Bounce: copy the swiotlb buffer back to the original dma location
*/
-void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
- enum dma_data_direction dir)
+static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir)
{
- unsigned long pfn = PFN_DOWN(phys);
+ unsigned long pfn = PFN_DOWN(orig_addr);
+ unsigned char *vaddr = phys_to_virt(tlb_addr);
if (PageHighMem(pfn_to_page(pfn))) {
/* The buffer does not have a mapping. Map it in and copy */
- unsigned int offset = phys & ~PAGE_MASK;
+ unsigned int offset = orig_addr & ~PAGE_MASK;
char *buffer;
unsigned int sz = 0;
unsigned long flags;
@@ -364,38 +382,40 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
local_irq_save(flags);
buffer = kmap_atomic(pfn_to_page(pfn));
if (dir == DMA_TO_DEVICE)
- memcpy(dma_addr, buffer + offset, sz);
+ memcpy(vaddr, buffer + offset, sz);
else
- memcpy(buffer + offset, dma_addr, sz);
+ memcpy(buffer + offset, vaddr, sz);
kunmap_atomic(buffer);
local_irq_restore(flags);
size -= sz;
pfn++;
- dma_addr += sz;
+ vaddr += sz;
offset = 0;
}
+ } else if (dir == DMA_TO_DEVICE) {
+ memcpy(vaddr, phys_to_virt(orig_addr), size);
} else {
- if (dir == DMA_TO_DEVICE)
- memcpy(dma_addr, phys_to_virt(phys), size);
- else
- memcpy(phys_to_virt(phys), dma_addr, size);
+ memcpy(phys_to_virt(orig_addr), vaddr, size);
}
}
-EXPORT_SYMBOL_GPL(swiotlb_bounce);
-void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
- phys_addr_t phys, size_t size,
- enum dma_data_direction dir)
+phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
+ dma_addr_t tbl_dma_addr,
+ phys_addr_t orig_addr, size_t size,
+ enum dma_data_direction dir)
{
unsigned long flags;
- char *dma_addr;
+ phys_addr_t tlb_addr;
unsigned int nslots, stride, index, wrap;
int i;
unsigned long mask;
unsigned long offset_slots;
unsigned long max_slots;
+ if (no_iotlb_memory)
+ panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+
mask = dma_get_seg_boundary(hwdev);
tbl_dma_addr &= mask;
@@ -453,7 +473,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
io_tlb_list[i] = 0;
for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
io_tlb_list[i] = ++count;
- dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+ tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
/*
* Update the indices to avoid searching in the next
@@ -471,7 +491,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
not_found:
spin_unlock_irqrestore(&io_tlb_lock, flags);
- return NULL;
+ return SWIOTLB_MAP_ERROR;
found:
spin_unlock_irqrestore(&io_tlb_lock, flags);
@@ -481,11 +501,11 @@ found:
* needed.
*/
for (i = 0; i < nslots; i++)
- io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
+ io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+ swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
- return dma_addr;
+ return tlb_addr;
}
EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
@@ -493,11 +513,10 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
* Allocates bounce buffer and returns its kernel virtual address.
*/
-static void *
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
- enum dma_data_direction dir)
+phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+ enum dma_data_direction dir)
{
- dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
+ dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
}
@@ -505,20 +524,19 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
/*
* dma_addr is the kernel virtual address of the bounce buffer to unmap.
*/
-void
-swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
- enum dma_data_direction dir)
+void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir)
{
unsigned long flags;
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
- phys_addr_t phys = io_tlb_orig_addr[index];
+ int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ phys_addr_t orig_addr = io_tlb_orig_addr[index];
/*
* First, sync the memory before unmapping the entry
*/
- if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
- swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+ if (orig_addr && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+ swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
/*
* Return the buffer to the free list by setting the corresponding
@@ -547,26 +565,27 @@ swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
}
EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
-void
-swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
- enum dma_data_direction dir,
- enum dma_sync_target target)
+void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir,
+ enum dma_sync_target target)
{
- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
- phys_addr_t phys = io_tlb_orig_addr[index];
+ int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ phys_addr_t orig_addr = io_tlb_orig_addr[index];
- phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
+ orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
switch (target) {
case SYNC_FOR_CPU:
if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+ swiotlb_bounce(orig_addr, tlb_addr,
+ size, DMA_FROM_DEVICE);
else
BUG_ON(dir != DMA_TO_DEVICE);
break;
case SYNC_FOR_DEVICE:
if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+ swiotlb_bounce(orig_addr, tlb_addr,
+ size, DMA_TO_DEVICE);
else
BUG_ON(dir != DMA_FROM_DEVICE);
break;
@@ -589,12 +608,15 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_mask = hwdev->coherent_dma_mask;
ret = (void *)__get_free_pages(flags, order);
- if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) {
- /*
- * The allocated memory isn't reachable by the device.
- */
- free_pages((unsigned long) ret, order);
- ret = NULL;
+ if (ret) {
+ dev_addr = swiotlb_virt_to_bus(hwdev, ret);
+ if (dev_addr + size - 1 > dma_mask) {
+ /*
+ * The allocated memory isn't reachable by the device.
+ */
+ free_pages((unsigned long) ret, order);
+ ret = NULL;
+ }
}
if (!ret) {
/*
@@ -602,25 +624,29 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
* GFP_DMA memory; fall back on map_single(), which
* will grab memory from the lowest available address range.
*/
- ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
- if (!ret)
+ phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
+ if (paddr == SWIOTLB_MAP_ERROR)
return NULL;
- }
- memset(ret, 0, size);
- dev_addr = swiotlb_virt_to_bus(hwdev, ret);
+ ret = phys_to_virt(paddr);
+ dev_addr = phys_to_dma(hwdev, paddr);
- /* Confirm address can be DMA'd by device */
- if (dev_addr + size - 1 > dma_mask) {
- printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
- (unsigned long long)dma_mask,
- (unsigned long long)dev_addr);
+ /* Confirm address can be DMA'd by device */
+ if (dev_addr + size - 1 > dma_mask) {
+ printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
+ (unsigned long long)dma_mask,
+ (unsigned long long)dev_addr);
- /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
- swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
- return NULL;
+ /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
+ swiotlb_tbl_unmap_single(hwdev, paddr,
+ size, DMA_TO_DEVICE);
+ return NULL;
+ }
}
+
*dma_handle = dev_addr;
+ memset(ret, 0, size);
+
return ret;
}
EXPORT_SYMBOL(swiotlb_alloc_coherent);
@@ -636,7 +662,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
free_pages((unsigned long)vaddr, get_order(size));
else
/* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
- swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+ swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE);
}
EXPORT_SYMBOL(swiotlb_free_coherent);
@@ -677,9 +703,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- phys_addr_t phys = page_to_phys(page) + offset;
+ phys_addr_t map, phys = page_to_phys(page) + offset;
dma_addr_t dev_addr = phys_to_dma(dev, phys);
- void *map;
BUG_ON(dir == DMA_NONE);
/*
@@ -690,23 +715,19 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
return dev_addr;
- /*
- * Oh well, have to allocate and map a bounce buffer.
- */
+ /* Oh well, have to allocate and map a bounce buffer. */
map = map_single(dev, phys, size, dir);
- if (!map) {
+ if (map == SWIOTLB_MAP_ERROR) {
swiotlb_full(dev, size, dir, 1);
- map = io_tlb_overflow_buffer;
+ return phys_to_dma(dev, io_tlb_overflow_buffer);
}
- dev_addr = swiotlb_virt_to_bus(dev, map);
+ dev_addr = phys_to_dma(dev, map);
- /*
- * Ensure that the address returned is DMA'ble
- */
+ /* Ensure that the address returned is DMA'ble */
if (!dma_capable(dev, dev_addr, size)) {
swiotlb_tbl_unmap_single(dev, map, size, dir);
- dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer);
+ return phys_to_dma(dev, io_tlb_overflow_buffer);
}
return dev_addr;
@@ -729,7 +750,7 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(paddr)) {
- swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+ swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
return;
}
@@ -773,8 +794,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(paddr)) {
- swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
- target);
+ swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
return;
}
@@ -831,9 +851,9 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
if (swiotlb_force ||
!dma_capable(hwdev, dev_addr, sg->length)) {
- void *map = map_single(hwdev, sg_phys(sg),
- sg->length, dir);
- if (!map) {
+ phys_addr_t map = map_single(hwdev, sg_phys(sg),
+ sg->length, dir);
+ if (map == SWIOTLB_MAP_ERROR) {
/* Don't panic here, we expect map_sg users
to do proper error handling. */
swiotlb_full(hwdev, sg->length, dir, 0);
@@ -842,7 +862,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
sgl[0].dma_length = 0;
return 0;
}
- sg->dma_address = swiotlb_virt_to_bus(hwdev, map);
+ sg->dma_address = phys_to_dma(hwdev, map);
} else
sg->dma_address = dev_addr;
sg->dma_length = sg->length;
@@ -925,7 +945,7 @@ EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
int
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
{
- return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer));
+ return (dma_addr == phys_to_dma(hwdev, io_tlb_overflow_buffer));
}
EXPORT_SYMBOL(swiotlb_dma_mapping_error);
@@ -938,6 +958,6 @@ EXPORT_SYMBOL(swiotlb_dma_mapping_error);
int
swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
- return swiotlb_virt_to_bus(hwdev, io_tlb_end - 1) <= mask;
+ return phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
}
EXPORT_SYMBOL(swiotlb_dma_supported);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 39c99fea7c03..0d62fd700f68 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -23,12 +23,12 @@
#include <linux/ctype.h>
#include <linux/kernel.h>
#include <linux/kallsyms.h>
+#include <linux/math64.h>
#include <linux/uaccess.h>
#include <linux/ioport.h>
#include <net/addrconf.h>
#include <asm/page.h> /* for PAGE_SIZE */
-#include <asm/div64.h>
#include <asm/sections.h> /* for dereference_function_descriptor() */
#include "kstrtox.h"
@@ -38,6 +38,8 @@
* @cp: The start of the string
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
+ *
+ * This function is obsolete. Please use kstrtoull instead.
*/
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
{
@@ -61,6 +63,8 @@ EXPORT_SYMBOL(simple_strtoull);
* @cp: The start of the string
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
+ *
+ * This function is obsolete. Please use kstrtoul instead.
*/
unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
{
@@ -73,6 +77,8 @@ EXPORT_SYMBOL(simple_strtoul);
* @cp: The start of the string
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
+ *
+ * This function is obsolete. Please use kstrtol instead.
*/
long simple_strtol(const char *cp, char **endp, unsigned int base)
{
@@ -88,6 +94,8 @@ EXPORT_SYMBOL(simple_strtol);
* @cp: The start of the string
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
+ *
+ * This function is obsolete. Please use kstrtoll instead.
*/
long long simple_strtoll(const char *cp, char **endp, unsigned int base)
{
@@ -1022,6 +1030,7 @@ int kptr_restrict __read_mostly;
* N no separator
* The maximum supported length is 64 bytes of the input. Consider
* to use print_hex_dump() for the larger input.
+ * - 'a' For a phys_addr_t type and its derivative types (passed by reference)
*
* Note: The difference between 'S' and 'F' is that on ia64 and ppc64
* function pointers are really function descriptors, which contain a
@@ -1112,6 +1121,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
return netdev_feature_string(buf, end, ptr, spec);
}
break;
+ case 'a':
+ spec.flags |= SPECIAL | SMALL | ZEROPAD;
+ spec.field_width = sizeof(phys_addr_t) * 2 + 2;
+ spec.base = 16;
+ return number(buf, end,
+ (unsigned long long) *((phys_addr_t *)ptr), spec);
}
spec.flags |= SMALL;
if (spec.field_width == -1) {
@@ -1485,7 +1500,10 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
num = va_arg(args, long);
break;
case FORMAT_TYPE_SIZE_T:
- num = va_arg(args, size_t);
+ if (spec.flags & SIGN)
+ num = va_arg(args, ssize_t);
+ else
+ num = va_arg(args, size_t);
break;
case FORMAT_TYPE_PTRDIFF:
num = va_arg(args, ptrdiff_t);
@@ -2013,7 +2031,11 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
char digit;
int num = 0;
u8 qualifier;
- u8 base;
+ unsigned int base;
+ union {
+ long long s;
+ unsigned long long u;
+ } val;
s16 field_width;
bool is_sign;
@@ -2053,8 +2075,11 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
/* get field width */
field_width = -1;
- if (isdigit(*fmt))
+ if (isdigit(*fmt)) {
field_width = skip_atoi(&fmt);
+ if (field_width <= 0)
+ break;
+ }
/* get conversion qualifier */
qualifier = -1;
@@ -2154,58 +2179,61 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
|| (base == 0 && !isdigit(digit)))
break;
+ if (is_sign)
+ val.s = qualifier != 'L' ?
+ simple_strtol(str, &next, base) :
+ simple_strtoll(str, &next, base);
+ else
+ val.u = qualifier != 'L' ?
+ simple_strtoul(str, &next, base) :
+ simple_strtoull(str, &next, base);
+
+ if (field_width > 0 && next - str > field_width) {
+ if (base == 0)
+ _parse_integer_fixup_radix(str, &base);
+ while (next - str > field_width) {
+ if (is_sign)
+ val.s = div_s64(val.s, base);
+ else
+ val.u = div_u64(val.u, base);
+ --next;
+ }
+ }
+
switch (qualifier) {
case 'H': /* that's 'hh' in format */
- if (is_sign) {
- signed char *s = (signed char *)va_arg(args, signed char *);
- *s = (signed char)simple_strtol(str, &next, base);
- } else {
- unsigned char *s = (unsigned char *)va_arg(args, unsigned char *);
- *s = (unsigned char)simple_strtoul(str, &next, base);
- }
+ if (is_sign)
+ *va_arg(args, signed char *) = val.s;
+ else
+ *va_arg(args, unsigned char *) = val.u;
break;
case 'h':
- if (is_sign) {
- short *s = (short *)va_arg(args, short *);
- *s = (short)simple_strtol(str, &next, base);
- } else {
- unsigned short *s = (unsigned short *)va_arg(args, unsigned short *);
- *s = (unsigned short)simple_strtoul(str, &next, base);
- }
+ if (is_sign)
+ *va_arg(args, short *) = val.s;
+ else
+ *va_arg(args, unsigned short *) = val.u;
break;
case 'l':
- if (is_sign) {
- long *l = (long *)va_arg(args, long *);
- *l = simple_strtol(str, &next, base);
- } else {
- unsigned long *l = (unsigned long *)va_arg(args, unsigned long *);
- *l = simple_strtoul(str, &next, base);
- }
+ if (is_sign)
+ *va_arg(args, long *) = val.s;
+ else
+ *va_arg(args, unsigned long *) = val.u;
break;
case 'L':
- if (is_sign) {
- long long *l = (long long *)va_arg(args, long long *);
- *l = simple_strtoll(str, &next, base);
- } else {
- unsigned long long *l = (unsigned long long *)va_arg(args, unsigned long long *);
- *l = simple_strtoull(str, &next, base);
- }
+ if (is_sign)
+ *va_arg(args, long long *) = val.s;
+ else
+ *va_arg(args, unsigned long long *) = val.u;
break;
case 'Z':
case 'z':
- {
- size_t *s = (size_t *)va_arg(args, size_t *);
- *s = (size_t)simple_strtoul(str, &next, base);
- }
- break;
+ *va_arg(args, size_t *) = val.u;
+ break;
default:
- if (is_sign) {
- int *i = (int *)va_arg(args, int *);
- *i = (int)simple_strtol(str, &next, base);
- } else {
- unsigned int *i = (unsigned int *)va_arg(args, unsigned int*);
- *i = (unsigned int)simple_strtoul(str, &next, base);
- }
+ if (is_sign)
+ *va_arg(args, int *) = val.s;
+ else
+ *va_arg(args, unsigned int *) = val.u;
break;
}
num++;
diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig
index 60a6088d0e5e..08837db52d94 100644
--- a/lib/xz/Kconfig
+++ b/lib/xz/Kconfig
@@ -6,42 +6,40 @@ config XZ_DEC
the .xz file format as the container. For integrity checking,
CRC32 is supported. See Documentation/xz.txt for more information.
+if XZ_DEC
+
config XZ_DEC_X86
- bool "x86 BCJ filter decoder" if EXPERT
- default y
- depends on XZ_DEC
+ bool "x86 BCJ filter decoder"
+ default y if X86
select XZ_DEC_BCJ
config XZ_DEC_POWERPC
- bool "PowerPC BCJ filter decoder" if EXPERT
- default y
- depends on XZ_DEC
+ bool "PowerPC BCJ filter decoder"
+ default y if PPC
select XZ_DEC_BCJ
config XZ_DEC_IA64
- bool "IA-64 BCJ filter decoder" if EXPERT
- default y
- depends on XZ_DEC
+ bool "IA-64 BCJ filter decoder"
+ default y if IA64
select XZ_DEC_BCJ
config XZ_DEC_ARM
- bool "ARM BCJ filter decoder" if EXPERT
- default y
- depends on XZ_DEC
+ bool "ARM BCJ filter decoder"
+ default y if ARM
select XZ_DEC_BCJ
config XZ_DEC_ARMTHUMB
- bool "ARM-Thumb BCJ filter decoder" if EXPERT
- default y
- depends on XZ_DEC
+ bool "ARM-Thumb BCJ filter decoder"
+ default y if (ARM && ARM_THUMB)
select XZ_DEC_BCJ
config XZ_DEC_SPARC
- bool "SPARC BCJ filter decoder" if EXPERT
- default y
- depends on XZ_DEC
+ bool "SPARC BCJ filter decoder"
+ default y if SPARC
select XZ_DEC_BCJ
+endif
+
config XZ_DEC_BCJ
bool
default n