diff options
Diffstat (limited to 'lib')
43 files changed, 2044 insertions, 730 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 4b31a46fb307..3958dc4389f9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -42,6 +42,9 @@ config GENERIC_IO config STMP_DEVICE bool +config PERCPU_RWSEM + boolean + config CRC_CCITT tristate "CRC-CCITT functions" help @@ -319,7 +322,7 @@ config CPUMASK_OFFSTACK config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS - depends on EXPERIMENTAL && BROKEN + depends on BROKEN config CPU_RMAP bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e458782f3c52..e4a7f808fa06 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -134,7 +134,7 @@ config DEBUG_SECTION_MISMATCH any use of code/data previously in these sections would most likely result in an oops. In the code, functions and variables are annotated with - __init, __devinit, etc. (see the full list in include/linux/init.h), + __init, __cpuinit, etc. (see the full list in include/linux/init.h), which results in the code/data being placed in specific sections. The section mismatch analysis is always performed after a full kernel build, and enabling this option causes the following @@ -243,8 +243,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE default 1 if BOOTPARAM_SOFTLOCKUP_PANIC config PANIC_ON_OOPS - bool "Panic on Oops" if EXPERT - default n + bool "Panic on Oops" help Say Y here to enable the kernel to panic when it oopses. This has the same effect as setting oops=panic on the kernel command @@ -455,7 +454,7 @@ config HAVE_DEBUG_KMEMLEAK config DEBUG_KMEMLEAK bool "Kernel memory leak detector" - depends on DEBUG_KERNEL && EXPERIMENTAL && HAVE_DEBUG_KMEMLEAK + depends on DEBUG_KERNEL && HAVE_DEBUG_KMEMLEAK select DEBUG_FS select STACKTRACE if STACKTRACE_SUPPORT select KALLSYMS @@ -605,61 +604,6 @@ config PROVE_LOCKING For more details, see Documentation/lockdep-design.txt. -config PROVE_RCU - bool "RCU debugging: prove RCU correctness" - depends on PROVE_LOCKING - default n - help - This feature enables lockdep extensions that check for correct - use of RCU APIs. This is currently under development. Say Y - if you want to debug RCU usage or help work on the PROVE_RCU - feature. - - Say N if you are unsure. - -config PROVE_RCU_REPEATEDLY - bool "RCU debugging: don't disable PROVE_RCU on first splat" - depends on PROVE_RCU - default n - help - By itself, PROVE_RCU will disable checking upon issuing the - first warning (or "splat"). This feature prevents such - disabling, allowing multiple RCU-lockdep warnings to be printed - on a single reboot. - - Say Y to allow multiple RCU-lockdep warnings per boot. - - Say N if you are unsure. - -config PROVE_RCU_DELAY - bool "RCU debugging: preemptible RCU race provocation" - depends on DEBUG_KERNEL && PREEMPT_RCU - default n - help - There is a class of races that involve an unlikely preemption - of __rcu_read_unlock() just after ->rcu_read_lock_nesting has - been set to INT_MIN. This feature inserts a delay at that - point to increase the probability of these races. - - Say Y to increase probability of preemption of __rcu_read_unlock(). - - Say N if you are unsure. - -config SPARSE_RCU_POINTER - bool "RCU debugging: sparse-based checks for pointer usage" - default n - help - This feature enables the __rcu sparse annotation for - RCU-protected pointers. This annotation will cause sparse - to flag any non-RCU used of annotated pointers. This can be - helpful when debugging RCU usage. Please note that this feature - is not intended to enforce code cleanliness; it is instead merely - a debugging aid. - - Say Y to make sparse flag questionable use of RCU-protected pointers - - Say N if you are unsure. - config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT @@ -937,6 +881,63 @@ config BOOT_PRINTK_DELAY BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect what it believes to be lockup conditions. +menu "RCU Debugging" + +config PROVE_RCU + bool "RCU debugging: prove RCU correctness" + depends on PROVE_LOCKING + default n + help + This feature enables lockdep extensions that check for correct + use of RCU APIs. This is currently under development. Say Y + if you want to debug RCU usage or help work on the PROVE_RCU + feature. + + Say N if you are unsure. + +config PROVE_RCU_REPEATEDLY + bool "RCU debugging: don't disable PROVE_RCU on first splat" + depends on PROVE_RCU + default n + help + By itself, PROVE_RCU will disable checking upon issuing the + first warning (or "splat"). This feature prevents such + disabling, allowing multiple RCU-lockdep warnings to be printed + on a single reboot. + + Say Y to allow multiple RCU-lockdep warnings per boot. + + Say N if you are unsure. + +config PROVE_RCU_DELAY + bool "RCU debugging: preemptible RCU race provocation" + depends on DEBUG_KERNEL && PREEMPT_RCU + default n + help + There is a class of races that involve an unlikely preemption + of __rcu_read_unlock() just after ->rcu_read_lock_nesting has + been set to INT_MIN. This feature inserts a delay at that + point to increase the probability of these races. + + Say Y to increase probability of preemption of __rcu_read_unlock(). + + Say N if you are unsure. + +config SPARSE_RCU_POINTER + bool "RCU debugging: sparse-based checks for pointer usage" + default n + help + This feature enables the __rcu sparse annotation for + RCU-protected pointers. This annotation will cause sparse + to flag any non-RCU used of annotated pointers. This can be + helpful when debugging RCU usage. Please note that this feature + is not intended to enforce code cleanliness; it is instead merely + a debugging aid. + + Say Y to make sparse flag questionable use of RCU-protected pointers + + Say N if you are unsure. + config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL @@ -970,7 +971,7 @@ config RCU_TORTURE_TEST_RUNNABLE config RCU_CPU_STALL_TIMEOUT int "RCU CPU stall timeout in seconds" - depends on TREE_RCU || TREE_PREEMPT_RCU + depends on RCU_STALL_COMMON range 3 300 default 21 help @@ -1008,6 +1009,7 @@ config RCU_CPU_STALL_INFO config RCU_TRACE bool "Enable tracing for RCU" depends on DEBUG_KERNEL + select TRACE_CLOCK help This option provides tracing in RCU which presents stats in debugfs for debugging RCU implementation. @@ -1015,6 +1017,8 @@ config RCU_TRACE Say Y here if you want to enable RCU tracing Say N if you are unsure. +endmenu # "RCU Debugging" + config KPROBES_SANITY_TEST bool "Kprobes sanity tests" depends on DEBUG_KERNEL @@ -1192,14 +1196,14 @@ config MEMORY_NOTIFIER_ERROR_INJECT If unsure, say N. -config PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT - tristate "pSeries reconfig notifier error injection module" - depends on PPC_PSERIES && NOTIFIER_ERROR_INJECTION +config OF_RECONFIG_NOTIFIER_ERROR_INJECT + tristate "OF reconfig notifier error injection module" + depends on OF_DYNAMIC && NOTIFIER_ERROR_INJECTION help This option provides the ability to inject artificial errors to - pSeries reconfig notifier chain callbacks. It is controlled + OF reconfig notifier chain callbacks. It is controlled through debugfs interface under - /sys/kernel/debug/notifier-error-inject/pSeries-reconfig/ + /sys/kernel/debug/notifier-error-inject/OF-reconfig/ If the notifier call chain should be failed with some events notified, write the error code to "actions/<notifier event>/error". diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 43cb93fa2651..dbb58ae1b8e0 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -5,7 +5,7 @@ config HAVE_ARCH_KGDB menuconfig KGDB bool "KGDB: kernel debugger" depends on HAVE_ARCH_KGDB - depends on DEBUG_KERNEL && EXPERIMENTAL + depends on DEBUG_KERNEL help If you say Y here, it will be possible to remotely debug the kernel using gdb. It is recommended but not required, that @@ -22,6 +22,7 @@ config KGDB_SERIAL_CONSOLE tristate "KGDB: use kgdb over the serial console" select CONSOLE_POLL select MAGIC_SYSRQ + depends on TTY default y help Share a serial console with kgdb. Sysrq-g must be used diff --git a/lib/Makefile b/lib/Makefile index e3723c7527da..02ed6c04cd7d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -12,7 +12,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ idr.o int_sqrt.o extable.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o decompress.o kobject_uevent.o + is_single_threaded.o plist.o decompress.o kobject_uevent.o \ + earlycpio.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -39,6 +40,7 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o +lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o @@ -93,8 +95,8 @@ obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o -obj-$(CONFIG_PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT) += \ - pSeries-reconfig-notifier-error-inject.o +obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \ + of-reconfig-notifier-error-inject.o lib-$(CONFIG_GENERIC_BUG) += bug.o diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index 5293d2433029..11b9b01fda6b 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -81,7 +81,7 @@ next_tag: goto next_tag; } - if (unlikely((tag & 0x1f) == 0x1f)) { + if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) { do { if (unlikely(datalen - dp < 2)) goto data_overrun_error; @@ -96,7 +96,7 @@ next_tag: goto next_tag; } - if (unlikely(len == 0x80)) { + if (unlikely(len == ASN1_INDEFINITE_LENGTH)) { /* Indefinite length */ if (unlikely((tag & ASN1_CONS_BIT) == ASN1_PRIM << 5)) goto indefinite_len_primitive; @@ -222,7 +222,7 @@ next_op: if (unlikely(dp >= datalen - 1)) goto data_overrun_error; tag = data[dp++]; - if (unlikely((tag & 0x1f) == 0x1f)) + if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) goto long_tag_not_supported; if (op & ASN1_OP_MATCH__ANY) { @@ -254,7 +254,7 @@ next_op: len = data[dp++]; if (len > 0x7f) { - if (unlikely(len == 0x80)) { + if (unlikely(len == ASN1_INDEFINITE_LENGTH)) { /* Indefinite length */ if (unlikely(!(tag & ASN1_CONS_BIT))) goto indefinite_len_primitive; diff --git a/lib/atomic64.c b/lib/atomic64.c index 978537809d84..08a4f068e61e 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -31,7 +31,11 @@ static union { raw_spinlock_t lock; char pad[L1_CACHE_BYTES]; -} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp; +} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = { + [0 ... (NR_LOCKS - 1)] = { + .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock), + }, +}; static inline raw_spinlock_t *lock_addr(const atomic64_t *v) { @@ -173,14 +177,3 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u) return ret; } EXPORT_SYMBOL(atomic64_add_unless); - -static int init_atomic64_lock(void) -{ - int i; - - for (i = 0; i < NR_LOCKS; ++i) - raw_spin_lock_init(&atomic64_lock[i].lock); - return 0; -} - -pure_initcall(init_atomic64_lock); diff --git a/lib/bug.c b/lib/bug.c index a28c1415357c..d0cdf14c651a 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -55,6 +55,7 @@ static inline unsigned long bug_addr(const struct bug_entry *bug) } #ifdef CONFIG_MODULES +/* Updates are protected by module mutex */ static LIST_HEAD(module_bug_list); static const struct bug_entry *module_find_bug(unsigned long bugaddr) diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c index 145dec5267c9..5fbed5caba6e 100644 --- a/lib/cpu_rmap.c +++ b/lib/cpu_rmap.c @@ -45,6 +45,7 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) if (!rmap) return NULL; + kref_init(&rmap->refcount); rmap->obj = (void **)((char *)rmap + obj_offset); /* Initially assign CPUs to objects on a rota, since we have @@ -63,6 +64,35 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) } EXPORT_SYMBOL(alloc_cpu_rmap); +/** + * cpu_rmap_release - internal reclaiming helper called from kref_put + * @ref: kref to struct cpu_rmap + */ +static void cpu_rmap_release(struct kref *ref) +{ + struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount); + kfree(rmap); +} + +/** + * cpu_rmap_get - internal helper to get new ref on a cpu_rmap + * @rmap: reverse-map allocated with alloc_cpu_rmap() + */ +static inline void cpu_rmap_get(struct cpu_rmap *rmap) +{ + kref_get(&rmap->refcount); +} + +/** + * cpu_rmap_put - release ref on a cpu_rmap + * @rmap: reverse-map allocated with alloc_cpu_rmap() + */ +int cpu_rmap_put(struct cpu_rmap *rmap) +{ + return kref_put(&rmap->refcount, cpu_rmap_release); +} +EXPORT_SYMBOL(cpu_rmap_put); + /* Reevaluate nearest object for given CPU, comparing with the given * neighbours at the given distance. */ @@ -197,8 +227,7 @@ struct irq_glue { * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL * - * Must be called in process context, before freeing the IRQs, and - * without holding any locks required by global workqueue items. + * Must be called in process context, before freeing the IRQs. */ void free_irq_cpu_rmap(struct cpu_rmap *rmap) { @@ -212,12 +241,18 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap) glue = rmap->obj[index]; irq_set_affinity_notifier(glue->notify.irq, NULL); } - irq_run_affinity_notifiers(); - kfree(rmap); + cpu_rmap_put(rmap); } EXPORT_SYMBOL(free_irq_cpu_rmap); +/** + * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated + * @notify: struct irq_affinity_notify passed by irq/manage.c + * @mask: cpu mask for new SMP affinity + * + * This is executed in workqueue context. + */ static void irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) { @@ -230,10 +265,16 @@ irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc); } +/** + * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem + * @ref: kref to struct irq_affinity_notify passed by irq/manage.c + */ static void irq_cpu_rmap_release(struct kref *ref) { struct irq_glue *glue = container_of(ref, struct irq_glue, notify.kref); + + cpu_rmap_put(glue->rmap); kfree(glue); } @@ -258,10 +299,13 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) glue->notify.notify = irq_cpu_rmap_notify; glue->notify.release = irq_cpu_rmap_release; glue->rmap = rmap; + cpu_rmap_get(rmap); glue->index = cpu_rmap_add(rmap, glue); rc = irq_set_affinity_notifier(irq, &glue->notify); - if (rc) + if (rc) { + cpu_rmap_put(glue->rmap); kfree(glue); + } return rc; } EXPORT_SYMBOL(irq_cpu_rmap_add); diff --git a/lib/devres.c b/lib/devres.c index 80b9c76d436a..88ad75952a76 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -1,3 +1,4 @@ +#include <linux/err.h> #include <linux/pci.h> #include <linux/io.h> #include <linux/gfp.h> @@ -86,22 +87,24 @@ void devm_iounmap(struct device *dev, void __iomem *addr) EXPORT_SYMBOL(devm_iounmap); /** - * devm_request_and_ioremap() - Check, request region, and ioremap resource - * @dev: Generic device to handle the resource for + * devm_ioremap_resource() - check, request region, and ioremap resource + * @dev: generic device to handle the resource for * @res: resource to be handled * - * Takes all necessary steps to ioremap a mem resource. Uses managed device, so - * everything is undone on driver detach. Checks arguments, so you can feed - * it the result from e.g. platform_get_resource() directly. Returns the - * remapped pointer or NULL on error. Usage example: + * Checks that a resource is a valid memory region, requests the memory region + * and ioremaps it either as cacheable or as non-cacheable memory depending on + * the resource's flags. All operations are managed and will be undone on + * driver detach. + * + * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code + * on failure. Usage example: * * res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - * base = devm_request_and_ioremap(&pdev->dev, res); - * if (!base) - * return -EADDRNOTAVAIL; + * base = devm_ioremap_resource(&pdev->dev, res); + * if (IS_ERR(base)) + * return PTR_ERR(base); */ -void __iomem *devm_request_and_ioremap(struct device *dev, - struct resource *res) +void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) { resource_size_t size; const char *name; @@ -111,7 +114,7 @@ void __iomem *devm_request_and_ioremap(struct device *dev, if (!res || resource_type(res) != IORESOURCE_MEM) { dev_err(dev, "invalid resource\n"); - return NULL; + return ERR_PTR(-EINVAL); } size = resource_size(res); @@ -119,7 +122,7 @@ void __iomem *devm_request_and_ioremap(struct device *dev, if (!devm_request_mem_region(dev, res->start, size, name)) { dev_err(dev, "can't request region for resource %pR\n", res); - return NULL; + return ERR_PTR(-EBUSY); } if (res->flags & IORESOURCE_CACHEABLE) @@ -130,10 +133,39 @@ void __iomem *devm_request_and_ioremap(struct device *dev, if (!dest_ptr) { dev_err(dev, "ioremap failed for resource %pR\n", res); devm_release_mem_region(dev, res->start, size); + dest_ptr = ERR_PTR(-ENOMEM); } return dest_ptr; } +EXPORT_SYMBOL(devm_ioremap_resource); + +/** + * devm_request_and_ioremap() - Check, request region, and ioremap resource + * @dev: Generic device to handle the resource for + * @res: resource to be handled + * + * Takes all necessary steps to ioremap a mem resource. Uses managed device, so + * everything is undone on driver detach. Checks arguments, so you can feed + * it the result from e.g. platform_get_resource() directly. Returns the + * remapped pointer or NULL on error. Usage example: + * + * res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + * base = devm_request_and_ioremap(&pdev->dev, res); + * if (!base) + * return -EADDRNOTAVAIL; + */ +void __iomem *devm_request_and_ioremap(struct device *device, + struct resource *res) +{ + void __iomem *dest_ptr; + + dest_ptr = devm_ioremap_resource(device, res); + if (IS_ERR(dest_ptr)) + return NULL; + + return dest_ptr; +} EXPORT_SYMBOL(devm_request_and_ioremap); #ifdef CONFIG_HAS_IOPORT diff --git a/lib/digsig.c b/lib/digsig.c index 8c0e62975c88..2f31e6a45f0a 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -30,11 +30,10 @@ static struct crypto_shash *shash; -static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg, - unsigned long msglen, - unsigned long modulus_bitlen, - unsigned char *out, - unsigned long *outlen) +static const char *pkcs_1_v1_5_decode_emsa(const unsigned char *msg, + unsigned long msglen, + unsigned long modulus_bitlen, + unsigned long *outlen) { unsigned long modulus_len, ps_len, i; @@ -42,11 +41,11 @@ static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg, /* test message size */ if ((msglen > modulus_len) || (modulus_len < 11)) - return -EINVAL; + return NULL; /* separate encoded message */ - if ((msg[0] != 0x00) || (msg[1] != (unsigned char)1)) - return -EINVAL; + if (msg[0] != 0x00 || msg[1] != 0x01) + return NULL; for (i = 2; i < modulus_len - 1; i++) if (msg[i] != 0xFF) @@ -56,19 +55,13 @@ static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg, if (msg[i] != 0) /* There was no octet with hexadecimal value 0x00 to separate ps from m. */ - return -EINVAL; + return NULL; ps_len = i - 2; - if (*outlen < (msglen - (2 + ps_len + 1))) { - *outlen = msglen - (2 + ps_len + 1); - return -EOVERFLOW; - } - *outlen = (msglen - (2 + ps_len + 1)); - memcpy(out, &msg[2 + ps_len + 1], *outlen); - return 0; + return msg + 2 + ps_len + 1; } /* @@ -83,7 +76,8 @@ static int digsig_verify_rsa(struct key *key, unsigned long mlen, mblen; unsigned nret, l; int head, i; - unsigned char *out1 = NULL, *out2 = NULL; + unsigned char *out1 = NULL; + const char *m; MPI in = NULL, res = NULL, pkey[2]; uint8_t *p, *datap, *endp; struct user_key_payload *ukp; @@ -120,7 +114,7 @@ static int digsig_verify_rsa(struct key *key, } mblen = mpi_get_nbits(pkey[0]); - mlen = (mblen + 7)/8; + mlen = DIV_ROUND_UP(mblen, 8); if (mlen == 0) goto err; @@ -129,10 +123,6 @@ static int digsig_verify_rsa(struct key *key, if (!out1) goto err; - out2 = kzalloc(mlen, GFP_KERNEL); - if (!out2) - goto err; - nret = siglen; in = mpi_read_from_buffer(sig, &nret); if (!in) @@ -162,18 +152,17 @@ static int digsig_verify_rsa(struct key *key, memset(out1, 0, head); memcpy(out1 + head, p, l); - err = pkcs_1_v1_5_decode_emsa(out1, len, mblen, out2, &len); - if (err) - goto err; + kfree(p); + + m = pkcs_1_v1_5_decode_emsa(out1, len, mblen, &len); - if (len != hlen || memcmp(out2, h, hlen)) + if (!m || len != hlen || memcmp(m, h, hlen)) err = -EINVAL; err: mpi_free(in); mpi_free(res); kfree(out1); - kfree(out2); while (--i >= 0) mpi_free(pkey[i]); err1: diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d84beb994f36..5e396accd3d0 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -45,6 +45,12 @@ enum { dma_debug_coherent, }; +enum map_err_types { + MAP_ERR_CHECK_NOT_APPLICABLE, + MAP_ERR_NOT_CHECKED, + MAP_ERR_CHECKED, +}; + #define DMA_DEBUG_STACKTRACE_ENTRIES 5 struct dma_debug_entry { @@ -57,6 +63,7 @@ struct dma_debug_entry { int direction; int sg_call_ents; int sg_mapped_ents; + enum map_err_types map_err_type; #ifdef CONFIG_STACKTRACE struct stack_trace stacktrace; unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; @@ -114,6 +121,12 @@ static struct device_driver *current_driver __read_mostly; static DEFINE_RWLOCK(driver_name_lock); +static const char *const maperr2str[] = { + [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable", + [MAP_ERR_NOT_CHECKED] = "dma map error not checked", + [MAP_ERR_CHECKED] = "dma map error checked", +}; + static const char *type2name[4] = { "single", "page", "scather-gather", "coherent" }; @@ -376,11 +389,12 @@ void debug_dma_dump_mappings(struct device *dev) list_for_each_entry(entry, &bucket->list, list) { if (!dev || dev == entry->dev) { dev_info(entry->dev, - "%s idx %d P=%Lx D=%Lx L=%Lx %s\n", + "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n", type2name[entry->type], idx, (unsigned long long)entry->paddr, entry->dev_addr, entry->size, - dir2name[entry->direction]); + dir2name[entry->direction], + maperr2str[entry->map_err_type]); } } @@ -844,16 +858,16 @@ static void check_unmap(struct dma_debug_entry *ref) struct hash_bucket *bucket; unsigned long flags; - if (dma_mapping_error(ref->dev, ref->dev_addr)) { - err_printk(ref->dev, NULL, "DMA-API: device driver tries " - "to free an invalid DMA memory address\n"); - return; - } - bucket = get_hash_bucket(ref, &flags); entry = bucket_find_exact(bucket, ref); if (!entry) { + if (dma_mapping_error(ref->dev, ref->dev_addr)) { + err_printk(ref->dev, NULL, + "DMA-API: device driver tries " + "to free an invalid DMA memory address\n"); + return; + } err_printk(ref->dev, NULL, "DMA-API: device driver tries " "to free DMA memory it has not allocated " "[device address=0x%016llx] [size=%llu bytes]\n", @@ -910,6 +924,15 @@ static void check_unmap(struct dma_debug_entry *ref) dir2name[ref->direction]); } + if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { + err_printk(ref->dev, entry, + "DMA-API: device driver failed to check map error" + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped as %s]", + ref->dev_addr, ref->size, + type2name[entry->type]); + } + hash_bucket_del(entry); dma_entry_free(entry); @@ -1017,7 +1040,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, if (unlikely(global_disable)) return; - if (unlikely(dma_mapping_error(dev, dma_addr))) + if (dma_mapping_error(dev, dma_addr)) return; entry = dma_entry_alloc(); @@ -1030,6 +1053,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; + entry->map_err_type = MAP_ERR_NOT_CHECKED; if (map_single) entry->type = dma_debug_single; @@ -1045,6 +1069,30 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, } EXPORT_SYMBOL(debug_dma_map_page); +void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + struct dma_debug_entry ref; + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + + if (unlikely(global_disable)) + return; + + ref.dev = dev; + ref.dev_addr = dma_addr; + bucket = get_hash_bucket(&ref, &flags); + entry = bucket_find_exact(bucket, &ref); + + if (!entry) + goto out; + + entry->map_err_type = MAP_ERR_CHECKED; +out: + put_hash_bucket(bucket, &flags); +} +EXPORT_SYMBOL(debug_dma_mapping_error); + void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction, bool map_single) { diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index e7f7d993357a..5276b99ca650 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -59,16 +59,9 @@ struct ddebug_iter { static DEFINE_MUTEX(ddebug_lock); static LIST_HEAD(ddebug_tables); -static int verbose = 0; +static int verbose; module_param(verbose, int, 0644); -/* Return the last part of a pathname */ -static inline const char *basename(const char *path) -{ - const char *tail = strrchr(path, '/'); - return tail ? tail+1 : path; -} - /* Return the path relative to source root */ static inline const char *trim_prefix(const char *path) { @@ -107,24 +100,32 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf, return buf; } -#define vpr_info(fmt, ...) \ - if (verbose) do { pr_info(fmt, ##__VA_ARGS__); } while (0) - -#define vpr_info_dq(q, msg) \ +#define vpr_info(fmt, ...) \ do { \ - /* trim last char off format print */ \ - vpr_info("%s: func=\"%s\" file=\"%s\" " \ - "module=\"%s\" format=\"%.*s\" " \ - "lineno=%u-%u", \ - msg, \ - q->function ? q->function : "", \ - q->filename ? q->filename : "", \ - q->module ? q->module : "", \ - (int)(q->format ? strlen(q->format) - 1 : 0), \ - q->format ? q->format : "", \ - q->first_lineno, q->last_lineno); \ + if (verbose) \ + pr_info(fmt, ##__VA_ARGS__); \ } while (0) +static void vpr_info_dq(const struct ddebug_query *query, const char *msg) +{ + /* trim any trailing newlines */ + int fmtlen = 0; + + if (query->format) { + fmtlen = strlen(query->format); + while (fmtlen && query->format[fmtlen - 1] == '\n') + fmtlen--; + } + + vpr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u\n", + msg, + query->function ? query->function : "", + query->filename ? query->filename : "", + query->module ? query->module : "", + fmtlen, query->format ? query->format : "", + query->first_lineno, query->last_lineno); +} + /* * Search the tables for _ddebug's which match the given `query' and * apply the `flags' and `mask' to them. Returns number of matching @@ -148,13 +149,13 @@ static int ddebug_change(const struct ddebug_query *query, if (query->module && strcmp(query->module, dt->mod_name)) continue; - for (i = 0 ; i < dt->num_ddebugs ; i++) { + for (i = 0; i < dt->num_ddebugs; i++) { struct _ddebug *dp = &dt->ddebugs[i]; /* match against the source filename */ if (query->filename && strcmp(query->filename, dp->filename) && - strcmp(query->filename, basename(dp->filename)) && + strcmp(query->filename, kbasename(dp->filename)) && strcmp(query->filename, trim_prefix(dp->filename))) continue; @@ -183,10 +184,10 @@ static int ddebug_change(const struct ddebug_query *query, continue; dp->flags = newflags; vpr_info("changed %s:%d [%s]%s =%s\n", - trim_prefix(dp->filename), dp->lineno, - dt->mod_name, dp->function, - ddebug_describe_flags(dp, flagbuf, - sizeof(flagbuf))); + trim_prefix(dp->filename), dp->lineno, + dt->mod_name, dp->function, + ddebug_describe_flags(dp, flagbuf, + sizeof(flagbuf))); } } mutex_unlock(&ddebug_lock); @@ -220,19 +221,23 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) /* find `end' of word, whitespace separated or quoted */ if (*buf == '"' || *buf == '\'') { int quote = *buf++; - for (end = buf ; *end && *end != quote ; end++) + for (end = buf; *end && *end != quote; end++) ; - if (!*end) + if (!*end) { + pr_err("unclosed quote: %s\n", buf); return -EINVAL; /* unclosed quote */ + } } else { - for (end = buf ; *end && !isspace(*end) ; end++) + for (end = buf; *end && !isspace(*end); end++) ; BUG_ON(end == buf); } /* `buf' is start of word, `end' is one past its end */ - if (nwords == maxwords) + if (nwords == maxwords) { + pr_err("too many words, legal max <=%d\n", maxwords); return -EINVAL; /* ran out of words[] before bytes */ + } if (*end) *end++ = '\0'; /* terminate the word */ words[nwords++] = buf; @@ -242,7 +247,7 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) if (verbose) { int i; pr_info("split into words:"); - for (i = 0 ; i < nwords ; i++) + for (i = 0; i < nwords; i++) pr_cont(" \"%s\"", words[i]); pr_cont("\n"); } @@ -264,7 +269,11 @@ static inline int parse_lineno(const char *str, unsigned int *val) return 0; } *val = simple_strtoul(str, &end, 10); - return end == NULL || end == str || *end != '\0' ? -EINVAL : 0; + if (end == NULL || end == str || *end != '\0') { + pr_err("bad line-number: %s\n", str); + return -EINVAL; + } + return 0; } /* @@ -293,11 +302,11 @@ static char *unescape(char *str) in += 2; continue; } else if (isodigit(in[1]) && - isodigit(in[2]) && - isodigit(in[3])) { - *out++ = ((in[1] - '0')<<6) | - ((in[2] - '0')<<3) | - (in[3] - '0'); + isodigit(in[2]) && + isodigit(in[3])) { + *out++ = (((in[1] - '0') << 6) | + ((in[2] - '0') << 3) | + (in[3] - '0')); in += 4; continue; } @@ -315,8 +324,8 @@ static int check_set(const char **dest, char *src, char *name) if (*dest) { rc = -EINVAL; - pr_err("match-spec:%s val:%s overridden by %s", - name, *dest, src); + pr_err("match-spec:%s val:%s overridden by %s\n", + name, *dest, src); } *dest = src; return rc; @@ -344,40 +353,46 @@ static int ddebug_parse_query(char *words[], int nwords, int rc; /* check we have an even number of words */ - if (nwords % 2 != 0) + if (nwords % 2 != 0) { + pr_err("expecting pairs of match-spec <value>\n"); return -EINVAL; + } memset(query, 0, sizeof(*query)); if (modname) /* support $modname.dyndbg=<multiple queries> */ query->module = modname; - for (i = 0 ; i < nwords ; i += 2) { - if (!strcmp(words[i], "func")) + for (i = 0; i < nwords; i += 2) { + if (!strcmp(words[i], "func")) { rc = check_set(&query->function, words[i+1], "func"); - else if (!strcmp(words[i], "file")) + } else if (!strcmp(words[i], "file")) { rc = check_set(&query->filename, words[i+1], "file"); - else if (!strcmp(words[i], "module")) + } else if (!strcmp(words[i], "module")) { rc = check_set(&query->module, words[i+1], "module"); - else if (!strcmp(words[i], "format")) + } else if (!strcmp(words[i], "format")) { rc = check_set(&query->format, unescape(words[i+1]), - "format"); - else if (!strcmp(words[i], "line")) { + "format"); + } else if (!strcmp(words[i], "line")) { char *first = words[i+1]; char *last = strchr(first, '-'); if (query->first_lineno || query->last_lineno) { - pr_err("match-spec:line given 2 times\n"); + pr_err("match-spec: line used 2x\n"); return -EINVAL; } if (last) *last++ = '\0'; - if (parse_lineno(first, &query->first_lineno) < 0) + if (parse_lineno(first, &query->first_lineno) < 0) { + pr_err("line-number is <0\n"); return -EINVAL; + } if (last) { /* range <first>-<last> */ if (parse_lineno(last, &query->last_lineno) < query->first_lineno) { - pr_err("last-line < 1st-line\n"); + pr_err("last-line:%d < 1st-line:%d\n", + query->last_lineno, + query->first_lineno); return -EINVAL; } } else { @@ -413,19 +428,22 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, op = *str++; break; default: + pr_err("bad flag-op %c, at start of %s\n", *str, str); return -EINVAL; } vpr_info("op='%c'\n", op); - for ( ; *str ; ++str) { + for (; *str ; ++str) { for (i = ARRAY_SIZE(opt_array) - 1; i >= 0; i--) { if (*str == opt_array[i].opt_char) { flags |= opt_array[i].flag; break; } } - if (i < 0) + if (i < 0) { + pr_err("unknown flag '%c' in \"%s\"\n", *str, str); return -EINVAL; + } } vpr_info("flags=0x%x\n", flags); @@ -457,16 +475,22 @@ static int ddebug_exec_query(char *query_string, const char *modname) char *words[MAXWORDS]; nwords = ddebug_tokenize(query_string, words, MAXWORDS); - if (nwords <= 0) + if (nwords <= 0) { + pr_err("tokenize failed\n"); return -EINVAL; - if (ddebug_parse_query(words, nwords-1, &query, modname)) + } + /* check flags 1st (last arg) so query is pairs of spec,val */ + if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) { + pr_err("flags parse failed\n"); return -EINVAL; - if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) + } + if (ddebug_parse_query(words, nwords-1, &query, modname)) { + pr_err("query parse failed\n"); return -EINVAL; - + } /* actually go and implement the change */ nfound = ddebug_change(&query, flags, mask); - vpr_info_dq((&query), (nfound) ? "applied" : "no-match"); + vpr_info_dq(&query, nfound ? "applied" : "no-match"); return nfound; } @@ -495,8 +519,9 @@ static int ddebug_exec_queries(char *query, const char *modname) if (rc < 0) { errs++; exitcode = rc; - } else + } else { nfound += rc; + } i++; } vpr_info("processed %d queries, with %d matches, %d errs\n", @@ -772,7 +797,7 @@ static void *ddebug_proc_next(struct seq_file *m, void *p, loff_t *pos) struct _ddebug *dp; vpr_info("called m=%p p=%p *pos=%lld\n", - m, p, (unsigned long long)*pos); + m, p, (unsigned long long)*pos); if (p == SEQ_START_TOKEN) dp = ddebug_iter_first(iter); @@ -798,14 +823,14 @@ static int ddebug_proc_show(struct seq_file *m, void *p) if (p == SEQ_START_TOKEN) { seq_puts(m, - "# filename:lineno [module]function flags format\n"); + "# filename:lineno [module]function flags format\n"); return 0; } seq_printf(m, "%s:%u [%s]%s =%s \"", - trim_prefix(dp->filename), dp->lineno, - iter->table->mod_name, dp->function, - ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf))); + trim_prefix(dp->filename), dp->lineno, + iter->table->mod_name, dp->function, + ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf))); seq_escape(m, dp->format, "\t\r\n\""); seq_puts(m, "\"\n"); @@ -852,7 +877,7 @@ static int ddebug_proc_open(struct inode *inode, struct file *file) kfree(iter); return err; } - ((struct seq_file *) file->private_data)->private = iter; + ((struct seq_file *)file->private_data)->private = iter; return 0; } @@ -1009,8 +1034,7 @@ static int __init dynamic_debug_init(void) int verbose_bytes = 0; if (__start___verbose == __stop___verbose) { - pr_warn("_ddebug table is empty in a " - "CONFIG_DYNAMIC_DEBUG build"); + pr_warn("_ddebug table is empty in a CONFIG_DYNAMIC_DEBUG build\n"); return 1; } iter = __start___verbose; @@ -1037,18 +1061,16 @@ static int __init dynamic_debug_init(void) goto out_err; ddebug_init_success = 1; - vpr_info("%d modules, %d entries and %d bytes in ddebug tables," - " %d bytes in (readonly) verbose section\n", - modct, entries, (int)( modct * sizeof(struct ddebug_table)), - verbose_bytes + (int)(__stop___verbose - __start___verbose)); + vpr_info("%d modules, %d entries and %d bytes in ddebug tables, %d bytes in (readonly) verbose section\n", + modct, entries, (int)(modct * sizeof(struct ddebug_table)), + verbose_bytes + (int)(__stop___verbose - __start___verbose)); /* apply ddebug_query boot param, dont unload tables on err */ if (ddebug_setup_string[0] != '\0') { - pr_warn("ddebug_query param name is deprecated," - " change it to dyndbg\n"); + pr_warn("ddebug_query param name is deprecated, change it to dyndbg\n"); ret = ddebug_exec_queries(ddebug_setup_string, NULL); if (ret < 0) - pr_warn("Invalid ddebug boot param %s", + pr_warn("Invalid ddebug boot param %s\n", ddebug_setup_string); else pr_info("%d changes by ddebug_query\n", ret); diff --git a/lib/earlycpio.c b/lib/earlycpio.c new file mode 100644 index 000000000000..8078ef49cb79 --- /dev/null +++ b/lib/earlycpio.c @@ -0,0 +1,145 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2012 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available + * under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * ----------------------------------------------------------------------- */ + +/* + * earlycpio.c + * + * Find a specific cpio member; must precede any compressed content. + * This is used to locate data items in the initramfs used by the + * kernel itself during early boot (before the main initramfs is + * decompressed.) It is the responsibility of the initramfs creator + * to ensure that these items are uncompressed at the head of the + * blob. Depending on the boot loader or package tool that may be a + * separate file or part of the same file. + */ + +#include <linux/earlycpio.h> +#include <linux/kernel.h> +#include <linux/string.h> + +enum cpio_fields { + C_MAGIC, + C_INO, + C_MODE, + C_UID, + C_GID, + C_NLINK, + C_MTIME, + C_FILESIZE, + C_MAJ, + C_MIN, + C_RMAJ, + C_RMIN, + C_NAMESIZE, + C_CHKSUM, + C_NFIELDS +}; + +/** + * cpio_data find_cpio_data - Search for files in an uncompressed cpio + * @path: The directory to search for, including a slash at the end + * @data: Pointer to the the cpio archive or a header inside + * @len: Remaining length of the cpio based on data pointer + * @offset: When a matching file is found, this is the offset to the + * beginning of the cpio. It can be used to iterate through + * the cpio to find all files inside of a directory path + * + * @return: struct cpio_data containing the address, length and + * filename (with the directory path cut off) of the found file. + * If you search for a filename and not for files in a directory, + * pass the absolute path of the filename in the cpio and make sure + * the match returned an empty filename string. + */ + +struct cpio_data __cpuinit find_cpio_data(const char *path, void *data, + size_t len, long *offset) +{ + const size_t cpio_header_len = 8*C_NFIELDS - 2; + struct cpio_data cd = { NULL, 0, "" }; + const char *p, *dptr, *nptr; + unsigned int ch[C_NFIELDS], *chp, v; + unsigned char c, x; + size_t mypathsize = strlen(path); + int i, j; + + p = data; + + while (len > cpio_header_len) { + if (!*p) { + /* All cpio headers need to be 4-byte aligned */ + p += 4; + len -= 4; + continue; + } + + j = 6; /* The magic field is only 6 characters */ + chp = ch; + for (i = C_NFIELDS; i; i--) { + v = 0; + while (j--) { + v <<= 4; + c = *p++; + + x = c - '0'; + if (x < 10) { + v += x; + continue; + } + + x = (c | 0x20) - 'a'; + if (x < 6) { + v += x + 10; + continue; + } + + goto quit; /* Invalid hexadecimal */ + } + *chp++ = v; + j = 8; /* All other fields are 8 characters */ + } + + if ((ch[C_MAGIC] - 0x070701) > 1) + goto quit; /* Invalid magic */ + + len -= cpio_header_len; + + dptr = PTR_ALIGN(p + ch[C_NAMESIZE], 4); + nptr = PTR_ALIGN(dptr + ch[C_FILESIZE], 4); + + if (nptr > p + len || dptr < p || nptr < dptr) + goto quit; /* Buffer overrun */ + + if ((ch[C_MODE] & 0170000) == 0100000 && + ch[C_NAMESIZE] >= mypathsize && + !memcmp(p, path, mypathsize)) { + *offset = (long)nptr - (long)data; + if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) { + pr_warn( + "File %s exceeding MAX_CPIO_FILE_NAME [%d]\n", + p, MAX_CPIO_FILE_NAME); + } + strlcpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME); + + cd.data = (void *)dptr; + cd.size = ch[C_FILESIZE]; + return cd; /* Found it! */ + } + len -= (nptr - p); + p = nptr; + } + +quit: + return cd; +} diff --git a/lib/hexdump.c b/lib/hexdump.c index 6540d657dca4..3f0494c9d57a 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -227,6 +227,7 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, } EXPORT_SYMBOL(print_hex_dump); +#if !defined(CONFIG_DYNAMIC_DEBUG) /** * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params * @prefix_str: string to prefix each line with; @@ -246,4 +247,5 @@ void print_hex_dump_bytes(const char *prefix_str, int prefix_type, buf, len, true); } EXPORT_SYMBOL(print_hex_dump_bytes); -#endif +#endif /* !defined(CONFIG_DYNAMIC_DEBUG) */ +#endif /* defined(CONFIG_PRINTK) */ diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test_main.c index b25903987f7a..245900b98c8e 100644 --- a/lib/interval_tree_test_main.c +++ b/lib/interval_tree_test_main.c @@ -30,7 +30,8 @@ static void init(void) { int i; for (i = 0; i < NODES; i++) { - u32 a = prandom32(&rnd), b = prandom32(&rnd); + u32 a = prandom_u32_state(&rnd); + u32 b = prandom_u32_state(&rnd); if (a <= b) { nodes[i].start = a; nodes[i].last = b; @@ -40,7 +41,7 @@ static void init(void) } } for (i = 0; i < SEARCHES; i++) - queries[i] = prandom32(&rnd); + queries[i] = prandom_u32_state(&rnd); } static int interval_tree_test_init(void) @@ -51,7 +52,7 @@ static int interval_tree_test_init(void) printk(KERN_ALERT "interval tree insert/remove"); - prandom32_seed(&rnd, 3141592653589793238ULL); + prandom_seed_state(&rnd, 3141592653589793238ULL); init(); time1 = get_cycles(); diff --git a/lib/kstrtox.c b/lib/kstrtox.c index c3615eab0cc3..f78ae0c0c4e2 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -104,6 +104,22 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) return 0; } +/** + * kstrtoull - convert a string to an unsigned long long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign, but not a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. + */ int kstrtoull(const char *s, unsigned int base, unsigned long long *res) { if (s[0] == '+') @@ -112,6 +128,22 @@ int kstrtoull(const char *s, unsigned int base, unsigned long long *res) } EXPORT_SYMBOL(kstrtoull); +/** + * kstrtoll - convert a string to a long long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign or a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. + */ int kstrtoll(const char *s, unsigned int base, long long *res) { unsigned long long tmp; @@ -168,6 +200,22 @@ int _kstrtol(const char *s, unsigned int base, long *res) } EXPORT_SYMBOL(_kstrtol); +/** + * kstrtouint - convert a string to an unsigned int + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign, but not a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. + */ int kstrtouint(const char *s, unsigned int base, unsigned int *res) { unsigned long long tmp; @@ -183,6 +231,22 @@ int kstrtouint(const char *s, unsigned int base, unsigned int *res) } EXPORT_SYMBOL(kstrtouint); +/** + * kstrtoint - convert a string to an int + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign or a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. + */ int kstrtoint(const char *s, unsigned int base, int *res) { long long tmp; diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 7aae0f2a5e0a..c3eb261a7df3 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -47,10 +47,10 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose); * Normal standalone locks, for the circular and irq-context * dependency tests: */ -static DEFINE_SPINLOCK(lock_A); -static DEFINE_SPINLOCK(lock_B); -static DEFINE_SPINLOCK(lock_C); -static DEFINE_SPINLOCK(lock_D); +static DEFINE_RAW_SPINLOCK(lock_A); +static DEFINE_RAW_SPINLOCK(lock_B); +static DEFINE_RAW_SPINLOCK(lock_C); +static DEFINE_RAW_SPINLOCK(lock_D); static DEFINE_RWLOCK(rwlock_A); static DEFINE_RWLOCK(rwlock_B); @@ -73,12 +73,12 @@ static DECLARE_RWSEM(rwsem_D); * but X* and Y* are different classes. We do this so that * we do not trigger a real lockup: */ -static DEFINE_SPINLOCK(lock_X1); -static DEFINE_SPINLOCK(lock_X2); -static DEFINE_SPINLOCK(lock_Y1); -static DEFINE_SPINLOCK(lock_Y2); -static DEFINE_SPINLOCK(lock_Z1); -static DEFINE_SPINLOCK(lock_Z2); +static DEFINE_RAW_SPINLOCK(lock_X1); +static DEFINE_RAW_SPINLOCK(lock_X2); +static DEFINE_RAW_SPINLOCK(lock_Y1); +static DEFINE_RAW_SPINLOCK(lock_Y2); +static DEFINE_RAW_SPINLOCK(lock_Z1); +static DEFINE_RAW_SPINLOCK(lock_Z2); static DEFINE_RWLOCK(rwlock_X1); static DEFINE_RWLOCK(rwlock_X2); @@ -107,10 +107,10 @@ static DECLARE_RWSEM(rwsem_Z2); */ #define INIT_CLASS_FUNC(class) \ static noinline void \ -init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \ - struct rw_semaphore *rwsem) \ +init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \ + struct mutex *mutex, struct rw_semaphore *rwsem)\ { \ - spin_lock_init(lock); \ + raw_spin_lock_init(lock); \ rwlock_init(rwlock); \ mutex_init(mutex); \ init_rwsem(rwsem); \ @@ -168,10 +168,10 @@ static void init_shared_classes(void) * Shortcuts for lock/unlock API variants, to keep * the testcases compact: */ -#define L(x) spin_lock(&lock_##x) -#define U(x) spin_unlock(&lock_##x) +#define L(x) raw_spin_lock(&lock_##x) +#define U(x) raw_spin_unlock(&lock_##x) #define LU(x) L(x); U(x) -#define SI(x) spin_lock_init(&lock_##x) +#define SI(x) raw_spin_lock_init(&lock_##x) #define WL(x) write_lock(&rwlock_##x) #define WU(x) write_unlock(&rwlock_##x) @@ -911,7 +911,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) #define I2(x) \ do { \ - spin_lock_init(&lock_##x); \ + raw_spin_lock_init(&lock_##x); \ rwlock_init(&rwlock_##x); \ mutex_init(&mutex_##x); \ init_rwsem(&rwsem_##x); \ diff --git a/lib/lru_cache.c b/lib/lru_cache.c index a07e7268d7ed..d71d89498943 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -44,8 +44,8 @@ MODULE_LICENSE("GPL"); } while (0) #define RETURN(x...) do { \ - clear_bit(__LC_PARANOIA, &lc->flags); \ - smp_mb__after_clear_bit(); return x ; } while (0) + clear_bit_unlock(__LC_PARANOIA, &lc->flags); \ + return x ; } while (0) /* BUG() if e is not one of the elements tracked by lc */ #define PARANOIA_LC_ELEMENT(lc, e) do { \ @@ -55,9 +55,40 @@ MODULE_LICENSE("GPL"); BUG_ON(i >= lc_->nr_elements); \ BUG_ON(lc_->lc_element[i] != e_); } while (0) + +/* We need to atomically + * - try to grab the lock (set LC_LOCKED) + * - only if there is no pending transaction + * (neither LC_DIRTY nor LC_STARVING is set) + * Because of PARANOIA_ENTRY() above abusing lc->flags as well, + * it is not sufficient to just say + * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED); + */ +int lc_try_lock(struct lru_cache *lc) +{ + unsigned long val; + do { + val = cmpxchg(&lc->flags, 0, LC_LOCKED); + } while (unlikely (val == LC_PARANOIA)); + /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ + return 0 == val; +#if 0 + /* Alternative approach, spin in case someone enters or leaves a + * PARANOIA_ENTRY()/RETURN() section. */ + unsigned long old, new, val; + do { + old = lc->flags & LC_PARANOIA; + new = old | LC_LOCKED; + val = cmpxchg(&lc->flags, old, new); + } while (unlikely (val == (old ^ LC_PARANOIA))); + return old == val; +#endif +} + /** * lc_create - prepares to track objects in an active set * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @max_pending_changes: maximum changes to accumulate until a transaction is required * @e_count: number of elements allowed to be active simultaneously * @e_size: size of the tracked objects * @e_off: offset to the &struct lc_element member in a tracked object @@ -66,6 +97,7 @@ MODULE_LICENSE("GPL"); * or NULL on (allocation) failure. */ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned max_pending_changes, unsigned e_count, size_t e_size, size_t e_off) { struct hlist_head *slot = NULL; @@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + INIT_LIST_HEAD(&lc->to_be_changed); lc->name = name; lc->element_size = e_size; lc->element_off = e_off; lc->nr_elements = e_count; - lc->new_number = LC_FREE; + lc->max_pending_changes = max_pending_changes; lc->lc_cache = cache; lc->lc_element = element; lc->lc_slot = slot; @@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, e = p + e_off; e->lc_index = i; e->lc_number = LC_FREE; + e->lc_new_number = LC_FREE; list_add(&e->list, &lc->free); element[i] = e; } @@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc) INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + INIT_LIST_HEAD(&lc->to_be_changed); lc->used = 0; lc->hits = 0; lc->misses = 0; lc->starving = 0; - lc->dirty = 0; + lc->locked = 0; lc->changed = 0; + lc->pending_changes = 0; lc->flags = 0; - lc->changing_element = NULL; - lc->new_number = LC_FREE; memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); for (i = 0; i < lc->nr_elements; i++) { @@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc) /* re-init it */ e->lc_index = i; e->lc_number = LC_FREE; + e->lc_new_number = LC_FREE; list_add(&e->list, &lc->free); } } @@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) /* NOTE: * total calls to lc_get are * (starving + hits + misses) - * misses include "dirty" count (update from an other thread in + * misses include "locked" count (update from an other thread in * progress) and "changed", when this in fact lead to an successful * update of the cache. */ return seq_printf(seq, "\t%s: used:%u/%u " - "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", lc->name, lc->used, lc->nr_elements, - lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); + lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); } static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) @@ -224,16 +259,8 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) } -/** - * lc_find - find element by label, if present in the hash table - * @lc: The lru_cache object - * @enr: element number - * - * Returns the pointer to an element, if the element with the requested - * "label" or element number is present in the hash table, - * or NULL if not found. Does not change the refcnt. - */ -struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) +static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, + bool include_changing) { struct hlist_node *n; struct lc_element *e; @@ -241,29 +268,48 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) BUG_ON(!lc); BUG_ON(!lc->nr_elements); hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { - if (e->lc_number == enr) + /* "about to be changed" elements, pending transaction commit, + * are hashed by their "new number". "Normal" elements have + * lc_number == lc_new_number. */ + if (e->lc_new_number != enr) + continue; + if (e->lc_new_number == e->lc_number || include_changing) return e; + break; } return NULL; } -/* returned element will be "recycled" immediately */ -static struct lc_element *lc_evict(struct lru_cache *lc) +/** + * lc_find - find element by label, if present in the hash table + * @lc: The lru_cache object + * @enr: element number + * + * Returns the pointer to an element, if the element with the requested + * "label" or element number is present in the hash table, + * or NULL if not found. Does not change the refcnt. + * Ignores elements that are "about to be used", i.e. not yet in the active + * set, but still pending transaction commit. + */ +struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) { - struct list_head *n; - struct lc_element *e; - - if (list_empty(&lc->lru)) - return NULL; - - n = lc->lru.prev; - e = list_entry(n, struct lc_element, list); - - PARANOIA_LC_ELEMENT(lc, e); + return __lc_find(lc, enr, 0); +} - list_del(&e->list); - hlist_del(&e->colision); - return e; +/** + * lc_is_used - find element by label + * @lc: The lru_cache object + * @enr: element number + * + * Returns true, if the element with the requested "label" or element number is + * present in the hash table, and is used (refcnt > 0). + * Also finds elements that are not _currently_ used but only "about to be + * used", i.e. on the "to_be_changed" list, pending transaction commit. + */ +bool lc_is_used(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e = __lc_find(lc, enr, 1); + return e && e->refcnt; } /** @@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e) PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt); - e->lc_number = LC_FREE; + e->lc_number = e->lc_new_number = LC_FREE; hlist_del_init(&e->colision); list_move(&e->list, &lc->free); RETURN(); } -static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number) { struct list_head *n; + struct lc_element *e; + + if (!list_empty(&lc->free)) + n = lc->free.next; + else if (!list_empty(&lc->lru)) + n = lc->lru.prev; + else + return NULL; + + e = list_entry(n, struct lc_element, list); + PARANOIA_LC_ELEMENT(lc, e); - if (list_empty(&lc->free)) - return lc_evict(lc); + e->lc_new_number = new_number; + if (!hlist_unhashed(&e->colision)) + __hlist_del(&e->colision); + hlist_add_head(&e->colision, lc_hash_slot(lc, new_number)); + list_move(&e->list, &lc->to_be_changed); - n = lc->free.next; - list_del(n); - return list_entry(n, struct lc_element, list); + return e; } static int lc_unused_element_available(struct lru_cache *lc) @@ -308,45 +366,7 @@ static int lc_unused_element_available(struct lru_cache *lc) return 0; } - -/** - * lc_get - get element by label, maybe change the active set - * @lc: the lru cache to operate on - * @enr: the label to look up - * - * Finds an element in the cache, increases its usage count, - * "touches" and returns it. - * - * In case the requested number is not present, it needs to be added to the - * cache. Therefore it is possible that an other element becomes evicted from - * the cache. In either case, the user is notified so he is able to e.g. keep - * a persistent log of the cache changes, and therefore the objects in use. - * - * Return values: - * NULL - * The cache was marked %LC_STARVING, - * or the requested label was not in the active set - * and a changing transaction is still pending (@lc was marked %LC_DIRTY). - * Or no unused or free element could be recycled (@lc will be marked as - * %LC_STARVING, blocking further lc_get() operations). - * - * pointer to the element with the REQUESTED element number. - * In this case, it can be used right away - * - * pointer to an UNUSED element with some different element number, - * where that different number may also be %LC_FREE. - * - * In this case, the cache is marked %LC_DIRTY (blocking further changes), - * and the returned element pointer is removed from the lru list and - * hash collision chains. The user now should do whatever housekeeping - * is necessary. - * Then he must call lc_changed(lc,element_pointer), to finish - * the change. - * - * NOTE: The user needs to check the lc_number on EACH use, so he recognizes - * any cache set change. - */ -struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) +static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change) { struct lc_element *e; @@ -356,8 +376,12 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) RETURN(NULL); } - e = lc_find(lc, enr); - if (e) { + e = __lc_find(lc, enr, 1); + /* if lc_new_number != lc_number, + * this enr is currently being pulled in already, + * and will be available once the pending transaction + * has been committed. */ + if (e && e->lc_new_number == e->lc_number) { ++lc->hits; if (e->refcnt++ == 0) lc->used++; @@ -366,6 +390,26 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) } ++lc->misses; + if (!may_change) + RETURN(NULL); + + /* It has been found above, but on the "to_be_changed" list, not yet + * committed. Don't pull it in twice, wait for the transaction, then + * try again */ + if (e) + RETURN(NULL); + + /* To avoid races with lc_try_lock(), first, mark us dirty + * (using test_and_set_bit, as it implies memory barriers), ... */ + test_and_set_bit(__LC_DIRTY, &lc->flags); + + /* ... only then check if it is locked anyways. If lc_unlock clears + * the dirty bit again, that's not a problem, we will come here again. + */ + if (test_bit(__LC_LOCKED, &lc->flags)) { + ++lc->locked; + RETURN(NULL); + } /* In case there is nothing available and we can not kick out * the LRU element, we have to wait ... @@ -375,71 +419,109 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) RETURN(NULL); } - /* it was not present in the active set. - * we are going to recycle an unused (or even "free") element. - * user may need to commit a transaction to record that change. - * we serialize on flags & TF_DIRTY */ - if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { - ++lc->dirty; + /* It was not present in the active set. We are going to recycle an + * unused (or even "free") element, but we won't accumulate more than + * max_pending_changes changes. */ + if (lc->pending_changes >= lc->max_pending_changes) RETURN(NULL); - } - e = lc_get_unused_element(lc); + e = lc_prepare_for_change(lc, enr); BUG_ON(!e); clear_bit(__LC_STARVING, &lc->flags); BUG_ON(++e->refcnt != 1); lc->used++; - - lc->changing_element = e; - lc->new_number = enr; + lc->pending_changes++; RETURN(e); } -/* similar to lc_get, - * but only gets a new reference on an existing element. - * you either get the requested element, or NULL. - * will be consolidated into one function. +/** + * lc_get - get element by label, maybe change the active set + * @lc: the lru cache to operate on + * @enr: the label to look up + * + * Finds an element in the cache, increases its usage count, + * "touches" and returns it. + * + * In case the requested number is not present, it needs to be added to the + * cache. Therefore it is possible that an other element becomes evicted from + * the cache. In either case, the user is notified so he is able to e.g. keep + * a persistent log of the cache changes, and therefore the objects in use. + * + * Return values: + * NULL + * The cache was marked %LC_STARVING, + * or the requested label was not in the active set + * and a changing transaction is still pending (@lc was marked %LC_DIRTY). + * Or no unused or free element could be recycled (@lc will be marked as + * %LC_STARVING, blocking further lc_get() operations). + * + * pointer to the element with the REQUESTED element number. + * In this case, it can be used right away + * + * pointer to an UNUSED element with some different element number, + * where that different number may also be %LC_FREE. + * + * In this case, the cache is marked %LC_DIRTY, + * so lc_try_lock() will no longer succeed. + * The returned element pointer is moved to the "to_be_changed" list, + * and registered with the new element number on the hash collision chains, + * so it is possible to pick it up from lc_is_used(). + * Up to "max_pending_changes" (see lc_create()) can be accumulated. + * The user now should do whatever housekeeping is necessary, + * typically serialize on lc_try_lock_for_transaction(), then call + * lc_committed(lc) and lc_unlock(), to finish the change. + * + * NOTE: The user needs to check the lc_number on EACH use, so he recognizes + * any cache set change. */ -struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) +struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) { - struct lc_element *e; - - PARANOIA_ENTRY(); - if (lc->flags & LC_STARVING) { - ++lc->starving; - RETURN(NULL); - } + return __lc_get(lc, enr, 1); +} - e = lc_find(lc, enr); - if (e) { - ++lc->hits; - if (e->refcnt++ == 0) - lc->used++; - list_move(&e->list, &lc->in_use); /* Not evictable... */ - } - RETURN(e); +/** + * lc_try_get - get element by label, if present; do not change the active set + * @lc: the lru cache to operate on + * @enr: the label to look up + * + * Finds an element in the cache, increases its usage count, + * "touches" and returns it. + * + * Return values: + * NULL + * The cache was marked %LC_STARVING, + * or the requested label was not in the active set + * + * pointer to the element with the REQUESTED element number. + * In this case, it can be used right away + */ +struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) +{ + return __lc_get(lc, enr, 0); } /** - * lc_changed - tell @lc that the change has been recorded + * lc_committed - tell @lc that pending changes have been recorded * @lc: the lru cache to operate on - * @e: the element pending label change + * + * User is expected to serialize on explicit lc_try_lock_for_transaction() + * before the transaction is started, and later needs to lc_unlock() explicitly + * as well. */ -void lc_changed(struct lru_cache *lc, struct lc_element *e) +void lc_committed(struct lru_cache *lc) { + struct lc_element *e, *tmp; + PARANOIA_ENTRY(); - BUG_ON(e != lc->changing_element); - PARANOIA_LC_ELEMENT(lc, e); - ++lc->changed; - e->lc_number = lc->new_number; - list_add(&e->list, &lc->in_use); - hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); - lc->changing_element = NULL; - lc->new_number = LC_FREE; - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) { + /* count number of changes, not number of transactions */ + ++lc->changed; + e->lc_number = e->lc_new_number; + list_move(&e->list, &lc->in_use); + } + lc->pending_changes = 0; RETURN(); } @@ -458,13 +540,12 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) PARANOIA_ENTRY(); PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt == 0); - BUG_ON(e == lc->changing_element); + BUG_ON(e->lc_number != e->lc_new_number); if (--e->refcnt == 0) { /* move it to the front of LRU. */ list_move(&e->list, &lc->lru); lc->used--; - clear_bit(__LC_STARVING, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_STARVING, &lc->flags); } RETURN(e->refcnt); } @@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) void lc_set(struct lru_cache *lc, unsigned int enr, int index) { struct lc_element *e; + struct list_head *lh; if (index < 0 || index >= lc->nr_elements) return; e = lc_element_by_index(lc, index); - e->lc_number = enr; + BUG_ON(e->lc_number != e->lc_new_number); + BUG_ON(e->refcnt != 0); + e->lc_number = e->lc_new_number = enr; hlist_del_init(&e->colision); - hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); - list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); + if (enr == LC_FREE) + lh = &lc->free; + else { + hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); + lh = &lc->lru; + } + list_move(&e->list, lh); } /** @@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get); EXPORT_SYMBOL(lc_find); EXPORT_SYMBOL(lc_get); EXPORT_SYMBOL(lc_put); -EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_committed); EXPORT_SYMBOL(lc_element_by_index); EXPORT_SYMBOL(lc_index_of); EXPORT_SYMBOL(lc_seq_printf_stats); EXPORT_SYMBOL(lc_seq_dump_details); +EXPORT_SYMBOL(lc_try_lock); +EXPORT_SYMBOL(lc_is_used); diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h index 77adcf6bc257..60cf765628e9 100644 --- a/lib/mpi/mpi-internal.h +++ b/lib/mpi/mpi-internal.h @@ -65,10 +65,6 @@ typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ typedef int mpi_size_t; /* (must be a signed type) */ -#define ABS(x) (x >= 0 ? x : -x) -#define MIN(l, o) ((l) < (o) ? (l) : (o)) -#define MAX(h, i) ((h) > (i) ? (h) : (i)) - static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) { if (a->alloced < b) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 3962b7f7fe3f..5f9c44cdf1f5 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -52,7 +52,7 @@ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes) else nbits = 0; - nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) return NULL; @@ -96,8 +96,8 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) buffer += 2; nread = 2; - nbytes = (nbits + 7) / 8; - nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; + nbytes = DIV_ROUND_UP(nbits, 8); + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) return NULL; @@ -193,7 +193,7 @@ int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign) int nlimbs; int i; - nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); if (RESIZE_IF_NEEDED(a, nlimbs) < 0) return -ENOMEM; a->sign = sign; diff --git a/lib/pSeries-reconfig-notifier-error-inject.c b/lib/of-reconfig-notifier-error-inject.c index 7f7c98dcd5c4..8dc79861758a 100644 --- a/lib/pSeries-reconfig-notifier-error-inject.c +++ b/lib/of-reconfig-notifier-error-inject.c @@ -1,20 +1,20 @@ #include <linux/kernel.h> #include <linux/module.h> - -#include <asm/pSeries_reconfig.h> +#include <linux/of.h> #include "notifier-error-inject.h" static int priority; module_param(priority, int, 0); -MODULE_PARM_DESC(priority, "specify pSeries reconfig notifier priority"); +MODULE_PARM_DESC(priority, "specify OF reconfig notifier priority"); static struct notifier_err_inject reconfig_err_inject = { .actions = { - { NOTIFIER_ERR_INJECT_ACTION(PSERIES_RECONFIG_ADD) }, - { NOTIFIER_ERR_INJECT_ACTION(PSERIES_RECONFIG_REMOVE) }, - { NOTIFIER_ERR_INJECT_ACTION(PSERIES_DRCONF_MEM_ADD) }, - { NOTIFIER_ERR_INJECT_ACTION(PSERIES_DRCONF_MEM_REMOVE) }, + { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_ATTACH_NODE) }, + { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_DETACH_NODE) }, + { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_ADD_PROPERTY) }, + { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_REMOVE_PROPERTY) }, + { NOTIFIER_ERR_INJECT_ACTION(OF_RECONFIG_UPDATE_PROPERTY) }, {} } }; @@ -25,12 +25,12 @@ static int err_inject_init(void) { int err; - dir = notifier_err_inject_init("pSeries-reconfig", + dir = notifier_err_inject_init("OF-reconfig", notifier_err_inject_dir, &reconfig_err_inject, priority); if (IS_ERR(dir)) return PTR_ERR(dir); - err = pSeries_reconfig_notifier_register(&reconfig_err_inject.nb); + err = of_reconfig_notifier_register(&reconfig_err_inject.nb); if (err) debugfs_remove_recursive(dir); @@ -39,13 +39,13 @@ static int err_inject_init(void) static void err_inject_exit(void) { - pSeries_reconfig_notifier_unregister(&reconfig_err_inject.nb); + of_reconfig_notifier_unregister(&reconfig_err_inject.nb); debugfs_remove_recursive(dir); } module_init(err_inject_init); module_exit(err_inject_exit); -MODULE_DESCRIPTION("pSeries reconfig notifier error injection module"); +MODULE_DESCRIPTION("OF reconfig notifier error injection module"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>"); diff --git a/lib/parser.c b/lib/parser.c index 52cfa69f73df..807b2aaa33fa 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -157,7 +157,7 @@ static int match_number(substring_t *s, int *result, int base) * * Description: Attempts to parse the &substring_t @s as a decimal integer. On * success, sets @result to the integer represented by the string and returns 0. - * Returns either -ENOMEM or -EINVAL on failure. + * Returns -ENOMEM, -EINVAL, or -ERANGE on failure. */ int match_int(substring_t *s, int *result) { @@ -171,7 +171,7 @@ int match_int(substring_t *s, int *result) * * Description: Attempts to parse the &substring_t @s as an octal integer. On * success, sets @result to the integer represented by the string and returns - * 0. Returns either -ENOMEM or -EINVAL on failure. + * 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure. */ int match_octal(substring_t *s, int *result) { @@ -185,7 +185,7 @@ int match_octal(substring_t *s, int *result) * * Description: Attempts to parse the &substring_t @s as a hexadecimal integer. * On success, sets @result to the integer represented by the string and - * returns 0. Returns either -ENOMEM or -EINVAL on failure. + * returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure. */ int match_hex(substring_t *s, int *result) { diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c new file mode 100644 index 000000000000..652a8ee8efe9 --- /dev/null +++ b/lib/percpu-rwsem.c @@ -0,0 +1,165 @@ +#include <linux/atomic.h> +#include <linux/rwsem.h> +#include <linux/percpu.h> +#include <linux/wait.h> +#include <linux/lockdep.h> +#include <linux/percpu-rwsem.h> +#include <linux/rcupdate.h> +#include <linux/sched.h> +#include <linux/errno.h> + +int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, + const char *name, struct lock_class_key *rwsem_key) +{ + brw->fast_read_ctr = alloc_percpu(int); + if (unlikely(!brw->fast_read_ctr)) + return -ENOMEM; + + /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ + __init_rwsem(&brw->rw_sem, name, rwsem_key); + atomic_set(&brw->write_ctr, 0); + atomic_set(&brw->slow_read_ctr, 0); + init_waitqueue_head(&brw->write_waitq); + return 0; +} + +void percpu_free_rwsem(struct percpu_rw_semaphore *brw) +{ + free_percpu(brw->fast_read_ctr); + brw->fast_read_ctr = NULL; /* catch use after free bugs */ +} + +/* + * This is the fast-path for down_read/up_read, it only needs to ensure + * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the + * fast per-cpu counter. The writer uses synchronize_sched_expedited() to + * serialize with the preempt-disabled section below. + * + * The nontrivial part is that we should guarantee acquire/release semantics + * in case when + * + * R_W: down_write() comes after up_read(), the writer should see all + * changes done by the reader + * or + * W_R: down_read() comes after up_write(), the reader should see all + * changes done by the writer + * + * If this helper fails the callers rely on the normal rw_semaphore and + * atomic_dec_and_test(), so in this case we have the necessary barriers. + * + * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or + * __this_cpu_add() below can be reordered with any LOAD/STORE done by the + * reader inside the critical section. See the comments in down_write and + * up_write below. + */ +static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) +{ + bool success = false; + + preempt_disable(); + if (likely(!atomic_read(&brw->write_ctr))) { + __this_cpu_add(*brw->fast_read_ctr, val); + success = true; + } + preempt_enable(); + + return success; +} + +/* + * Like the normal down_read() this is not recursive, the writer can + * come after the first percpu_down_read() and create the deadlock. + * + * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep, + * percpu_up_read() does rwsem_release(). This pairs with the usage + * of ->rw_sem in percpu_down/up_write(). + */ +void percpu_down_read(struct percpu_rw_semaphore *brw) +{ + might_sleep(); + if (likely(update_fast_ctr(brw, +1))) { + rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_); + return; + } + + down_read(&brw->rw_sem); + atomic_inc(&brw->slow_read_ctr); + /* avoid up_read()->rwsem_release() */ + __up_read(&brw->rw_sem); +} + +void percpu_up_read(struct percpu_rw_semaphore *brw) +{ + rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_); + + if (likely(update_fast_ctr(brw, -1))) + return; + + /* false-positive is possible but harmless */ + if (atomic_dec_and_test(&brw->slow_read_ctr)) + wake_up_all(&brw->write_waitq); +} + +static int clear_fast_ctr(struct percpu_rw_semaphore *brw) +{ + unsigned int sum = 0; + int cpu; + + for_each_possible_cpu(cpu) { + sum += per_cpu(*brw->fast_read_ctr, cpu); + per_cpu(*brw->fast_read_ctr, cpu) = 0; + } + + return sum; +} + +/* + * A writer increments ->write_ctr to force the readers to switch to the + * slow mode, note the atomic_read() check in update_fast_ctr(). + * + * After that the readers can only inc/dec the slow ->slow_read_ctr counter, + * ->fast_read_ctr is stable. Once the writer moves its sum into the slow + * counter it represents the number of active readers. + * + * Finally the writer takes ->rw_sem for writing and blocks the new readers, + * then waits until the slow counter becomes zero. + */ +void percpu_down_write(struct percpu_rw_semaphore *brw) +{ + /* tell update_fast_ctr() there is a pending writer */ + atomic_inc(&brw->write_ctr); + /* + * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read + * so that update_fast_ctr() can't succeed. + * + * 2. Ensures we see the result of every previous this_cpu_add() in + * update_fast_ctr(). + * + * 3. Ensures that if any reader has exited its critical section via + * fast-path, it executes a full memory barrier before we return. + * See R_W case in the comment above update_fast_ctr(). + */ + synchronize_sched_expedited(); + + /* exclude other writers, and block the new readers completely */ + down_write(&brw->rw_sem); + + /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ + atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); + + /* wait for all readers to complete their percpu_up_read() */ + wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); +} + +void percpu_up_write(struct percpu_rw_semaphore *brw) +{ + /* release the lock, but the readers can't use the fast-path */ + up_write(&brw->rw_sem); + /* + * Insert the barrier before the next fast-path in down_read, + * see W_R case in the comment above update_fast_ctr(). + */ + synchronize_sched_expedited(); + /* the last writer unblocks update_fast_ctr() */ + atomic_dec(&brw->write_ctr); +} diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index de06dfe165b8..9f7c184725d7 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -1,8 +1,11 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o -raid6_pq-y += algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \ - int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \ - altivec8.o mmx.o sse1.o sse2.o +raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ + int8.o int16.o int32.o + +raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o +raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o + hostprogs-y += mktables quiet_cmd_unroll = UNROLL $@ diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 589f5f50ad2e..6d7316fe9f30 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -45,11 +45,20 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_sse1x2, &raid6_sse2x1, &raid6_sse2x2, +#ifdef CONFIG_AS_AVX2 + &raid6_avx2x1, + &raid6_avx2x2, +#endif #endif #if defined(__x86_64__) && !defined(__arch_um__) &raid6_sse2x1, &raid6_sse2x2, &raid6_sse2x4, +#ifdef CONFIG_AS_AVX2 + &raid6_avx2x1, + &raid6_avx2x2, + &raid6_avx2x4, +#endif #endif #ifdef CONFIG_ALTIVEC &raid6_altivec1, @@ -72,6 +81,9 @@ EXPORT_SYMBOL_GPL(raid6_datap_recov); const struct raid6_recov_calls *const raid6_recov_algos[] = { #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) +#ifdef CONFIG_AS_AVX2 + &raid6_recov_avx2, +#endif &raid6_recov_ssse3, #endif &raid6_recov_intx1, diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc index b71012b756f4..7cc12b532e95 100644 --- a/lib/raid6/altivec.uc +++ b/lib/raid6/altivec.uc @@ -24,13 +24,10 @@ #include <linux/raid/pq.h> -#ifdef CONFIG_ALTIVEC - #include <altivec.h> #ifdef __KERNEL__ # include <asm/cputable.h> # include <asm/switch_to.h> -#endif /* * This is the C data type to use. We use a vector of diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c new file mode 100644 index 000000000000..bc3b1dd436eb --- /dev/null +++ b/lib/raid6/avx2.c @@ -0,0 +1,251 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 2012 Intel Corporation + * Author: Yuanhan Liu <yuanhan.liu@linux.intel.com> + * + * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * AVX2 implementation of RAID-6 syndrome functions + * + */ + +#ifdef CONFIG_AS_AVX2 + +#include <linux/raid/pq.h> +#include "x86.h" + +static const struct raid6_avx2_constants { + u64 x1d[4]; +} raid6_avx2_constants __aligned(32) = { + { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, + 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, +}; + +static int raid6_have_avx2(void) +{ + return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX); +} + +/* + * Plain AVX2 implementation + */ +static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); + asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */ + + for (d = 0; d < bytes; d += 32) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ + asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); + asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */ + asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d])); + for (z = z0-2; z >= 0; z--) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); + asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); + asm volatile("vpand %ymm0,%ymm5,%ymm5"); + asm volatile("vpxor %ymm5,%ymm4,%ymm4"); + asm volatile("vpxor %ymm6,%ymm2,%ymm2"); + asm volatile("vpxor %ymm6,%ymm4,%ymm4"); + asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d])); + } + asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); + asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); + asm volatile("vpand %ymm0,%ymm5,%ymm5"); + asm volatile("vpxor %ymm5,%ymm4,%ymm4"); + asm volatile("vpxor %ymm6,%ymm2,%ymm2"); + asm volatile("vpxor %ymm6,%ymm4,%ymm4"); + + asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); + asm volatile("vpxor %ymm2,%ymm2,%ymm2"); + asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); + asm volatile("vpxor %ymm4,%ymm4,%ymm4"); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_avx2x1 = { + raid6_avx21_gen_syndrome, + raid6_have_avx2, + "avx2x1", + 1 /* Has cache hints */ +}; + +/* + * Unrolled-by-2 AVX2 implementation + */ +static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); + asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ + + /* We uniformly assume a single prefetch covers at least 32 bytes */ + for (d = 0; d < bytes; d += 64) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32])); + asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ + asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */ + asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */ + asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */ + for (z = z0-1; z >= 0; z--) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); + asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); + asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); + asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); + asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); + asm volatile("vpand %ymm0,%ymm5,%ymm5"); + asm volatile("vpand %ymm0,%ymm7,%ymm7"); + asm volatile("vpxor %ymm5,%ymm4,%ymm4"); + asm volatile("vpxor %ymm7,%ymm6,%ymm6"); + asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); + asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); + asm volatile("vpxor %ymm5,%ymm2,%ymm2"); + asm volatile("vpxor %ymm7,%ymm3,%ymm3"); + asm volatile("vpxor %ymm5,%ymm4,%ymm4"); + asm volatile("vpxor %ymm7,%ymm6,%ymm6"); + } + asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); + asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); + asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); + asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_avx2x2 = { + raid6_avx22_gen_syndrome, + raid6_have_avx2, + "avx2x2", + 1 /* Has cache hints */ +}; + +#ifdef CONFIG_X86_64 + +/* + * Unrolled-by-4 AVX2 implementation + */ +static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); + asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ + asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */ + asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */ + asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */ + asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */ + asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */ + asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */ + asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */ + asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */ + + for (d = 0; d < bytes; d += 128) { + for (z = z0; z >= 0; z--) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); + asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64])); + asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96])); + asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); + asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); + asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13"); + asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15"); + asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); + asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); + asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); + asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); + asm volatile("vpand %ymm0,%ymm5,%ymm5"); + asm volatile("vpand %ymm0,%ymm7,%ymm7"); + asm volatile("vpand %ymm0,%ymm13,%ymm13"); + asm volatile("vpand %ymm0,%ymm15,%ymm15"); + asm volatile("vpxor %ymm5,%ymm4,%ymm4"); + asm volatile("vpxor %ymm7,%ymm6,%ymm6"); + asm volatile("vpxor %ymm13,%ymm12,%ymm12"); + asm volatile("vpxor %ymm15,%ymm14,%ymm14"); + asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); + asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); + asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64])); + asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96])); + asm volatile("vpxor %ymm5,%ymm2,%ymm2"); + asm volatile("vpxor %ymm7,%ymm3,%ymm3"); + asm volatile("vpxor %ymm13,%ymm10,%ymm10"); + asm volatile("vpxor %ymm15,%ymm11,%ymm11"); + asm volatile("vpxor %ymm5,%ymm4,%ymm4"); + asm volatile("vpxor %ymm7,%ymm6,%ymm6"); + asm volatile("vpxor %ymm13,%ymm12,%ymm12"); + asm volatile("vpxor %ymm15,%ymm14,%ymm14"); + } + asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); + asm volatile("vpxor %ymm2,%ymm2,%ymm2"); + asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); + asm volatile("vpxor %ymm3,%ymm3,%ymm3"); + asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64])); + asm volatile("vpxor %ymm10,%ymm10,%ymm10"); + asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96])); + asm volatile("vpxor %ymm11,%ymm11,%ymm11"); + asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); + asm volatile("vpxor %ymm4,%ymm4,%ymm4"); + asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); + asm volatile("vpxor %ymm6,%ymm6,%ymm6"); + asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64])); + asm volatile("vpxor %ymm12,%ymm12,%ymm12"); + asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96])); + asm volatile("vpxor %ymm14,%ymm14,%ymm14"); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_avx2x4 = { + raid6_avx24_gen_syndrome, + raid6_have_avx2, + "avx2x4", + 1 /* Has cache hints */ +}; +#endif + +#endif /* CONFIG_AS_AVX2 */ diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c index 279347f23094..590c71c9e200 100644 --- a/lib/raid6/mmx.c +++ b/lib/raid6/mmx.c @@ -16,7 +16,7 @@ * MMX implementation of RAID-6 syndrome functions */ -#if defined(__i386__) && !defined(__arch_um__) +#ifdef CONFIG_X86_32 #include <linux/raid/pq.h> #include "x86.h" diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c new file mode 100644 index 000000000000..e1eea433a493 --- /dev/null +++ b/lib/raid6/recov_avx2.c @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2012 Intel Corporation + * Author: Jim Kukunas <james.t.kukunas@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#if CONFIG_AS_AVX2 + +#include <linux/raid/pq.h> +#include "x86.h" + +static int raid6_has_avx2(void) +{ + return boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX); +} + +static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + const u8 x0f = 0x0f; + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data pages + Use the dead data pages as temporary storage for + delta p and delta q */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks-2] = p; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ + raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + /* ymm0 = x0f[16] */ + asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f)); + + while (bytes) { +#ifdef CONFIG_X86_64 + asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0])); + asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32])); + asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0])); + asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32])); + asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0])); + asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32])); + asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0])); + asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32])); + + /* + * 1 = dq[0] ^ q[0] + * 9 = dq[32] ^ q[32] + * 0 = dp[0] ^ p[0] + * 8 = dp[32] ^ p[32] + */ + + asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0])); + asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16])); + + asm volatile("vpsraw $4, %ymm1, %ymm3"); + asm volatile("vpsraw $4, %ymm9, %ymm12"); + asm volatile("vpand %ymm7, %ymm1, %ymm1"); + asm volatile("vpand %ymm7, %ymm9, %ymm9"); + asm volatile("vpand %ymm7, %ymm3, %ymm3"); + asm volatile("vpand %ymm7, %ymm12, %ymm12"); + asm volatile("vpshufb %ymm9, %ymm4, %ymm14"); + asm volatile("vpshufb %ymm1, %ymm4, %ymm4"); + asm volatile("vpshufb %ymm12, %ymm5, %ymm15"); + asm volatile("vpshufb %ymm3, %ymm5, %ymm5"); + asm volatile("vpxor %ymm14, %ymm15, %ymm15"); + asm volatile("vpxor %ymm4, %ymm5, %ymm5"); + + /* + * 5 = qx[0] + * 15 = qx[32] + */ + + asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0])); + asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16])); + asm volatile("vpsraw $4, %ymm0, %ymm2"); + asm volatile("vpsraw $4, %ymm8, %ymm6"); + asm volatile("vpand %ymm7, %ymm0, %ymm3"); + asm volatile("vpand %ymm7, %ymm8, %ymm14"); + asm volatile("vpand %ymm7, %ymm2, %ymm2"); + asm volatile("vpand %ymm7, %ymm6, %ymm6"); + asm volatile("vpshufb %ymm14, %ymm4, %ymm12"); + asm volatile("vpshufb %ymm3, %ymm4, %ymm4"); + asm volatile("vpshufb %ymm6, %ymm1, %ymm13"); + asm volatile("vpshufb %ymm2, %ymm1, %ymm1"); + asm volatile("vpxor %ymm4, %ymm1, %ymm1"); + asm volatile("vpxor %ymm12, %ymm13, %ymm13"); + + /* + * 1 = pbmul[px[0]] + * 13 = pbmul[px[32]] + */ + asm volatile("vpxor %ymm5, %ymm1, %ymm1"); + asm volatile("vpxor %ymm15, %ymm13, %ymm13"); + + /* + * 1 = db = DQ + * 13 = db[32] = DQ[32] + */ + asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); + asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32])); + asm volatile("vpxor %ymm1, %ymm0, %ymm0"); + asm volatile("vpxor %ymm13, %ymm8, %ymm8"); + + asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0])); + asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32])); + + bytes -= 64; + p += 64; + q += 64; + dp += 64; + dq += 64; +#else + asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q)); + asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p)); + asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq)); + asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp)); + + /* 1 = dq ^ q; 0 = dp ^ p */ + + asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0])); + asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16])); + + /* + * 1 = dq ^ q + * 3 = dq ^ p >> 4 + */ + asm volatile("vpsraw $4, %ymm1, %ymm3"); + asm volatile("vpand %ymm7, %ymm1, %ymm1"); + asm volatile("vpand %ymm7, %ymm3, %ymm3"); + asm volatile("vpshufb %ymm1, %ymm4, %ymm4"); + asm volatile("vpshufb %ymm3, %ymm5, %ymm5"); + asm volatile("vpxor %ymm4, %ymm5, %ymm5"); + + /* 5 = qx */ + + asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0])); + asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16])); + + asm volatile("vpsraw $4, %ymm0, %ymm2"); + asm volatile("vpand %ymm7, %ymm0, %ymm3"); + asm volatile("vpand %ymm7, %ymm2, %ymm2"); + asm volatile("vpshufb %ymm3, %ymm4, %ymm4"); + asm volatile("vpshufb %ymm2, %ymm1, %ymm1"); + asm volatile("vpxor %ymm4, %ymm1, %ymm1"); + + /* 1 = pbmul[px] */ + asm volatile("vpxor %ymm5, %ymm1, %ymm1"); + /* 1 = db = DQ */ + asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); + + asm volatile("vpxor %ymm1, %ymm0, %ymm0"); + asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0])); + + bytes -= 32; + p += 32; + q += 32; + dp += 32; + dq += 32; +#endif + } + + kernel_fpu_end(); +} + +static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + const u8 x0f = 0x0f; + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data page + Use the dead data page as temporary storage for delta q */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f)); + + while (bytes) { +#ifdef CONFIG_X86_64 + asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0])); + asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32])); + asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0])); + asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32])); + + /* + * 3 = q[0] ^ dq[0] + * 8 = q[32] ^ dq[32] + */ + asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0])); + asm volatile("vmovapd %ymm0, %ymm13"); + asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16])); + asm volatile("vmovapd %ymm1, %ymm14"); + + asm volatile("vpsraw $4, %ymm3, %ymm6"); + asm volatile("vpsraw $4, %ymm8, %ymm12"); + asm volatile("vpand %ymm7, %ymm3, %ymm3"); + asm volatile("vpand %ymm7, %ymm8, %ymm8"); + asm volatile("vpand %ymm7, %ymm6, %ymm6"); + asm volatile("vpand %ymm7, %ymm12, %ymm12"); + asm volatile("vpshufb %ymm3, %ymm0, %ymm0"); + asm volatile("vpshufb %ymm8, %ymm13, %ymm13"); + asm volatile("vpshufb %ymm6, %ymm1, %ymm1"); + asm volatile("vpshufb %ymm12, %ymm14, %ymm14"); + asm volatile("vpxor %ymm0, %ymm1, %ymm1"); + asm volatile("vpxor %ymm13, %ymm14, %ymm14"); + + /* + * 1 = qmul[q[0] ^ dq[0]] + * 14 = qmul[q[32] ^ dq[32]] + */ + asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0])); + asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32])); + asm volatile("vpxor %ymm1, %ymm2, %ymm2"); + asm volatile("vpxor %ymm14, %ymm12, %ymm12"); + + /* + * 2 = p[0] ^ qmul[q[0] ^ dq[0]] + * 12 = p[32] ^ qmul[q[32] ^ dq[32]] + */ + + asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); + asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32])); + asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0])); + asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32])); + + bytes -= 64; + p += 64; + q += 64; + dq += 64; +#else + asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0])); + asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0])); + + /* 3 = q ^ dq */ + + asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0])); + asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16])); + + asm volatile("vpsraw $4, %ymm3, %ymm6"); + asm volatile("vpand %ymm7, %ymm3, %ymm3"); + asm volatile("vpand %ymm7, %ymm6, %ymm6"); + asm volatile("vpshufb %ymm3, %ymm0, %ymm0"); + asm volatile("vpshufb %ymm6, %ymm1, %ymm1"); + asm volatile("vpxor %ymm0, %ymm1, %ymm1"); + + /* 1 = qmul[q ^ dq] */ + + asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0])); + asm volatile("vpxor %ymm1, %ymm2, %ymm2"); + + /* 2 = p ^ qmul[q ^ dq] */ + + asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); + asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0])); + + bytes -= 32; + p += 32; + q += 32; + dq += 32; +#endif + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_avx2 = { + .data2 = raid6_2data_recov_avx2, + .datap = raid6_datap_recov_avx2, + .valid = raid6_has_avx2, +#ifdef CONFIG_X86_64 + .name = "avx2x2", +#else + .name = "avx2x1", +#endif + .priority = 2, +}; + +#else +#warning "your version of binutils lacks AVX2 support" +#endif diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c index ecb710c0b4d9..a9168328f03b 100644 --- a/lib/raid6/recov_ssse3.c +++ b/lib/raid6/recov_ssse3.c @@ -7,8 +7,6 @@ * of the License. */ -#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) - #include <linux/raid/pq.h> #include "x86.h" @@ -332,5 +330,3 @@ const struct raid6_recov_calls raid6_recov_ssse3 = { #endif .priority = 1, }; - -#endif diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c index 10dd91948c07..f76297139445 100644 --- a/lib/raid6/sse1.c +++ b/lib/raid6/sse1.c @@ -21,7 +21,7 @@ * worthwhile as a separate implementation. */ -#if defined(__i386__) && !defined(__arch_um__) +#ifdef CONFIG_X86_32 #include <linux/raid/pq.h> #include "x86.h" diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c index bc2d57daa589..85b82c85f28e 100644 --- a/lib/raid6/sse2.c +++ b/lib/raid6/sse2.c @@ -17,8 +17,6 @@ * */ -#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) - #include <linux/raid/pq.h> #include "x86.h" @@ -159,9 +157,7 @@ const struct raid6_calls raid6_sse2x2 = { 1 /* Has cache hints */ }; -#endif - -#if defined(__x86_64__) && !defined(__arch_um__) +#ifdef CONFIG_X86_64 /* * Unrolled-by-4 SSE2 implementation @@ -259,4 +255,4 @@ const struct raid6_calls raid6_sse2x4 = { 1 /* Has cache hints */ }; -#endif +#endif /* CONFIG_X86_64 */ diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index c76151d94764..087332dbf8aa 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -10,6 +10,31 @@ LD = ld AWK = awk -f AR = ar RANLIB = ranlib +OBJS = int1.o int2.o int4.o int8.o int16.o int32.o recov.o algos.o tables.o + +ARCH := $(shell uname -m 2>/dev/null | sed -e /s/i.86/i386/) +ifeq ($(ARCH),i386) + CFLAGS += -DCONFIG_X86_32 + IS_X86 = yes +endif +ifeq ($(ARCH),x86_64) + CFLAGS += -DCONFIG_X86_64 + IS_X86 = yes +endif + +ifeq ($(IS_X86),yes) + OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o + CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ + gcc -c -x assembler - >&/dev/null && \ + rm ./-.o && echo -DCONFIG_AS_AVX2=1) +else + HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\ + gcc -c -x c - >&/dev/null && \ + rm ./-.o && echo yes) + ifeq ($(HAS_ALTIVEC),yes) + OBJS += altivec1.o altivec2.o altivec4.o altivec8.o + endif +endif .c.o: $(CC) $(CFLAGS) -c -o $@ $< @@ -22,9 +47,7 @@ RANLIB = ranlib all: raid6.a raid6test -raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ - altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \ - tables.o +raid6.a: $(OBJS) rm -f $@ $(AR) cq $@ $^ $(RANLIB) $@ diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index d55d63232c55..b7595484a815 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h @@ -45,19 +45,23 @@ static inline void kernel_fpu_end(void) #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ /* Should work well enough on modern CPUs for testing */ static inline int boot_cpu_has(int flag) { - u32 eax = (flag & 0x20) ? 0x80000001 : 1; - u32 ecx, edx; + u32 eax, ebx, ecx, edx; + + eax = (flag & 0x100) ? 7 : + (flag & 0x20) ? 0x80000001 : 1; + ecx = 0; asm volatile("cpuid" - : "+a" (eax), "=d" (edx), "=c" (ecx) - : : "ebx"); + : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx)); - return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1; + return ((flag & 0x100 ? ebx : + (flag & 0x80) ? ecx : edx) >> (flag & 31)) & 1; } #endif /* ndef __KERNEL__ */ diff --git a/lib/random32.c b/lib/random32.c index 938bde5876ac..52280d5526be 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -42,13 +42,13 @@ static DEFINE_PER_CPU(struct rnd_state, net_rand_state); /** - * prandom32 - seeded pseudo-random number generator. + * prandom_u32_state - seeded pseudo-random number generator. * @state: pointer to state structure holding seeded state. * * This is used for pseudo-randomness with no outside seeding. - * For more random results, use random32(). + * For more random results, use prandom_u32(). */ -u32 prandom32(struct rnd_state *state) +u32 prandom_u32_state(struct rnd_state *state) { #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) @@ -58,32 +58,81 @@ u32 prandom32(struct rnd_state *state) return (state->s1 ^ state->s2 ^ state->s3); } -EXPORT_SYMBOL(prandom32); +EXPORT_SYMBOL(prandom_u32_state); /** - * random32 - pseudo random number generator + * prandom_u32 - pseudo random number generator * * A 32 bit pseudo-random number is generated using a fast * algorithm suitable for simulation. This algorithm is NOT * considered safe for cryptographic use. */ -u32 random32(void) +u32 prandom_u32(void) { unsigned long r; struct rnd_state *state = &get_cpu_var(net_rand_state); - r = prandom32(state); + r = prandom_u32_state(state); put_cpu_var(state); return r; } -EXPORT_SYMBOL(random32); +EXPORT_SYMBOL(prandom_u32); + +/* + * prandom_bytes_state - get the requested number of pseudo-random bytes + * + * @state: pointer to state structure holding seeded state. + * @buf: where to copy the pseudo-random bytes to + * @bytes: the requested number of bytes + * + * This is used for pseudo-randomness with no outside seeding. + * For more random results, use prandom_bytes(). + */ +void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes) +{ + unsigned char *p = buf; + int i; + + for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) { + u32 random = prandom_u32_state(state); + int j; + + for (j = 0; j < sizeof(u32); j++) { + p[i + j] = random; + random >>= BITS_PER_BYTE; + } + } + if (i < bytes) { + u32 random = prandom_u32_state(state); + + for (; i < bytes; i++) { + p[i] = random; + random >>= BITS_PER_BYTE; + } + } +} +EXPORT_SYMBOL(prandom_bytes_state); + +/** + * prandom_bytes - get the requested number of pseudo-random bytes + * @buf: where to copy the pseudo-random bytes to + * @bytes: the requested number of bytes + */ +void prandom_bytes(void *buf, int bytes) +{ + struct rnd_state *state = &get_cpu_var(net_rand_state); + + prandom_bytes_state(state, buf, bytes); + put_cpu_var(state); +} +EXPORT_SYMBOL(prandom_bytes); /** - * srandom32 - add entropy to pseudo random number generator + * prandom_seed - add entropy to pseudo random number generator * @seed: seed value * - * Add some additional seeding to the random32() pool. + * Add some additional seeding to the prandom pool. */ -void srandom32(u32 entropy) +void prandom_seed(u32 entropy) { int i; /* @@ -95,13 +144,13 @@ void srandom32(u32 entropy) state->s1 = __seed(state->s1 ^ entropy, 1); } } -EXPORT_SYMBOL(srandom32); +EXPORT_SYMBOL(prandom_seed); /* * Generate some initially weak seeding values to allow - * to start the random32() engine. + * to start the prandom_u32() engine. */ -static int __init random32_init(void) +static int __init prandom_init(void) { int i; @@ -114,22 +163,22 @@ static int __init random32_init(void) state->s3 = __seed(LCG(state->s2), 15); /* "warm it up" */ - prandom32(state); - prandom32(state); - prandom32(state); - prandom32(state); - prandom32(state); - prandom32(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); } return 0; } -core_initcall(random32_init); +core_initcall(prandom_init); /* * Generate better values after random number generator * is fully initialized. */ -static int __init random32_reseed(void) +static int __init prandom_reseed(void) { int i; @@ -143,8 +192,8 @@ static int __init random32_reseed(void) state->s3 = __seed(seeds[2], 15); /* mix it in */ - prandom32(state); + prandom_u32_state(state); } return 0; } -late_initcall(random32_reseed); +late_initcall(prandom_reseed); diff --git a/lib/rbtree.c b/lib/rbtree.c index 4f56a11d67fa..c0e31fe2fabf 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -194,8 +194,12 @@ __rb_insert(struct rb_node *node, struct rb_root *root, } } -__always_inline void -__rb_erase_color(struct rb_node *parent, struct rb_root *root, +/* + * Inline version for rb_erase() use - we want to be able to inline + * and eliminate the dummy_rotate callback there + */ +static __always_inline void +____rb_erase_color(struct rb_node *parent, struct rb_root *root, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) { struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; @@ -355,6 +359,13 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root, } } } + +/* Non-inline version for rb_erase_augmented() use */ +void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + ____rb_erase_color(parent, root, augment_rotate); +} EXPORT_SYMBOL(__rb_erase_color); /* @@ -380,7 +391,10 @@ EXPORT_SYMBOL(rb_insert_color); void rb_erase(struct rb_node *node, struct rb_root *root) { - rb_erase_augmented(node, root, &dummy_callbacks); + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, root, dummy_rotate); } EXPORT_SYMBOL(rb_erase); diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 268b23951fec..af38aedbd874 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -96,8 +96,8 @@ static void init(void) { int i; for (i = 0; i < NODES; i++) { - nodes[i].key = prandom32(&rnd); - nodes[i].val = prandom32(&rnd); + nodes[i].key = prandom_u32_state(&rnd); + nodes[i].val = prandom_u32_state(&rnd); } } @@ -118,7 +118,7 @@ static void check(int nr_nodes) { struct rb_node *rb; int count = 0; - int blacks; + int blacks = 0; u32 prev_key = 0; for (rb = rb_first(&root); rb; rb = rb_next(rb)) { @@ -155,7 +155,7 @@ static int rbtree_test_init(void) printk(KERN_ALERT "rbtree testing"); - prandom32_seed(&rnd, 3141592653589793238ULL); + prandom_seed_state(&rnd, 3141592653589793238ULL); init(); time1 = get_cycles(); diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index 7e0d6a58fc83..7542afbb22b3 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -73,20 +73,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) goto dont_wake_writers; } - /* if we are allowed to wake writers try to grant a single write lock - * if there's a writer at the front of the queue - * - we leave the 'waiting count' incremented to signify potential - * contention + /* + * as we support write lock stealing, we can't set sem->activity + * to -1 here to indicate we get the lock. Instead, we wake it up + * to let it go get it again. */ if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { - sem->activity = -1; - list_del(&waiter->list); - tsk = waiter->task; - /* Don't touch waiter after ->task has been NULLed */ - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); + wake_up_process(waiter->task); goto out; } @@ -121,18 +114,10 @@ static inline struct rw_semaphore * __rwsem_wake_one_writer(struct rw_semaphore *sem) { struct rwsem_waiter *waiter; - struct task_struct *tsk; - - sem->activity = -1; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - list_del(&waiter->list); + wake_up_process(waiter->task); - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); return sem; } @@ -204,7 +189,6 @@ int __down_read_trylock(struct rw_semaphore *sem) /* * get a write lock on the semaphore - * - we increment the waiting count anyway to indicate an exclusive lock */ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) { @@ -214,37 +198,32 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity == 0 && list_empty(&sem->wait_list)) { - /* granted */ - sem->activity = -1; - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - goto out; - } - - tsk = current; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ + tsk = current; waiter.task = tsk; waiter.flags = RWSEM_WAITING_FOR_WRITE; - get_task_struct(tsk); - list_add_tail(&waiter.list, &sem->wait_list); - /* we don't need to touch the semaphore struct anymore */ - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - /* wait to be given the lock */ + /* wait for someone to release the lock */ for (;;) { - if (!waiter.task) + /* + * That is the key to support write lock stealing: allows the + * task already on CPU to get the lock soon rather than put + * itself into sleep and waiting for system woke it or someone + * else in the head of the wait list up. + */ + if (sem->activity == 0) break; - schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + schedule(); + raw_spin_lock_irqsave(&sem->wait_lock, flags); } + /* got the lock */ + sem->activity = -1; + list_del(&waiter.list); - tsk->state = TASK_RUNNING; - out: - ; + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } void __sched __down_write(struct rw_semaphore *sem) @@ -262,8 +241,8 @@ int __down_write_trylock(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity == 0 && list_empty(&sem->wait_list)) { - /* granted */ + if (sem->activity == 0) { + /* got the lock */ sem->activity = -1; ret = 1; } diff --git a/lib/rwsem.c b/lib/rwsem.c index 8337e1b9bb8d..ad5e0df16ab4 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -2,6 +2,8 @@ * * Written by David Howells (dhowells@redhat.com). * Derived from arch/i386/kernel/semaphore.c + * + * Writer lock-stealing by Alex Shi <alex.shi@intel.com> */ #include <linux/rwsem.h> #include <linux/sched.h> @@ -60,7 +62,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) struct rwsem_waiter *waiter; struct task_struct *tsk; struct list_head *next; - signed long oldcount, woken, loop, adjustment; + signed long woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) @@ -72,30 +74,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) */ goto out; - /* There's a writer at the front of the queue - try to grant it the - * write lock. However, we only wake this writer if we can transition - * the active part of the count from 0 -> 1 - */ - adjustment = RWSEM_ACTIVE_WRITE_BIAS; - if (waiter->list.next == &sem->wait_list) - adjustment -= RWSEM_WAITING_BIAS; - - try_again_write: - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; - if (oldcount & RWSEM_ACTIVE_MASK) - /* Someone grabbed the sem already */ - goto undo_write; - - /* We must be careful not to touch 'waiter' after we set ->task = NULL. - * It is an allocated on the waiter's stack and may become invalid at - * any time after that point (due to a wakeup from another source). - */ - list_del(&waiter->list); - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); + /* Wake up the writing waiter and let the task grab the sem: */ + wake_up_process(waiter->task); goto out; readers_only: @@ -157,12 +137,40 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) out: return sem; +} + +/* Try to get write sem, caller holds sem->wait_lock: */ +static int try_get_writer_sem(struct rw_semaphore *sem, + struct rwsem_waiter *waiter) +{ + struct rwsem_waiter *fwaiter; + long oldcount, adjustment; - /* undo the change to the active count, but check for a transition - * 1->0 */ - undo_write: + /* only steal when first waiter is writing */ + fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); + if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE)) + return 0; + + adjustment = RWSEM_ACTIVE_WRITE_BIAS; + /* Only one waiter in the queue: */ + if (fwaiter == waiter && waiter->list.next == &sem->wait_list) + adjustment -= RWSEM_WAITING_BIAS; + +try_again_write: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (!(oldcount & RWSEM_ACTIVE_MASK)) { + /* No active lock: */ + struct task_struct *tsk = waiter->task; + + list_del(&waiter->list); + smp_mb(); + put_task_struct(tsk); + tsk->state = TASK_RUNNING; + return 1; + } + /* some one grabbed the sem already */ if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) - goto out; + return 0; goto try_again_write; } @@ -210,6 +218,15 @@ rwsem_down_failed_common(struct rw_semaphore *sem, for (;;) { if (!waiter.task) break; + + raw_spin_lock_irq(&sem->wait_lock); + /* Try to get the writer sem, may steal from the head writer: */ + if (flags == RWSEM_WAITING_FOR_WRITE) + if (try_get_writer_sem(sem, &waiter)) { + raw_spin_unlock_irq(&sem->wait_lock); + return sem; + } + raw_spin_unlock_irq(&sem->wait_lock); schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); } diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 3675452b23ca..7874b01e816e 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -248,7 +248,8 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, unsigned int left; #ifndef ARCH_HAS_SG_CHAIN - BUG_ON(nents > max_ents); + if (WARN_ON_ONCE(nents > max_ents)) + return -EINVAL; #endif memset(table, 0, sizeof(*table)); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index f114bf6a8e13..bfe02b8fc55b 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -57,7 +57,7 @@ int swiotlb_force; * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this * API. */ -static char *io_tlb_start, *io_tlb_end; +static phys_addr_t io_tlb_start, io_tlb_end; /* * The number of IO TLB blocks (in groups of 64) between io_tlb_start and @@ -70,7 +70,7 @@ static unsigned long io_tlb_nslabs; */ static unsigned long io_tlb_overflow = 32*1024; -static void *io_tlb_overflow_buffer; +static phys_addr_t io_tlb_overflow_buffer; /* * This is a free list describing the number of free entries available from @@ -122,30 +122,49 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, return phys_to_dma(hwdev, virt_to_phys(address)); } +static bool no_iotlb_memory; + void swiotlb_print_info(void) { unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; - phys_addr_t pstart, pend; + unsigned char *vstart, *vend; + + if (no_iotlb_memory) { + pr_warn("software IO TLB: No low mem\n"); + return; + } - pstart = virt_to_phys(io_tlb_start); - pend = virt_to_phys(io_tlb_end); + vstart = phys_to_virt(io_tlb_start); + vend = phys_to_virt(io_tlb_end); printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", - (unsigned long long)pstart, (unsigned long long)pend - 1, - bytes >> 20, io_tlb_start, io_tlb_end - 1); + (unsigned long long)io_tlb_start, + (unsigned long long)io_tlb_end, + bytes >> 20, vstart, vend - 1); } -void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) +int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) { + void *v_overflow_buffer; unsigned long i, bytes; bytes = nslabs << IO_TLB_SHIFT; io_tlb_nslabs = nslabs; - io_tlb_start = tlb; + io_tlb_start = __pa(tlb); io_tlb_end = io_tlb_start + bytes; /* + * Get the overflow emergency buffer + */ + v_overflow_buffer = alloc_bootmem_low_pages_nopanic( + PAGE_ALIGN(io_tlb_overflow)); + if (!v_overflow_buffer) + return -ENOMEM; + + io_tlb_overflow_buffer = __pa(v_overflow_buffer); + + /* * Allocate and initialize the free list array. This array is used * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE * between io_tlb_start and io_tlb_end. @@ -156,23 +175,22 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) io_tlb_index = 0; io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); - /* - * Get the overflow emergency buffer - */ - io_tlb_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow)); - if (!io_tlb_overflow_buffer) - panic("Cannot allocate SWIOTLB overflow buffer!\n"); if (verbose) swiotlb_print_info(); + + return 0; } /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. */ -static void __init -swiotlb_init_with_default_size(size_t default_size, int verbose) +void __init +swiotlb_init(int verbose) { + /* default to 64MB */ + size_t default_size = 64UL<<20; + unsigned char *vstart; unsigned long bytes; if (!io_tlb_nslabs) { @@ -182,20 +200,16 @@ swiotlb_init_with_default_size(size_t default_size, int verbose) bytes = io_tlb_nslabs << IO_TLB_SHIFT; - /* - * Get IO TLB memory from the low pages - */ - io_tlb_start = alloc_bootmem_low_pages(PAGE_ALIGN(bytes)); - if (!io_tlb_start) - panic("Cannot allocate SWIOTLB buffer"); - - swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose); -} + /* Get IO TLB memory from the low pages */ + vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes)); + if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) + return; -void __init -swiotlb_init(int verbose) -{ - swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ + if (io_tlb_start) + free_bootmem(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + pr_warn("Cannot allocate SWIOTLB buffer"); + no_iotlb_memory = true; } /* @@ -207,6 +221,7 @@ int swiotlb_late_init_with_default_size(size_t default_size) { unsigned long bytes, req_nslabs = io_tlb_nslabs; + unsigned char *vstart = NULL; unsigned int order; int rc = 0; @@ -223,14 +238,14 @@ swiotlb_late_init_with_default_size(size_t default_size) bytes = io_tlb_nslabs << IO_TLB_SHIFT; while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, - order); - if (io_tlb_start) + vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, + order); + if (vstart) break; order--; } - if (!io_tlb_start) { + if (!vstart) { io_tlb_nslabs = req_nslabs; return -ENOMEM; } @@ -239,9 +254,9 @@ swiotlb_late_init_with_default_size(size_t default_size) "for software IO TLB\n", (PAGE_SIZE << order) >> 20); io_tlb_nslabs = SLABS_PER_PAGE << order; } - rc = swiotlb_late_init_with_tbl(io_tlb_start, io_tlb_nslabs); + rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); if (rc) - free_pages((unsigned long)io_tlb_start, order); + free_pages((unsigned long)vstart, order); return rc; } @@ -249,14 +264,25 @@ int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) { unsigned long i, bytes; + unsigned char *v_overflow_buffer; bytes = nslabs << IO_TLB_SHIFT; io_tlb_nslabs = nslabs; - io_tlb_start = tlb; + io_tlb_start = virt_to_phys(tlb); io_tlb_end = io_tlb_start + bytes; - memset(io_tlb_start, 0, bytes); + memset(tlb, 0, bytes); + + /* + * Get the overflow emergency buffer + */ + v_overflow_buffer = (void *)__get_free_pages(GFP_DMA, + get_order(io_tlb_overflow)); + if (!v_overflow_buffer) + goto cleanup2; + + io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); /* * Allocate and initialize the free list array. This array is used @@ -266,7 +292,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, get_order(io_tlb_nslabs * sizeof(int))); if (!io_tlb_list) - goto cleanup2; + goto cleanup3; for (i = 0; i < io_tlb_nslabs; i++) io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); @@ -277,18 +303,10 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) get_order(io_tlb_nslabs * sizeof(phys_addr_t))); if (!io_tlb_orig_addr) - goto cleanup3; + goto cleanup4; memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t)); - /* - * Get the overflow emergency buffer - */ - io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA, - get_order(io_tlb_overflow)); - if (!io_tlb_overflow_buffer) - goto cleanup4; - swiotlb_print_info(); late_alloc = 1; @@ -296,42 +314,42 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) return 0; cleanup4: - free_pages((unsigned long)io_tlb_orig_addr, - get_order(io_tlb_nslabs * sizeof(phys_addr_t))); - io_tlb_orig_addr = NULL; -cleanup3: free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * sizeof(int))); io_tlb_list = NULL; +cleanup3: + free_pages((unsigned long)v_overflow_buffer, + get_order(io_tlb_overflow)); + io_tlb_overflow_buffer = 0; cleanup2: - io_tlb_end = NULL; - io_tlb_start = NULL; + io_tlb_end = 0; + io_tlb_start = 0; io_tlb_nslabs = 0; return -ENOMEM; } void __init swiotlb_free(void) { - if (!io_tlb_overflow_buffer) + if (!io_tlb_orig_addr) return; if (late_alloc) { - free_pages((unsigned long)io_tlb_overflow_buffer, + free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer), get_order(io_tlb_overflow)); free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs * sizeof(phys_addr_t))); free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * sizeof(int))); - free_pages((unsigned long)io_tlb_start, + free_pages((unsigned long)phys_to_virt(io_tlb_start), get_order(io_tlb_nslabs << IO_TLB_SHIFT)); } else { - free_bootmem_late(__pa(io_tlb_overflow_buffer), + free_bootmem_late(io_tlb_overflow_buffer, PAGE_ALIGN(io_tlb_overflow)); free_bootmem_late(__pa(io_tlb_orig_addr), PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); free_bootmem_late(__pa(io_tlb_list), PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - free_bootmem_late(__pa(io_tlb_start), + free_bootmem_late(io_tlb_start, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); } io_tlb_nslabs = 0; @@ -339,21 +357,21 @@ void __init swiotlb_free(void) static int is_swiotlb_buffer(phys_addr_t paddr) { - return paddr >= virt_to_phys(io_tlb_start) && - paddr < virt_to_phys(io_tlb_end); + return paddr >= io_tlb_start && paddr < io_tlb_end; } /* * Bounce: copy the swiotlb buffer back to the original dma location */ -void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, - enum dma_data_direction dir) +static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir) { - unsigned long pfn = PFN_DOWN(phys); + unsigned long pfn = PFN_DOWN(orig_addr); + unsigned char *vaddr = phys_to_virt(tlb_addr); if (PageHighMem(pfn_to_page(pfn))) { /* The buffer does not have a mapping. Map it in and copy */ - unsigned int offset = phys & ~PAGE_MASK; + unsigned int offset = orig_addr & ~PAGE_MASK; char *buffer; unsigned int sz = 0; unsigned long flags; @@ -364,38 +382,40 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, local_irq_save(flags); buffer = kmap_atomic(pfn_to_page(pfn)); if (dir == DMA_TO_DEVICE) - memcpy(dma_addr, buffer + offset, sz); + memcpy(vaddr, buffer + offset, sz); else - memcpy(buffer + offset, dma_addr, sz); + memcpy(buffer + offset, vaddr, sz); kunmap_atomic(buffer); local_irq_restore(flags); size -= sz; pfn++; - dma_addr += sz; + vaddr += sz; offset = 0; } + } else if (dir == DMA_TO_DEVICE) { + memcpy(vaddr, phys_to_virt(orig_addr), size); } else { - if (dir == DMA_TO_DEVICE) - memcpy(dma_addr, phys_to_virt(phys), size); - else - memcpy(phys_to_virt(phys), dma_addr, size); + memcpy(phys_to_virt(orig_addr), vaddr, size); } } -EXPORT_SYMBOL_GPL(swiotlb_bounce); -void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, - phys_addr_t phys, size_t size, - enum dma_data_direction dir) +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, + dma_addr_t tbl_dma_addr, + phys_addr_t orig_addr, size_t size, + enum dma_data_direction dir) { unsigned long flags; - char *dma_addr; + phys_addr_t tlb_addr; unsigned int nslots, stride, index, wrap; int i; unsigned long mask; unsigned long offset_slots; unsigned long max_slots; + if (no_iotlb_memory) + panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); + mask = dma_get_seg_boundary(hwdev); tbl_dma_addr &= mask; @@ -453,7 +473,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, io_tlb_list[i] = 0; for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) io_tlb_list[i] = ++count; - dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); + tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); /* * Update the indices to avoid searching in the next @@ -471,7 +491,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); - return NULL; + return SWIOTLB_MAP_ERROR; found: spin_unlock_irqrestore(&io_tlb_lock, flags); @@ -481,11 +501,11 @@ found: * needed. */ for (i = 0; i < nslots; i++) - io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT); + io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); + swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); - return dma_addr; + return tlb_addr; } EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); @@ -493,11 +513,10 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); * Allocates bounce buffer and returns its kernel virtual address. */ -static void * -map_single(struct device *hwdev, phys_addr_t phys, size_t size, - enum dma_data_direction dir) +phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, + enum dma_data_direction dir) { - dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start); + dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); } @@ -505,20 +524,19 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size, /* * dma_addr is the kernel virtual address of the bounce buffer to unmap. */ -void -swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, - enum dma_data_direction dir) +void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir) { unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t phys = io_tlb_orig_addr[index]; + int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; + phys_addr_t orig_addr = io_tlb_orig_addr[index]; /* * First, sync the memory before unmapping the entry */ - if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) - swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); + if (orig_addr && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) + swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); /* * Return the buffer to the free list by setting the corresponding @@ -547,26 +565,27 @@ swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, } EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); -void -swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size, - enum dma_data_direction dir, - enum dma_sync_target target) +void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + enum dma_sync_target target) { - int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t phys = io_tlb_orig_addr[index]; + int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; + phys_addr_t orig_addr = io_tlb_orig_addr[index]; - phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1)); + orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); switch (target) { case SYNC_FOR_CPU: if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); + swiotlb_bounce(orig_addr, tlb_addr, + size, DMA_FROM_DEVICE); else BUG_ON(dir != DMA_TO_DEVICE); break; case SYNC_FOR_DEVICE: if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); + swiotlb_bounce(orig_addr, tlb_addr, + size, DMA_TO_DEVICE); else BUG_ON(dir != DMA_FROM_DEVICE); break; @@ -589,12 +608,15 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_mask = hwdev->coherent_dma_mask; ret = (void *)__get_free_pages(flags, order); - if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) { - /* - * The allocated memory isn't reachable by the device. - */ - free_pages((unsigned long) ret, order); - ret = NULL; + if (ret) { + dev_addr = swiotlb_virt_to_bus(hwdev, ret); + if (dev_addr + size - 1 > dma_mask) { + /* + * The allocated memory isn't reachable by the device. + */ + free_pages((unsigned long) ret, order); + ret = NULL; + } } if (!ret) { /* @@ -602,25 +624,29 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, * GFP_DMA memory; fall back on map_single(), which * will grab memory from the lowest available address range. */ - ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); - if (!ret) + phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE); + if (paddr == SWIOTLB_MAP_ERROR) return NULL; - } - memset(ret, 0, size); - dev_addr = swiotlb_virt_to_bus(hwdev, ret); + ret = phys_to_virt(paddr); + dev_addr = phys_to_dma(hwdev, paddr); - /* Confirm address can be DMA'd by device */ - if (dev_addr + size - 1 > dma_mask) { - printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", - (unsigned long long)dma_mask, - (unsigned long long)dev_addr); + /* Confirm address can be DMA'd by device */ + if (dev_addr + size - 1 > dma_mask) { + printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", + (unsigned long long)dma_mask, + (unsigned long long)dev_addr); - /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); - return NULL; + /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ + swiotlb_tbl_unmap_single(hwdev, paddr, + size, DMA_TO_DEVICE); + return NULL; + } } + *dma_handle = dev_addr; + memset(ret, 0, size); + return ret; } EXPORT_SYMBOL(swiotlb_alloc_coherent); @@ -636,7 +662,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, get_order(size)); else /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ - swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); + swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE); } EXPORT_SYMBOL(swiotlb_free_coherent); @@ -677,9 +703,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, struct dma_attrs *attrs) { - phys_addr_t phys = page_to_phys(page) + offset; + phys_addr_t map, phys = page_to_phys(page) + offset; dma_addr_t dev_addr = phys_to_dma(dev, phys); - void *map; BUG_ON(dir == DMA_NONE); /* @@ -690,23 +715,19 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, if (dma_capable(dev, dev_addr, size) && !swiotlb_force) return dev_addr; - /* - * Oh well, have to allocate and map a bounce buffer. - */ + /* Oh well, have to allocate and map a bounce buffer. */ map = map_single(dev, phys, size, dir); - if (!map) { + if (map == SWIOTLB_MAP_ERROR) { swiotlb_full(dev, size, dir, 1); - map = io_tlb_overflow_buffer; + return phys_to_dma(dev, io_tlb_overflow_buffer); } - dev_addr = swiotlb_virt_to_bus(dev, map); + dev_addr = phys_to_dma(dev, map); - /* - * Ensure that the address returned is DMA'ble - */ + /* Ensure that the address returned is DMA'ble */ if (!dma_capable(dev, dev_addr, size)) { swiotlb_tbl_unmap_single(dev, map, size, dir); - dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer); + return phys_to_dma(dev, io_tlb_overflow_buffer); } return dev_addr; @@ -729,7 +750,7 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir); + swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); return; } @@ -773,8 +794,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir, - target); + swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); return; } @@ -831,9 +851,9 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, if (swiotlb_force || !dma_capable(hwdev, dev_addr, sg->length)) { - void *map = map_single(hwdev, sg_phys(sg), - sg->length, dir); - if (!map) { + phys_addr_t map = map_single(hwdev, sg_phys(sg), + sg->length, dir); + if (map == SWIOTLB_MAP_ERROR) { /* Don't panic here, we expect map_sg users to do proper error handling. */ swiotlb_full(hwdev, sg->length, dir, 0); @@ -842,7 +862,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, sgl[0].dma_length = 0; return 0; } - sg->dma_address = swiotlb_virt_to_bus(hwdev, map); + sg->dma_address = phys_to_dma(hwdev, map); } else sg->dma_address = dev_addr; sg->dma_length = sg->length; @@ -925,7 +945,7 @@ EXPORT_SYMBOL(swiotlb_sync_sg_for_device); int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) { - return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer)); + return (dma_addr == phys_to_dma(hwdev, io_tlb_overflow_buffer)); } EXPORT_SYMBOL(swiotlb_dma_mapping_error); @@ -938,6 +958,6 @@ EXPORT_SYMBOL(swiotlb_dma_mapping_error); int swiotlb_dma_supported(struct device *hwdev, u64 mask) { - return swiotlb_virt_to_bus(hwdev, io_tlb_end - 1) <= mask; + return phys_to_dma(hwdev, io_tlb_end - 1) <= mask; } EXPORT_SYMBOL(swiotlb_dma_supported); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 39c99fea7c03..0d62fd700f68 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -23,12 +23,12 @@ #include <linux/ctype.h> #include <linux/kernel.h> #include <linux/kallsyms.h> +#include <linux/math64.h> #include <linux/uaccess.h> #include <linux/ioport.h> #include <net/addrconf.h> #include <asm/page.h> /* for PAGE_SIZE */ -#include <asm/div64.h> #include <asm/sections.h> /* for dereference_function_descriptor() */ #include "kstrtox.h" @@ -38,6 +38,8 @@ * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use + * + * This function is obsolete. Please use kstrtoull instead. */ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) { @@ -61,6 +63,8 @@ EXPORT_SYMBOL(simple_strtoull); * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use + * + * This function is obsolete. Please use kstrtoul instead. */ unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) { @@ -73,6 +77,8 @@ EXPORT_SYMBOL(simple_strtoul); * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use + * + * This function is obsolete. Please use kstrtol instead. */ long simple_strtol(const char *cp, char **endp, unsigned int base) { @@ -88,6 +94,8 @@ EXPORT_SYMBOL(simple_strtol); * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use + * + * This function is obsolete. Please use kstrtoll instead. */ long long simple_strtoll(const char *cp, char **endp, unsigned int base) { @@ -1022,6 +1030,7 @@ int kptr_restrict __read_mostly; * N no separator * The maximum supported length is 64 bytes of the input. Consider * to use print_hex_dump() for the larger input. + * - 'a' For a phys_addr_t type and its derivative types (passed by reference) * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a @@ -1112,6 +1121,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return netdev_feature_string(buf, end, ptr, spec); } break; + case 'a': + spec.flags |= SPECIAL | SMALL | ZEROPAD; + spec.field_width = sizeof(phys_addr_t) * 2 + 2; + spec.base = 16; + return number(buf, end, + (unsigned long long) *((phys_addr_t *)ptr), spec); } spec.flags |= SMALL; if (spec.field_width == -1) { @@ -1485,7 +1500,10 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) num = va_arg(args, long); break; case FORMAT_TYPE_SIZE_T: - num = va_arg(args, size_t); + if (spec.flags & SIGN) + num = va_arg(args, ssize_t); + else + num = va_arg(args, size_t); break; case FORMAT_TYPE_PTRDIFF: num = va_arg(args, ptrdiff_t); @@ -2013,7 +2031,11 @@ int vsscanf(const char *buf, const char *fmt, va_list args) char digit; int num = 0; u8 qualifier; - u8 base; + unsigned int base; + union { + long long s; + unsigned long long u; + } val; s16 field_width; bool is_sign; @@ -2053,8 +2075,11 @@ int vsscanf(const char *buf, const char *fmt, va_list args) /* get field width */ field_width = -1; - if (isdigit(*fmt)) + if (isdigit(*fmt)) { field_width = skip_atoi(&fmt); + if (field_width <= 0) + break; + } /* get conversion qualifier */ qualifier = -1; @@ -2154,58 +2179,61 @@ int vsscanf(const char *buf, const char *fmt, va_list args) || (base == 0 && !isdigit(digit))) break; + if (is_sign) + val.s = qualifier != 'L' ? + simple_strtol(str, &next, base) : + simple_strtoll(str, &next, base); + else + val.u = qualifier != 'L' ? + simple_strtoul(str, &next, base) : + simple_strtoull(str, &next, base); + + if (field_width > 0 && next - str > field_width) { + if (base == 0) + _parse_integer_fixup_radix(str, &base); + while (next - str > field_width) { + if (is_sign) + val.s = div_s64(val.s, base); + else + val.u = div_u64(val.u, base); + --next; + } + } + switch (qualifier) { case 'H': /* that's 'hh' in format */ - if (is_sign) { - signed char *s = (signed char *)va_arg(args, signed char *); - *s = (signed char)simple_strtol(str, &next, base); - } else { - unsigned char *s = (unsigned char *)va_arg(args, unsigned char *); - *s = (unsigned char)simple_strtoul(str, &next, base); - } + if (is_sign) + *va_arg(args, signed char *) = val.s; + else + *va_arg(args, unsigned char *) = val.u; break; case 'h': - if (is_sign) { - short *s = (short *)va_arg(args, short *); - *s = (short)simple_strtol(str, &next, base); - } else { - unsigned short *s = (unsigned short *)va_arg(args, unsigned short *); - *s = (unsigned short)simple_strtoul(str, &next, base); - } + if (is_sign) + *va_arg(args, short *) = val.s; + else + *va_arg(args, unsigned short *) = val.u; break; case 'l': - if (is_sign) { - long *l = (long *)va_arg(args, long *); - *l = simple_strtol(str, &next, base); - } else { - unsigned long *l = (unsigned long *)va_arg(args, unsigned long *); - *l = simple_strtoul(str, &next, base); - } + if (is_sign) + *va_arg(args, long *) = val.s; + else + *va_arg(args, unsigned long *) = val.u; break; case 'L': - if (is_sign) { - long long *l = (long long *)va_arg(args, long long *); - *l = simple_strtoll(str, &next, base); - } else { - unsigned long long *l = (unsigned long long *)va_arg(args, unsigned long long *); - *l = simple_strtoull(str, &next, base); - } + if (is_sign) + *va_arg(args, long long *) = val.s; + else + *va_arg(args, unsigned long long *) = val.u; break; case 'Z': case 'z': - { - size_t *s = (size_t *)va_arg(args, size_t *); - *s = (size_t)simple_strtoul(str, &next, base); - } - break; + *va_arg(args, size_t *) = val.u; + break; default: - if (is_sign) { - int *i = (int *)va_arg(args, int *); - *i = (int)simple_strtol(str, &next, base); - } else { - unsigned int *i = (unsigned int *)va_arg(args, unsigned int*); - *i = (unsigned int)simple_strtoul(str, &next, base); - } + if (is_sign) + *va_arg(args, int *) = val.s; + else + *va_arg(args, unsigned int *) = val.u; break; } num++; diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index 60a6088d0e5e..82a04d7ba99e 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -6,42 +6,40 @@ config XZ_DEC the .xz file format as the container. For integrity checking, CRC32 is supported. See Documentation/xz.txt for more information. +if XZ_DEC + config XZ_DEC_X86 - bool "x86 BCJ filter decoder" if EXPERT - default y - depends on XZ_DEC + bool "x86 BCJ filter decoder" + default y if X86 select XZ_DEC_BCJ config XZ_DEC_POWERPC - bool "PowerPC BCJ filter decoder" if EXPERT - default y - depends on XZ_DEC + bool "PowerPC BCJ filter decoder" + default y if POWERPC select XZ_DEC_BCJ config XZ_DEC_IA64 - bool "IA-64 BCJ filter decoder" if EXPERT - default y - depends on XZ_DEC + bool "IA-64 BCJ filter decoder" + default y if IA64 select XZ_DEC_BCJ config XZ_DEC_ARM - bool "ARM BCJ filter decoder" if EXPERT - default y - depends on XZ_DEC + bool "ARM BCJ filter decoder" + default y if ARM select XZ_DEC_BCJ config XZ_DEC_ARMTHUMB - bool "ARM-Thumb BCJ filter decoder" if EXPERT - default y - depends on XZ_DEC + bool "ARM-Thumb BCJ filter decoder" + default y if (ARM && ARM_THUMB) select XZ_DEC_BCJ config XZ_DEC_SPARC - bool "SPARC BCJ filter decoder" if EXPERT - default y - depends on XZ_DEC + bool "SPARC BCJ filter decoder" + default y if SPARC select XZ_DEC_BCJ +endif + config XZ_DEC_BCJ bool default n |