diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 3 | ||||
-rw-r--r-- | lib/Kconfig.debug | 53 | ||||
-rw-r--r-- | lib/Kconfig.kmemcheck | 3 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/debugobjects.c | 1 | ||||
-rw-r--r-- | lib/decompress_inflate.c | 8 | ||||
-rw-r--r-- | lib/decompress_unlzma.c | 10 | ||||
-rw-r--r-- | lib/dma-debug.c | 6 | ||||
-rw-r--r-- | lib/fault-inject.c | 1 | ||||
-rw-r--r-- | lib/flex_array.c | 121 | ||||
-rw-r--r-- | lib/inflate.c | 2 | ||||
-rw-r--r-- | lib/is_single_threaded.c | 61 | ||||
-rw-r--r-- | lib/kernel_lock.c | 20 | ||||
-rw-r--r-- | lib/lru_cache.c | 560 | ||||
-rw-r--r-- | lib/radix-tree.c | 5 | ||||
-rw-r--r-- | lib/ratelimit.c | 45 | ||||
-rw-r--r-- | lib/string.c | 20 | ||||
-rw-r--r-- | lib/swiotlb.c | 162 | ||||
-rw-r--r-- | lib/vsprintf.c | 236 | ||||
-rw-r--r-- | lib/zlib_deflate/deflate.c | 4 |
20 files changed, 1070 insertions, 253 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index bb1326d3839c..1cfe51628e1b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -200,4 +200,7 @@ config NLATTR config GENERIC_ATOMIC64 bool +config LRU_CACHE + tristate + endmenu diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 12327b2bb785..0b8f35918a36 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -50,6 +50,14 @@ config MAGIC_SYSRQ keys are documented in <file:Documentation/sysrq.txt>. Don't say Y unless you really know what this hack does. +config STRIP_ASM_SYMS + bool "Strip assembler-generated symbols during link" + default n + help + Strip internal assembler-generated symbols during a link (symbols + that look like '.Lxxx') so they don't pollute the output of + get_wchan() and suchlike. + config UNUSED_SYMBOLS bool "Enable unused/obsolete exported symbols" default y if X86 @@ -338,8 +346,9 @@ config SLUB_STATS config DEBUG_KMEMLEAK bool "Kernel memory leak detector" - depends on DEBUG_KERNEL && EXPERIMENTAL && (X86 || ARM) && \ - !MEMORY_HOTPLUG + depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \ + (X86 || ARM || PPC || S390) + select DEBUG_FS if SYSFS select STACKTRACE if STACKTRACE_SUPPORT select KALLSYMS @@ -362,7 +371,7 @@ config DEBUG_KMEMLEAK config DEBUG_KMEMLEAK_EARLY_LOG_SIZE int "Maximum kmemleak early log entries" depends on DEBUG_KMEMLEAK - range 200 2000 + range 200 40000 default 400 help Kmemleak must track all the memory allocations to avoid @@ -383,7 +392,7 @@ config DEBUG_KMEMLEAK_TEST config DEBUG_PREEMPT bool "Debug preemptible kernel" - depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64) + depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT default y help If you say Y here then the kernel will use a debug variant of the @@ -653,6 +662,21 @@ config DEBUG_NOTIFIERS This is a relatively cheap check but if you care about maximum performance, say N. +config DEBUG_CREDENTIALS + bool "Debug credential management" + depends on DEBUG_KERNEL + help + Enable this to turn on some debug checking for credential + management. The additional code keeps track of the number of + pointers from task_structs to any given cred struct, and checks to + see that this number never exceeds the usage count of the cred + struct. + + Furthermore, if SELinux is enabled, this also checks that the + security pointer in the cred struct is never seen to be invalid. + + If unsure, say N. + # # Select this config option from the architecture Kconfig, if it # it is preferred to always offer frame pointers as a config @@ -725,8 +749,8 @@ config RCU_TORTURE_TEST_RUNNABLE config RCU_CPU_STALL_DETECTOR bool "Check for stalled CPUs delaying RCU grace periods" - depends on CLASSIC_RCU || TREE_RCU - default n + depends on TREE_RCU || TREE_PREEMPT_RCU + default y help This option causes RCU to printk information on which CPUs are delaying the current grace period, but only when @@ -790,6 +814,21 @@ config DEBUG_BLOCK_EXT_DEVT Say N if you are unsure. +config DEBUG_FORCE_WEAK_PER_CPU + bool "Force weak per-cpu definitions" + depends on DEBUG_KERNEL + help + s390 and alpha require percpu variables in modules to be + defined weak to work around addressing range issue which + puts the following two restrictions on percpu variable + definitions. + + 1. percpu symbols must be unique whether static or not + 2. percpu variables can't be defined inside a function + + To ensure that generic code follows the above rules, this + option forces all percpu variables to be defined as weak. + config LKDTM tristate "Linux Kernel Dump Test Tool Module" depends on DEBUG_KERNEL @@ -873,7 +912,7 @@ config LATENCYTOP config SYSCTL_SYSCALL_CHECK bool "Sysctl checks" - depends on SYSCTL_SYSCALL + depends on SYSCTL ---help--- sys_sysctl uses binary paths that have been found challenging to properly maintain and use. This enables checks that help diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck index 603c81b66549..846e039a86b4 100644 --- a/lib/Kconfig.kmemcheck +++ b/lib/Kconfig.kmemcheck @@ -1,6 +1,8 @@ config HAVE_ARCH_KMEMCHECK bool +if HAVE_ARCH_KMEMCHECK + menuconfig KMEMCHECK bool "kmemcheck: trap use of uninitialized memory" depends on DEBUG_KERNEL @@ -89,3 +91,4 @@ config KMEMCHECK_BITOPS_OK accesses where not all the bits are initialized at the same time. This may also hide some real bugs. +endif diff --git a/lib/Makefile b/lib/Makefile index 2e78277eff9d..347ad8db29d3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -91,6 +91,8 @@ obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o obj-$(CONFIG_NLATTR) += nlattr.o +obj-$(CONFIG_LRU_CACHE) += lru_cache.o + obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o obj-$(CONFIG_GENERIC_CSUM) += checksum.o diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 2755a3bd16a1..eae56fddfa3b 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -9,6 +9,7 @@ */ #include <linux/debugobjects.h> #include <linux/interrupt.h> +#include <linux/sched.h> #include <linux/seq_file.h> #include <linux/debugfs.h> #include <linux/hash.h> diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index 68dfce59c1b8..fc686c7a0a0d 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -27,6 +27,11 @@ #define GZIP_IOBUF_SIZE (16*1024) +static int nofill(void *buffer, unsigned int len) +{ + return -1; +} + /* Included from initramfs et al code */ STATIC int INIT gunzip(unsigned char *buf, int len, int(*fill)(void*, unsigned int), @@ -76,6 +81,9 @@ STATIC int INIT gunzip(unsigned char *buf, int len, goto gunzip_nomem4; } + if (!fill) + fill = nofill; + if (len == 0) len = fill(zbuf, GZIP_IOBUF_SIZE); diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 0b954e04bd30..ca82fde81c8f 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -82,6 +82,11 @@ struct rc { #define RC_MODEL_TOTAL_BITS 11 +static int nofill(void *buffer, unsigned int len) +{ + return -1; +} + /* Called twice: once at startup and once in rc_normalize() */ static void INIT rc_read(struct rc *rc) { @@ -97,7 +102,10 @@ static inline void INIT rc_init(struct rc *rc, int (*fill)(void*, unsigned int), char *buffer, int buffer_size) { - rc->fill = fill; + if (fill) + rc->fill = fill; + else + rc->fill = nofill; rc->buffer = (uint8_t *)buffer; rc->buffer_size = buffer_size; rc->buffer_end = rc->buffer + rc->buffer_size; diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 58a9f9fc609a..ce6b7eabf674 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -819,9 +819,11 @@ static void check_unmap(struct dma_debug_entry *ref) err_printk(ref->dev, entry, "DMA-API: device driver frees " "DMA memory with different CPU address " "[device address=0x%016llx] [size=%llu bytes] " - "[cpu alloc address=%p] [cpu free address=%p]", + "[cpu alloc address=0x%016llx] " + "[cpu free address=0x%016llx]", ref->dev_addr, ref->size, - (void *)entry->paddr, (void *)ref->paddr); + (unsigned long long)entry->paddr, + (unsigned long long)ref->paddr); } if (ref->sg_call_ents && ref->type == dma_debug_sg && diff --git a/lib/fault-inject.c b/lib/fault-inject.c index f97af55bdd96..7e65af70635e 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -1,6 +1,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/random.h> +#include <linux/sched.h> #include <linux/stat.h> #include <linux/types.h> #include <linux/fs.h> diff --git a/lib/flex_array.c b/lib/flex_array.c index 7baed2fc3bc8..66eef2e4483e 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -28,23 +28,6 @@ struct flex_array_part { char elements[FLEX_ARRAY_PART_SIZE]; }; -static inline int __elements_per_part(int element_size) -{ - return FLEX_ARRAY_PART_SIZE / element_size; -} - -static inline int bytes_left_in_base(void) -{ - int element_offset = offsetof(struct flex_array, parts); - int bytes_left = FLEX_ARRAY_BASE_SIZE - element_offset; - return bytes_left; -} - -static inline int nr_base_part_ptrs(void) -{ - return bytes_left_in_base() / sizeof(struct flex_array_part *); -} - /* * If a user requests an allocation which is small * enough, we may simply use the space in the @@ -54,7 +37,7 @@ static inline int nr_base_part_ptrs(void) static inline int elements_fit_in_base(struct flex_array *fa) { int data_size = fa->element_size * fa->total_nr_elements; - if (data_size <= bytes_left_in_base()) + if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT) return 1; return 0; } @@ -63,6 +46,7 @@ static inline int elements_fit_in_base(struct flex_array *fa) * flex_array_alloc - allocate a new flexible array * @element_size: the size of individual elements in the array * @total: total number of elements that this should hold + * @flags: page allocation flags to use for base array * * Note: all locking must be provided by the caller. * @@ -103,7 +87,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total, gfp_t flags) { struct flex_array *ret; - int max_size = nr_base_part_ptrs() * __elements_per_part(element_size); + int max_size = FLEX_ARRAY_NR_BASE_PTRS * + FLEX_ARRAY_ELEMENTS_PER_PART(element_size); /* max_size will end up 0 if element_size > PAGE_SIZE */ if (total > max_size) @@ -113,17 +98,21 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total, return NULL; ret->element_size = element_size; ret->total_nr_elements = total; + if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) + memset(ret->parts[0], FLEX_ARRAY_FREE, + FLEX_ARRAY_BASE_BYTES_LEFT); return ret; } static int fa_element_to_part_nr(struct flex_array *fa, unsigned int element_nr) { - return element_nr / __elements_per_part(fa->element_size); + return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size); } /** * flex_array_free_parts - just free the second-level pages + * @fa: the flex array from which to free parts * * This is to be used in cases where the base 'struct flex_array' * has been statically allocated and should not be free. @@ -131,11 +120,10 @@ static int fa_element_to_part_nr(struct flex_array *fa, void flex_array_free_parts(struct flex_array *fa) { int part_nr; - int max_part = nr_base_part_ptrs(); if (elements_fit_in_base(fa)) return; - for (part_nr = 0; part_nr < max_part; part_nr++) + for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) kfree(fa->parts[part_nr]); } @@ -150,7 +138,8 @@ static unsigned int index_inside_part(struct flex_array *fa, { unsigned int part_offset; - part_offset = element_nr % __elements_per_part(fa->element_size); + part_offset = element_nr % + FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size); return part_offset * fa->element_size; } @@ -159,15 +148,12 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) { struct flex_array_part *part = fa->parts[part_nr]; if (!part) { - /* - * This leaves the part pages uninitialized - * and with potentially random data, just - * as if the user had kmalloc()'d the whole. - * __GFP_ZERO can be used to zero it. - */ - part = kmalloc(FLEX_ARRAY_PART_SIZE, flags); + part = kmalloc(sizeof(struct flex_array_part), flags); if (!part) return NULL; + if (!(flags & __GFP_ZERO)) + memset(part, FLEX_ARRAY_FREE, + sizeof(struct flex_array_part)); fa->parts[part_nr] = part; } return part; @@ -175,9 +161,12 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) /** * flex_array_put - copy data into the array at @element_nr - * @src: address of data to copy into the array + * @fa: the flex array to copy data into * @element_nr: index of the position in which to insert * the new element. + * @src: address of data to copy into the array + * @flags: page allocation flags to use for array expansion + * * * Note that this *copies* the contents of @src into * the array. If you are trying to store an array of @@ -207,9 +196,38 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, } /** + * flex_array_clear - clear element in array at @element_nr + * @fa: the flex array of the element. + * @element_nr: index of the position to clear. + * + * Locking must be provided by the caller. + */ +int flex_array_clear(struct flex_array *fa, unsigned int element_nr) +{ + int part_nr = fa_element_to_part_nr(fa, element_nr); + struct flex_array_part *part; + void *dst; + + if (element_nr >= fa->total_nr_elements) + return -ENOSPC; + if (elements_fit_in_base(fa)) + part = (struct flex_array_part *)&fa->parts[0]; + else { + part = fa->parts[part_nr]; + if (!part) + return -EINVAL; + } + dst = &part->elements[index_inside_part(fa, element_nr)]; + memset(dst, FLEX_ARRAY_FREE, fa->element_size); + return 0; +} + +/** * flex_array_prealloc - guarantee that array space exists + * @fa: the flex array for which to preallocate parts * @start: index of first array element for which space is allocated * @end: index of last (inclusive) element for which space is allocated + * @flags: page allocation flags * * This will guarantee that no future calls to flex_array_put() * will allocate memory. It can be used if you are expecting to @@ -242,6 +260,7 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start, /** * flex_array_get - pull data back out of the array + * @fa: the flex array from which to extract data * @element_nr: index of the element to fetch from the array * * Returns a pointer to the data at index @element_nr. Note @@ -266,3 +285,43 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr) } return &part->elements[index_inside_part(fa, element_nr)]; } + +static int part_is_free(struct flex_array_part *part) +{ + int i; + + for (i = 0; i < sizeof(struct flex_array_part); i++) + if (part->elements[i] != FLEX_ARRAY_FREE) + return 0; + return 1; +} + +/** + * flex_array_shrink - free unused second-level pages + * @fa: the flex array to shrink + * + * Frees all second-level pages that consist solely of unused + * elements. Returns the number of pages freed. + * + * Locking must be provided by the caller. + */ +int flex_array_shrink(struct flex_array *fa) +{ + struct flex_array_part *part; + int part_nr; + int ret = 0; + + if (elements_fit_in_base(fa)) + return ret; + for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) { + part = fa->parts[part_nr]; + if (!part) + continue; + if (part_is_free(part)) { + fa->parts[part_nr] = NULL; + kfree(part); + ret++; + } + } + return ret; +} diff --git a/lib/inflate.c b/lib/inflate.c index 1a8e8a978128..d10255973a9f 100644 --- a/lib/inflate.c +++ b/lib/inflate.c @@ -7,7 +7,7 @@ * Adapted for booting Linux by Hannu Savolainen 1993 * based on gzip-1.0.3 * - * Nicolas Pitre <nico@cam.org>, 1999/04/14 : + * Nicolas Pitre <nico@fluxnic.net>, 1999/04/14 : * Little mods for all variable to reside either into rodata or bss segments * by marking constant variables with 'const' and initializing all the others * at run-time only. This allows for the kernel uncompressor to run diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c index f1ed2fe76c65..bd2bea963364 100644 --- a/lib/is_single_threaded.c +++ b/lib/is_single_threaded.c @@ -12,34 +12,47 @@ #include <linux/sched.h> -/** - * is_single_threaded - Determine if a thread group is single-threaded or not - * @p: A task in the thread group in question - * - * This returns true if the thread group to which a task belongs is single - * threaded, false if it is not. +/* + * Returns true if the task does not share ->mm with another thread/process. */ -bool is_single_threaded(struct task_struct *p) +bool current_is_single_threaded(void) { - struct task_struct *g, *t; - struct mm_struct *mm = p->mm; + struct task_struct *task = current; + struct mm_struct *mm = task->mm; + struct task_struct *p, *t; + bool ret; - if (atomic_read(&p->signal->count) != 1) - goto no; + if (atomic_read(&task->signal->live) != 1) + return false; - if (atomic_read(&p->mm->mm_users) != 1) { - read_lock(&tasklist_lock); - do_each_thread(g, t) { - if (t->mm == mm && t != p) - goto no_unlock; - } while_each_thread(g, t); - read_unlock(&tasklist_lock); - } + if (atomic_read(&mm->mm_users) == 1) + return true; - return true; + ret = false; + rcu_read_lock(); + for_each_process(p) { + if (unlikely(p->flags & PF_KTHREAD)) + continue; + if (unlikely(p == task->group_leader)) + continue; + + t = p; + do { + if (unlikely(t->mm == mm)) + goto found; + if (likely(t->mm)) + break; + /* + * t->mm == NULL. Make sure next_thread/next_task + * will see other CLONE_VM tasks which might be + * forked before exiting. + */ + smp_rmb(); + } while_each_thread(p, t); + } + ret = true; +found: + rcu_read_unlock(); -no_unlock: - read_unlock(&tasklist_lock); -no: - return false; + return ret; } diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 39f1029e3525..4ebfa5a164d7 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -5,10 +5,13 @@ * relegated to obsolescence, but used by various less * important (or lazy) subsystems. */ -#include <linux/smp_lock.h> #include <linux/module.h> #include <linux/kallsyms.h> #include <linux/semaphore.h> +#include <linux/smp_lock.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/bkl.h> /* * The 'big kernel lock' @@ -113,21 +116,26 @@ static inline void __unlock_kernel(void) * This cannot happen asynchronously, so we only need to * worry about other CPU's. */ -void __lockfunc lock_kernel(void) +void __lockfunc _lock_kernel(const char *func, const char *file, int line) { - int depth = current->lock_depth+1; + int depth = current->lock_depth + 1; + + trace_lock_kernel(func, file, line); + if (likely(!depth)) __lock_kernel(); current->lock_depth = depth; } -void __lockfunc unlock_kernel(void) +void __lockfunc _unlock_kernel(const char *func, const char *file, int line) { BUG_ON(current->lock_depth < 0); if (likely(--current->lock_depth < 0)) __unlock_kernel(); + + trace_unlock_kernel(func, file, line); } -EXPORT_SYMBOL(lock_kernel); -EXPORT_SYMBOL(unlock_kernel); +EXPORT_SYMBOL(_lock_kernel); +EXPORT_SYMBOL(_unlock_kernel); diff --git a/lib/lru_cache.c b/lib/lru_cache.c new file mode 100644 index 000000000000..270de9d31b8c --- /dev/null +++ b/lib/lru_cache.c @@ -0,0 +1,560 @@ +/* + lru_cache.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. + Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include <linux/module.h> +#include <linux/bitops.h> +#include <linux/slab.h> +#include <linux/string.h> /* for memset */ +#include <linux/seq_file.h> /* for seq_printf */ +#include <linux/lru_cache.h> + +MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " + "Lars Ellenberg <lars@linbit.com>"); +MODULE_DESCRIPTION("lru_cache - Track sets of hot objects"); +MODULE_LICENSE("GPL"); + +/* this is developers aid only. + * it catches concurrent access (lack of locking on the users part) */ +#define PARANOIA_ENTRY() do { \ + BUG_ON(!lc); \ + BUG_ON(!lc->nr_elements); \ + BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \ +} while (0) + +#define RETURN(x...) do { \ + clear_bit(__LC_PARANOIA, &lc->flags); \ + smp_mb__after_clear_bit(); return x ; } while (0) + +/* BUG() if e is not one of the elements tracked by lc */ +#define PARANOIA_LC_ELEMENT(lc, e) do { \ + struct lru_cache *lc_ = (lc); \ + struct lc_element *e_ = (e); \ + unsigned i = e_->lc_index; \ + BUG_ON(i >= lc_->nr_elements); \ + BUG_ON(lc_->lc_element[i] != e_); } while (0) + +/** + * lc_create - prepares to track objects in an active set + * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @e_count: number of elements allowed to be active simultaneously + * @e_size: size of the tracked objects + * @e_off: offset to the &struct lc_element member in a tracked object + * + * Returns a pointer to a newly initialized struct lru_cache on success, + * or NULL on (allocation) failure. + */ +struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned e_count, size_t e_size, size_t e_off) +{ + struct hlist_head *slot = NULL; + struct lc_element **element = NULL; + struct lru_cache *lc; + struct lc_element *e; + unsigned cache_obj_size = kmem_cache_size(cache); + unsigned i; + + WARN_ON(cache_obj_size < e_size); + if (cache_obj_size < e_size) + return NULL; + + /* e_count too big; would probably fail the allocation below anyways. + * for typical use cases, e_count should be few thousand at most. */ + if (e_count > LC_MAX_ACTIVE) + return NULL; + + slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL); + if (!slot) + goto out_fail; + element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL); + if (!element) + goto out_fail; + + lc = kzalloc(sizeof(*lc), GFP_KERNEL); + if (!lc) + goto out_fail; + + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + + lc->name = name; + lc->element_size = e_size; + lc->element_off = e_off; + lc->nr_elements = e_count; + lc->new_number = LC_FREE; + lc->lc_cache = cache; + lc->lc_element = element; + lc->lc_slot = slot; + + /* preallocate all objects */ + for (i = 0; i < e_count; i++) { + void *p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) + break; + memset(p, 0, lc->element_size); + e = p + e_off; + e->lc_index = i; + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + element[i] = e; + } + if (i == e_count) + return lc; + + /* else: could not allocate all elements, give up */ + for (i--; i; i--) { + void *p = element[i]; + kmem_cache_free(cache, p - e_off); + } + kfree(lc); +out_fail: + kfree(element); + kfree(slot); + return NULL; +} + +void lc_free_by_index(struct lru_cache *lc, unsigned i) +{ + void *p = lc->lc_element[i]; + WARN_ON(!p); + if (p) { + p -= lc->element_off; + kmem_cache_free(lc->lc_cache, p); + } +} + +/** + * lc_destroy - frees memory allocated by lc_create() + * @lc: the lru cache to destroy + */ +void lc_destroy(struct lru_cache *lc) +{ + unsigned i; + if (!lc) + return; + for (i = 0; i < lc->nr_elements; i++) + lc_free_by_index(lc, i); + kfree(lc->lc_element); + kfree(lc->lc_slot); + kfree(lc); +} + +/** + * lc_reset - does a full reset for @lc and the hash table slots. + * @lc: the lru cache to operate on + * + * It is roughly the equivalent of re-allocating a fresh lru_cache object, + * basically a short cut to lc_destroy(lc); lc = lc_create(...); + */ +void lc_reset(struct lru_cache *lc) +{ + unsigned i; + + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + lc->used = 0; + lc->hits = 0; + lc->misses = 0; + lc->starving = 0; + lc->dirty = 0; + lc->changed = 0; + lc->flags = 0; + lc->changing_element = NULL; + lc->new_number = LC_FREE; + memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); + + for (i = 0; i < lc->nr_elements; i++) { + struct lc_element *e = lc->lc_element[i]; + void *p = e; + p -= lc->element_off; + memset(p, 0, lc->element_size); + /* re-init it */ + e->lc_index = i; + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + } +} + +/** + * lc_seq_printf_stats - print stats about @lc into @seq + * @seq: the seq_file to print into + * @lc: the lru cache to print statistics of + */ +size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) +{ + /* NOTE: + * total calls to lc_get are + * (starving + hits + misses) + * misses include "dirty" count (update from an other thread in + * progress) and "changed", when this in fact lead to an successful + * update of the cache. + */ + return seq_printf(seq, "\t%s: used:%u/%u " + "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + lc->name, lc->used, lc->nr_elements, + lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); +} + +static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) +{ + return lc->lc_slot + (enr % lc->nr_elements); +} + + +/** + * lc_find - find element by label, if present in the hash table + * @lc: The lru_cache object + * @enr: element number + * + * Returns the pointer to an element, if the element with the requested + * "label" or element number is present in the hash table, + * or NULL if not found. Does not change the refcnt. + */ +struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) +{ + struct hlist_node *n; + struct lc_element *e; + + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { + if (e->lc_number == enr) + return e; + } + return NULL; +} + +/* returned element will be "recycled" immediately */ +static struct lc_element *lc_evict(struct lru_cache *lc) +{ + struct list_head *n; + struct lc_element *e; + + if (list_empty(&lc->lru)) + return NULL; + + n = lc->lru.prev; + e = list_entry(n, struct lc_element, list); + + PARANOIA_LC_ELEMENT(lc, e); + + list_del(&e->list); + hlist_del(&e->colision); + return e; +} + +/** + * lc_del - removes an element from the cache + * @lc: The lru_cache object + * @e: The element to remove + * + * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list, + * sets @e->enr to %LC_FREE. + */ +void lc_del(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + PARANOIA_LC_ELEMENT(lc, e); + BUG_ON(e->refcnt); + + e->lc_number = LC_FREE; + hlist_del_init(&e->colision); + list_move(&e->list, &lc->free); + RETURN(); +} + +static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +{ + struct list_head *n; + + if (list_empty(&lc->free)) + return lc_evict(lc); + + n = lc->free.next; + list_del(n); + return list_entry(n, struct lc_element, list); +} + +static int lc_unused_element_available(struct lru_cache *lc) +{ + if (!list_empty(&lc->free)) + return 1; /* something on the free list */ + if (!list_empty(&lc->lru)) + return 1; /* something to evict */ + + return 0; +} + + +/** + * lc_get - get element by label, maybe change the active set + * @lc: the lru cache to operate on + * @enr: the label to look up + * + * Finds an element in the cache, increases its usage count, + * "touches" and returns it. + * + * In case the requested number is not present, it needs to be added to the + * cache. Therefore it is possible that an other element becomes evicted from + * the cache. In either case, the user is notified so he is able to e.g. keep + * a persistent log of the cache changes, and therefore the objects in use. + * + * Return values: + * NULL + * The cache was marked %LC_STARVING, + * or the requested label was not in the active set + * and a changing transaction is still pending (@lc was marked %LC_DIRTY). + * Or no unused or free element could be recycled (@lc will be marked as + * %LC_STARVING, blocking further lc_get() operations). + * + * pointer to the element with the REQUESTED element number. + * In this case, it can be used right away + * + * pointer to an UNUSED element with some different element number, + * where that different number may also be %LC_FREE. + * + * In this case, the cache is marked %LC_DIRTY (blocking further changes), + * and the returned element pointer is removed from the lru list and + * hash collision chains. The user now should do whatever housekeeping + * is necessary. + * Then he must call lc_changed(lc,element_pointer), to finish + * the change. + * + * NOTE: The user needs to check the lc_number on EACH use, so he recognizes + * any cache set change. + */ +struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + RETURN(e); + } + + ++lc->misses; + + /* In case there is nothing available and we can not kick out + * the LRU element, we have to wait ... + */ + if (!lc_unused_element_available(lc)) { + __set_bit(__LC_STARVING, &lc->flags); + RETURN(NULL); + } + + /* it was not present in the active set. + * we are going to recycle an unused (or even "free") element. + * user may need to commit a transaction to record that change. + * we serialize on flags & TF_DIRTY */ + if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { + ++lc->dirty; + RETURN(NULL); + } + + e = lc_get_unused_element(lc); + BUG_ON(!e); + + clear_bit(__LC_STARVING, &lc->flags); + BUG_ON(++e->refcnt != 1); + lc->used++; + + lc->changing_element = e; + lc->new_number = enr; + + RETURN(e); +} + +/* similar to lc_get, + * but only gets a new reference on an existing element. + * you either get the requested element, or NULL. + * will be consolidated into one function. + */ +struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + } + RETURN(e); +} + +/** + * lc_changed - tell @lc that the change has been recorded + * @lc: the lru cache to operate on + * @e: the element pending label change + */ +void lc_changed(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + BUG_ON(e != lc->changing_element); + PARANOIA_LC_ELEMENT(lc, e); + ++lc->changed; + e->lc_number = lc->new_number; + list_add(&e->list, &lc->in_use); + hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); + lc->changing_element = NULL; + lc->new_number = LC_FREE; + clear_bit(__LC_DIRTY, &lc->flags); + smp_mb__after_clear_bit(); + RETURN(); +} + + +/** + * lc_put - give up refcnt of @e + * @lc: the lru cache to operate on + * @e: the element to put + * + * If refcnt reaches zero, the element is moved to the lru list, + * and a %LC_STARVING (if set) is cleared. + * Returns the new (post-decrement) refcnt. + */ +unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + PARANOIA_LC_ELEMENT(lc, e); + BUG_ON(e->refcnt == 0); + BUG_ON(e == lc->changing_element); + if (--e->refcnt == 0) { + /* move it to the front of LRU. */ + list_move(&e->list, &lc->lru); + lc->used--; + clear_bit(__LC_STARVING, &lc->flags); + smp_mb__after_clear_bit(); + } + RETURN(e->refcnt); +} + +/** + * lc_element_by_index + * @lc: the lru cache to operate on + * @i: the index of the element to return + */ +struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) +{ + BUG_ON(i >= lc->nr_elements); + BUG_ON(lc->lc_element[i] == NULL); + BUG_ON(lc->lc_element[i]->lc_index != i); + return lc->lc_element[i]; +} + +/** + * lc_index_of + * @lc: the lru cache to operate on + * @e: the element to query for its index position in lc->element + */ +unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_LC_ELEMENT(lc, e); + return e->lc_index; +} + +/** + * lc_set - associate index with label + * @lc: the lru cache to operate on + * @enr: the label to set + * @index: the element index to associate label with. + * + * Used to initialize the active set to some previously recorded state. + */ +void lc_set(struct lru_cache *lc, unsigned int enr, int index) +{ + struct lc_element *e; + + if (index < 0 || index >= lc->nr_elements) + return; + + e = lc_element_by_index(lc, index); + e->lc_number = enr; + + hlist_del_init(&e->colision); + hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); + list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); +} + +/** + * lc_dump - Dump a complete LRU cache to seq in textual form. + * @lc: the lru cache to operate on + * @seq: the &struct seq_file pointer to seq_printf into + * @utext: user supplied "heading" or other info + * @detail: function pointer the user may provide to dump further details + * of the object the lc_element is embedded in. + */ +void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, + void (*detail) (struct seq_file *, struct lc_element *)) +{ + unsigned int nr_elements = lc->nr_elements; + struct lc_element *e; + int i; + + seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); + for (i = 0; i < nr_elements; i++) { + e = lc_element_by_index(lc, i); + if (e->lc_number == LC_FREE) { + seq_printf(seq, "\t%2d: FREE\n", i); + } else { + seq_printf(seq, "\t%2d: %4u %4u ", i, + e->lc_number, e->refcnt); + detail(seq, e); + } + } +} + +EXPORT_SYMBOL(lc_create); +EXPORT_SYMBOL(lc_reset); +EXPORT_SYMBOL(lc_destroy); +EXPORT_SYMBOL(lc_set); +EXPORT_SYMBOL(lc_del); +EXPORT_SYMBOL(lc_try_get); +EXPORT_SYMBOL(lc_find); +EXPORT_SYMBOL(lc_get); +EXPORT_SYMBOL(lc_put); +EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_element_by_index); +EXPORT_SYMBOL(lc_index_of); +EXPORT_SYMBOL(lc_seq_printf_stats); +EXPORT_SYMBOL(lc_seq_dump_details); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 23abbd93cae1..92cdd9936e3d 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -200,6 +200,9 @@ radix_tree_node_free(struct radix_tree_node *node) * ensure that the addition of a single element in the tree cannot fail. On * success, return zero, with preemption disabled. On error, return -ENOMEM * with preemption not disabled. + * + * To make use of this facility, the radix tree must be initialised without + * __GFP_WAIT being passed to INIT_RADIX_TREE(). */ int radix_tree_preload(gfp_t gfp_mask) { @@ -543,7 +546,6 @@ out: } EXPORT_SYMBOL(radix_tree_tag_clear); -#ifndef __KERNEL__ /* Only the test harness uses this at present */ /** * radix_tree_tag_get - get a tag on a radix tree node * @root: radix tree root @@ -606,7 +608,6 @@ int radix_tree_tag_get(struct radix_tree_root *root, } } EXPORT_SYMBOL(radix_tree_tag_get); -#endif /** * radix_tree_next_hole - find the next hole (not-present entry) diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 26187edcc7ea..09f5ce1810dc 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -7,15 +7,12 @@ * parameter. Now every user can use their own standalone ratelimit_state. * * This file is released under the GPLv2. - * */ -#include <linux/kernel.h> +#include <linux/ratelimit.h> #include <linux/jiffies.h> #include <linux/module.h> -static DEFINE_SPINLOCK(ratelimit_lock); - /* * __ratelimit - rate limiting * @rs: ratelimit_state data @@ -23,35 +20,43 @@ static DEFINE_SPINLOCK(ratelimit_lock); * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks * in every @rs->ratelimit_jiffies */ -int __ratelimit(struct ratelimit_state *rs) +int ___ratelimit(struct ratelimit_state *rs, const char *func) { unsigned long flags; + int ret; if (!rs->interval) return 1; - spin_lock_irqsave(&ratelimit_lock, flags); + /* + * If we contend on this state's lock then almost + * by definition we are too busy to print a message, + * in addition to the one that will be printed by + * the entity that is holding the lock already: + */ + if (!spin_trylock_irqsave(&rs->lock, flags)) + return 1; + if (!rs->begin) rs->begin = jiffies; if (time_is_before_jiffies(rs->begin + rs->interval)) { if (rs->missed) printk(KERN_WARNING "%s: %d callbacks suppressed\n", - __func__, rs->missed); - rs->begin = 0; + func, rs->missed); + rs->begin = 0; rs->printed = 0; - rs->missed = 0; + rs->missed = 0; } - if (rs->burst && rs->burst > rs->printed) - goto print; - - rs->missed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 0; + if (rs->burst && rs->burst > rs->printed) { + rs->printed++; + ret = 1; + } else { + rs->missed++; + ret = 0; + } + spin_unlock_irqrestore(&rs->lock, flags); -print: - rs->printed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 1; + return ret; } -EXPORT_SYMBOL(__ratelimit); +EXPORT_SYMBOL(___ratelimit); diff --git a/lib/string.c b/lib/string.c index b19b87af65a3..e96421ab9a9a 100644 --- a/lib/string.c +++ b/lib/string.c @@ -246,13 +246,17 @@ EXPORT_SYMBOL(strlcat); #undef strcmp int strcmp(const char *cs, const char *ct) { - signed char __res; + unsigned char c1, c2; while (1) { - if ((__res = *cs - *ct++) != 0 || !*cs++) + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) break; } - return __res; + return 0; } EXPORT_SYMBOL(strcmp); #endif @@ -266,14 +270,18 @@ EXPORT_SYMBOL(strcmp); */ int strncmp(const char *cs, const char *ct, size_t count) { - signed char __res = 0; + unsigned char c1, c2; while (count) { - if ((__res = *cs - *ct++) != 0 || !*cs++) + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) break; count--; } - return __res; + return 0; } EXPORT_SYMBOL(strncmp); #endif diff --git a/lib/swiotlb.c b/lib/swiotlb.c index bffe6d7ef9d9..795472d8ae24 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -97,6 +97,8 @@ static phys_addr_t *io_tlb_orig_addr; */ static DEFINE_SPINLOCK(io_tlb_lock); +static int late_alloc; + static int __init setup_io_tlb_npages(char *str) { @@ -109,55 +111,22 @@ setup_io_tlb_npages(char *str) ++str; if (!strcmp(str, "force")) swiotlb_force = 1; + return 1; } __setup("swiotlb=", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ -void * __weak __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) -{ - return alloc_bootmem_low_pages(size); -} - -void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs) -{ - return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); -} - -dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) -{ - return paddr; -} - -phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) -{ - return baddr; -} - +/* Note that this doesn't work with highmem page */ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, volatile void *address) { - return swiotlb_phys_to_bus(hwdev, virt_to_phys(address)); -} - -void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address) -{ - return phys_to_virt(swiotlb_bus_to_phys(hwdev, address)); -} - -int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev, - dma_addr_t addr, size_t size) -{ - return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); -} - -int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) -{ - return 0; + return phys_to_dma(hwdev, virt_to_phys(address)); } -static void swiotlb_print_info(unsigned long bytes) +void swiotlb_print_info(void) { + unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; phys_addr_t pstart, pend; pstart = virt_to_phys(io_tlb_start); @@ -175,7 +144,7 @@ static void swiotlb_print_info(unsigned long bytes) * structures for the software IO TLB used to implement the DMA API. */ void __init -swiotlb_init_with_default_size(size_t default_size) +swiotlb_init_with_default_size(size_t default_size, int verbose) { unsigned long i, bytes; @@ -189,7 +158,7 @@ swiotlb_init_with_default_size(size_t default_size) /* * Get IO TLB memory from the low pages */ - io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs); + io_tlb_start = alloc_bootmem_low_pages(bytes); if (!io_tlb_start) panic("Cannot allocate SWIOTLB buffer"); io_tlb_end = io_tlb_start + bytes; @@ -211,14 +180,14 @@ swiotlb_init_with_default_size(size_t default_size) io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); if (!io_tlb_overflow_buffer) panic("Cannot allocate SWIOTLB overflow buffer!\n"); - - swiotlb_print_info(bytes); + if (verbose) + swiotlb_print_info(); } void __init -swiotlb_init(void) +swiotlb_init(int verbose) { - swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ + swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ } /* @@ -245,7 +214,8 @@ swiotlb_late_init_with_default_size(size_t default_size) bytes = io_tlb_nslabs << IO_TLB_SHIFT; while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - io_tlb_start = swiotlb_alloc(order, io_tlb_nslabs); + io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, + order); if (io_tlb_start) break; order--; @@ -294,7 +264,9 @@ swiotlb_late_init_with_default_size(size_t default_size) if (!io_tlb_overflow_buffer) goto cleanup4; - swiotlb_print_info(bytes); + swiotlb_print_info(); + + late_alloc = 1; return 0; @@ -315,20 +287,36 @@ cleanup1: return -ENOMEM; } -static inline int -address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size) +void __init swiotlb_free(void) { - return swiotlb_arch_address_needs_mapping(hwdev, addr, size); -} + if (!io_tlb_overflow_buffer) + return; -static inline int range_needs_mapping(phys_addr_t paddr, size_t size) -{ - return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr, size); + if (late_alloc) { + free_pages((unsigned long)io_tlb_overflow_buffer, + get_order(io_tlb_overflow)); + free_pages((unsigned long)io_tlb_orig_addr, + get_order(io_tlb_nslabs * sizeof(phys_addr_t))); + free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * + sizeof(int))); + free_pages((unsigned long)io_tlb_start, + get_order(io_tlb_nslabs << IO_TLB_SHIFT)); + } else { + free_bootmem_late(__pa(io_tlb_overflow_buffer), + io_tlb_overflow); + free_bootmem_late(__pa(io_tlb_orig_addr), + io_tlb_nslabs * sizeof(phys_addr_t)); + free_bootmem_late(__pa(io_tlb_list), + io_tlb_nslabs * sizeof(int)); + free_bootmem_late(__pa(io_tlb_start), + io_tlb_nslabs << IO_TLB_SHIFT); + } } -static int is_swiotlb_buffer(char *addr) +static int is_swiotlb_buffer(phys_addr_t paddr) { - return addr >= io_tlb_start && addr < io_tlb_end; + return paddr >= virt_to_phys(io_tlb_start) && + paddr < virt_to_phys(io_tlb_end); } /* @@ -561,9 +549,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_mask = hwdev->coherent_dma_mask; ret = (void *)__get_free_pages(flags, order); - if (ret && - !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(hwdev, ret), - size)) { + if (ret && swiotlb_virt_to_bus(hwdev, ret) + size > dma_mask) { /* * The allocated memory isn't reachable by the device. */ @@ -585,7 +571,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, dev_addr = swiotlb_virt_to_bus(hwdev, ret); /* Confirm address can be DMA'd by device */ - if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) { + if (dev_addr + size > dma_mask) { printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", (unsigned long long)dma_mask, (unsigned long long)dev_addr); @@ -601,11 +587,13 @@ EXPORT_SYMBOL(swiotlb_alloc_coherent); void swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dma_handle) + dma_addr_t dev_addr) { + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); + WARN_ON(irqs_disabled()); - if (!is_swiotlb_buffer(vaddr)) - free_pages((unsigned long) vaddr, get_order(size)); + if (!is_swiotlb_buffer(paddr)) + free_pages((unsigned long)vaddr, get_order(size)); else /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); @@ -625,12 +613,15 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at " "device %s\n", size, dev ? dev_name(dev) : "?"); - if (size > io_tlb_overflow && do_panic) { - if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) - panic("DMA: Memory would be corrupted\n"); - if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - panic("DMA: Random memory would be DMAed\n"); - } + if (size <= io_tlb_overflow || !do_panic) + return; + + if (dir == DMA_BIDIRECTIONAL) + panic("DMA: Random memory could be DMA accessed\n"); + if (dir == DMA_FROM_DEVICE) + panic("DMA: Random memory could be DMA written\n"); + if (dir == DMA_TO_DEVICE) + panic("DMA: Random memory could be DMA read\n"); } /* @@ -646,7 +637,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys); + dma_addr_t dev_addr = phys_to_dma(dev, phys); void *map; BUG_ON(dir == DMA_NONE); @@ -655,8 +646,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, * we can safely return the device addr and not worry about bounce * buffering it. */ - if (!address_needs_mapping(dev, dev_addr, size) && - !range_needs_mapping(phys, size)) + if (dma_capable(dev, dev_addr, size) && !swiotlb_force) return dev_addr; /* @@ -673,7 +663,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, /* * Ensure that the address returned is DMA'ble */ - if (address_needs_mapping(dev, dev_addr, size)) + if (!dma_capable(dev, dev_addr, size)) panic("map_single: bounce buffer is not DMA'ble"); return dev_addr; @@ -691,19 +681,25 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir) { - char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr); + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); - if (is_swiotlb_buffer(dma_addr)) { - do_unmap_single(hwdev, dma_addr, size, dir); + if (is_swiotlb_buffer(paddr)) { + do_unmap_single(hwdev, phys_to_virt(paddr), size, dir); return; } if (dir != DMA_FROM_DEVICE) return; - dma_mark_clean(dma_addr, size); + /* + * phys_to_virt doesn't work with hihgmem page but we could + * call dma_mark_clean() with hihgmem page here. However, we + * are fine since dma_mark_clean() is null on POWERPC. We can + * make dma_mark_clean() take a physical address if necessary. + */ + dma_mark_clean(phys_to_virt(paddr), size); } void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, @@ -728,19 +724,19 @@ static void swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir, int target) { - char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr); + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); - if (is_swiotlb_buffer(dma_addr)) { - sync_single(hwdev, dma_addr, size, dir, target); + if (is_swiotlb_buffer(paddr)) { + sync_single(hwdev, phys_to_virt(paddr), size, dir, target); return; } if (dir != DMA_FROM_DEVICE) return; - dma_mark_clean(dma_addr, size); + dma_mark_clean(phys_to_virt(paddr), size); } void @@ -817,10 +813,10 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, for_each_sg(sgl, sg, nelems, i) { phys_addr_t paddr = sg_phys(sg); - dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr); + dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); - if (range_needs_mapping(paddr, sg->length) || - address_needs_mapping(hwdev, dev_addr, sg->length)) { + if (swiotlb_force || + !dma_capable(hwdev, dev_addr, sg->length)) { void *map = map_single(hwdev, sg_phys(sg), sg->length, dir); if (!map) { diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 756ccafa9cec..33bed5e67a21 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -25,6 +25,7 @@ #include <linux/kallsyms.h> #include <linux/uaccess.h> #include <linux/ioport.h> +#include <net/addrconf.h> #include <asm/page.h> /* for PAGE_SIZE */ #include <asm/div64.h> @@ -580,7 +581,7 @@ static char *symbol_string(char *buf, char *end, void *ptr, unsigned long value = (unsigned long) ptr; #ifdef CONFIG_KALLSYMS char sym[KSYM_SYMBOL_LEN]; - if (ext != 'f') + if (ext != 'f' && ext != 's') sprint_symbol(sym, value); else kallsyms_lookup(value, NULL, NULL, NULL, sym); @@ -630,60 +631,161 @@ static char *resource_string(char *buf, char *end, struct resource *res, } static char *mac_address_string(char *buf, char *end, u8 *addr, - struct printf_spec spec) + struct printf_spec spec, const char *fmt) { - char mac_addr[6 * 3]; /* (6 * 2 hex digits), 5 colons and trailing zero */ + char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")]; char *p = mac_addr; int i; for (i = 0; i < 6; i++) { p = pack_hex_byte(p, addr[i]); - if (!(spec.flags & SPECIAL) && i != 5) + if (fmt[0] == 'M' && i != 5) *p++ = ':'; } *p = '\0'; - spec.flags &= ~SPECIAL; return string(buf, end, mac_addr, spec); } -static char *ip6_addr_string(char *buf, char *end, u8 *addr, - struct printf_spec spec) +static char *ip4_string(char *p, const u8 *addr, bool leading_zeros) { - char ip6_addr[8 * 5]; /* (8 * 4 hex digits), 7 colons and trailing zero */ - char *p = ip6_addr; int i; + for (i = 0; i < 4; i++) { + char temp[3]; /* hold each IP quad in reverse order */ + int digits = put_dec_trunc(temp, addr[i]) - temp; + if (leading_zeros) { + if (digits < 3) + *p++ = '0'; + if (digits < 2) + *p++ = '0'; + } + /* reverse the digits in the quad */ + while (digits--) + *p++ = temp[digits]; + if (i < 3) + *p++ = '.'; + } + + *p = '\0'; + return p; +} + +static char *ip6_compressed_string(char *p, const char *addr) +{ + int i; + int j; + int range; + unsigned char zerolength[8]; + int longest = 1; + int colonpos = -1; + u16 word; + u8 hi; + u8 lo; + bool needcolon = false; + bool useIPv4; + struct in6_addr in6; + + memcpy(&in6, addr, sizeof(struct in6_addr)); + + useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6); + + memset(zerolength, 0, sizeof(zerolength)); + + if (useIPv4) + range = 6; + else + range = 8; + + /* find position of longest 0 run */ + for (i = 0; i < range; i++) { + for (j = i; j < range; j++) { + if (in6.s6_addr16[j] != 0) + break; + zerolength[i]++; + } + } + for (i = 0; i < range; i++) { + if (zerolength[i] > longest) { + longest = zerolength[i]; + colonpos = i; + } + } + + /* emit address */ + for (i = 0; i < range; i++) { + if (i == colonpos) { + if (needcolon || i == 0) + *p++ = ':'; + *p++ = ':'; + needcolon = false; + i += longest - 1; + continue; + } + if (needcolon) { + *p++ = ':'; + needcolon = false; + } + /* hex u16 without leading 0s */ + word = ntohs(in6.s6_addr16[i]); + hi = word >> 8; + lo = word & 0xff; + if (hi) { + if (hi > 0x0f) + p = pack_hex_byte(p, hi); + else + *p++ = hex_asc_lo(hi); + } + if (hi || lo > 0x0f) + p = pack_hex_byte(p, lo); + else + *p++ = hex_asc_lo(lo); + needcolon = true; + } + + if (useIPv4) { + if (needcolon) + *p++ = ':'; + p = ip4_string(p, &in6.s6_addr[12], false); + } + + *p = '\0'; + return p; +} + +static char *ip6_string(char *p, const char *addr, const char *fmt) +{ + int i; for (i = 0; i < 8; i++) { - p = pack_hex_byte(p, addr[2 * i]); - p = pack_hex_byte(p, addr[2 * i + 1]); - if (!(spec.flags & SPECIAL) && i != 7) + p = pack_hex_byte(p, *addr++); + p = pack_hex_byte(p, *addr++); + if (fmt[0] == 'I' && i != 7) *p++ = ':'; } + *p = '\0'; - spec.flags &= ~SPECIAL; + return p; +} + +static char *ip6_addr_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) +{ + char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")]; + + if (fmt[0] == 'I' && fmt[2] == 'c') + ip6_compressed_string(ip6_addr, addr); + else + ip6_string(ip6_addr, addr, fmt); return string(buf, end, ip6_addr, spec); } -static char *ip4_addr_string(char *buf, char *end, u8 *addr, - struct printf_spec spec) +static char *ip4_addr_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) { - char ip4_addr[4 * 4]; /* (4 * 3 decimal digits), 3 dots and trailing zero */ - char temp[3]; /* hold each IP quad in reverse order */ - char *p = ip4_addr; - int i, digits; + char ip4_addr[sizeof("255.255.255.255")]; - for (i = 0; i < 4; i++) { - digits = put_dec_trunc(temp, addr[i]) - temp; - /* reverse the digits in the quad */ - while (digits--) - *p++ = temp[digits]; - if (i != 3) - *p++ = '.'; - } - *p = '\0'; - spec.flags &= ~SPECIAL; + ip4_string(ip4_addr, addr, fmt[0] == 'i'); return string(buf, end, ip4_addr, spec); } @@ -697,16 +799,21 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, * * - 'F' For symbolic function descriptor pointers with offset * - 'f' For simple symbolic function names without offset - * - 'S' For symbolic direct pointers + * - 'S' For symbolic direct pointers with offset + * - 's' For symbolic direct pointers without offset * - 'R' For a struct resource pointer, it prints the range of * addresses (not the name nor the flags) * - 'M' For a 6-byte MAC address, it prints the address in the * usual colon-separated hex notation - * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way (dot-separated - * decimal for v4 and colon separated network-order 16 bit hex for v6) - * - 'i' [46] for 'raw' IPv4/IPv6 addresses, IPv6 omits the colons, IPv4 is - * currently the same - * + * - 'm' For a 6-byte MAC address, it prints the hex address without colons + * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way + * IPv4 uses dot-separated decimal without leading 0's (1.2.3.4) + * IPv6 uses colon separated network-order 16 bit hex with leading 0's + * - 'i' [46] for 'raw' IPv4/IPv6 addresses + * IPv6 omits the colons (01020304...0f) + * IPv4 uses dot-separated decimal with leading 0's (010.123.045.006) + * - 'I6c' for IPv6 addresses printed as specified by + * http://www.ietf.org/id/draft-kawamura-ipv6-text-representation-03.txt * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a * pointer to the real address. @@ -721,25 +828,30 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'F': case 'f': ptr = dereference_function_descriptor(ptr); + case 's': /* Fallthrough */ case 'S': return symbol_string(buf, end, ptr, spec, *fmt); case 'R': return resource_string(buf, end, ptr, spec); - case 'm': - spec.flags |= SPECIAL; - /* Fallthrough */ - case 'M': - return mac_address_string(buf, end, ptr, spec); - case 'i': - spec.flags |= SPECIAL; - /* Fallthrough */ - case 'I': - if (fmt[1] == '6') - return ip6_addr_string(buf, end, ptr, spec); - if (fmt[1] == '4') - return ip4_addr_string(buf, end, ptr, spec); - spec.flags &= ~SPECIAL; + case 'M': /* Colon separated: 00:01:02:03:04:05 */ + case 'm': /* Contiguous: 000102030405 */ + return mac_address_string(buf, end, ptr, spec, fmt); + case 'I': /* Formatted IP supported + * 4: 1.2.3.4 + * 6: 0001:0203:...:0708 + * 6c: 1::708 or 1::1.2.3.4 + */ + case 'i': /* Contiguous: + * 4: 001.002.003.004 + * 6: 000102...0f + */ + switch (fmt[1]) { + case '6': + return ip6_addr_string(buf, end, ptr, spec, fmt); + case '4': + return ip4_addr_string(buf, end, ptr, spec, fmt); + } break; } spec.flags |= SMALL; @@ -958,10 +1070,12 @@ qualifier: * @args: Arguments for the format string * * This function follows C99 vsnprintf, but has some extensions: - * %pS output the name of a text symbol + * %pS output the name of a text symbol with offset + * %ps output the name of a text symbol without offset * %pF output the name of a function pointer with its offset * %pf output the name of a function pointer without its offset * %pR output the address range in a struct resource + * %n is ignored * * The return value is the number of characters which would * be generated for the given input, excluding the trailing @@ -983,13 +1097,8 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) /* Reject out-of-range values early. Large positive sizes are used for unknown buffer sizes. */ - if (unlikely((int) size < 0)) { - /* There can be only one.. */ - static char warn = 1; - WARN_ON(warn); - warn = 0; + if (WARN_ON_ONCE((int) size < 0)) return 0; - } str = buf; end = buf + size; @@ -1417,11 +1526,7 @@ EXPORT_SYMBOL_GPL(vbin_printf); * a binary buffer that generated by vbin_printf. * * The format follows C99 vsnprintf, but has some extensions: - * %pS output the name of a text symbol - * %pF output the name of a function pointer with its offset - * %pf output the name of a function pointer without its offset - * %pR output the address range in a struct resource - * %n is ignored + * see vsnprintf comment for details. * * The return value is the number of characters which would * be generated for the given input, excluding the trailing @@ -1439,13 +1544,8 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) struct printf_spec spec = {0}; - if (unlikely((int) size < 0)) { - /* There can be only one.. */ - static char warn = 1; - WARN_ON(warn); - warn = 0; + if (WARN_ON_ONCE((int) size < 0)) return 0; - } str = buf; end = buf + size; @@ -1671,7 +1771,7 @@ int vsscanf(const char * buf, const char * fmt, va_list args) * advance both strings to next white space */ if (*fmt == '*') { - while (!isspace(*fmt) && *fmt) + while (!isspace(*fmt) && *fmt != '%' && *fmt) fmt++; while (!isspace(*str) && *str) str++; diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c index c3e4a2baf835..46a31e5f49c3 100644 --- a/lib/zlib_deflate/deflate.c +++ b/lib/zlib_deflate/deflate.c @@ -135,7 +135,7 @@ static const config configuration_table[10] = { /* =========================================================================== * Update a hash value with the given input byte - * IN assertion: all calls to to UPDATE_HASH are made with consecutive + * IN assertion: all calls to UPDATE_HASH are made with consecutive * input characters, so that a running hash key can be computed from the * previous key instead of complete recalculation each time. */ @@ -146,7 +146,7 @@ static const config configuration_table[10] = { * Insert string str in the dictionary and set match_head to the previous head * of the hash chain (the most recent string with same hash key). Return * the previous length of the hash chain. - * IN assertion: all calls to to INSERT_STRING are made with consecutive + * IN assertion: all calls to INSERT_STRING are made with consecutive * input characters and the first MIN_MATCH bytes of str are valid * (except for the last MIN_MATCH-1 bytes of the input file). */ |