From 3d071d8da1f586c24863a57349586a1611b9aa67 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 15 Dec 2016 12:42:33 +0100 Subject: random: remove stale maybe_reseed_primary_crng The function maybe_reseed_primary_crng is not used anywhere and thus can be removed. Signed-off-by: Stephan Mueller Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 1ef26403bcc8..8e5ab20848c0 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -855,13 +855,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) spin_unlock_irqrestore(&primary_crng.lock, flags); } -static inline void maybe_reseed_primary_crng(void) -{ - if (crng_init > 2 && - time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(&primary_crng, &input_pool); -} - static inline void crng_wait_ready(void) { wait_event_interruptible(crng_init_wait, crng_ready()); -- cgit v1.2.3 From 2e03c36f25ebb52d3358b8baebcdf96895c33a87 Mon Sep 17 00:00:00 2001 From: Stephan Müller Date: Tue, 27 Dec 2016 23:39:31 +0100 Subject: random: remove stale urandom_init_wait The urandom_init_wait wait queue is a left over from the pre-ChaCha20 times and can therefore be savely removed. Signed-off-by: Stephan Mueller Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 8e5ab20848c0..482531d87fb8 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -409,7 +409,6 @@ static struct poolinfo { */ static DECLARE_WAIT_QUEUE_HEAD(random_read_wait); static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); -static DECLARE_WAIT_QUEUE_HEAD(urandom_init_wait); static struct fasync_struct *fasync; static DEFINE_SPINLOCK(random_ready_list_lock); -- cgit v1.2.3 From 43d8a72cd985ca5279a9eb84d61fcbb3ee3d3774 Mon Sep 17 00:00:00 2001 From: Stephan Müller Date: Tue, 27 Dec 2016 23:40:59 +0100 Subject: random: remove variable limit The variable limit was used to identify the nonblocking pool's unlimited random number generation. As the nonblocking pool is a thing of the past, remove the limit variable and any conditions around it (i.e. preserve the branches for limit == 1). Signed-off-by: Stephan Mueller Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 482531d87fb8..92d6dd24c86e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -466,7 +466,6 @@ struct entropy_store { int entropy_count; int entropy_total; unsigned int initialized:1; - unsigned int limit:1; unsigned int last_data_init:1; __u8 last_data[EXTRACT_SIZE]; }; @@ -484,7 +483,6 @@ static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy; static struct entropy_store input_pool = { .poolinfo = &poolinfo_table[0], .name = "input", - .limit = 1, .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), .pool = input_pool_data }; @@ -492,7 +490,6 @@ static struct entropy_store input_pool = { static struct entropy_store blocking_pool = { .poolinfo = &poolinfo_table[1], .name = "blocking", - .limit = 1, .pull = &input_pool, .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock), .pool = blocking_pool_data, @@ -1212,15 +1209,6 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) r->entropy_count > r->poolinfo->poolfracbits) return; - if (r->limit == 0 && random_min_urandom_seed) { - unsigned long now = jiffies; - - if (time_before(now, - r->last_pulled + random_min_urandom_seed * HZ)) - return; - r->last_pulled = now; - } - _xfer_secondary_pool(r, nbytes); } @@ -1228,8 +1216,6 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes) { __u32 tmp[OUTPUT_POOL_WORDS]; - /* For /dev/random's pool, always leave two wakeups' worth */ - int rsvd_bytes = r->limit ? 0 : random_read_wakeup_bits / 4; int bytes = nbytes; /* pull at least as much as a wakeup */ @@ -1240,7 +1226,7 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes) trace_xfer_secondary_pool(r->name, bytes * 8, nbytes * 8, ENTROPY_BITS(r), ENTROPY_BITS(r->pull)); bytes = extract_entropy(r->pull, tmp, bytes, - random_read_wakeup_bits / 8, rsvd_bytes); + random_read_wakeup_bits / 8, 0); mix_pool_bytes(r, tmp, bytes); credit_entropy_bits(r, bytes*8); } @@ -1268,7 +1254,7 @@ static void push_to_pool(struct work_struct *work) static size_t account(struct entropy_store *r, size_t nbytes, int min, int reserved) { - int entropy_count, orig; + int entropy_count, orig, have_bytes; size_t ibytes, nfrac; BUG_ON(r->entropy_count > r->poolinfo->poolfracbits); @@ -1277,14 +1263,12 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, retry: entropy_count = orig = ACCESS_ONCE(r->entropy_count); ibytes = nbytes; - /* If limited, never pull more than available */ - if (r->limit) { - int have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); + /* never pull more than available */ + have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); - if ((have_bytes -= reserved) < 0) - have_bytes = 0; - ibytes = min_t(size_t, ibytes, have_bytes); - } + if ((have_bytes -= reserved) < 0) + have_bytes = 0; + ibytes = min_t(size_t, ibytes, have_bytes); if (ibytes < min) ibytes = 0; -- cgit v1.2.3 From 5d0e5ea343a0f70351428476bcf8715e0731f26a Mon Sep 17 00:00:00 2001 From: Stephan Müller Date: Tue, 27 Dec 2016 23:41:22 +0100 Subject: random: fix comment for unused random_min_urandom_seed The variable random_min_urandom_seed is not needed any more as it defined the reseeding behavior of the nonblocking pool. Though it is not needed any more, it is left in the code for user space interface compatibility. Signed-off-by: Stephan Mueller Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 92d6dd24c86e..9d147d456598 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -313,9 +313,7 @@ static int random_read_wakeup_bits = 64; static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; /* - * The minimum number of seconds between urandom pool reseeding. We - * do this to limit the amount of entropy that can be drained from the - * input pool even if there are heavy demands on /dev/urandom. + * Variable is currently unused by left for user space compatibility. */ static int random_min_urandom_seed = 60; -- cgit v1.2.3 From f5b98461cb8167ba362ad9f74c41d126b7becea7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 6 Jan 2017 19:32:01 +0100 Subject: random: use chacha20 for get_random_int/long Now that our crng uses chacha20, we can rely on its speedy characteristics for replacing MD5, while simultaneously achieving a higher security guarantee. Before the idea was to use these functions if you wanted random integers that aren't stupidly insecure but aren't necessarily secure either, a vague gray zone, that hopefully was "good enough" for its users. With chacha20, we can strengthen this claim, since either we're using an rdrand-like instruction, or we're using the same crng as /dev/urandom. And it's faster than what was before. We could have chosen to replace this with a SipHash-derived function, which might be slightly faster, but at the cost of having yet another RNG construction in the kernel. By moving to chacha20, we have a single RNG to analyze and verify, and we also already get good performance improvements on all platforms. Implementation-wise, rather than use a generic buffer for both get_random_int/long and memcpy based on the size needs, we use a specific buffer for 32-bit reads and for 64-bit reads. This way, we're guaranteed to always have aligned accesses on all platforms. While slightly more verbose in C, the assembly this generates is a lot simpler than otherwise. Finally, on 32-bit platforms where longs and ints are the same size, we simply alias get_random_int to get_random_long. Signed-off-by: Jason A. Donenfeld Suggested-by: Theodore Ts'o Cc: Theodore Ts'o Cc: Hannes Frederic Sowa Cc: Andy Lutomirski Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 84 ++++++++++++++++++++++++++------------------------ include/linux/random.h | 1 - init/main.c | 1 - 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 9d147d456598..b800e5479b7d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2016,63 +2016,65 @@ struct ctl_table random_table[] = { }; #endif /* CONFIG_SYSCTL */ -static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; - -int random_int_secret_init(void) -{ - get_random_bytes(random_int_secret, sizeof(random_int_secret)); - return 0; -} - -static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash) - __aligned(sizeof(unsigned long)); +struct batched_entropy { + union { + unsigned long entropy_long[CHACHA20_BLOCK_SIZE / sizeof(unsigned long)]; + unsigned int entropy_int[CHACHA20_BLOCK_SIZE / sizeof(unsigned int)]; + }; + unsigned int position; +}; /* - * Get a random word for internal kernel use only. Similar to urandom but - * with the goal of minimal entropy pool depletion. As a result, the random - * value is not cryptographically secure but for several uses the cost of - * depleting entropy is too high + * Get a random word for internal kernel use only. The quality of the random + * number is either as good as RDRAND or as good as /dev/urandom, with the + * goal of being quite fast and not depleting entropy. */ -unsigned int get_random_int(void) +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long); +unsigned long get_random_long(void) { - __u32 *hash; - unsigned int ret; + unsigned long ret; + struct batched_entropy *batch; - if (arch_get_random_int(&ret)) + if (arch_get_random_long(&ret)) return ret; - hash = get_cpu_var(get_random_int_hash); - - hash[0] += current->pid + jiffies + random_get_entropy(); - md5_transform(hash, random_int_secret); - ret = hash[0]; - put_cpu_var(get_random_int_hash); - + batch = &get_cpu_var(batched_entropy_long); + if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) { + extract_crng((u8 *)batch->entropy_long); + batch->position = 0; + } + ret = batch->entropy_long[batch->position++]; + put_cpu_var(batched_entropy_long); return ret; } -EXPORT_SYMBOL(get_random_int); +EXPORT_SYMBOL(get_random_long); -/* - * Same as get_random_int(), but returns unsigned long. - */ -unsigned long get_random_long(void) +#if BITS_PER_LONG == 32 +unsigned int get_random_int(void) { - __u32 *hash; - unsigned long ret; + return get_random_long(); +} +#else +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int); +unsigned int get_random_int(void) +{ + unsigned int ret; + struct batched_entropy *batch; - if (arch_get_random_long(&ret)) + if (arch_get_random_int(&ret)) return ret; - hash = get_cpu_var(get_random_int_hash); - - hash[0] += current->pid + jiffies + random_get_entropy(); - md5_transform(hash, random_int_secret); - ret = *(unsigned long *)hash; - put_cpu_var(get_random_int_hash); - + batch = &get_cpu_var(batched_entropy_int); + if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) { + extract_crng((u8 *)batch->entropy_int); + batch->position = 0; + } + ret = batch->entropy_int[batch->position++]; + put_cpu_var(batched_entropy_int); return ret; } -EXPORT_SYMBOL(get_random_long); +#endif +EXPORT_SYMBOL(get_random_int); /** * randomize_page - Generate a random, page aligned address diff --git a/include/linux/random.h b/include/linux/random.h index 7bd2403e4fef..16ab429735a7 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -37,7 +37,6 @@ extern void get_random_bytes(void *buf, int nbytes); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); extern void get_random_bytes_arch(void *buf, int nbytes); -extern int random_int_secret_init(void); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; diff --git a/init/main.c b/init/main.c index b0c9d6facef9..09beb7fc6e8c 100644 --- a/init/main.c +++ b/init/main.c @@ -879,7 +879,6 @@ static void __init do_basic_setup(void) do_ctors(); usermodehelper_enable(); do_initcalls(); - random_int_secret_init(); } static void __init do_pre_smp_initcalls(void) -- cgit v1.2.3 From c440408cf6901eeb2c09563397e24a9097907078 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 22 Jan 2017 16:34:08 +0100 Subject: random: convert get_random_int/long into get_random_u32/u64 Many times, when a user wants a random number, he wants a random number of a guaranteed size. So, thinking of get_random_int and get_random_long in terms of get_random_u32 and get_random_u64 makes it much easier to achieve this. It also makes the code simpler. On 32-bit platforms, get_random_int and get_random_long are both aliased to get_random_u32. On 64-bit platforms, int->u32 and long->u64. Signed-off-by: Jason A. Donenfeld Cc: Greg Kroah-Hartman Cc: Theodore Ts'o Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 55 +++++++++++++++++++++++++------------------------- include/linux/random.h | 17 ++++++++++++++-- 2 files changed, 42 insertions(+), 30 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index b800e5479b7d..066ae125f2c8 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2018,8 +2018,8 @@ struct ctl_table random_table[] = { struct batched_entropy { union { - unsigned long entropy_long[CHACHA20_BLOCK_SIZE / sizeof(unsigned long)]; - unsigned int entropy_int[CHACHA20_BLOCK_SIZE / sizeof(unsigned int)]; + u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)]; + u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)]; }; unsigned int position; }; @@ -2029,52 +2029,51 @@ struct batched_entropy { * number is either as good as RDRAND or as good as /dev/urandom, with the * goal of being quite fast and not depleting entropy. */ -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long); -unsigned long get_random_long(void) +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64); +u64 get_random_u64(void) { - unsigned long ret; + u64 ret; struct batched_entropy *batch; - if (arch_get_random_long(&ret)) +#if BITS_PER_LONG == 64 + if (arch_get_random_long((unsigned long *)&ret)) return ret; +#else + if (arch_get_random_long((unsigned long *)&ret) && + arch_get_random_long((unsigned long *)&ret + 1)) + return ret; +#endif - batch = &get_cpu_var(batched_entropy_long); - if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) { - extract_crng((u8 *)batch->entropy_long); + batch = &get_cpu_var(batched_entropy_u64); + if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { + extract_crng((u8 *)batch->entropy_u64); batch->position = 0; } - ret = batch->entropy_long[batch->position++]; - put_cpu_var(batched_entropy_long); + ret = batch->entropy_u64[batch->position++]; + put_cpu_var(batched_entropy_u64); return ret; } -EXPORT_SYMBOL(get_random_long); +EXPORT_SYMBOL(get_random_u64); -#if BITS_PER_LONG == 32 -unsigned int get_random_int(void) -{ - return get_random_long(); -} -#else -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int); -unsigned int get_random_int(void) +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32); +u32 get_random_u32(void) { - unsigned int ret; + u32 ret; struct batched_entropy *batch; if (arch_get_random_int(&ret)) return ret; - batch = &get_cpu_var(batched_entropy_int); - if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) { - extract_crng((u8 *)batch->entropy_int); + batch = &get_cpu_var(batched_entropy_u32); + if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { + extract_crng((u8 *)batch->entropy_u32); batch->position = 0; } - ret = batch->entropy_int[batch->position++]; - put_cpu_var(batched_entropy_int); + ret = batch->entropy_u32[batch->position++]; + put_cpu_var(batched_entropy_u32); return ret; } -#endif -EXPORT_SYMBOL(get_random_int); +EXPORT_SYMBOL(get_random_u32); /** * randomize_page - Generate a random, page aligned address diff --git a/include/linux/random.h b/include/linux/random.h index 16ab429735a7..ed5c3838780d 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -42,8 +42,21 @@ extern void get_random_bytes_arch(void *buf, int nbytes); extern const struct file_operations random_fops, urandom_fops; #endif -unsigned int get_random_int(void); -unsigned long get_random_long(void); +u32 get_random_u32(void); +u64 get_random_u64(void); +static inline unsigned int get_random_int(void) +{ + return get_random_u32(); +} +static inline unsigned long get_random_long(void) +{ +#if BITS_PER_LONG == 64 + return get_random_u64(); +#else + return get_random_u32(); +#endif +} + unsigned long randomize_page(unsigned long start, unsigned long range); u32 prandom_u32(void); -- cgit v1.2.3 From db61ffe3a71c697aaa91c42c862a5f7557a0e562 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 31 Jan 2017 14:36:07 -0200 Subject: random: move random_min_urandom_seed into CONFIG_SYSCTL ifdef block Building arm allnodefconfig causes the following build warning: drivers/char/random.c:318:12: warning: 'random_min_urandom_seed' defined but not used [-Wunused-variable] Fix the warning by moving 'random_min_urandom_seed' declaration inside the CONFIG_SYSCTL ifdef block, where it is actually used. While at it, remove the comment prior to the variable declaration. Signed-off-by: Fabio Estevam Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 066ae125f2c8..0ab024918907 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -312,11 +312,6 @@ static int random_read_wakeup_bits = 64; */ static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; -/* - * Variable is currently unused by left for user space compatibility. - */ -static int random_min_urandom_seed = 60; - /* * Originally, we used a primitive polynomial of degree .poolwords * over GF(2). The taps for various sizes are defined below. They @@ -1886,6 +1881,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, static int min_read_thresh = 8, min_write_thresh; static int max_read_thresh = OUTPUT_POOL_WORDS * 32; static int max_write_thresh = INPUT_POOL_WORDS * 32; +static int random_min_urandom_seed = 60; static char sysctl_bootid[16]; /* -- cgit v1.2.3 From 7e91c7df29b5e196de3dc6f086c8937973bd0b88 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 7 Feb 2017 19:58:02 +0200 Subject: swiotlb-xen: implement xen_swiotlb_dma_mmap callback This function creates userspace mapping for the DMA-coherent memory. Signed-off-by: Stefano Stabellini Signed-off-by: Oleksandr Dmytryshyn Signed-off-by: Andrii Anisov Signed-off-by: Konrad Rzeszutek Wilk --- arch/arm/xen/mm.c | 1 + drivers/xen/swiotlb-xen.c | 19 +++++++++++++++++++ include/xen/swiotlb-xen.h | 5 +++++ 3 files changed, 25 insertions(+) diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index bd62d94f8ac5..cd1684e4e864 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -198,6 +198,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { .unmap_page = xen_swiotlb_unmap_page, .dma_supported = xen_swiotlb_dma_supported, .set_dma_mask = xen_swiotlb_set_dma_mask, + .mmap = xen_swiotlb_dma_mmap, }; int __init xen_mm_init(void) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index f905d6eeb048..c7f61fc0572a 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -680,3 +680,22 @@ xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask) return 0; } EXPORT_SYMBOL_GPL(xen_swiotlb_set_dma_mask); + +/* + * Create userspace mapping for the DMA-coherent memory. + * This function should be called with the pages from the current domain only, + * passing pages mapped from other domains would lead to memory corruption. + */ +int +xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) + if (__generic_dma_ops(dev)->mmap) + return __generic_dma_ops(dev)->mmap(dev, vma, cpu_addr, + dma_addr, size, attrs); +#endif + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mmap); diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h index a0083be5d529..a315c876aaa9 100644 --- a/include/xen/swiotlb-xen.h +++ b/include/xen/swiotlb-xen.h @@ -55,4 +55,9 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask); extern int xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask); + +extern int +xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); #endif /* __LINUX_SWIOTLB_XEN_H */ -- cgit v1.2.3 From 69369f52d28a34c84acb6f2a8a585e743441566a Mon Sep 17 00:00:00 2001 From: Andrii Anisov Date: Tue, 7 Feb 2017 19:58:03 +0200 Subject: swiotlb-xen: implement xen_swiotlb_get_sgtable callback Signed-off-by: Andrii Anisov Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/arm/xen/mm.c | 1 + drivers/xen/swiotlb-xen.c | 28 ++++++++++++++++++++++++++++ include/xen/swiotlb-xen.h | 6 ++++++ 3 files changed, 35 insertions(+) diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index cd1684e4e864..76ea48a614e1 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -199,6 +199,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { .dma_supported = xen_swiotlb_dma_supported, .set_dma_mask = xen_swiotlb_set_dma_mask, .mmap = xen_swiotlb_dma_mmap, + .get_sgtable = xen_swiotlb_get_sgtable, }; int __init xen_mm_init(void) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index c7f61fc0572a..23e30b4e1fb6 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -699,3 +699,31 @@ xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); } EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mmap); + +/* + * This function should be called with the pages from the current domain only, + * passing pages mapped from other domains would lead to memory corruption. + */ +int +xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + unsigned long attrs) +{ +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) + if (__generic_dma_ops(dev)->get_sgtable) { +#if 0 + /* + * This check verifies that the page belongs to the current domain and + * is not one mapped from another domain. + * This check is for debug only, and should not go to production build + */ + unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle)); + BUG_ON (!page_is_ram(bfn)); +#endif + return __generic_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr, + handle, size, attrs); + } +#endif + return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_get_sgtable); diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h index a315c876aaa9..1f6d78f044b6 100644 --- a/include/xen/swiotlb-xen.h +++ b/include/xen/swiotlb-xen.h @@ -2,6 +2,7 @@ #define __LINUX_SWIOTLB_XEN_H #include +#include #include extern int xen_swiotlb_init(int verbose, bool early); @@ -60,4 +61,9 @@ extern int xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); + +extern int +xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + unsigned long attrs); #endif /* __LINUX_SWIOTLB_XEN_H */ -- cgit v1.2.3 From f655e67ac8d797425abb0404d0878758f3f71c1a Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 16 Feb 2017 14:10:01 +0800 Subject: drm/i915/gvt: Fix check error on opregion.c As we switched to memremap for opregion, shouldn't use any __iomem for that, and move to use memcpy instead. This fixed static check errors for: CHECK drivers/gpu/drm/i915//gvt/opregion.c drivers/gpu/drm/i915//gvt/opregion.c:142:31: warning: incorrect type in argument 1 (different address spaces) drivers/gpu/drm/i915//gvt/opregion.c:142:31: expected void *addr drivers/gpu/drm/i915//gvt/opregion.c:142:31: got void [noderef] *opregion_va drivers/gpu/drm/i915//gvt/opregion.c:160:35: warning: incorrect type in assignment (different address spaces) drivers/gpu/drm/i915//gvt/opregion.c:160:35: expected void [noderef] *opregion_va drivers/gpu/drm/i915//gvt/opregion.c:160:35: got void * Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 2 +- drivers/gpu/drm/i915/gvt/opregion.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index e227caf5859e..5ee660077b0c 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -203,7 +203,7 @@ struct intel_gvt_firmware { }; struct intel_gvt_opregion { - void __iomem *opregion_va; + void *opregion_va; u32 opregion_pa; }; diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index d9fb41ab7119..5d1caf9daba9 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -27,7 +27,6 @@ static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa) { - void __iomem *host_va = vgpu->gvt->opregion.opregion_va; u8 *buf; int i; @@ -43,8 +42,8 @@ static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa) if (!vgpu_opregion(vgpu)->va) return -ENOMEM; - memcpy_fromio(vgpu_opregion(vgpu)->va, host_va, - INTEL_GVT_OPREGION_SIZE); + memcpy(vgpu_opregion(vgpu)->va, vgpu->gvt->opregion.opregion_va, + INTEL_GVT_OPREGION_SIZE); for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i; -- cgit v1.2.3 From fd64be636708d808852c4c8c1efce0a0a51c24c5 Mon Sep 17 00:00:00 2001 From: Min He Date: Fri, 17 Feb 2017 15:02:36 +0800 Subject: drm/i915/gvt: introduced failsafe mode into vgpu New failsafe mode is introduced, when we detect guest not supporting GVT-g. In failsafe mode, we will ignore all the MMIO and cfg space read/write from guest. This patch can fix the issue that when guest kernel or graphics driver version is too low, there will be a lot of kernel traces in host. V5: rebased onto latest gvt-staging V4: changed coding style by Zhenyu and Ping's advice V3: modified coding style and error messages according to Zhenyu's comment V2: 1) implemented MMIO/GTT/WP pages read/write logic; 2) used a unified function to enter failsafe mode Signed-off-by: Min He Signed-off-by: Pei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cfg_space.c | 3 ++ drivers/gpu/drm/i915/gvt/gvt.h | 6 ++++ drivers/gpu/drm/i915/gvt/handlers.c | 36 +++++++++++++++++++++ drivers/gpu/drm/i915/gvt/mmio.c | 62 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/vgpu.c | 6 +++- 5 files changed, 112 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index 4a6a2ed65732..a77e050b85a3 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -237,6 +237,9 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, { int ret; + if (vgpu->failsafe) + return 0; + if (WARN_ON(bytes > 4)) return -EINVAL; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 5ee660077b0c..48ef0771d772 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -143,6 +143,8 @@ struct intel_vgpu { int id; unsigned long handle; /* vGPU handle used by hypervisor MPT modules */ bool active; + bool pv_notified; + bool failsafe; bool resetting; void *sched_data; @@ -449,6 +451,10 @@ struct intel_gvt_ops { }; +enum { + GVT_FAILSAFE_UNSUPPORTED_GUEST, +}; + #include "mpt.h" #endif diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 1d450627ff65..6f098bb110bd 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -150,10 +150,34 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg) #define fence_num_to_offset(num) \ (num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0))) + +static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) +{ + switch (reason) { + case GVT_FAILSAFE_UNSUPPORTED_GUEST: + pr_err("Detected your guest driver doesn't support GVT-g.\n"); + break; + default: + break; + } + pr_err("Now vgpu %d will enter failsafe mode.\n", vgpu->id); + vgpu->failsafe = true; +} + static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, unsigned int fence_num, void *p_data, unsigned int bytes) { if (fence_num >= vgpu_fence_sz(vgpu)) { + + /* When guest access oob fence regs without access + * pv_info first, we treat guest not supporting GVT, + * and we will let vgpu enter failsafe mode. + */ + if (!vgpu->pv_notified) { + enter_failsafe_mode(vgpu, + GVT_FAILSAFE_UNSUPPORTED_GUEST); + return -EINVAL; + } gvt_err("vgpu%d: found oob fence register access\n", vgpu->id); gvt_err("vgpu%d: total fence num %d access fence num %d\n", @@ -1001,6 +1025,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, if (invalid_read) gvt_err("invalid pvinfo read: [%x:%x] = %x\n", offset, bytes, *(u32 *)p_data); + vgpu->pv_notified = true; return 0; } @@ -1318,6 +1343,17 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, bool enable_execlist; write_vreg(vgpu, offset, p_data, bytes); + + /* when PPGTT mode enabled, we will check if guest has called + * pvinfo, if not, we will treat this guest as non-gvtg-aware + * guest, and stop emulating its cfg space, mmio, gtt, etc. + */ + if (((data & _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)) || + (data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE))) + && !vgpu->pv_notified) { + enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); + return 0; + } if ((data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)) || (data & _MASKED_BIT_DISABLE(GFX_RUN_LIST_ENABLE))) { enable_execlist = !!(data & GFX_RUN_LIST_ENABLE); diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 4df078bc5d04..b2d72dad1537 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -57,6 +57,58 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) (reg >= gvt->device_info.gtt_start_offset \ && reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) +static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, + void *p_data, unsigned int bytes, bool read) +{ + struct intel_gvt *gvt = NULL; + void *pt = NULL; + unsigned int offset = 0; + + if (!vgpu || !p_data) + return; + + gvt = vgpu->gvt; + mutex_lock(&gvt->lock); + offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); + if (reg_is_mmio(gvt, offset)) { + if (read) + intel_vgpu_default_mmio_read(vgpu, offset, p_data, + bytes); + else + intel_vgpu_default_mmio_write(vgpu, offset, p_data, + bytes); + } else if (reg_is_gtt(gvt, offset) && + vgpu->gtt.ggtt_mm->virtual_page_table) { + offset -= gvt->device_info.gtt_start_offset; + pt = vgpu->gtt.ggtt_mm->virtual_page_table + offset; + if (read) + memcpy(p_data, pt, bytes); + else + memcpy(pt, p_data, bytes); + + } else if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { + struct intel_vgpu_guest_page *gp; + + /* Since we enter the failsafe mode early during guest boot, + * guest may not have chance to set up its ppgtt table, so + * there should not be any wp pages for guest. Keep the wp + * related code here in case we need to handle it in furture. + */ + gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT); + if (gp) { + /* remove write protection to prevent furture traps */ + intel_vgpu_clean_guest_page(vgpu, gp); + if (read) + intel_gvt_hypervisor_read_gpa(vgpu, pa, + p_data, bytes); + else + intel_gvt_hypervisor_write_gpa(vgpu, pa, + p_data, bytes); + } + } + mutex_unlock(&gvt->lock); +} + /** * intel_vgpu_emulate_mmio_read - emulate MMIO read * @vgpu: a vGPU @@ -75,6 +127,11 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, unsigned int offset = 0; int ret = -EINVAL; + + if (vgpu->failsafe) { + failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true); + return 0; + } mutex_lock(&gvt->lock); if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { @@ -188,6 +245,11 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, u32 old_vreg = 0, old_sreg = 0; int ret = -EINVAL; + if (vgpu->failsafe) { + failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, false); + return 0; + } + mutex_lock(&gvt->lock); if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 95a97aa0051e..dcfcce1dc00e 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -387,8 +387,12 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, populate_pvinfo_page(vgpu); intel_vgpu_reset_display(vgpu); - if (dmlr) + if (dmlr) { intel_vgpu_reset_cfg_space(vgpu); + /* only reset the failsafe mode when dmlr reset */ + vgpu->failsafe = false; + vgpu->pv_notified = false; + } } vgpu->resetting = false; -- cgit v1.2.3 From d1be371d4f4c12d11023c9fc795e5d460d960680 Mon Sep 17 00:00:00 2001 From: "Zhao, Xinda" Date: Fri, 17 Feb 2017 14:38:33 +0800 Subject: drm/i915/gvt: handle fence reg access during GPU reset Lots of reduntant log info will be printed out during GPU reset, including accessing untracked mmio register and fence register, variable disable_warn_untrack is added previously to handle the situation, but the accessing of fence register is ignored in the previously patch, so add it back. Besides, set the variable disable_warn_untrack to the defalut value after GPU reset is finished. Signed-off-by: Zhao, Xinda Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 15 +++++++++------ drivers/gpu/drm/i915/gvt/mmio.c | 2 ++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 6f098bb110bd..fd7e789a72c3 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -173,16 +173,19 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, * pv_info first, we treat guest not supporting GVT, * and we will let vgpu enter failsafe mode. */ - if (!vgpu->pv_notified) { + if (!vgpu->pv_notified) enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); - return -EINVAL; + + if (!vgpu->mmio.disable_warn_untrack) { + gvt_err("vgpu%d: found oob fence register access\n", + vgpu->id); + gvt_err("vgpu%d: total fence %d, access fence %d\n", + vgpu->id, vgpu_fence_sz(vgpu), + fence_num); } - gvt_err("vgpu%d: found oob fence register access\n", - vgpu->id); - gvt_err("vgpu%d: total fence num %d access fence num %d\n", - vgpu->id, vgpu_fence_sz(vgpu), fence_num); memset(p_data, 0, bytes); + return -EINVAL; } return 0; } diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index b2d72dad1537..99abb01fa9eb 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -384,6 +384,8 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu) /* set the bit 0:2(Core C-State ) to C0 */ vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0; + + vgpu->mmio.disable_warn_untrack = false; } /** -- cgit v1.2.3 From c74fd80f2f41d05f350bb478151021f88551afe8 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 13 Jan 2017 15:07:51 -0500 Subject: xen: do not re-use pirq number cached in pci device msi msg data Revert the main part of commit: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests") That commit introduced reading the pci device's msi message data to see if a pirq was previously configured for the device's msi/msix, and re-use that pirq. At the time, that was the correct behavior. However, a later change to Qemu caused it to call into the Xen hypervisor to unmap all pirqs for a pci device, when the pci device disables its MSI/MSIX vectors; specifically the Qemu commit: c976437c7dba9c7444fb41df45468968aaa326ad ("qemu-xen: free all the pirqs for msi/msix when driver unload") Once Qemu added this pirq unmapping, it was no longer correct for the kernel to re-use the pirq number cached in the pci device msi message data. All Qemu releases since 2.1.0 contain the patch that unmaps the pirqs when the pci device disables its MSI/MSIX vectors. This bug is causing failures to initialize multiple NVMe controllers under Xen, because the NVMe driver sets up a single MSIX vector for each controller (concurrently), and then after using that to talk to the controller for some configuration data, it disables the single MSIX vector and re-configures all the MSIX vectors it needs. So the MSIX setup code tries to re-use the cached pirq from the first vector for each controller, but the hypervisor has already given away that pirq to another controller, and its initialization fails. This is discussed in more detail at: https://lists.xen.org/archives/html/xen-devel/2017-01/msg00447.html Fixes: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests") Signed-off-by: Dan Streetman Reviewed-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Boris Ostrovsky --- arch/x86/pci/xen.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index e1fb269c87af..292ab0364a89 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -234,23 +234,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 1; for_each_pci_msi_entry(msidesc, dev) { - __pci_read_msi_msg(msidesc, &msg); - pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | - ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); - if (msg.data != XEN_PIRQ_MSI_DATA || - xen_irq_from_pirq(pirq) < 0) { - pirq = xen_allocate_pirq_msi(dev, msidesc); - if (pirq < 0) { - irq = -ENODEV; - goto error; - } - xen_msi_compose_msg(dev, pirq, &msg); - __pci_write_msi_msg(msidesc, &msg); - dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); - } else { - dev_dbg(&dev->dev, - "xen: msi already bound to pirq=%d\n", pirq); + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) { + irq = -ENODEV; + goto error; } + xen_msi_compose_msg(dev, pirq, &msg); + __pci_write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, (type == PCI_CAP_ID_MSI) ? nvec : 1, (type == PCI_CAP_ID_MSIX) ? -- cgit v1.2.3 From a33fc7a0482a40068c022aefcefd50f9f0f44f87 Mon Sep 17 00:00:00 2001 From: Min He Date: Fri, 17 Feb 2017 16:42:38 +0800 Subject: drm/i915/gvt: enter failsafe mode when guest requires more resources Windows guest will notitfy GVT-g to request more resources through g2v interface, when its resources are not enough. This patch is to handle this case and let vgpu enter failsafe mode to avoid too many error messages. Signed-off-by: Min He Signed-off-by: Pei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/handlers.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 48ef0771d772..a41b322d8957 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -453,6 +453,7 @@ struct intel_gvt_ops { enum { GVT_FAILSAFE_UNSUPPORTED_GUEST, + GVT_FAILSAFE_INSUFFICIENT_RESOURCE, }; #include "mpt.h" diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index fd7e789a72c3..e1a382645112 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -157,6 +157,8 @@ static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) case GVT_FAILSAFE_UNSUPPORTED_GUEST: pr_err("Detected your guest driver doesn't support GVT-g.\n"); break; + case GVT_FAILSAFE_INSUFFICIENT_RESOURCE: + pr_err("Graphics resource is not enough for the guest\n"); default: break; } @@ -1106,6 +1108,9 @@ static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, case _vgtif_reg(execlist_context_descriptor_lo): case _vgtif_reg(execlist_context_descriptor_hi): break; + case _vgtif_reg(rsv5[0])..._vgtif_reg(rsv5[3]): + enter_failsafe_mode(vgpu, GVT_FAILSAFE_INSUFFICIENT_RESOURCE); + break; default: gvt_err("invalid pvinfo write offset %x bytes %x data %x\n", offset, bytes, data); -- cgit v1.2.3 From 9272f73f79bd780502134f227fa52fd280ecda17 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Fri, 17 Feb 2017 19:29:52 +0800 Subject: drm/i915/gvt: add a NULL pointer check to avoid kernel panic Due to the request replay, context switch interrupt may come after gvt free the workload thus can cause a kernel NULL pointer kernel panic. This patch will add a simple check to avoid this for a short term. From long term, gvt workload lifecycle doesn't match with i915 request and need to find a proper way to manage this. v4: simplify the NULL pointer check. v5: add unlikely to optimize. Signed-off-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index d6b6d0efdd1a..e355a82ccabd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -139,6 +139,9 @@ static int shadow_context_status_change(struct notifier_block *nb, struct intel_vgpu_workload *workload = scheduler->current_workload[req->engine->id]; + if (unlikely(!workload)) + return NOTIFY_OK; + switch (action) { case INTEL_CONTEXT_SCHEDULE_IN: intel_gvt_load_render_mmio(workload->vgpu, -- cgit v1.2.3 From 593e59b4b941910e4f7ba87d3c02d63e38e960d0 Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Mon, 20 Feb 2017 15:51:13 +0800 Subject: drm/i915/gvt: fix unhandled mmio warnings some registers were missing or treated as BDW only. This patch is to fix it avoid unhandled mmio wanrings v2: update commit message according to zhenyu's comment Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index e1a382645112..e1abece853d4 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1519,6 +1519,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK, NULL, NULL); MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK, NULL, NULL); MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(0x2124, D_ALL, F_MODE_MASK, NULL, NULL); MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK, NULL, NULL); MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK, NULL, NULL); @@ -2390,9 +2392,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW); - MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW); - MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW); + MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW_PLUS); + MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW_PLUS); + MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS); MMIO_D(WM_MISC, D_BDW); MMIO_D(BDW_EDP_PSR_BASE, D_BDW); @@ -2406,7 +2408,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN8_EU_DISABLE1, D_BDW_PLUS); MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS); - MMIO_D(0xfdc, D_BDW); + MMIO_D(0xfdc, D_BDW_PLUS); MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_D(GEN7_ROW_CHICKEN2, D_BDW_PLUS); MMIO_D(GEN8_UCGCTL6, D_BDW_PLUS); @@ -2423,6 +2425,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DFH(0x24d8, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x24dc, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_D(0x22040, D_BDW_PLUS); + MMIO_D(0x44484, D_BDW_PLUS); + MMIO_D(0x4448c, D_BDW_PLUS); + MMIO_D(0x83a4, D_BDW); MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS); @@ -2668,6 +2674,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL); MMIO_D(0x44500, D_SKL); + MMIO_D(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS); return 0; } -- cgit v1.2.3 From d8e9b2b9097c117880dc22933239d05199c60b96 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 20 Feb 2017 14:58:25 +0100 Subject: drm/i915/gvt: Fix superfluous newline in GVT_DISPLAY_READY env var send_display_send_uevent() sends two environment variable, and the first one GVT_DISPLAY_READY is set including a new line at the end of the string; that is obviously superfluous and wrong -- at least, it *looks* so when you only read the code. However, it doesn't appear in the actual output by a (supposedly unexpected) trick. The code uses snprintf() and truncates the string in size 20 bytes. This makes the string as GVT_DISPLAY_READY=0 or ...=1 including the trailing NUL-letter. That is, the '\n' found in the format string is always cut off as a result. Although the code gives the correct result, it is confusing. This patch addresses it, just removing the superfluous '\n' from the format string for avoiding further confusion. If the argument "ready" were not a bool, the size 20 should be corrected as well. But it's a bool, so we can leave the magic number 20 as is for now. FWIW, the bug was spotted by a new GCC7 warning: drivers/gpu/drm/i915/gvt/handlers.c: In function 'pvinfo_mmio_write': drivers/gpu/drm/i915/gvt/handlers.c:1042:34: error: 'snprintf' output truncated before the last format character [-Werror=format-truncation=] snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready); ^~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gvt/handlers.c:1042:2: note: 'snprintf' output 21 bytes into a destination of size 20 snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 04d348ae3f0a ("drm/i915/gvt: vGPU display virtualization") Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1025903 Reported-by: Richard Biener Cc: Signed-off-by: Takashi Iwai Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index e1abece853d4..a1880424ad32 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1069,7 +1069,7 @@ static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready) char vmid_str[20]; char display_ready_str[20]; - snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready); + snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d", ready); env[0] = display_ready_str; snprintf(vmid_str, 20, "VMID=%d", vgpu->id); -- cgit v1.2.3 From 4c4b22abb3d405c5c194b02255eecc1a9eff42cf Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 21 Feb 2017 09:39:00 +0800 Subject: drm/i915/gvt: add more registers to context save/restore list the value of those registers should be applied to hardware on context restoring Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/render.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 2b3a642284b6..73f052a4f424 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -53,6 +53,14 @@ static struct render_mmio gen8_render_mmio_list[] = { {RCS, _MMIO(0x24d4), 0, false}, {RCS, _MMIO(0x24d8), 0, false}, {RCS, _MMIO(0x24dc), 0, false}, + {RCS, _MMIO(0x24e0), 0, false}, + {RCS, _MMIO(0x24e4), 0, false}, + {RCS, _MMIO(0x24e8), 0, false}, + {RCS, _MMIO(0x24ec), 0, false}, + {RCS, _MMIO(0x24f0), 0, false}, + {RCS, _MMIO(0x24f4), 0, false}, + {RCS, _MMIO(0x24f8), 0, false}, + {RCS, _MMIO(0x24fc), 0, false}, {RCS, _MMIO(0x7004), 0xffff, true}, {RCS, _MMIO(0x7008), 0xffff, true}, {RCS, _MMIO(0x7000), 0xffff, true}, @@ -76,6 +84,14 @@ static struct render_mmio gen9_render_mmio_list[] = { {RCS, _MMIO(0x24d4), 0, false}, {RCS, _MMIO(0x24d8), 0, false}, {RCS, _MMIO(0x24dc), 0, false}, + {RCS, _MMIO(0x24e0), 0, false}, + {RCS, _MMIO(0x24e4), 0, false}, + {RCS, _MMIO(0x24e8), 0, false}, + {RCS, _MMIO(0x24ec), 0, false}, + {RCS, _MMIO(0x24f0), 0, false}, + {RCS, _MMIO(0x24f4), 0, false}, + {RCS, _MMIO(0x24f8), 0, false}, + {RCS, _MMIO(0x24fc), 0, false}, {RCS, _MMIO(0x7004), 0xffff, true}, {RCS, _MMIO(0x7008), 0xffff, true}, {RCS, _MMIO(0x7000), 0xffff, true}, -- cgit v1.2.3 From e6cedfea6b8dcb205f75ea632570f52d2ffd1251 Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 21 Feb 2017 10:38:53 +0800 Subject: drm/i915/gvt: force-nopriv register handling add a whitelist to check the content of force-nonpriv registers v3: per He Min's comment, modify in_whitelist()'s return type to bool, and use negative value as the return value for failure for force_nonpriv_write(). v2: 1. split a big patch into two smaller ones per zhenyu's comment. this patch is the mmio handling part for force-nopriv registers 2. per zhenyu's comment, combine all non-priv registers into a single MMIO_DFH entry Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 74 +++++++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index a1880424ad32..af0c0d1ec9a8 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -398,6 +398,74 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, return 0; } +/* ascendingly sorted */ +static i915_reg_t force_nonpriv_white_list[] = { + GEN9_CS_DEBUG_MODE1, //_MMIO(0x20ec) + GEN9_CTX_PREEMPT_REG,//_MMIO(0x2248) + GEN8_CS_CHICKEN1,//_MMIO(0x2580) + _MMIO(0x2690), + _MMIO(0x2694), + _MMIO(0x2698), + _MMIO(0x4de0), + _MMIO(0x4de4), + _MMIO(0x4dfc), + GEN7_COMMON_SLICE_CHICKEN1,//_MMIO(0x7010) + _MMIO(0x7014), + HDC_CHICKEN0,//_MMIO(0x7300) + GEN8_HDC_CHICKEN1,//_MMIO(0x7304) + _MMIO(0x7700), + _MMIO(0x7704), + _MMIO(0x7708), + _MMIO(0x770c), + _MMIO(0xb110), + GEN8_L3SQCREG4,//_MMIO(0xb118) + _MMIO(0xe100), + _MMIO(0xe18c), + _MMIO(0xe48c), + _MMIO(0xe5f4), +}; + +/* a simple bsearch */ +static inline bool in_whitelist(unsigned int reg) +{ + int left = 0, right = ARRAY_SIZE(force_nonpriv_white_list); + i915_reg_t *array = force_nonpriv_white_list; + + while (left < right) { + int mid = (left + right)/2; + + if (reg > array[mid].reg) + left = mid + 1; + else if (reg < array[mid].reg) + right = mid; + else + return true; + } + return false; +} + +static int force_nonpriv_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 reg_nonpriv = *(u32 *)p_data; + int ret = -EINVAL; + + if ((bytes != 4) || ((offset & (bytes - 1)) != 0)) { + gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n", + vgpu->id, offset, bytes); + return ret; + } + + if (in_whitelist(reg_nonpriv)) { + ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data, + bytes); + } else { + gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x\n", + vgpu->id, reg_nonpriv); + } + return ret; +} + static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -2420,10 +2488,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(0xb10c, D_BDW); MMIO_D(0xb110, D_BDW); - MMIO_DFH(0x24d0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x24d4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x24d8, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x24dc, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, + NULL, force_nonpriv_write); MMIO_D(0x22040, D_BDW_PLUS); MMIO_D(0x44484, D_BDW_PLUS); -- cgit v1.2.3 From 187447a106fc9caca45f10413845678d3666556c Mon Sep 17 00:00:00 2001 From: Pei Zhang Date: Tue, 21 Feb 2017 21:58:14 +0800 Subject: drm/i915/gvt: add cmd_access to GEN7_HALF_SLICE_CHICKEN1 Linux guest is using this MMIO in lri command. Add cmd_access flag for this mmio in gvt to avoid error log. v2: change the mmio address to its macro name Signed-off-by: Pei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index af0c0d1ec9a8..bfe12ddb0210 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1607,7 +1607,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(0x243c, D_ALL); MMIO_DFH(0x7018, D_ALL, F_MODE_MASK, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe100, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* display */ MMIO_F(0x60220, 0x20, 0, 0, 0, D_ALL, NULL, NULL); -- cgit v1.2.3 From ec162aa84c9057e196f3a844c0a8d569f5095e6c Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 21 Feb 2017 14:00:37 +0800 Subject: drm/i915/gvt: set default value to 0 for unhandled mmio regs for a handled mmio reg, its default value is read from hardware, while for an unhandled mmio regs, its default value would be random if not explicitly set to 0 Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index 1cb29b2d7dc6..933a7c211a1c 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -80,7 +80,7 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) int ret; size = sizeof(*h) + info->mmio_size + info->cfg_space_size - 1; - firmware = vmalloc(size); + firmware = vzalloc(size); if (!firmware) return -ENOMEM; -- cgit v1.2.3 From bab059304314e127cf4a5330c6bd5a71fd27c022 Mon Sep 17 00:00:00 2001 From: "Zhao, Xinda" Date: Tue, 21 Feb 2017 15:07:31 +0800 Subject: drm/i915/gvt: decrease priority of output msg for untracked mmio When untracked mmio is visited, too many log info will be printed out, it may confuse the user, but most of the time, it is not the urgent case, so use gvt_dbg_mmio() instead. Signed-off-by: Zhao, Xinda Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 99abb01fa9eb..60b698cb8365 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -298,7 +298,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4)); if (!mmio && !vgpu->mmio.disable_warn_untrack) - gvt_err("vgpu%d: write untracked MMIO %x len %d val %x\n", + gvt_dbg_mmio("vgpu%d: write untracked MMIO %x len %d val %x\n", vgpu->id, offset, bytes, *(u32 *)p_data); if (!intel_gvt_mmio_is_unalign(gvt, offset)) { -- cgit v1.2.3 From da9cc8de22aa6bd6ed51c406432d599ab520a6e3 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Tue, 21 Feb 2017 15:52:56 +0800 Subject: drm/i915/gvt: clear the vGPU reset logic Releasing shadow PPGTT pages is not enough when vGPU reset, the guest page table tracking data should has same life-cycle with all the shadow PPGTT pages; Otherwise there is no chance to re-shadow the PPGTT pages without free the guest page table tracking data. This patch clear the PPGTT reset logic and make the vGPU reset in working order. v2: refactor some logic to avoid code duplicated. v3: remove useless macro and add comments from Christophe. v4: keep reset logic in reset function. Signed-off-by: Ping Gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 28c92346db0e..b5c833287d39 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2015,6 +2015,22 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) return create_scratch_page_tree(vgpu); } +static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type) +{ + struct list_head *pos, *n; + struct intel_vgpu_mm *mm; + + list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) { + mm = container_of(pos, struct intel_vgpu_mm, list); + if (mm->type == type) { + vgpu->gvt->gtt.mm_free_page_table(mm); + list_del(&mm->list); + list_del(&mm->lru_list); + kfree(mm); + } + } +} + /** * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization * @vgpu: a vGPU @@ -2027,19 +2043,11 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) */ void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) { - struct list_head *pos, *n; - struct intel_vgpu_mm *mm; - ppgtt_free_all_shadow_page(vgpu); release_scratch_page_tree(vgpu); - list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) { - mm = container_of(pos, struct intel_vgpu_mm, list); - vgpu->gvt->gtt.mm_free_page_table(mm); - list_del(&mm->list); - list_del(&mm->lru_list); - kfree(mm); - } + intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); + intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_GGTT); } static void clean_spt_oos(struct intel_gvt *gvt) @@ -2322,6 +2330,13 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu, bool dmlr) int i; ppgtt_free_all_shadow_page(vgpu); + + /* Shadow pages are only created when there is no page + * table tracking data, so remove page tracking data after + * removing the shadow pages. + */ + intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); + if (!dmlr) return; -- cgit v1.2.3 From d8a355be0b2b5613f7b34aee1394369d45d50586 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Wed, 22 Feb 2017 11:03:24 +0800 Subject: drm/i915/gvt: refine pcode write emulation In GVT-g we always emulate as pcode read/write success and ready for access anytime, since we don't touch real physical registers here. Add 'SKL_PCODE_CDCLK_CONTROL' write emulation, without it will cause skl_set_cdclk fail in guest. Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index bfe12ddb0210..f89b183488e9 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1315,6 +1315,9 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, else *data0 = 0x61514b3d; break; + case SKL_PCODE_CDCLK_CONTROL: + *data0 = SKL_CDCLK_READY_FOR_CHANGE; + break; case 0x5: *data0 |= 0x1; break; @@ -1322,8 +1325,13 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, gvt_dbg_core("VM(%d) write %x to mailbox, return data0 %x\n", vgpu->id, value, *data0); - - value &= ~(1 << 31); + /** + * PCODE_READY clear means ready for pcode read/write, + * PCODE_ERROR_MASK clear means no error happened. In GVT-g we + * always emulate as pcode read/write success and ready for access + * anytime, since we don't touch real physical registers here. + */ + value &= ~(GEN6_PCODE_READY | GEN6_PCODE_ERROR_MASK); return intel_vgpu_default_mmio_write(vgpu, offset, &value, bytes); } -- cgit v1.2.3 From 7c28135c77414327523b89bfc3f13096e095f5ac Mon Sep 17 00:00:00 2001 From: "Zhao, Xinda" Date: Tue, 21 Feb 2017 15:54:56 +0800 Subject: drm/i915/gvt: remove unnecessary error msg from gtt write The guest VM may initialize the whole GTT table during boot up, so the warning msg in emulate_gtt_mmio_write is not necessary, it is the expected behavior and it may confuse the user if error msg is printed out, so remove the msg from emulate_gtt_mmio_write(), Signed-off-by: Zhao, Xinda Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index b5c833287d39..6a5ff23ded90 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1825,11 +1825,8 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, gma = g_gtt_index << GTT_PAGE_SHIFT; /* the VM may configure the whole GM space when ballooning is used */ - if (WARN_ONCE(!vgpu_gmadr_is_valid(vgpu, gma), - "vgpu%d: found oob ggtt write, offset %x\n", - vgpu->id, off)) { + if (!vgpu_gmadr_is_valid(vgpu, gma)) return 0; - } ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index); -- cgit v1.2.3 From 191020b670f84f5e2edfaa906c3801df20485610 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 23 Feb 2017 14:46:23 +0800 Subject: drm/i915/gvt: adjust to fixed vGPU types Previous vGPU type create tried to determine vGPU type name e.g _1, _2 based on the number of mdev devices can be created, but different type might have very different resource size depending on physical device. We need to split type name vs. actual mdev resource and create fixed vGPU type with determined size for consistence. With this we'd like to fix vGPU types for _1, _2, _4 and _8 now, each type has fixed defined resource size. Available mdev instances that could be created is determined by physical resource, and user should query for that before creating. Cc: Kevin Tian Reviewed-by: Kevin Tian Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 1 - drivers/gpu/drm/i915/gvt/vgpu.c | 58 ++++++++++++++++++++++++----------------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index a41b322d8957..faff044c6434 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -212,7 +212,6 @@ struct intel_gvt_opregion { #define NR_MAX_INTEL_VGPU_TYPES 20 struct intel_vgpu_type { char name[16]; - unsigned int max_instance; unsigned int avail_instance; unsigned int low_gm_size; unsigned int high_gm_size; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index dcfcce1dc00e..c27c13c3cc48 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -64,6 +64,19 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); } +static struct { + unsigned int low_mm; + unsigned int high_mm; + unsigned int fence; + char *name; +} vgpu_types[] = { +/* Fixed vGPU type table */ + { MB_TO_BYTES(64), MB_TO_BYTES(512), 4, "8" }, + { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, "4" }, + { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, "2" }, + { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, "1" }, +}; + /** * intel_gvt_init_vgpu_types - initialize vGPU type list * @gvt : GVT device @@ -78,9 +91,8 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) unsigned int min_low; /* vGPU type name is defined as GVTg_Vx_y which contains - * physical GPU generation type and 'y' means maximum vGPU - * instances user can create on one physical GPU for this - * type. + * physical GPU generation type (e.g V4 as BDW server, V5 as + * SKL server). * * Depend on physical SKU resource, might see vGPU types like * GVTg_V4_8, GVTg_V4_4, GVTg_V4_2, etc. We can create @@ -92,7 +104,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) */ low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE; high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE; - num_types = 4; + num_types = sizeof(vgpu_types) / sizeof(vgpu_types[0]); gvt->types = kzalloc(num_types * sizeof(struct intel_vgpu_type), GFP_KERNEL); @@ -101,25 +113,24 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) min_low = MB_TO_BYTES(32); for (i = 0; i < num_types; ++i) { - if (low_avail / min_low == 0) + if (low_avail / vgpu_types[i].low_mm == 0) break; - gvt->types[i].low_gm_size = min_low; - gvt->types[i].high_gm_size = max((min_low<<3), MB_TO_BYTES(384U)); - gvt->types[i].fence = 4; - gvt->types[i].max_instance = min(low_avail / min_low, - high_avail / gvt->types[i].high_gm_size); - gvt->types[i].avail_instance = gvt->types[i].max_instance; + + gvt->types[i].low_gm_size = vgpu_types[i].low_mm; + gvt->types[i].high_gm_size = vgpu_types[i].high_mm; + gvt->types[i].fence = vgpu_types[i].fence; + gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm, + high_avail / vgpu_types[i].high_mm); if (IS_GEN8(gvt->dev_priv)) - sprintf(gvt->types[i].name, "GVTg_V4_%u", - gvt->types[i].max_instance); + sprintf(gvt->types[i].name, "GVTg_V4_%s", + vgpu_types[i].name); else if (IS_GEN9(gvt->dev_priv)) - sprintf(gvt->types[i].name, "GVTg_V5_%u", - gvt->types[i].max_instance); + sprintf(gvt->types[i].name, "GVTg_V5_%s", + vgpu_types[i].name); - min_low <<= 1; - gvt_dbg_core("type[%d]: %s max %u avail %u low %u high %u fence %u\n", - i, gvt->types[i].name, gvt->types[i].max_instance, + gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u\n", + i, gvt->types[i].name, gvt->types[i].avail_instance, gvt->types[i].low_gm_size, gvt->types[i].high_gm_size, gvt->types[i].fence); @@ -138,7 +149,7 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) { int i; unsigned int low_gm_avail, high_gm_avail, fence_avail; - unsigned int low_gm_min, high_gm_min, fence_min, total_min; + unsigned int low_gm_min, high_gm_min, fence_min; /* Need to depend on maxium hw resource size but keep on * static config for now. @@ -154,12 +165,11 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) low_gm_min = low_gm_avail / gvt->types[i].low_gm_size; high_gm_min = high_gm_avail / gvt->types[i].high_gm_size; fence_min = fence_avail / gvt->types[i].fence; - total_min = min(min(low_gm_min, high_gm_min), fence_min); - gvt->types[i].avail_instance = min(gvt->types[i].max_instance, - total_min); + gvt->types[i].avail_instance = min(min(low_gm_min, high_gm_min), + fence_min); - gvt_dbg_core("update type[%d]: %s max %u avail %u low %u high %u fence %u\n", - i, gvt->types[i].name, gvt->types[i].max_instance, + gvt_dbg_core("update type[%d]: %s avail %u low %u high %u fence %u\n", + i, gvt->types[i].name, gvt->types[i].avail_instance, gvt->types[i].low_gm_size, gvt->types[i].high_gm_size, gvt->types[i].fence); } -- cgit v1.2.3 From bca5609fcc401b0055a3c01ecc33c5f6fb67af7b Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 24 Feb 2017 10:58:20 +0800 Subject: drm/i915/gvt: Add more edid definition support We'll need to apply different resolution for vgpu types, so this adds more EDID types definition. v2: fix typo for actual 1920x1200 resolution Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 112 ++++++++++++++++++++++++------------- drivers/gpu/drm/i915/gvt/display.h | 6 ++ 2 files changed, 80 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 6d8fde880c39..d346e43656f7 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -83,44 +83,80 @@ static int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe) return 0; } +static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = { + { +/* EDID with 1024x768 as its resolution */ + /*Header*/ + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, + /* Vendor & Product Identification */ + 0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17, + /* Version & Revision */ + 0x01, 0x04, + /* Basic Display Parameters & Features */ + 0xa5, 0x34, 0x20, 0x78, 0x23, + /* Color Characteristics */ + 0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54, + /* Established Timings: maximum resolution is 1024x768 */ + 0x21, 0x08, 0x00, + /* Standard Timings. All invalid */ + 0x00, 0xc0, 0x00, 0xc0, 0x00, 0x40, 0x00, 0x80, 0x00, 0x00, + 0x00, 0x40, 0x00, 0x00, 0x00, 0x01, + /* 18 Byte Data Blocks 1: invalid */ + 0x00, 0x00, 0x80, 0xa0, 0x70, 0xb0, + 0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a, + /* 18 Byte Data Blocks 2: invalid */ + 0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + /* 18 Byte Data Blocks 3: invalid */ + 0x00, 0x00, 0x00, 0xfc, 0x00, 0x48, + 0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20, + /* 18 Byte Data Blocks 4: invalid */ + 0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30, + 0x44, 0x58, 0x51, 0x0a, 0x20, 0x20, + /* Extension Block Count */ + 0x00, + /* Checksum */ + 0xef, + }, + { /* EDID with 1920x1200 as its resolution */ -static unsigned char virtual_dp_monitor_edid[] = { - /*Header*/ - 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, - /* Vendor & Product Identification */ - 0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17, - /* Version & Revision */ - 0x01, 0x04, - /* Basic Display Parameters & Features */ - 0xa5, 0x34, 0x20, 0x78, 0x23, - /* Color Characteristics */ - 0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54, - /* Established Timings: maximum resolution is 1024x768 */ - 0x21, 0x08, 0x00, - /* - * Standard Timings. - * below new resolutions can be supported: - * 1920x1080, 1280x720, 1280x960, 1280x1024, - * 1440x900, 1600x1200, 1680x1050 - */ - 0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00, - 0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01, - /* 18 Byte Data Blocks 1: max resolution is 1920x1200 */ - 0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0, - 0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a, - /* 18 Byte Data Blocks 2: invalid */ - 0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - /* 18 Byte Data Blocks 3: invalid */ - 0x00, 0x00, 0x00, 0xfc, 0x00, 0x48, - 0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20, - /* 18 Byte Data Blocks 4: invalid */ - 0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30, - 0x44, 0x58, 0x51, 0x0a, 0x20, 0x20, - /* Extension Block Count */ - 0x00, - /* Checksum */ - 0x45, + /*Header*/ + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, + /* Vendor & Product Identification */ + 0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17, + /* Version & Revision */ + 0x01, 0x04, + /* Basic Display Parameters & Features */ + 0xa5, 0x34, 0x20, 0x78, 0x23, + /* Color Characteristics */ + 0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54, + /* Established Timings: maximum resolution is 1024x768 */ + 0x21, 0x08, 0x00, + /* + * Standard Timings. + * below new resolutions can be supported: + * 1920x1080, 1280x720, 1280x960, 1280x1024, + * 1440x900, 1600x1200, 1680x1050 + */ + 0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00, + 0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01, + /* 18 Byte Data Blocks 1: max resolution is 1920x1200 */ + 0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0, + 0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a, + /* 18 Byte Data Blocks 2: invalid */ + 0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + /* 18 Byte Data Blocks 3: invalid */ + 0x00, 0x00, 0x00, 0xfc, 0x00, 0x48, + 0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20, + /* 18 Byte Data Blocks 4: invalid */ + 0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30, + 0x44, 0x58, 0x51, 0x0a, 0x20, 0x20, + /* Extension Block Count */ + 0x00, + /* Checksum */ + 0x45, + }, }; #define DPCD_HEADER_SIZE 0xb @@ -189,7 +225,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, return -ENOMEM; } - memcpy(port->edid->edid_block, virtual_dp_monitor_edid, + memcpy(port->edid->edid_block, virtual_dp_monitor_edid[GVT_EDID_1920_1200], EDID_SIZE); port->edid->data_valid = true; diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index 8b234ea961f6..84f160bb7bc2 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -154,6 +154,12 @@ struct intel_vgpu_port { int type; }; +enum intel_vgpu_edid { + GVT_EDID_1024_768, + GVT_EDID_1920_1200, + GVT_EDID_NUM, +}; + void intel_gvt_emulate_vblank(struct intel_gvt *gvt); void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt); -- cgit v1.2.3 From d1a513be1f0a25f094e1577d059b9aebaa279bb2 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 24 Feb 2017 10:58:21 +0800 Subject: drm/i915/gvt: add resolution definition for vGPU type This assigns resolution definition for each vGPU type. For smaller resource type we should limit max resolution, so e.g limit to 1024x768 for 64M type, others are still default to 1920x1200. v2: Fix for actual 1920x1200 resolution Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 15 ++++++++++----- drivers/gpu/drm/i915/gvt/display.h | 14 +++++++++++++- drivers/gpu/drm/i915/gvt/gvt.h | 2 ++ drivers/gpu/drm/i915/gvt/kvmgt.c | 8 ++++---- drivers/gpu/drm/i915/gvt/vgpu.c | 18 +++++++++++------- 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index d346e43656f7..43e02e038375 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -211,10 +211,13 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) } static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, - int type) + int type, unsigned int resolution) { struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); + if (WARN_ON(resolution >= GVT_EDID_NUM)) + return -EINVAL; + port->edid = kzalloc(sizeof(*(port->edid)), GFP_KERNEL); if (!port->edid) return -ENOMEM; @@ -225,7 +228,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, return -ENOMEM; } - memcpy(port->edid->edid_block, virtual_dp_monitor_edid[GVT_EDID_1920_1200], + memcpy(port->edid->edid_block, virtual_dp_monitor_edid[resolution], EDID_SIZE); port->edid->data_valid = true; @@ -358,16 +361,18 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu) * Zero on success, negative error code if failed. * */ -int intel_vgpu_init_display(struct intel_vgpu *vgpu) +int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; intel_vgpu_init_i2c_edid(vgpu); if (IS_SKYLAKE(dev_priv)) - return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D); + return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D, + resolution); else - return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B); + return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B, + resolution); } /** diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index 84f160bb7bc2..d73de22102e2 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -160,10 +160,22 @@ enum intel_vgpu_edid { GVT_EDID_NUM, }; +static inline char *vgpu_edid_str(enum intel_vgpu_edid id) +{ + switch (id) { + case GVT_EDID_1024_768: + return "1024x768"; + case GVT_EDID_1920_1200: + return "1920x1200"; + default: + return ""; + } +} + void intel_gvt_emulate_vblank(struct intel_gvt *gvt); void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt); -int intel_vgpu_init_display(struct intel_vgpu *vgpu); +int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution); void intel_vgpu_reset_display(struct intel_vgpu *vgpu); void intel_vgpu_clean_display(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index faff044c6434..23791920ced1 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -216,6 +216,7 @@ struct intel_vgpu_type { unsigned int low_gm_size; unsigned int high_gm_size; unsigned int fence; + enum intel_vgpu_edid resolution; }; struct intel_gvt { @@ -318,6 +319,7 @@ struct intel_vgpu_creation_params { __u64 low_gm_sz; /* in MB */ __u64 high_gm_sz; /* in MB */ __u64 fence_sz; + __u64 resolution; __s32 primary; __u64 vgpu_id; }; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 10c3a4b95a92..182914c22ac5 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -295,10 +295,10 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev, return 0; return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" - "fence: %d\n", - BYTES_TO_MB(type->low_gm_size), - BYTES_TO_MB(type->high_gm_size), - type->fence); + "fence: %d\nresolution: %s\n", + BYTES_TO_MB(type->low_gm_size), + BYTES_TO_MB(type->high_gm_size), + type->fence, vgpu_edid_str(type->resolution)); } static MDEV_TYPE_ATTR_RO(available_instance); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index c27c13c3cc48..41cfa5ccae84 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -68,13 +68,14 @@ static struct { unsigned int low_mm; unsigned int high_mm; unsigned int fence; + enum intel_vgpu_edid edid; char *name; } vgpu_types[] = { /* Fixed vGPU type table */ - { MB_TO_BYTES(64), MB_TO_BYTES(512), 4, "8" }, - { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, "4" }, - { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, "2" }, - { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, "1" }, + { MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" }, + { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" }, + { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" }, + { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" }, }; /** @@ -119,6 +120,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) gvt->types[i].low_gm_size = vgpu_types[i].low_mm; gvt->types[i].high_gm_size = vgpu_types[i].high_mm; gvt->types[i].fence = vgpu_types[i].fence; + gvt->types[i].resolution = vgpu_types[i].edid; gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm, high_avail / vgpu_types[i].high_mm); @@ -129,11 +131,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) sprintf(gvt->types[i].name, "GVTg_V5_%s", vgpu_types[i].name); - gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u\n", + gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u res %s\n", i, gvt->types[i].name, gvt->types[i].avail_instance, gvt->types[i].low_gm_size, - gvt->types[i].high_gm_size, gvt->types[i].fence); + gvt->types[i].high_gm_size, gvt->types[i].fence, + vgpu_edid_str(gvt->types[i].resolution)); } gvt->num_types = i; @@ -258,7 +261,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_detach_hypervisor_vgpu; - ret = intel_vgpu_init_display(vgpu); + ret = intel_vgpu_init_display(vgpu, param->resolution); if (ret) goto out_clean_gtt; @@ -322,6 +325,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, param.low_gm_sz = type->low_gm_size; param.high_gm_sz = type->high_gm_size; param.fence_sz = type->fence; + param.resolution = type->resolution; /* XXX current param based on MB */ param.low_gm_sz = BYTES_TO_MB(param.low_gm_sz); -- cgit v1.2.3 From 8bcd7c188bc479bea866d286a7dc0af9734c3c64 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Fri, 24 Feb 2017 17:07:38 +0800 Subject: drm/i915/gvt: fix pcode mailbox write emulation of BDW Add pcode mailbox write emulation in gvt for BDW, reuse emulation code of Skylake. V2: refine comments, remove duplication defination of 0x138124, add IS_SKYLAKE() check for Skylake only pcode commands. Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index f89b183488e9..ebbe74072dc3 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1304,21 +1304,24 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, u32 *data0 = &vgpu_vreg(vgpu, GEN6_PCODE_DATA); switch (cmd) { - case 0x6: - /** - * "Read memory latency" command on gen9. - * Below memory latency values are read - * from skylake platform. - */ - if (!*data0) - *data0 = 0x1e1a1100; - else - *data0 = 0x61514b3d; + case GEN9_PCODE_READ_MEM_LATENCY: + if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { + /** + * "Read memory latency" command on gen9. + * Below memory latency values are read + * from skylake platform. + */ + if (!*data0) + *data0 = 0x1e1a1100; + else + *data0 = 0x61514b3d; + } break; case SKL_PCODE_CDCLK_CONTROL: - *data0 = SKL_CDCLK_READY_FOR_CHANGE; + if (IS_SKYLAKE(vgpu->gvt->dev_priv)) + *data0 = SKL_CDCLK_READY_FOR_CHANGE; break; - case 0x5: + case GEN6_PCODE_READ_RC6VIDS: *data0 |= 0x1; break; } @@ -2202,7 +2205,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_F(0x4f000, 0x90, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_SKL); + MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_BDW); MMIO_D(GEN6_PCODE_DATA, D_ALL); MMIO_D(0x13812c, D_ALL); MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL); @@ -2281,7 +2284,6 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(0x1a054, D_ALL); MMIO_D(0x44070, D_ALL); - MMIO_D(0x215c, D_HSW_PLUS); MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -2453,6 +2455,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN7_MISCCPCTL, D_BDW_PLUS); MMIO_D(0x1c054, D_BDW_PLUS); + MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write); + MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS); MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS); @@ -2544,7 +2548,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(HSW_PWR_WELL_BIOS, D_SKL); MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL, NULL, skl_power_well_ctl_write); - MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL, NULL, mailbox_write); MMIO_D(0xa210, D_SKL_PLUS); MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); -- cgit v1.2.3 From 41bfab35b3d0e0a85ed0c12d7bafd7484e96ca78 Mon Sep 17 00:00:00 2001 From: Pei Zhang Date: Fri, 24 Feb 2017 16:03:28 +0800 Subject: drm/i915/gvt: add some new MMIOs to cmd_access white list Guest is now acces some MMIOs (0x215c, RING_INSTPM) through command which is not originally in gvt's white list. This cause huge error log printed in gvt. This patch addes these MMIOs to the white list. V2. change the commit message content. V3. remove duplicate defination of 0x20c0. V4. refine code style. Signed-off-by: Pei Zhang Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index ebbe74072dc3..57b4d538f370 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1589,7 +1589,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) #undef RING_REG MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS, ring_timestamp_mmio_read, NULL); MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS, @@ -2284,7 +2285,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(0x1a054, D_ALL); MMIO_D(0x44070, D_ALL); - MMIO_D(0x215c, D_HSW_PLUS); + MMIO_DFH(0x215c, D_HSW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL); -- cgit v1.2.3 From d825adb48cf9bf9e3f5cb1d927e2827f8c2abee4 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sun, 26 Feb 2017 16:15:21 +0900 Subject: xenbus: Remove duplicate inclusion of linux/init.h This patch remove duplicate inclusion of linux/init.h in xenbus_dev_frontend.c. Confirm successfully compile after remove the line. Signed-off-by: Masanari Iida Reviewed-by: Juergen Gross --- drivers/xen/xenbus/xenbus_dev_frontend.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 4d343eed08f5..1f4733b80c87 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -55,7 +55,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 3a150df945b7408c27cad2c01a1638e8b14ac562 Mon Sep 17 00:00:00 2001 From: Chunyu Hu Date: Wed, 22 Feb 2017 08:29:26 +0800 Subject: tracing: Fix code comment for ftrace_ops_get_func() There is no function 'ftrace_ops_recurs_func' existing in the current code, it was renamed to ftrace_ops_assist_func() in commit c68c0fa29341 ("ftrace: Have ftrace_ops_get_func() handle RCU and PER_CPU flags too"). Update the comment to the correct function name. Link: http://lkml.kernel.org/r/1487723366-14463-1-git-send-email-chuhu@redhat.com Signed-off-by: Chunyu Hu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0c0609326391..fd84f2e30b6d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5487,7 +5487,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, * Normally the mcount trampoline will call the ops->func, but there * are times that it should not. For example, if the ops does not * have its own recursion protection, then it should call the - * ftrace_ops_recurs_func() instead. + * ftrace_ops_assist_func() instead. * * Returns the function that the trampoline should call for @ops. */ -- cgit v1.2.3 From dcc235279a52d49023d4f1af7c3ed468a97015ae Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Feb 2017 21:29:03 +0100 Subject: gcc-plugins: fix sancov_plugin for gcc-5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The name of the local variable was inadvertantly changed from sancov_plugin_pass_info to sancov_pass_info: scripts/gcc-plugins/sancov_plugin.c: In function ‘int plugin_init(plugin_name_args*, plugin_gcc_version*)’: scripts/gcc-plugins/sancov_plugin.c:136:67: error: ‘sancov_plugin_pass_info’ was not declared in this scope This changes the conditional reference to this variable as well. Fixes: 5a45a4c5c3f5 ("gcc-plugins: consolidate on PASS_INFO macro") Signed-off-by: Arnd Bergmann Signed-off-by: Kees Cook --- scripts/gcc-plugins/sancov_plugin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gcc-plugins/sancov_plugin.c b/scripts/gcc-plugins/sancov_plugin.c index 9b0b5cbc5b89..0f98634c20a0 100644 --- a/scripts/gcc-plugins/sancov_plugin.c +++ b/scripts/gcc-plugins/sancov_plugin.c @@ -133,7 +133,7 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gc #if BUILDING_GCC_VERSION < 6000 register_callback(plugin_name, PLUGIN_START_UNIT, &sancov_start_unit, NULL); register_callback(plugin_name, PLUGIN_REGISTER_GGC_ROOTS, NULL, (void *)>_ggc_r_gt_sancov); - register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &sancov_plugin_pass_info); + register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &sancov_pass_info); #endif return 0; -- cgit v1.2.3 From 0aa5277c3a6dbe9d5991c490ef23fe900254d931 Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 28 Feb 2017 15:39:25 +0800 Subject: drm/i915/gvt: have more registers with F_CMD_ACCESS flags set those registers are render registers, should have F_CMD_ACCESS flag set v4: rebase to lastest code base v3: per zhenyu's comments, move newly added registers to a separate patch v2: per Kevin's comments, move newly added registers to the tails of lists. Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 223 ++++++++++++++++++++---------------- 1 file changed, 123 insertions(+), 100 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 57b4d538f370..37956eee342c 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1523,6 +1523,9 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, #define MMIO_GM(reg, d, r, w) \ MMIO_F(reg, 4, F_GMADR, 0xFFFFF000, 0, d, r, w) +#define MMIO_GM_RDR(reg, d, r, w) \ + MMIO_F(reg, 4, F_GMADR | F_CMD_ACCESS, 0xFFFFF000, 0, d, r, w) + #define MMIO_RO(reg, d, f, rm, r, w) \ MMIO_F(reg, 4, F_RO | f, 0, rm, d, r, w) @@ -1542,6 +1545,9 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, #define MMIO_RING_GM(prefix, d, r, w) \ MMIO_RING_F(prefix, 4, F_GMADR, 0xFFFF0000, 0, d, r, w) +#define MMIO_RING_GM_RDR(prefix, d, r, w) \ + MMIO_RING_F(prefix, 4, F_GMADR | F_CMD_ACCESS, 0xFFFF0000, 0, d, r, w) + #define MMIO_RING_RO(prefix, d, f, rm, r, w) \ MMIO_RING_F(prefix, 4, F_RO | f, 0, rm, d, r, w) @@ -1550,45 +1556,48 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) struct drm_i915_private *dev_priv = gvt->dev_priv; int ret; - MMIO_RING_DFH(RING_IMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler); + MMIO_RING_DFH(RING_IMR, D_ALL, F_CMD_ACCESS, NULL, + intel_vgpu_reg_imr_handler); MMIO_DFH(SDEIMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler); MMIO_DFH(SDEIER, D_ALL, 0, NULL, intel_vgpu_reg_ier_handler); MMIO_DFH(SDEIIR, D_ALL, 0, NULL, intel_vgpu_reg_iir_handler); MMIO_D(SDEISR, D_ALL); - MMIO_RING_D(RING_HWSTAM, D_ALL); + MMIO_RING_DFH(RING_HWSTAM, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_GM(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL); - MMIO_GM(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL); - MMIO_GM(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL); - MMIO_GM(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_GM_RDR(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_GM_RDR(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL); #define RING_REG(base) (base + 0x28) - MMIO_RING_D(RING_REG, D_ALL); + MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x134) - MMIO_RING_D(RING_REG, D_ALL); + MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL); #undef RING_REG - MMIO_GM(0x2148, D_ALL, NULL, NULL); - MMIO_GM(CCID, D_ALL, NULL, NULL); - MMIO_GM(0x12198, D_ALL, NULL, NULL); + MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL); + MMIO_GM_RDR(CCID, D_ALL, NULL, NULL); + MMIO_GM_RDR(0x12198, D_ALL, NULL, NULL); MMIO_D(GEN7_CXT_SIZE, D_ALL); - MMIO_RING_D(RING_TAIL, D_ALL); - MMIO_RING_D(RING_HEAD, D_ALL); - MMIO_RING_D(RING_CTL, D_ALL); - MMIO_RING_D(RING_ACTHD, D_ALL); - MMIO_RING_GM(RING_START, D_ALL, NULL, NULL); + MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_HEAD, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_CTL, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL); /* RING MODE */ #define RING_REG(base) (base + 0x29c) - MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK, NULL, ring_mode_mmio_write); + MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, + ring_mode_mmio_write); #undef RING_REG - MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS, @@ -1596,28 +1605,30 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS, ring_timestamp_mmio_read, NULL); - MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x2124, D_ALL, F_MODE_MASK, NULL, NULL); - - MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x2088, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x2470, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_D(GAM_ECOCHK, D_ALL); - MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2124, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + + MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2088, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2470, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(0x9030, D_ALL); - MMIO_D(0x20a0, D_ALL); - MMIO_D(0x2420, D_ALL); - MMIO_D(0x2430, D_ALL); - MMIO_D(0x2434, D_ALL); - MMIO_D(0x2438, D_ALL); - MMIO_D(0x243c, D_ALL); - MMIO_DFH(0x7018, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2430, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2434, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2438, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x243c, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x7018, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2148,8 +2159,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(FORCEWAKE_ACK, D_ALL); MMIO_D(GEN6_GT_CORE_STATUS, D_ALL); MMIO_D(GEN6_GT_THREAD_STATUS_REG, D_ALL); - MMIO_D(GTFIFODBG, D_ALL); - MMIO_D(GTFIFOCTL, D_ALL); + MMIO_DFH(GTFIFODBG, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GTFIFOCTL, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DH(FORCEWAKE_MT, D_PRE_SKL, NULL, mul_force_wake_write); MMIO_DH(FORCEWAKE_ACK_HSW, D_HSW | D_BDW, NULL, NULL); MMIO_D(ECOBUS, D_ALL); @@ -2291,29 +2302,29 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x1217c, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_F(0x2290, 8, 0, 0, 0, D_HSW_PLUS, NULL, NULL); - MMIO_D(GEN7_OACONTROL, D_HSW); + MMIO_F(0x2290, 8, F_CMD_ACCESS, 0, 0, D_HSW_PLUS, NULL, NULL); + MMIO_DFH(GEN7_OACONTROL, D_HSW, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x2b00, D_BDW_PLUS); MMIO_D(0x2360, D_BDW_PLUS); - MMIO_F(0x5200, 32, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x5240, 32, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x5280, 16, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(0x5200, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(0x5240, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(0x5280, 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); MMIO_DFH(0x1c17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x1c178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(BCS_SWCTRL, D_ALL); - - MMIO_F(HS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(DS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(IA_VERTICES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(IA_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(VS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(GS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(GS_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(CL_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(CL_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(PS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(PS_DEPTH_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_DFH(BCS_SWCTRL, D_ALL, F_CMD_ACCESS, NULL, NULL); + + MMIO_F(HS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(DS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(IA_VERTICES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(IA_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(VS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(GS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(GS_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(CL_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(CL_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(PS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(PS_DEPTH_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); MMIO_DH(0x4260, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); MMIO_DH(0x4264, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); MMIO_DH(0x4268, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); @@ -2329,7 +2340,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) struct drm_i915_private *dev_priv = gvt->dev_priv; int ret; - MMIO_DH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, + MMIO_DFH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, NULL, intel_vgpu_reg_imr_handler); MMIO_DH(GEN8_GT_IMR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler); @@ -2394,24 +2405,31 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL, intel_vgpu_reg_master_irq_handler); - MMIO_D(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_D(0x1c134, D_BDW_PLUS); - - MMIO_D(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_D(RING_HEAD(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_GM(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); - MMIO_D(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_D(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_D(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK, NULL, ring_mode_mmio_write); - MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK, - NULL, NULL); - MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK, - NULL, NULL); + MMIO_DFH(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x1c134, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + + MMIO_DFH(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(RING_HEAD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_CMD_ACCESS, NULL, NULL); + MMIO_GM_RDR(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); + MMIO_DFH(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, + ring_mode_mmio_write); + MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(RING_TIMESTAMP(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, ring_timestamp_mmio_read, NULL); - MMIO_RING_D(RING_ACTHD_UDW, D_BDW_PLUS); + MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); #define RING_REG(base) (base + 0xd0) MMIO_RING_F(RING_REG, 4, F_RO, 0, @@ -2428,13 +2446,16 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) #undef RING_REG #define RING_REG(base) (base + 0x234) - MMIO_RING_F(RING_REG, 8, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO, 0, ~0LL, D_BDW_PLUS, NULL, NULL); + MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS, + NULL, NULL); + MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO | F_CMD_ACCESS, 0, + ~0LL, D_BDW_PLUS, NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x244) - MMIO_RING_D(RING_REG, D_BDW_PLUS); - MMIO_D(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS); + MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, + NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x370) @@ -2468,8 +2489,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG - MMIO_RING_GM(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL); - MMIO_GM(0x1c080, D_BDW_PLUS, NULL, NULL); + MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL); + MMIO_GM_RDR(RING_HWS_PGA(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2490,15 +2511,17 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS); MMIO_D(0xfdc, D_BDW_PLUS); - MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(GEN7_ROW_CHICKEN2, D_BDW_PLUS); - MMIO_D(GEN8_UCGCTL6, D_BDW_PLUS); + MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(GEN7_ROW_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(GEN8_UCGCTL6, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(0xb1f0, D_BDW); - MMIO_D(0xb1c0, D_BDW); + MMIO_DFH(0xb1f0, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xb1c0, D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN8_L3SQCREG4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(0xb100, D_BDW); - MMIO_D(0xb10c, D_BDW); + MMIO_DFH(0xb100, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xb10c, D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(0xb110, D_BDW); MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, @@ -2508,10 +2531,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(0x44484, D_BDW_PLUS); MMIO_D(0x4448c, D_BDW_PLUS); - MMIO_D(0x83a4, D_BDW); + MMIO_DFH(0x83a4, D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS); - MMIO_D(0x8430, D_BDW); + MMIO_DFH(0x8430, D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x110000, D_BDW_PLUS); @@ -2523,9 +2546,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK, NULL, NULL); + MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(0x2248, D_BDW); + MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2707,15 +2730,15 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0xd08, D_SKL); MMIO_D(0x20e0, D_SKL); - MMIO_D(0x20ec, D_SKL); + MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* TRTT */ - MMIO_D(0x4de0, D_SKL); - MMIO_D(0x4de4, D_SKL); - MMIO_D(0x4de8, D_SKL); - MMIO_D(0x4dec, D_SKL); - MMIO_D(0x4df0, D_SKL); - MMIO_DH(0x4df4, D_SKL, NULL, gen9_trtte_write); + MMIO_DFH(0x4de0, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4de4, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4de8, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4dec, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4df0, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4df4, D_SKL, F_CMD_ACCESS, NULL, gen9_trtte_write); MMIO_DH(0x4dfc, D_SKL, NULL, gen9_trtt_chicken_write); MMIO_D(0x45008, D_SKL); @@ -2739,7 +2762,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0x65f08, D_SKL); MMIO_D(0x320f0, D_SKL); - MMIO_D(_REG_VCS2_EXCC, D_SKL); + MMIO_DFH(_REG_VCS2_EXCC, D_SKL, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x70034, D_SKL); MMIO_D(0x71034, D_SKL); MMIO_D(0x72034, D_SKL); @@ -2752,7 +2775,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL); MMIO_D(0x44500, D_SKL); - MMIO_D(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS); + MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } -- cgit v1.2.3 From 9112caafdf01439a6e43f4d8c09ceed7dc613d4a Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 28 Feb 2017 15:40:10 +0800 Subject: drm/i915/gvt: add more registers into handlers list those registers are render registers with F_CMD_ACCESS flag set Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 37956eee342c..ef17c38e00c4 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2332,6 +2332,17 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(0x4270, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); MMIO_DFH(0x4094, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_RING_GM_RDR(RING_BBADDR, D_ALL, NULL, NULL); + MMIO_DFH(0x2220, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x12220, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x22220, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_SYNC_1, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_SYNC_0, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x22178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x1a178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x1a17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2217c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2550,6 +2561,15 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe220, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe230, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe240, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe260, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe270, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe280, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe2a0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe2b0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe2c0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2776,6 +2796,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0x44500, D_SKL); MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); return 0; } -- cgit v1.2.3 From 1f58af304cce9e4a25b62b3619e69c586203c8ca Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 28 Feb 2017 15:41:03 +0800 Subject: drm/i915/gvt: fix an error for one register register 0x20e0 should be mode register v2: rebased to latest code base Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index ef17c38e00c4..548aedfbd402 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2749,7 +2749,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL); MMIO_D(0xd08, D_SKL); - MMIO_D(0x20e0, D_SKL); + MMIO_DFH(0x20e0, D_SKL, F_MODE_MASK, NULL, NULL); MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* TRTT */ -- cgit v1.2.3 From 858b0f571d30916bd69c45922045f24f26d6bfc9 Mon Sep 17 00:00:00 2001 From: Bing Niu Date: Tue, 28 Feb 2017 11:39:48 -0500 Subject: drm/i915/gvt: set SFUSE_STRAP properly for vitual monitor detection update the correct virtual montior connection status to vreg v2: address yulei's comment on commit message Signed-off-by: Bing Niu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 43e02e038375..5419ae6ec633 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -176,14 +176,20 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | SDE_PORTE_HOTPLUG_SPT); - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT; + vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED; + } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT; + vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED; + } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; + vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; + } if (IS_SKYLAKE(dev_priv) && intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) { @@ -196,6 +202,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) GEN8_PORT_DP_A_HOTPLUG; else vgpu_vreg(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT; + + vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED; } } -- cgit v1.2.3 From b6b6fbc8310e1b12b05717da9df6953b138f812b Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Wed, 1 Mar 2017 14:34:52 +0800 Subject: drm/i915/gvt: use pfn_valid for better checking Before get the page from pfn, use pfn_valid to check if pfn is able to translate to page structure. Signed-off-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 182914c22ac5..241354890603 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -96,10 +96,10 @@ static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn, struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; dma_addr_t daddr; - page = pfn_to_page(pfn); - if (is_error_page(page)) + if (unlikely(!pfn_valid(pfn))) return -EFAULT; + page = pfn_to_page(pfn); daddr = dma_map_page(dev, page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, daddr)) -- cgit v1.2.3 From 9dcfe2c75b51f454f39c2de4756e841228865b47 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Mar 2017 09:25:55 +0100 Subject: locking/refcounts: Change WARN() to WARN_ONCE() Linus noticed that the new refcount.h APIs used WARN(), which would turn into a dmesg DoS if it triggers frequently on some buggy driver. So make sure we only warn once. These warnings are never supposed to happen, so it's typically not a problem to lose subsequent warnings. Suggested-by: Linus Torvalds Cc: Peter Zijlstra (Intel) Cc: Elena Reshetova Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/CA+55aFzbYUTZ=oqZ2YgDjY0C2_n6ODhTfqj6V+m5xWmDxsuB0w@mail.gmail.com Signed-off-by: Ingo Molnar --- lib/refcount.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/refcount.c b/lib/refcount.c index 1d33366189d1..aa09ad3c30b0 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -58,7 +58,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) val = old; } - WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); return true; } @@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(refcount_add_not_zero); void refcount_add(unsigned int i, refcount_t *r) { - WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); + WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); } EXPORT_SYMBOL_GPL(refcount_add); @@ -97,7 +97,7 @@ bool refcount_inc_not_zero(refcount_t *r) val = old; } - WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); return true; } @@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(refcount_inc_not_zero); */ void refcount_inc(refcount_t *r) { - WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); + WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); } EXPORT_SYMBOL_GPL(refcount_inc); @@ -125,7 +125,7 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r) new = val - i; if (new > val) { - WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n"); return false; } @@ -164,7 +164,7 @@ EXPORT_SYMBOL_GPL(refcount_dec_and_test); void refcount_dec(refcount_t *r) { - WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); + WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); } EXPORT_SYMBOL_GPL(refcount_dec); @@ -204,7 +204,7 @@ bool refcount_dec_not_one(refcount_t *r) new = val - 1; if (new > val) { - WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n"); return true; } -- cgit v1.2.3 From 75013fb16f8484898eaa8d0b08fed942d790f029 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 1 Mar 2017 01:23:24 +0900 Subject: kprobes/x86: Fix kernel panic when certain exception-handling addresses are probed Fix to the exception table entry check by using probed address instead of the address of copied instruction. This bug may cause unexpected kernel panic if user probe an address where an exception can happen which should be fixup by __ex_table (e.g. copy_from_user.) Unless user puts a kprobe on such address, this doesn't cause any problem. This bug has been introduced years ago, by commit: 464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently"). Signed-off-by: Masami Hiramatsu Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently") Link: http://lkml.kernel.org/r/148829899399.28855.12581062400757221722.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/common.h | 2 +- arch/x86/kernel/kprobes/core.c | 6 +++--- arch/x86/kernel/kprobes/opt.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index c6ee63f927ab..d688826e5736 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -67,7 +67,7 @@ #endif /* Ensure if the instruction can be boostable */ -extern int can_boost(kprobe_opcode_t *instruction); +extern int can_boost(kprobe_opcode_t *instruction, void *addr); /* Recover instruction if given address is probed */ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 520b8dfe1640..88b3c942473d 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -166,12 +166,12 @@ NOKPROBE_SYMBOL(skip_prefixes); * Returns non-zero if opcode is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -int can_boost(kprobe_opcode_t *opcodes) +int can_boost(kprobe_opcode_t *opcodes, void *addr) { kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; - if (search_exception_tables((unsigned long)opcodes)) + if (search_exception_tables((unsigned long)addr)) return 0; /* Page fault may occur on this address. */ retry: @@ -416,7 +416,7 @@ static int arch_copy_kprobe(struct kprobe *p) * __copy_instruction can modify the displacement of the instruction, * but it doesn't affect boostable check. */ - if (can_boost(p->ainsn.insn)) + if (can_boost(p->ainsn.insn, p->addr)) p->ainsn.boostable = 0; else p->ainsn.boostable = -1; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 3d1bee9d6a72..3e7c6e5a08ff 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -178,7 +178,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src) while (len < RELATIVEJUMP_SIZE) { ret = __copy_instruction(dest + len, src + len); - if (!ret || !can_boost(dest + len)) + if (!ret || !can_boost(dest + len, src + len)) return -EINVAL; len += ret; } -- cgit v1.2.3 From 10bce8410607a18eb3adf5d2739db8c8593e110d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 27 Feb 2017 23:50:58 +0100 Subject: x86/kdebugfs: Move boot params hierarchy under (debugfs)/x86/ ... since this is all x86-specific data and it makes sense to have it under x86/ logically instead in the toplevel debugfs dir. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170227225058.27289-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/kdebugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index bdb83e431d89..38b64587b31b 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -167,7 +167,7 @@ static int __init boot_params_kdebugfs_init(void) struct dentry *dbp, *version, *data; int error = -ENOMEM; - dbp = debugfs_create_dir("boot_params", NULL); + dbp = debugfs_create_dir("boot_params", arch_debugfs_dir); if (!dbp) return -ENOMEM; -- cgit v1.2.3 From e7c95effcd3a7a0c9535c809141ca499fede2c31 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 28 Feb 2017 07:05:59 +0100 Subject: s390/crypt: fix missing unlock in ctr_paes_crypt on error path The ctr mode of protected key aes uses the ctrblk page if the ctrblk_lock could be acquired. If the protected key has to be reestablished and this operation fails the unlock for the ctrblk_lock is missing. Add it. Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/paes_s390.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index d69ea495c4d7..716b17238599 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -474,8 +474,11 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, ret = blkcipher_walk_done(desc, walk, nbytes - n); } if (k < n) { - if (__ctr_paes_set_key(ctx) != 0) + if (__ctr_paes_set_key(ctx) != 0) { + if (locked) + spin_unlock(&ctrblk_lock); return blkcipher_walk_done(desc, walk, -EIO); + } } } if (locked) -- cgit v1.2.3 From d9fcf2a1cbd1ff65d0109b1b400938808007fcd5 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 28 Feb 2017 07:42:01 +0100 Subject: s390: fix in-kernel program checks A program check inside the kernel takes a slightly different path in entry.S compare to a normal user fault. A recent change moved the store of the breaking event address into the path taken for in-kernel program checks as well, but %r14 has not been setup to point to the correct location. A wild store is the consequence. Move the store of the breaking event address to the code path for user space faults. Fixes: 34525e1f7e8d ("s390: store breaking event address only for program checks") Reported-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index dff2152350a7..6a7d737d514c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -490,7 +490,7 @@ ENTRY(pgm_check_handler) jnz .Lpgm_svcper # -> single stepped svc 1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 3f + j 4f 2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER lg %r15,__LC_KERNEL_STACK lgr %r14,%r12 @@ -499,8 +499,8 @@ ENTRY(pgm_check_handler) tm __LC_PGM_ILC+2,0x02 # check for transaction abort jz 3f mvc __THREAD_trap_tdb(256,%r14),0(%r13) -3: la %r11,STACK_FRAME_OVERHEAD(%r15) - stg %r10,__THREAD_last_break(%r14) +3: stg %r10,__THREAD_last_break(%r14) +4: la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC stmg %r8,%r9,__PT_PSW(%r11) @@ -509,14 +509,14 @@ ENTRY(pgm_check_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 4f + jz 5f tmhh %r8,0x0001 # kernel per event ? jz .Lpgm_kprobe oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -4: REENABLE_IRQS +5: REENABLE_IRQS xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) -- cgit v1.2.3 From e69ca822ce0ed3ba006ce384d7d205c81d92373f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Mar 2017 09:16:03 +0100 Subject: s390/cputime: remove last traces of cputime_t The cputime_t type is a thing of the past, replace the last occurences of the type in the s390 code with a simple u64. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cputime.h | 14 ++------------ arch/s390/kernel/vtime.c | 2 +- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index d1c407ddf703..e5672d6276a6 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -8,32 +8,22 @@ #define _S390_CPUTIME_H #include -#include #define CPUTIME_PER_USEC 4096ULL #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC) /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ -typedef unsigned long long __nocast cputime_t; -typedef unsigned long long __nocast cputime64_t; - #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new) -static inline unsigned long __div(unsigned long long n, unsigned long base) -{ - return n / base; -} - /* * Convert cputime to microseconds and back. */ -static inline unsigned int cputime_to_usecs(const cputime_t cputime) +static inline u64 cputime_to_usecs(const u64 cputime) { - return (__force unsigned long long) cputime >> 12; + return cputime >> 12; } - u64 arch_cpu_idle_time(int cpu); #define arch_idle_time(cpu) arch_cpu_idle_time(cpu) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 31bd96e81167..8f5f59a151b4 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -111,7 +111,7 @@ static inline u64 scale_vtime(u64 vtime) } static void account_system_index_scaled(struct task_struct *p, - cputime_t cputime, cputime_t scaled, + u64 cputime, u64 scaled, enum cpu_usage_stat index) { p->stimescaled += cputime_to_nsecs(scaled); -- cgit v1.2.3 From 3c915bdc1775acfa214195da1ffb39dabdd1a389 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Mar 2017 09:18:34 +0100 Subject: s390/cputime: reset all accounting fields on fork copy_thread has to reset all cputime related field in the task struct, not only user_timer and system_timer. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 54281660582c..249deafaa6ee 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -121,7 +121,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, clear_tsk_thread_flag(p, TIF_SINGLE_STEP); /* Initialize per thread user and system timer values */ p->thread.user_timer = 0; + p->thread.guest_timer = 0; p->thread.system_timer = 0; + p->thread.hardirq_timer = 0; + p->thread.softirq_timer = 0; frame->sf.back_chain = 0; /* new return point is ret_from_fork */ -- cgit v1.2.3 From e53051e757d6cd66741955b93581e54415e48a70 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Mar 2017 09:21:10 +0100 Subject: s390/cputime: provide archicture specific cputime_to_nsecs The generic cputime_to_nsecs function first converts the cputime to micro-seconds and then multiplies the result with 1000. This looses some bits of accuracy, provide our own version of cputime_to_nsecs that does not loose precision. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cputime.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index e5672d6276a6..9072bf63a846 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -8,6 +8,7 @@ #define _S390_CPUTIME_H #include +#include #define CPUTIME_PER_USEC 4096ULL #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC) @@ -17,13 +18,18 @@ #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new) /* - * Convert cputime to microseconds and back. + * Convert cputime to microseconds. */ static inline u64 cputime_to_usecs(const u64 cputime) { return cputime >> 12; } +/* + * Convert cputime to nanoseconds. + */ +#define cputime_to_nsecs(cputime) tod_to_ns(cputime) + u64 arch_cpu_idle_time(int cpu); #define arch_idle_time(cpu) arch_cpu_idle_time(cpu) -- cgit v1.2.3 From d03bd0454b101adb94d0b5a9cc11396182943cb4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Mar 2017 09:47:57 +0100 Subject: s390/timex: micro optimization for tod_to_ns The conversion of a TOD value to nano-seconds currently uses a 32/32 bit split with the calculation for "nsecs = (TOD * 125) >> 9". Using a 55/9 bit split saves an instruction. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/timex.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 354344dcc198..118535123f34 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -206,20 +206,16 @@ static inline unsigned long long get_tod_clock_monotonic(void) * ns = (todval * 125) >> 9; * * In order to avoid an overflow with the multiplication we can rewrite this. - * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits) + * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits) * we end up with * - * ns = ((2^32 * th + tl) * 125 ) >> 9; - * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9); + * ns = ((2^9 * th + tl) * 125 ) >> 9; + * -> ns = (th * 125) + ((tl * 125) >> 9); * */ static inline unsigned long long tod_to_ns(unsigned long long todval) { - unsigned long long ns; - - ns = ((todval >> 32) << 23) * 125; - ns += ((todval & 0xffffffff) * 125) >> 9; - return ns; + return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); } #endif -- cgit v1.2.3 From b28ace12661fbcfd90959c1e84ff5a85113a82a1 Mon Sep 17 00:00:00 2001 From: Franck Demathieu Date: Thu, 23 Feb 2017 10:48:55 +0100 Subject: irqchip/crossbar: Fix incorrect type of local variables The max and entry variables are unsigned according to the dt-bindings. Fix following 3 sparse issues (-Wtypesign): drivers/irqchip/irq-crossbar.c:222:52: warning: incorrect type in argument 3 (different signedness) drivers/irqchip/irq-crossbar.c:222:52: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:222:52: got int * drivers/irqchip/irq-crossbar.c:245:56: warning: incorrect type in argument 4 (different signedness) drivers/irqchip/irq-crossbar.c:245:56: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:245:56: got int * drivers/irqchip/irq-crossbar.c:263:56: warning: incorrect type in argument 4 (different signedness) drivers/irqchip/irq-crossbar.c:263:56: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:263:56: got int * Signed-off-by: Franck Demathieu Cc: marc.zyngier@arm.com Cc: jason@lakedaemon.net Link: http://lkml.kernel.org/r/20170223094855.6546-1-fdemathieu@gmail.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-crossbar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 1eef56a89b1f..05bbf171df37 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -198,7 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = { static int __init crossbar_of_init(struct device_node *node) { - int i, size, max = 0, reserved = 0, entry; + int i, size, reserved = 0; + u32 max = 0, entry; const __be32 *irqsr; int ret = -ENOMEM; -- cgit v1.2.3 From bb3f0a52630c84807fca9bdd76ac2f5dcec82689 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Tue, 28 Feb 2017 13:50:52 +0800 Subject: x86/apic: Fix a warning message in logical CPU IDs allocation The current warning message in allocate_logical_cpuid() is somewhat confusing: Only 1 processors supported.Processor 2/0x2 and the rest are ignored. As it might imply that there's only one CPU in the system - while what we ran into here is a kernel limitation. Fix the warning message to clarify all that: APIC: NR_CPUS/possible_cpus limit of 2 reached. Processor 2/0x2 and the rest are ignored. ( Also update the error return from -1 to -EINVAL, which is the more canonical return value. ) Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: nicstange@gmail.com Cc: wanpeng.li@hotmail.com Link: http://lkml.kernel.org/r/1488261052-25753-1-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4261b3282ad9..11088b86e5c7 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2062,10 +2062,10 @@ static int allocate_logical_cpuid(int apicid) /* Allocate a new cpuid. */ if (nr_logical_cpuids >= nr_cpu_ids) { - WARN_ONCE(1, "Only %d processors supported." + WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. " "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids - 1, nr_logical_cpuids, apicid); - return -1; + nr_cpu_ids, nr_logical_cpuids, apicid); + return -EINVAL; } cpuid_to_apicid[nr_logical_cpuids] = apicid; -- cgit v1.2.3 From 11277aabcbbe13916151af897d29a5e9f71ca73f Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 23 Feb 2017 17:16:41 +0800 Subject: x86/apic: Simplify enable_IR_x2apic(), remove try_to_enable_IR() The following commit: 2e63ad4bd5dd ("x86/apic: Do not init irq remapping if ioapic is disabled") ... added a check for skipped IO-APIC setup to enable_IR_x2apic(), but this check is also duplicated in try_to_enable_IR() - and it will never succeed in calling irq_remapping_enable(). Remove the whole irq_remapping_enable() complication: if the IO-APIC is disabled we cannot enable IRQ remapping. Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: nicstange@gmail.com Cc: wanpeng.li@hotmail.com Link: http://lkml.kernel.org/r/1487841401-1543-1-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 11088b86e5c7..aee7deddabd0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1610,24 +1610,15 @@ static inline void try_to_enable_x2apic(int remap_mode) { } static inline void __x2apic_enable(void) { } #endif /* !CONFIG_X86_X2APIC */ -static int __init try_to_enable_IR(void) -{ -#ifdef CONFIG_X86_IO_APIC - if (!x2apic_enabled() && skip_ioapic_setup) { - pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); - return -1; - } -#endif - return irq_remapping_enable(); -} - void __init enable_IR_x2apic(void) { unsigned long flags; int ret, ir_stat; - if (skip_ioapic_setup) + if (skip_ioapic_setup) { + pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); return; + } ir_stat = irq_remapping_prepare(); if (ir_stat < 0 && !x2apic_supported()) @@ -1645,7 +1636,7 @@ void __init enable_IR_x2apic(void) /* If irq_remapping_prepare() succeeded, try to enable it */ if (ir_stat >= 0) - ir_stat = try_to_enable_IR(); + ir_stat = irq_remapping_enable(); /* ir_stat contains the remap mode or an error code */ try_to_enable_x2apic(ir_stat); -- cgit v1.2.3 From 2a4d0c627f5374f365a873dea4e10ae0bb437680 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 13 Feb 2017 13:13:36 +0300 Subject: x86/selftests: Add clobbers for int80 on x86_64 Kernel erases R8..R11 registers prior returning to userspace from int80: https://lkml.org/lkml/2009/10/1/164 GCC can reuse these registers and doesn't expect them to change during syscall invocation. I met this kind of bug in CRIU once GCC 6.1 and CLANG stored local variables in those registers and the kernel zerofied them during syscall: https://github.com/xemul/criu/commit/990d33f1a1cdd17bca6c2eb059ab3be2564f7fa2 By that reason I suggest to add those registers to clobbers in selftests. Also, as noted by Andy - removed unneeded clobber for flags in INT $0x80 inline asm. Signed-off-by: Dmitry Safonov Acked-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Shuah Khan Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Link: http://lkml.kernel.org/r/20170213101336.20486-1-dsafonov@virtuozzo.com Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/fsgsbase.c | 2 +- tools/testing/selftests/x86/ldt_gdt.c | 16 +++++++++++----- tools/testing/selftests/x86/ptrace_syscall.c | 3 ++- tools/testing/selftests/x86/single_step_syscall.c | 5 ++++- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 5b2b4b3c634c..b4967d875236 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -245,7 +245,7 @@ void do_unexpected_base(void) long ret; asm volatile ("int $0x80" : "=a" (ret) : "a" (243), "b" (low_desc) - : "flags"); + : "r8", "r9", "r10", "r11"); memcpy(&desc, low_desc, sizeof(desc)); munmap(low_desc, sizeof(desc)); diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 4af47079cf04..f6121612e769 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -45,6 +45,12 @@ #define AR_DB (1 << 22) #define AR_G (1 << 23) +#ifdef __x86_64__ +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" +#else +# define INT80_CLOBBERS +#endif + static int nerrs; /* Points to an array of 1024 ints, each holding its own index. */ @@ -588,7 +594,7 @@ static int invoke_set_thread_area(void) asm volatile ("int $0x80" : "=a" (ret), "+m" (low_user_desc) : "a" (243), "b" (low_user_desc) - : "flags"); + : INT80_CLOBBERS); return ret; } @@ -657,7 +663,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -688,7 +694,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -721,7 +727,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base); @@ -774,7 +780,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base); diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index b037ce9cf116..eaea92439708 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args) asm volatile ("int $0x80" : "+a" (args->nr), "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2), - "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)); + "+S" (args->arg3), "+D" (args->arg4), "+r" (bp) + : : "r8", "r9", "r10", "r11"); args->arg5 = bp; #else sys32_helper(args, int80_and_ret); diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index 50c26358e8b7..a48da95c18fd 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps; #ifdef __x86_64__ # define REG_IP REG_RIP # define WIDTH "q" +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" #else # define REG_IP REG_EIP # define WIDTH "l" +# define INT80_CLOBBERS #endif static unsigned long get_eflags(void) @@ -140,7 +142,8 @@ int main() printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); - asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)); + asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) + : INT80_CLOBBERS); check_result(); /* -- cgit v1.2.3 From 6b0b7551428e4caae1e2c023a529465a9a9ae2d4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 16 Feb 2017 17:00:50 +1100 Subject: perf/core: Rename CONFIG_[UK]PROBE_EVENT to CONFIG_[UK]PROBE_EVENTS We have uses of CONFIG_UPROBE_EVENT and CONFIG_KPROBE_EVENT as well as CONFIG_UPROBE_EVENTS and CONFIG_KPROBE_EVENTS. Consistently use the plurals. Signed-off-by: Anton Blanchard Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: alexander.shishkin@linux.intel.com Cc: davem@davemloft.net Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/20170216060050.20866-1-anton@ozlabs.org Signed-off-by: Ingo Molnar --- Documentation/trace/kprobetrace.txt | 2 +- Documentation/trace/uprobetracer.txt | 2 +- arch/powerpc/configs/85xx/kmp204x_defconfig | 2 +- arch/s390/configs/default_defconfig | 2 +- arch/s390/configs/gcov_defconfig | 2 +- arch/s390/configs/performance_defconfig | 2 +- arch/s390/defconfig | 2 +- kernel/trace/Kconfig | 6 +++--- kernel/trace/Makefile | 4 ++-- kernel/trace/trace.c | 10 +++++----- kernel/trace/trace_probe.h | 4 ++-- 11 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index e4991fb1eedc..41ef9d8efe95 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -12,7 +12,7 @@ kprobes can probe (this means, all functions body except for __kprobes functions). Unlike the Tracepoint based event, this can be added and removed dynamically, on the fly. -To enable this feature, build your kernel with CONFIG_KPROBE_EVENT=y. +To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y. Similar to the events tracer, this doesn't need to be activated via current_tracer. Instead of that, add probe points via diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt index fa7b680ee8a0..bf526a7c5559 100644 --- a/Documentation/trace/uprobetracer.txt +++ b/Documentation/trace/uprobetracer.txt @@ -7,7 +7,7 @@ Overview -------- Uprobe based trace events are similar to kprobe based trace events. -To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y. +To enable this feature, build your kernel with CONFIG_UPROBE_EVENTS=y. Similar to the kprobe-event tracer, this doesn't need to be activated via current_tracer. Instead of that, add probe points via diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig index aaaaa609cd24..34a4da23f000 100644 --- a/arch/powerpc/configs/85xx/kmp204x_defconfig +++ b/arch/powerpc/configs/85xx/kmp204x_defconfig @@ -210,7 +210,7 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_SCHEDSTATS=y CONFIG_RCU_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=y diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 143b1e00b818..4b176fe83da4 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -609,7 +609,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index f05d2d6e1087..0de46cc397f6 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -560,7 +560,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 2358bf33c5ef..e167557b434c 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -558,7 +558,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 68bfd09f1b02..97189dbaf34b 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -179,7 +179,7 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_KPROBES_SANITY_TEST=y diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d5038005eb5d..d4a06e714645 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -429,7 +429,7 @@ config BLK_DEV_IO_TRACE If unsure, say N. -config KPROBE_EVENT +config KPROBE_EVENTS depends on KPROBES depends on HAVE_REGS_AND_STACK_ACCESS_API bool "Enable kprobes-based dynamic events" @@ -447,7 +447,7 @@ config KPROBE_EVENT This option is also required by perf-probe subcommand of perf tools. If you want to use perf tools, this option is strongly recommended. -config UPROBE_EVENT +config UPROBE_EVENTS bool "Enable uprobes-based dynamic events" depends on ARCH_SUPPORTS_UPROBES depends on MMU @@ -466,7 +466,7 @@ config UPROBE_EVENT config BPF_EVENTS depends on BPF_SYSCALL - depends on (KPROBE_EVENT || UPROBE_EVENT) && PERF_EVENTS + depends on (KPROBE_EVENTS || UPROBE_EVENTS) && PERF_EVENTS bool default y help diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index e57980845549..90f2701d92a7 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o -obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o +obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o ifeq ($(CONFIG_PM),y) obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o @@ -66,7 +66,7 @@ ifeq ($(CONFIG_TRACING),y) obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o -obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o +obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 707445ceb7ef..f35109514a01 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4341,22 +4341,22 @@ static const char readme_msg[] = "\t\t\t traces\n" #endif #endif /* CONFIG_STACK_TRACER */ -#ifdef CONFIG_KPROBE_EVENT +#ifdef CONFIG_KPROBE_EVENTS " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif -#ifdef CONFIG_UPROBE_EVENT +#ifdef CONFIG_UPROBE_EVENTS " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif -#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT) +#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) "\t accepts: event-definitions (one definition per line)\n" "\t Format: p|r[:[/]] []\n" "\t -:[/]\n" -#ifdef CONFIG_KPROBE_EVENT +#ifdef CONFIG_KPROBE_EVENTS "\t place: [:][+]|\n" #endif -#ifdef CONFIG_UPROBE_EVENT +#ifdef CONFIG_UPROBE_EVENTS "\t place: :\n" #endif "\t args: =fetcharg[:type]\n" diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 0c0ae54d44c6..903273c93e61 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -248,7 +248,7 @@ ASSIGN_FETCH_FUNC(file_offset, ftype), \ #define FETCH_TYPE_STRING 0 #define FETCH_TYPE_STRSIZE 1 -#ifdef CONFIG_KPROBE_EVENT +#ifdef CONFIG_KPROBE_EVENTS struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); @@ -278,7 +278,7 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } -#endif /* CONFIG_KPROBE_EVENT */ +#endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { struct fetch_param fetch; -- cgit v1.2.3 From 1b17c6df852851b40c3c27c66b8fa2fd99cf25d8 Mon Sep 17 00:00:00 2001 From: Andrew Banman Date: Fri, 17 Feb 2017 11:07:49 -0600 Subject: x86/platform/uv/BAU: Fix HUB errors by remove initial write to sw-ack register Writing to the software acknowledge clear register when there are no pending messages causes a HUB error to assert. The original intent of this write was to clear the pending bits before start of operation, but this is an incorrect method and has been determined to be unnecessary. Signed-off-by: Andrew Banman Acked-by: Mike Travis Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: rja@hpe.com Cc: sivanich@hpe.com Link: http://lkml.kernel.org/r/1487351269-181133-1-git-send-email-abanman@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/tlb_uv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 766d4d3529a1..f25982cdff90 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1847,7 +1847,6 @@ static void pq_init(int node, int pnode) ops.write_payload_first(pnode, first); ops.write_payload_last(pnode, last); - ops.write_g_sw_ack(pnode, 0xffffUL); /* in effect, all msg_type's are set to MSG_NOOP */ memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); -- cgit v1.2.3 From 90b28ded88dda8bea82b4a86923e73ba0746d884 Mon Sep 17 00:00:00 2001 From: Matjaz Hegedic Date: Sun, 19 Feb 2017 19:32:48 +0100 Subject: x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk Without the parameter reboot=a, ASUS EeeBook X205TA will hang when it should reboot. This adds the appropriate quirk, thus fixing the problem. Signed-off-by: Matjaz Hegedic Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e244c19a2451..01c9cda3e1b7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -223,6 +223,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "P4S800"), }, }, + { /* Handle problems with rebooting on ASUS EeeBook X205TA */ + .callback = set_acpi_reboot, + .ident = "ASUS EeeBook X205TA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + }, + }, /* Certec */ { /* Handle problems with rebooting on Certec BPC600 */ -- cgit v1.2.3 From 73667e31a153a66da97feb1584726222504924f8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Feb 2017 22:17:17 +0100 Subject: x86/hyperv: Hide unused label This new 32-bit warning just showed up: arch/x86/hyperv/hv_init.c: In function 'hyperv_init': arch/x86/hyperv/hv_init.c:167:1: error: label 'register_msr_cs' defined but not used [-Werror=unused-label] The easiest solution is to move the label up into the existing #ifdef that has the goto. Fixes: dee863b571b0 ("hv: export current Hyper-V clocksource") Signed-off-by: Arnd Bergmann Acked-by: Stephen Hemminger Cc: Greg Kroah-Hartman Cc: Haiyang Zhang Cc: devel@linuxdriverproject.org Cc: Vitaly Kuznetsov Cc: "K. Y. Srinivasan" Link: http://lkml.kernel.org/r/20170214211736.2641241-1-arnd@arndb.de Signed-off-by: Thomas Gleixner --- arch/x86/hyperv/hv_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index db64baf0e500..8bef70e7f3cc 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -158,13 +158,13 @@ void hyperv_init(void) clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); return; } +register_msr_cs: #endif /* * For 32 bit guests just use the MSR based mechanism for reading * the partition counter. */ -register_msr_cs: hyperv_cs = &hyperv_cs_msr; if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); -- cgit v1.2.3 From aa5ec3f715d576353c7d8f4f8085634bd845b73f Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 27 Feb 2017 21:29:22 +0900 Subject: x86/vmware: Remove duplicate inclusion of asm/timer.h Signed-off-by: Masanari Iida Cc: akataria@vmware.com Cc: virtualization@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20170227122922.26230-1-standby24x7@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/vmware.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 891f4dad7b2c..22403a28caf5 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -30,7 +30,6 @@ #include #include #include -#include #undef pr_fmt #define pr_fmt(fmt) "vmware: " fmt -- cgit v1.2.3 From e86a2d2d34e20289ae7d46692e16d43c3a785db9 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 27 Feb 2017 22:07:03 +0900 Subject: x86/intel_rdt: Remove duplicate inclusion of linux/cpu.h Signed-off-by: Masanari Iida Cc: fenghua.yu@intel.com Link: http://lkml.kernel.org/r/20170227130703.26968-1-standby24x7@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 8af04afdfcb9..759577d9d166 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 153654dbe595a68845ba14d5b0bfe299fa6a7e99 Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Tue, 28 Feb 2017 21:34:28 +0800 Subject: x86/PCI: Implement pcibios_release_device to release IRQ from IOAPIC The revert of 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()") causes a problem for IOAPIC hotplug. The problem is that IRQs are allocated and freed in pci_enable_device() and pci_disable_device(). But there are some drivers which don't call pci_disable_device(), and they have good reasons not calling it, so if they're using IOAPIC their IRQs won't have a chance to be released from the IOAPIC. When this happens IOAPIC hot-removal fails with a kernel stack dump and an error message like this: [149335.697989] pin16 on IOAPIC2 is still in use. It turns out that we can fix it in a different way without moving IRQ allocation into pcibios_alloc_irq(), thus avoiding the regression of 991de2e59090. We can keep the allocation and freeing of IRQs as is within pci_enable_device()/pci_disable_device(), without breaking any previous assumption of the rest of the system, keeping compatibility with both the legacy and the modern drivers. We can accomplish this by implementing the existing __weak hook of pcibios_release_device() thus when a pci device is about to be deleted we get notified in the hook and take the chance to release its IRQ, if any, from the IOAPIC. Implement pcibios_release_device() for x86 to release any IRQ not released by the driver. Signed-off-by: Rui Wang Cc: tony.luck@intel.com Cc: linux-pci@vger.kernel.org Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: fengguang.wu@intel.com Cc: helgaas@kernel.org Cc: kbuild-all@01.org Cc: bhelgaas@google.com Link: http://lkml.kernel.org/r/1488288869-31290-2-git-send-email-rui.y.wang@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/pci/common.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 0cb52ae0a8f0..190e718694b1 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -735,6 +735,15 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC +void pcibios_release_device(struct pci_dev *dev) +{ + if (atomic_dec_return(&dev->enable_cnt) >= 0) + pcibios_disable_device(dev); + +} +#endif + int pci_ext_cfg_avail(void) { if (raw_pci_ext_ops) -- cgit v1.2.3 From f2ae5da726172fcf82f7be801489dd585f6a38eb Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Tue, 28 Feb 2017 21:34:29 +0800 Subject: x86/ioapic: Split IOAPIC hot-removal into two steps The hot removal of IOAPIC is handling PCI and ACPI removal in one go. That only works when the PCI drivers released the interrupt resources, but breaks when a IOAPIC interrupt is still associated to a PCI device. The new pcibios_release_device() callback allows to solve that problem by splitting the removal into two steps: 1) PCI removal: Release all PCI resources including eventually not yet released IOAPIC interrupts via the new pcibios_release_device() callback. 2) ACPI removal: After release of all PCI resources the ACPI resources can be released without issue. [ tglx: Rewrote changelog ] Signed-off-by: Rui Wang Cc: tony.luck@intel.com Cc: linux-pci@vger.kernel.org Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: fengguang.wu@intel.com Cc: helgaas@kernel.org Cc: kbuild-all@01.org Cc: bhelgaas@google.com Link: http://lkml.kernel.org/r/1488288869-31290-3-git-send-email-rui.y.wang@intel.com Signed-off-by: Thomas Gleixner --- drivers/acpi/internal.h | 2 ++ drivers/acpi/ioapic.c | 22 ++++++++++++++++------ drivers/acpi/pci_root.c | 4 ++-- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 219b90bc0922..f15900132912 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -41,8 +41,10 @@ void acpi_gpe_apply_masked_gpes(void); void acpi_container_init(void); void acpi_memory_hotplug_init(void); #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC +void pci_ioapic_remove(struct acpi_pci_root *root); int acpi_ioapic_remove(struct acpi_pci_root *root); #else +static inline void pci_ioapic_remove(struct acpi_pci_root *root) { return; } static inline int acpi_ioapic_remove(struct acpi_pci_root *root) { return 0; } #endif #ifdef CONFIG_ACPI_DOCK diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c index 6d7ce6e12aaa..1120dfd625b8 100644 --- a/drivers/acpi/ioapic.c +++ b/drivers/acpi/ioapic.c @@ -206,24 +206,34 @@ int acpi_ioapic_add(acpi_handle root_handle) return ACPI_SUCCESS(status) && ACPI_SUCCESS(retval) ? 0 : -ENODEV; } -int acpi_ioapic_remove(struct acpi_pci_root *root) +void pci_ioapic_remove(struct acpi_pci_root *root) { - int retval = 0; struct acpi_pci_ioapic *ioapic, *tmp; mutex_lock(&ioapic_list_lock); list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) { if (root->device->handle != ioapic->root_handle) continue; - - if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base)) - retval = -EBUSY; - if (ioapic->pdev) { pci_release_region(ioapic->pdev, 0); pci_disable_device(ioapic->pdev); pci_dev_put(ioapic->pdev); } + } + mutex_unlock(&ioapic_list_lock); +} + +int acpi_ioapic_remove(struct acpi_pci_root *root) +{ + int retval = 0; + struct acpi_pci_ioapic *ioapic, *tmp; + + mutex_lock(&ioapic_list_lock); + list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) { + if (root->device->handle != ioapic->root_handle) + continue; + if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base)) + retval = -EBUSY; if (ioapic->res.flags && ioapic->res.parent) release_resource(&ioapic->res); list_del(&ioapic->list); diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index bf601d4df8cf..919be0aa2578 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -648,12 +648,12 @@ static void acpi_pci_root_remove(struct acpi_device *device) pci_stop_root_bus(root->bus); - WARN_ON(acpi_ioapic_remove(root)); - + pci_ioapic_remove(root); device_set_run_wake(root->bus->bridge, false); pci_acpi_remove_bus_pm_notifier(device); pci_remove_root_bus(root->bus); + WARN_ON(acpi_ioapic_remove(root)); dmar_device_remove(device->handle); -- cgit v1.2.3 From 58ab9a088ddac4efe823471275859d64f735577e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 23 Feb 2017 14:26:03 -0800 Subject: x86/pkeys: Check against max pkey to avoid overflows Kirill reported a warning from UBSAN about undefined behavior when using protection keys. He is running on hardware that actually has support for it, which is not widely available. The warning triggers because of very large shifts of integers when doing a pkey_free() of a large, invalid value. This happens because we never check that the pkey "fits" into the mm_pkey_allocation_map(). I do not believe there is any danger here of anything bad happening other than some aliasing issues where somebody could do: pkey_free(35); and the kernel would effectively execute: pkey_free(8); While this might be confusing to an app that was doing something stupid, it has to do something stupid and the effects are limited to the app shooting itself in the foot. Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Cc: kirill.shutemov@linux.intel.com Link: http://lkml.kernel.org/r/20170223222603.A022ED65@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/pkeys.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 34684adb6899..b3b09b98896d 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { + /* + * "Allocated" pkeys are those that have been returned + * from pkey_alloc(). pkey 0 is special, and never + * returned from pkey_alloc(). + */ + if (pkey <= 0) + return false; + if (pkey >= arch_max_pkey()) + return false; return mm_pkey_allocation_map(mm) & (1U << pkey); } @@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm) static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - /* - * pkey 0 is special, always allocated and can never - * be freed. - */ - if (!pkey) - return -EINVAL; if (!mm_pkey_is_allocated(mm, pkey)) return -EINVAL; -- cgit v1.2.3 From 940b2f2fd963c043418ce8af64605783b2b19140 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Feb 2017 12:31:40 +0100 Subject: x86/events: Remove last remnants of old filenames Update to the new file paths, remove them from introductory comments. Signed-off-by: Borislav Petkov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170218113140.8051-1-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/events/amd/core.c | 2 +- arch/x86/events/intel/cstate.c | 2 +- arch/x86/events/intel/rapl.c | 2 +- arch/x86/events/intel/uncore.h | 6 +++--- tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index afb222b63cae..c84584bb9402 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -604,7 +604,7 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, return &amd_f15_PMC20; } case AMD_EVENT_NB: - /* moved to perf_event_amd_uncore.c */ + /* moved to uncore.c */ return &emptyconstraint; default: return &emptyconstraint; diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index aff4b5b69d40..238ae3248ba5 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -1,5 +1,5 @@ /* - * perf_event_intel_cstate.c: support cstate residency counters + * Support cstate residency counters * * Copyright (C) 2015, Intel Corp. * Author: Kan Liang (kan.liang@intel.com) diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 22054ca49026..9d05c7e67f60 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -1,5 +1,5 @@ /* - * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters + * Support Intel RAPL energy consumption counters * Copyright (C) 2013 Google, Inc., Stephane Eranian * * Intel RAPL interface is specified in the IA-32 Manual Vol3b diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index ad986c1e29bc..df5989f27b1b 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -360,7 +360,7 @@ extern struct list_head pci2phy_map_head; extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct event_constraint uncore_constraint_empty; -/* perf_event_intel_uncore_snb.c */ +/* uncore_snb.c */ int snb_uncore_pci_init(void); int ivb_uncore_pci_init(void); int hsw_uncore_pci_init(void); @@ -371,7 +371,7 @@ void nhm_uncore_cpu_init(void); void skl_uncore_cpu_init(void); int snb_pci2phy_map_init(int devid); -/* perf_event_intel_uncore_snbep.c */ +/* uncore_snbep.c */ int snbep_uncore_pci_init(void); void snbep_uncore_cpu_init(void); int ivbep_uncore_pci_init(void); @@ -385,5 +385,5 @@ void knl_uncore_cpu_init(void); int skx_uncore_pci_init(void); void skx_uncore_cpu_init(void); -/* perf_event_intel_uncore_nhmex.c */ +/* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 7913363bde5c..4f3c758d875d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -31,7 +31,7 @@ #error Instruction buffer size too small #endif -/* Based on branch_type() from perf_event_intel_lbr.c */ +/* Based on branch_type() from arch/x86/events/intel/lbr.c */ static void intel_pt_insn_decoder(struct insn *insn, struct intel_pt_insn *intel_pt_insn) { -- cgit v1.2.3 From 72042a8c7b01048a36ece216aaf206b7d60ca661 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Mon, 20 Feb 2017 10:12:35 +1100 Subject: x86/purgatory: Make functions and variables static Sparse emits several 'symbol not declared' warnings for various functions and variables. Add static keyword to functions and variables which have file scope only. Signed-off-by: Tobin C. Harding Link: http://lkml.kernel.org/r/1487545956-2547-2-git-send-email-me@tobin.cc Signed-off-by: Thomas Gleixner --- arch/x86/purgatory/purgatory.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 25e068ba3382..2a5f4373c797 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -18,11 +18,11 @@ struct sha_region { unsigned long len; }; -unsigned long backup_dest = 0; -unsigned long backup_src = 0; -unsigned long backup_sz = 0; +static unsigned long backup_dest; +static unsigned long backup_src; +static unsigned long backup_sz; -u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; +static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; struct sha_region sha_regions[16] = {}; @@ -39,7 +39,7 @@ static int copy_backup_region(void) return 0; } -int verify_sha256_digest(void) +static int verify_sha256_digest(void) { struct sha_region *ptr, *end; u8 digest[SHA256_DIGEST_SIZE]; -- cgit v1.2.3 From e98fe5127b9cc8ab33d1094b81c19deb1f9082bf Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Mon, 20 Feb 2017 10:12:36 +1100 Subject: x86/purgatory: Fix sparse warning, symbol not declared Sparse emits warning, 'symbol not declared' for a function that has neither file scope nor a forward declaration. The functions only call site is an ASM file. Add a header file with the function declaration. Include the header file in the C source file defining the function in order to fix the sparse warning. Include the header file in ASM file containing the call site to document the usage. Signed-off-by: Tobin C. Harding Link: http://lkml.kernel.org/r/1487545956-2547-3-git-send-email-me@tobin.cc Signed-off-by: Thomas Gleixner --- arch/x86/purgatory/purgatory.c | 1 + arch/x86/purgatory/purgatory.h | 8 ++++++++ arch/x86/purgatory/setup-x86_64.S | 1 + 3 files changed, 10 insertions(+) create mode 100644 arch/x86/purgatory/purgatory.h diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 2a5f4373c797..b6d5c8946e66 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -11,6 +11,7 @@ */ #include "sha256.h" +#include "purgatory.h" #include "../boot/string.h" struct sha_region { diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h new file mode 100644 index 000000000000..e2e365a6c192 --- /dev/null +++ b/arch/x86/purgatory/purgatory.h @@ -0,0 +1,8 @@ +#ifndef PURGATORY_H +#define PURGATORY_H + +#ifndef __ASSEMBLY__ +extern void purgatory(void); +#endif /* __ASSEMBLY__ */ + +#endif /* PURGATORY_H */ diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S index fe3c91ba1bd0..f90e9dfa90bb 100644 --- a/arch/x86/purgatory/setup-x86_64.S +++ b/arch/x86/purgatory/setup-x86_64.S @@ -9,6 +9,7 @@ * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ +#include "purgatory.h" .text .globl purgatory_start -- cgit v1.2.3 From 8392f16d38bb5222c03073a3906b7fd272386faf Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 21 Feb 2017 19:36:39 +0100 Subject: x86/boot: Correct setup_header.start_sys name It is called start_sys_seg elsewhere so rename it to that. It is an obsolete field so we could just as well directly call it __u16 __pad... No functional change. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170221183639.16554-1-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/uapi/asm/bootparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 5138dacf8bb8..07244ea16765 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -58,7 +58,7 @@ struct setup_header { __u32 header; __u16 version; __u32 realmode_swtch; - __u16 start_sys; + __u16 start_sys_seg; __u16 kernel_version; __u8 type_of_loader; __u8 loadflags; -- cgit v1.2.3 From 25b68a8f0ab13a98de02650208ec927796659898 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 17 Feb 2017 10:13:59 -0500 Subject: timerfd: Only check CAP_WAKE_ALARM when it is needed timerfd_create() and do_timerfd_settime() evaluate capable(CAP_WAKE_ALARM) unconditionally although CAP_WAKE_ALARM is only required for CLOCK_REALTIME_ALARM and CLOCK_BOOTTIME_ALARM. This can cause extraneous audit messages when using a LSM such as SELinux, incorrectly causes PF_SUPERPRIV to be set even when no privilege was exercised, and is inefficient. Flip the order of the tests in both functions so that we only call capable() if the capability is truly required for the operation. Signed-off-by: Stephen Smalley Cc: linux-security-module@vger.kernel.org Cc: selinux@tycho.nsa.gov Link: http://lkml.kernel.org/r/1487344439-22293-1-git-send-email-sds@tycho.nsa.gov Signed-off-by: Thomas Gleixner --- fs/timerfd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/timerfd.c b/fs/timerfd.c index 384fa759a563..c543cdb5f8ed 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -400,9 +400,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; - if (!capable(CAP_WAKE_ALARM) && - (clockid == CLOCK_REALTIME_ALARM || - clockid == CLOCK_BOOTTIME_ALARM)) + if ((clockid == CLOCK_REALTIME_ALARM || + clockid == CLOCK_BOOTTIME_ALARM) && + !capable(CAP_WAKE_ALARM)) return -EPERM; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -449,7 +449,7 @@ static int do_timerfd_settime(int ufd, int flags, return ret; ctx = f.file->private_data; - if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) { + if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) { fdput(f); return -EPERM; } -- cgit v1.2.3 From 52e51f16407b7b34e26affb500a21e250d9fce0b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Mar 2017 19:04:35 +0000 Subject: efi/libstub: Treat missing SecureBoot variable as Secure Boot disabled The newly refactored code that infers the firmware's Secure Boot state prints the following error when the EFI variable 'SecureBoot' does not exist: EFI stub: ERROR: Could not determine UEFI Secure Boot status. However, this variable is only guaranteed to be defined on a system that is Secure Boot capable to begin with, and so it is not an error if it is missing. So report Secure Boot as being disabled in this case, without printing any error messages. Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1488395076-29712-2-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/secureboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c index 6def402bf569..5da36e56b36a 100644 --- a/drivers/firmware/efi/libstub/secureboot.c +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -45,6 +45,8 @@ enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg) size = sizeof(secboot); status = get_efi_var(efi_SecureBoot_name, &efi_variable_guid, NULL, &size, &secboot); + if (status == EFI_NOT_FOUND) + return efi_secureboot_mode_disabled; if (status != EFI_SUCCESS) goto out_efi_err; @@ -78,7 +80,5 @@ secure_boot_enabled: out_efi_err: pr_efi_err(sys_table_arg, "Could not determine UEFI Secure Boot status.\n"); - if (status == EFI_NOT_FOUND) - return efi_secureboot_mode_disabled; return efi_secureboot_mode_unknown; } -- cgit v1.2.3 From d1eb98143c56f24fef125f5bbed49ae0b52fb7d6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Mar 2017 19:05:54 +0000 Subject: efi/arm: Fix boot crash with CONFIG_CPUMASK_OFFSTACK=y On ARM and arm64, we use a dedicated mm_struct to map the UEFI Runtime Services regions, which allows us to map those regions on demand, and in a way that is guaranteed to be compatible with incoming kernels across kexec. As it turns out, we don't fully initialize the mm_struct in the same way as process mm_structs are initialized on fork(), which results in the following crash on ARM if CONFIG_CPUMASK_OFFSTACK=y is enabled: ... EFI Variables Facility v0.08 2004-May-17 Unable to handle kernel NULL pointer dereference at virtual address 00000000 [...] Process swapper/0 (pid: 1) ... __memzero() check_and_switch_context() virt_efi_get_next_variable() efivar_init() efivars_sysfs_init() do_one_initcall() ... This is due to a missing call to mm_init_cpumask(), so add it. Signed-off-by: Ard Biesheuvel Cc: # v4.5+ Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1488395154-29786-1-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/arm-runtime.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 349dc3e1e52e..974c5a31a005 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -65,6 +65,7 @@ static bool __init efi_virtmap_init(void) bool systab_found; efi_mm.pgd = pgd_alloc(&efi_mm); + mm_init_cpumask(&efi_mm); init_new_context(NULL, &efi_mm); systab_found = false; -- cgit v1.2.3 From 4ec3dd89052a437304e1451733c989b8cec681af Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Thu, 2 Mar 2017 15:12:47 +0800 Subject: drm/i915/gvt: fix an error for F_RO flag the ro_mask is not stored into each mmio entry Fixes: 12d14cc43b34 ("drm/i915/gvt: Introduce a framework for tracking HW registers.") Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 548aedfbd402..8e43395c748a 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -121,6 +121,7 @@ static int new_mmio_info(struct intel_gvt *gvt, info->size = size; info->length = (i + 4) < end ? 4 : (end - i); info->addr_mask = addr_mask; + info->ro_mask = ro_mask; info->device = device; info->read = read ? read : intel_vgpu_default_mmio_read; info->write = write ? write : intel_vgpu_default_mmio_write; -- cgit v1.2.3 From 4c77b18cf8b7ab37c7d5737b4609010d2ceec5f0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Mar 2017 11:24:35 +0100 Subject: sched/fair: Make select_idle_cpu() more aggressive Kitsunyan reported desktop latency issues on his Celeron 887 because of commit: 1b568f0aabf2 ("sched/core: Optimize SCHED_SMT") ... even though his CPU doesn't do SMT. The effect of running the SMT code on a !SMT part is basically a more aggressive select_idle_cpu(). Removing the avg condition fixed things for him. I also know FB likes this test gone, even though other workloads like having it. For now, take it out by default, until we get a better idea. Reported-by: kitsunyan Signed-off-by: Peter Zijlstra (Intel) Cc: Chris Mason Cc: Linus Torvalds Cc: Mike Galbraith Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- kernel/sched/features.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 274c747a01ce..b3ee10dd3e85 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5797,7 +5797,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t * Due to large variance we need a large fuzz factor; hackbench in * particularly is sensitive here. */ - if ((avg_idle / 512) < avg_cost) + if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost) return -1; time = local_clock(); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 69631fa46c2f..1b3c8189b286 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true) */ SCHED_FEAT(TTWU_QUEUE, true) +/* + * When doing wakeups, attempt to limit superfluous scans of the LLC domain. + */ +SCHED_FEAT(SIS_AVG_CPU, false) + #ifdef HAVE_RT_PUSH_IPI /* * In order to avoid a thundering herd attack of CPUs that are -- cgit v1.2.3 From 0ba87bb27d66b78e278167ac7e20c66520b8a612 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Mar 2017 10:51:47 +0100 Subject: sched/core: Fix pick_next_task() for RT,DL Pavan noticed that the following commit: 49ee576809d8 ("sched/core: Optimize pick_next_task() for idle_sched_class") ... broke RT,DL balancing by robbing them of the opportinty to do new-'idle' balancing when their last runnable task (on that runqueue) goes away. Reported-by: Pavan Kondeti Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: 49ee576809d8 ("sched/core: Optimize pick_next_task() for idle_sched_class") Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bbfb917a9b49..6699d43a8843 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3273,10 +3273,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) struct task_struct *p; /* - * Optimization: we know that if all tasks are in - * the fair class we can call that function directly: + * Optimization: we know that if all tasks are in the fair class we can + * call that function directly, but only if the @prev task wasn't of a + * higher scheduling class, because otherwise those loose the + * opportunity to pull in more work from other CPUs. */ - if (likely(rq->nr_running == rq->cfs.h_nr_running)) { + if (likely((prev->sched_class == &idle_sched_class || + prev->sched_class == &fair_sched_class) && + rq->nr_running == rq->cfs.h_nr_running)) { + p = fair_sched_class.pick_next_task(rq, prev, rf); if (unlikely(p == RETRY_TASK)) goto again; -- cgit v1.2.3 From f94c8d116997597fc00f0812b0ab9256e7b0c58f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Mar 2017 15:53:38 +0100 Subject: sched/clock, x86/tsc: Rework the x86 'unstable' sched_clock() interface Wanpeng Li reported that since the following commit: acb04058de49 ("sched/clock: Fix hotplug crash") ... KVM always runs with unstable sched-clock even though KVM's kvm_clock _is_ stable. The problem is that we've tied clear_sched_clock_stable() to the TSC state, and overlooked that sched_clock() is a paravirt function. Solve this by doing two things: - tie the sched_clock() stable state more clearly to the TSC stable state for the normal (!paravirt) case. - only call clear_sched_clock_stable() when we mark TSC unstable when we use native_sched_clock(). The first means we can actually run with stable sched_clock in more situations then before, which is good. And since commit: 12907fbb1a69 ("sched/clock, clocksource: Add optional cs::mark_unstable() method") ... this should be reliable. Since any detection of TSC fail now results in marking the TSC unstable. Reported-by: Wanpeng Li Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: acb04058de49 ("sched/clock: Fix hotplug crash") Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 4 ---- arch/x86/kernel/cpu/centaur.c | 2 -- arch/x86/kernel/cpu/common.c | 3 --- arch/x86/kernel/cpu/cyrix.c | 1 - arch/x86/kernel/cpu/intel.c | 4 ---- arch/x86/kernel/cpu/transmeta.c | 2 -- arch/x86/kernel/tsc.c | 35 +++++++++++++++++++++++------------ 7 files changed, 23 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4e95b2e0d95f..30d924ae5c34 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -555,10 +555,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 2c234a6d94c4..bad8ff078a21 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -104,8 +104,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif - - clear_sched_clock_stable(); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c64ca5929cb5..9d98e2e15d54 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -86,7 +86,6 @@ static void default_init(struct cpuinfo_x86 *c) strcpy(c->x86_model_id, "386"); } #endif - clear_sched_clock_stable(); } static const struct cpu_dev default_cpu = { @@ -1075,8 +1074,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) */ if (this_cpu->c_init) this_cpu->c_init(c); - else - clear_sched_clock_stable(); /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 47416f959a48..31e679238e8d 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -184,7 +184,6 @@ static void early_init_cyrix(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; } - clear_sched_clock_stable(); } static void init_cyrix(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 017ecd3bb553..2388bafe5c37 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -161,10 +161,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index c1ea5b999839..6a9ad73a2c54 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -15,8 +15,6 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) if (xlvl >= 0x80860001) c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } - - clear_sched_clock_stable(); } static void init_transmeta(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2724dc82f992..911129fda2f9 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -326,9 +326,16 @@ unsigned long long sched_clock(void) { return paravirt_sched_clock(); } + +static inline bool using_native_sched_clock(void) +{ + return pv_time_ops.sched_clock == native_sched_clock; +} #else unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); + +static inline bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) @@ -1111,8 +1118,10 @@ static void tsc_cs_mark_unstable(struct clocksource *cs) { if (tsc_unstable) return; + tsc_unstable = 1; - clear_sched_clock_stable(); + if (using_native_sched_clock()) + clear_sched_clock_stable(); disable_sched_clock_irqtime(); pr_info("Marking TSC unstable due to clocksource watchdog\n"); } @@ -1134,18 +1143,20 @@ static struct clocksource clocksource_tsc = { void mark_tsc_unstable(char *reason) { - if (!tsc_unstable) { - tsc_unstable = 1; + if (tsc_unstable) + return; + + tsc_unstable = 1; + if (using_native_sched_clock()) clear_sched_clock_stable(); - disable_sched_clock_irqtime(); - pr_info("Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) - clocksource_mark_unstable(&clocksource_tsc); - else { - clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; - clocksource_tsc.rating = 0; - } + disable_sched_clock_irqtime(); + pr_info("Marking TSC unstable due to %s\n", reason); + /* Change only the rating, when not registered */ + if (clocksource_tsc.mult) { + clocksource_mark_unstable(&clocksource_tsc); + } else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; + clocksource_tsc.rating = 0; } } -- cgit v1.2.3 From 2b232e0c3b3a09f3e33750aa20e314f1b80e5361 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Feb 2017 09:40:11 +0000 Subject: locking/ww_mutex: Replace cpu_relax() with cond_resched() for tests When busy-spinning on a ww_mutex_trylock(), we depend upon the other thread advancing and releasing the lock. This can not happen on a single CPU unless we relinquish it: [ ] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [kworker/0:1:18] ... [ ] Call Trace: [ ] mutex_trylock() [ ] test_mutex_work+0x31/0x56 [ ] process_one_work+0x1b4/0x2f9 [ ] worker_thread+0x1b0/0x27c [ ] kthread+0xd1/0xd3 [ ] ret_from_fork+0x19/0x30 Reported-by: Fengguang Wu Signed-off-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: f2a5fec17395 ("locking/ww_mutex: Begin kselftests for ww_mutex") Link: http://lkml.kernel.org/r/20170228094011.2595-1-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- kernel/locking/test-ww_mutex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index da6c9a34f62f..3eb39c588397 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -50,7 +50,7 @@ static void test_mutex_work(struct work_struct *work) if (mtx->flags & TEST_MTX_TRY) { while (!ww_mutex_trylock(&mtx->mutex)) - cpu_relax(); + cond_resched(); } else { ww_mutex_lock(&mtx->mutex, NULL); } @@ -88,7 +88,7 @@ static int __test_mutex(unsigned int flags) ret = -EINVAL; break; } - cpu_relax(); + cond_resched(); } while (time_before(jiffies, timeout)); } else { ret = wait_for_completion_timeout(&mtx.done, TIMEOUT); -- cgit v1.2.3 From 7fb4a2cea6b18dab56d609530d077f168169ed6b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Mar 2017 16:23:30 +0100 Subject: locking/lockdep: Add nest_lock integrity test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Boqun reported that hlock->references can overflow. Add a debug test for that to generate a clear error when this happens. Without this, lockdep is likely to report a mysterious failure on unlock. Reported-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Linus Torvalds Cc: Nicolai Hähnle Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 9812e5dd409e..c0ee8607c11e 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3260,10 +3260,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (depth) { hlock = curr->held_locks + depth - 1; if (hlock->class_idx == class_idx && nest_lock) { - if (hlock->references) + if (hlock->references) { + /* + * Check: unsigned int references:12, overflow. + */ + if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1)) + return 0; + hlock->references++; - else + } else { hlock->references = 2; + } return 1; } -- cgit v1.2.3 From 857811a37129f5d2ba162d7be3986eff44724014 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 1 Mar 2017 23:01:38 +0800 Subject: locking/ww_mutex: Adjust the lock number for stress test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because there are only 12 bits in held_lock::references, so we only support 4095 nested lock held in the same time, adjust the lock number for ww_mutex stress test to kill one lockdep splat: [ ] [ BUG: bad unlock balance detected! ] [ ] kworker/u2:0/5 is trying to release lock (ww_class_mutex) at: [ ] ww_mutex_unlock() [ ] but there are no more locks to release! ... Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Fengguang Wu Cc: Linus Torvalds Cc: Nicolai Hähnle Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170301150138.hdixnmafzfsox7nn@tardis.cn.ibm.com Signed-off-by: Ingo Molnar --- kernel/locking/test-ww_mutex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 3eb39c588397..6b7abb334ca6 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -627,7 +627,7 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - ret = stress(4096, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); + ret = stress(4095, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); if (ret) return ret; -- cgit v1.2.3 From bb1a2c26165640ba2cbcfe06c81e9f9d6db4e643 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 1 Mar 2017 21:10:17 +0100 Subject: x86/hpet: Prevent might sleep splat on resume Sergey reported a might sleep warning triggered from the hpet resume path. It's caused by the call to disable_irq() from interrupt disabled context. The problem with the low level resume code is that it is not accounted as a special system_state like we do during the boot process. Calling the same code during system boot would not trigger the warning. That's inconsistent at best. In this particular case it's trivial to replace the disable_irq() with disable_hardirq() because this particular code path is solely used from system resume and the involved hpet interrupts can never be force threaded. Reported-and-tested-by: Sergey Senozhatsky Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Sergey Senozhatsky Cc: Borislav Petkov Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703012108460.3684@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dc6ba5bda9fc..89ff7af2de50 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_irq(hdev->irq); + disable_hardirq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } -- cgit v1.2.3 From 7afbeb6df2aa5f9e3a0fc228817a85c16dea0faa Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Feb 2017 12:56:12 +0100 Subject: s390/ipl: always use load normal for CCW-type re-IPL commit 14890678687c ("s390/ipl: use load normal for LPAR re-ipl") missed to convert one code path to use load normal semantics for re-IPL. Convert the missing code path as well. Fixes: 14890678687c ("s390/ipl: use load normal for LPAR re-ipl") Reported-by: Michael Holzheu Acked-by: Michael Holzheu Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index b67dafb7b7cf..e545ffe5155a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -564,6 +564,8 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { + if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW) + diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); diag308(DIAG308_LOAD_CLEAR, NULL); if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); -- cgit v1.2.3 From 2e4d88009f57057df7672fa69a32b5224af54d37 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 2 Mar 2017 15:23:42 +0100 Subject: KVM: s390: Fix guest migration for huge guests resulting in panic While we can technically not run huge page guests right now, we can setup a guest with huge pages. Trying to migrate it will trigger a VM_BUG_ON and, if the kernel is not configured to panic on a BUG, it will happily try to work on non-existing page table entries. With this patch, we always return "dirty" if we encounter a large page when migrating. This at least fixes the immediate problem until we have proper handling for both kind of pages. Fixes: 15f36eb ("KVM: s390: Add proper dirty bitmap support to S390 kvm.") Cc: # 3.16+ Signed-off-by: Janosch Frank Acked-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b48dc5f1900b..463e5ef02304 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -608,12 +608,29 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; pgste_t pgste; pte_t *ptep; pte_t pte; bool dirty; - ptep = get_locked_pte(mm, addr, &ptl); + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return false; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return false; + /* We can't run guests backed by huge pages, but userspace can + * still set them up and then try to migrate them without any + * migration support. + */ + if (pmd_large(*pmd)) + return true; + + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (unlikely(!ptep)) return false; -- cgit v1.2.3 From e148bd17f48bd17fca2f4f089ec879fa6e47e34c Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Feb 2017 14:46:42 +0530 Subject: powerpc: Emulation support for load/store instructions on LE emulate_step() uses a number of underlying kernel functions that were initially not enabled for LE. This has been rectified since. So, fix emulate_step() for LE for the corresponding instructions. Cc: stable@vger.kernel.org # v3.18+ Reported-by: Anton Blanchard Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman --- arch/powerpc/lib/sstep.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 846dba2c6360..9c542ec70c5b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case LARX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case STCX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case LOAD: - if (regs->msr & MSR_LE) - return 0; err = read_mem(®s->gpr[op.reg], op.ea, size, regs); if (!err) { if (op.type & SIGNEXT) @@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case LOAD_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); else @@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case LOAD_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); goto ldst_done; #endif @@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case STORE: - if (regs->msr & MSR_LE) - return 0; if ((op.type & UPDATE) && size == sizeof(long) && op.reg == 1 && op.update_reg == 1 && !(regs->msr & MSR_PR) && @@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case STORE_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); else @@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case STORE_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); goto ldst_done; #endif -- cgit v1.2.3 From 4ceae137bdab2232e57b2e6fd5631a22a0160938 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Feb 2017 14:46:43 +0530 Subject: powerpc: emulate_step() tests for load/store instructions Add new selftest that test emulate_step for Normal, Floating Point, Vector and Vector Scalar - load/store instructions. Test should run at boot time if CONFIG_KPROBES_SANITY_TEST and CONFIG_PPC64 is set. Sample log: emulate_step_test: ld : PASS emulate_step_test: lwz : PASS emulate_step_test: lwzx : PASS emulate_step_test: std : PASS emulate_step_test: ldarx / stdcx. : PASS emulate_step_test: lfsx : PASS emulate_step_test: stfsx : PASS emulate_step_test: lfdx : PASS emulate_step_test: stfdx : PASS emulate_step_test: lvx : PASS emulate_step_test: stvx : PASS emulate_step_test: lxvd2x : PASS emulate_step_test: stxvd2x : PASS Signed-off-by: Ravi Bangoria [mpe: Drop start/complete lines, make it all __init] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 7 + arch/powerpc/lib/Makefile | 1 + arch/powerpc/lib/test_emulate_step.c | 434 ++++++++++++++++++++++++++++++++++ 3 files changed, 442 insertions(+) create mode 100644 arch/powerpc/lib/test_emulate_step.c diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index d99bd442aacb..e7d6d86563ee 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -284,6 +284,13 @@ #define PPC_INST_BRANCH_COND 0x40800000 #define PPC_INST_LBZCIX 0x7c0006aa #define PPC_INST_STBCIX 0x7c0007aa +#define PPC_INST_LWZX 0x7c00002e +#define PPC_INST_LFSX 0x7c00042e +#define PPC_INST_STFSX 0x7c00052e +#define PPC_INST_LFDX 0x7c0004ae +#define PPC_INST_STFDX 0x7c0005ae +#define PPC_INST_LVX 0x7c0000ce +#define PPC_INST_STVX 0x7c0001ce /* macros to insert fields into opcodes */ #define ___PPC_RA(a) (((a) & 0x1f) << 16) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 0e649d72fe8d..2b5e09020cfe 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -20,6 +20,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o +obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o obj-y += checksum_$(BITS).o checksum_wrappers.o diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c new file mode 100644 index 000000000000..2534c1447554 --- /dev/null +++ b/arch/powerpc/lib/test_emulate_step.c @@ -0,0 +1,434 @@ +/* + * Simple sanity test for emulate_step load/store instructions. + * + * Copyright IBM Corp. 2016 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) "emulate_step_test: " fmt + +#include +#include +#include + +#define IMM_L(i) ((uintptr_t)(i) & 0xffff) + +/* + * Defined with TEST_ prefix so it does not conflict with other + * definitions. + */ +#define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ + ___PPC_RA(base) | ((i) & 0xfffc)) +#define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b) | \ + __PPC_EH(eh)) +#define TEST_STDCX(s, a, b) (PPC_INST_STDCX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFSX(s, a, b) (PPC_INST_STFSX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFDX(t, a, b) (PPC_INST_LFDX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFDX(s, a, b) (PPC_INST_STFDX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LVX(t, a, b) (PPC_INST_LVX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STVX(s, a, b) (PPC_INST_STVX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) +#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) + + +static void __init init_pt_regs(struct pt_regs *regs) +{ + static unsigned long msr; + static bool msr_cached; + + memset(regs, 0, sizeof(struct pt_regs)); + + if (likely(msr_cached)) { + regs->msr = msr; + return; + } + + asm volatile("mfmsr %0" : "=r"(regs->msr)); + + regs->msr |= MSR_FP; + regs->msr |= MSR_VEC; + regs->msr |= MSR_VSX; + + msr = regs->msr; + msr_cached = true; +} + +static void __init show_result(char *ins, char *result) +{ + pr_info("%-14s : %s\n", ins, result); +} + +static void __init test_ld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* ld r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LD(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("ld", "PASS"); + else + show_result("ld", "FAIL"); +} + +static void __init test_lwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* lwz r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LWZ(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("lwz", "PASS"); + else + show_result("lwz", "FAIL"); +} + +static void __init test_lwzx(void) +{ + struct pt_regs regs; + unsigned int a[3] = {0x0, 0x0, 0x1234}; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) a; + regs.gpr[4] = 8; + regs.gpr[5] = 0x8765; + + /* lwzx r5, r3, r4 */ + stepped = emulate_step(®s, TEST_LWZX(5, 3, 4)); + if (stepped == 1 && regs.gpr[5] == a[2]) + show_result("lwzx", "PASS"); + else + show_result("lwzx", "FAIL"); +} + +static void __init test_std(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + regs.gpr[5] = 0x5678; + + /* std r5, 0(r3) */ + stepped = emulate_step(®s, TEST_STD(5, 3, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("std", "PASS"); + else + show_result("std", "FAIL"); +} + +static void __init test_ldarx_stdcx(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */ + + init_pt_regs(®s); + asm volatile("mfcr %0" : "=r"(regs.ccr)); + + + /*** ldarx ***/ + + regs.gpr[3] = (unsigned long) &a; + regs.gpr[4] = 0; + regs.gpr[5] = 0x5678; + + /* ldarx r5, r3, r4, 0 */ + stepped = emulate_step(®s, TEST_LDARX(5, 3, 4, 0)); + + /* + * Don't touch 'a' here. Touching 'a' can do Load/store + * of 'a' which result in failure of subsequent stdcx. + * Instead, use hardcoded value for comparison. + */ + if (stepped <= 0 || regs.gpr[5] != 0x1234) { + show_result("ldarx / stdcx.", "FAIL (ldarx)"); + return; + } + + + /*** stdcx. ***/ + + regs.gpr[5] = 0x9ABC; + + /* stdcx. r5, r3, r4 */ + stepped = emulate_step(®s, TEST_STDCX(5, 3, 4)); + + /* + * Two possible scenarios that indicates successful emulation + * of stdcx. : + * 1. Reservation is active and store is performed. In this + * case cr0.eq bit will be set to 1. + * 2. Reservation is not active and store is not performed. + * In this case cr0.eq bit will be set to 0. + */ + if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq)) + || (regs.gpr[5] != a && !(regs.ccr & cr0_eq)))) + show_result("ldarx / stdcx.", "PASS"); + else + show_result("ldarx / stdcx.", "FAIL (stdcx.)"); +} + +#ifdef CONFIG_PPC_FPU +static void __init test_lfsx_stfsx(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfsx ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfsx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFSX(10, 3, 4)); + + if (stepped == 1) + show_result("lfsx", "PASS"); + else + show_result("lfsx", "FAIL"); + + + /*** stfsx ***/ + + c.a = 678.91; + + /* stfsx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFSX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfsx", "PASS"); + else + show_result("stfsx", "FAIL"); +} + +static void __init test_lfdx_stfdx(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfdx ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfdx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFDX(10, 3, 4)); + + if (stepped == 1) + show_result("lfdx", "PASS"); + else + show_result("lfdx", "FAIL"); + + + /*** stfdx ***/ + + c.a = 987654.32; + + /* stfdx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFDX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfdx", "PASS"); + else + show_result("stfdx", "FAIL"); +} +#else +static void __init test_lfsx_stfsx(void) +{ + show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); +} + +static void __init test_lfdx_stfdx(void) +{ + show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); +} +#endif /* CONFIG_PPC_FPU */ + +#ifdef CONFIG_ALTIVEC +static void __init test_lvx_stvx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lvx ***/ + + cached_b[0] = c.b[0] = 923745; + cached_b[1] = c.b[1] = 2139478; + cached_b[2] = c.b[2] = 9012; + cached_b[3] = c.b[3] = 982134; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lvx vrt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LVX(10, 3, 4)); + + if (stepped == 1) + show_result("lvx", "PASS"); + else + show_result("lvx", "FAIL"); + + + /*** stvx ***/ + + c.b[0] = 4987513; + c.b[1] = 84313948; + c.b[2] = 71; + c.b[3] = 498532; + + /* stvx vrs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STVX(10, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stvx", "PASS"); + else + show_result("stvx", "FAIL"); +} +#else +static void __init test_lvx_stvx(void) +{ + show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)"); + show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)"); +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +static void __init test_lxvd2x_stxvd2x(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lxvd2x ***/ + + cached_b[0] = c.b[0] = 18233; + cached_b[1] = c.b[1] = 34863571; + cached_b[2] = c.b[2] = 834; + cached_b[3] = c.b[3] = 6138911; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_LXVD2X(39, 3, 4)); + + if (stepped == 1) + show_result("lxvd2x", "PASS"); + else + show_result("lxvd2x", "FAIL"); + + + /*** stxvd2x ***/ + + c.b[0] = 21379463; + c.b[1] = 87; + c.b[2] = 374234; + c.b[3] = 4; + + /* stxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_STXVD2X(39, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stxvd2x", "PASS"); + else + show_result("stxvd2x", "FAIL"); +} +#else +static void __init test_lxvd2x_stxvd2x(void) +{ + show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +static int __init test_emulate_step(void) +{ + test_ld(); + test_lwz(); + test_lwzx(); + test_std(); + test_ldarx_stdcx(); + test_lfsx_stfsx(); + test_lfdx_stfdx(); + test_lvx_stvx(); + test_lxvd2x_stxvd2x(); + + return 0; +} +late_initcall(test_emulate_step); -- cgit v1.2.3 From 7a70d7288c926ae88e0c773fbb506aa374e99c2d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 27 Feb 2017 14:32:41 +1100 Subject: powerpc/64: Invalidate process table caching after setting process table The POWER9 MMU reads and caches entries from the process table. When we kexec from one kernel to another, the second kernel sets its process table pointer but doesn't currently do anything to make the CPU invalidate any cached entries from the old process table. This adds a tlbie (TLB invalidate entry) instruction with parameters to invalidate caching of the process table after the new process table is installed. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pgtable-radix.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index feeda90cd06d..01c94f141164 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -186,6 +186,10 @@ static void __init radix_init_pgtable(void) */ register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } static void __init radix_init_partition_table(void) -- cgit v1.2.3 From 424f8acd328a111319ae30bf384e5dfb9bc8f8cb Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Mon, 27 Feb 2017 11:10:07 +0530 Subject: powerpc/powernv: Fix bug due to labeling ambiguity in power_enter_stop Commit 09206b600c76 ("powernv: Pass PSSCR value and mask to power9_idle_stop") added additional code in power_enter_stop() to distinguish between stop requests whose PSSCR had ESL=EC=1 from those which did not. When ESL=EC=1, we do a forward-jump to a location labelled by "1", which had the code to handle the ESL=EC=1 case. Unfortunately just a couple of instructions before this label, is the macro IDLE_STATE_ENTER_SEQ() which also has a label "1" in its expansion. As a result, the current code can result in directly executing stop instruction for deep stop requests with PSSCR ESL=EC=1, without saving the hypervisor state. Fix this BUG by labeling the location that handles ESL=EC=1 case with a more descriptive label ".Lhandle_esl_ec_set" (local label suggestion a la .Lxx from Anton Blanchard). While at it, rename the label "2" labelling the location of the code handling entry into deep stop states with ".Lhandle_deep_stop". For a good measure, change the label in IDLE_STATE_ENTER_SEQ() macro to an not-so commonly used value in order to avoid similar mishaps in the future. Fixes: 09206b600c76 ("powernv: Pass PSSCR value and mask to power9_idle_stop") Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 4 ++-- arch/powerpc/kernel/idle_book3s.S | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index fd321eb423cb..155731557c9b 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -70,8 +70,8 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err) std r0,0(r1); \ ptesync; \ ld r0,0(r1); \ -1: cmpd cr0,r0,r0; \ - bne 1b; \ +236: cmpd cr0,r0,r0; \ + bne 236b; \ IDLE_INST; \ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 5f61cc0349c0..995728736677 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -276,19 +276,21 @@ power_enter_stop: */ andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ - bne 1f + bne .Lhandle_esl_ec_set IDLE_STATE_ENTER_SEQ(PPC_STOP) li r3,0 /* Since we didn't lose state, return 0 */ b pnv_wakeup_noloss + +.Lhandle_esl_ec_set: /* * Check if the requested state is a deep idle state. */ -1: LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) cmpd r3,r4 - bge 2f + bge .Lhandle_deep_stop IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) -2: +.Lhandle_deep_stop: /* * Entering deep idle state. * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to -- cgit v1.2.3 From a6d8a21596df041f36f4c2ccc260c459e3e851f1 Mon Sep 17 00:00:00 2001 From: Sachin Sant Date: Sun, 26 Feb 2017 11:38:39 +0530 Subject: selftest/powerpc: Fix false failures for skipped tests Tests under alignment subdirectory are skipped when executed on previous generation hardware, but harness still marks them as failed. test: test_copy_unaligned tags: git_version:unknown [SKIP] Test skipped on line 26 skip: test_copy_unaligned selftests: copy_unaligned [FAIL] The MAGIC_SKIP_RETURN_VALUE value assigned to rc variable is retained till the program exit which causes the test to be marked as failed. This patch resets the value before returning to the main() routine. With this patch the test o/p is as follows: test: test_copy_unaligned tags: git_version:unknown [SKIP] Test skipped on line 26 skip: test_copy_unaligned selftests: copy_unaligned [PASS] Signed-off-by: Sachin Sant Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/harness.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 248a820048df..66d31de60b9a 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name) rc = run_test(test_function, name); - if (rc == MAGIC_SKIP_RETURN_VALUE) + if (rc == MAGIC_SKIP_RETURN_VALUE) { test_skip(name); - else + /* so that skipped test is not marked as failed */ + rc = 0; + } else test_finish(name, rc); return rc; -- cgit v1.2.3 From 4dc831aa88132f835cefe876aa0206977c4d7710 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2016 13:46:20 +1100 Subject: powerpc: Fix compiling a BE kernel with a powerpc64le toolchain GCC can compile with either endian, but the default ABI version is set based on the default endianness of the toolchain. Alan Modra says: you need both -mbig and -mabi=elfv1 to make a powerpc64le gcc generate powerpc64 code The opposite is true for powerpc64 when generating -mlittle it requires -mabi=elfv2 to generate v2 ABI, which we were already doing. This change adds ABI annotations together with endianness for all cases, LE and BE. This fixes the case of building a BE kernel with a toolchain that is LE by default. Signed-off-by: Nicholas Piggin Tested-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 31286fa7873c..19b0d1a81959 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -72,8 +72,15 @@ GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +ifdef CONFIG_PPC64 +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 +endif + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) ifneq ($(cc-name),clang) cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align endif @@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) +AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) -- cgit v1.2.3 From 3fb66a70a4ae886445743354e4b60e54058bb3ff Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Thu, 16 Feb 2017 09:11:29 -0600 Subject: powerpc/booke: Fix boot crash due to null hugepd On 32-bit book-e machines, hugepd_ok() no longer takes into account null hugepd values, causing this crash at boot: Unable to handle kernel paging request for data at address 0x80000000 ... NIP [c0018378] follow_huge_addr+0x38/0xf0 LR [c001836c] follow_huge_addr+0x2c/0xf0 Call Trace: follow_huge_addr+0x2c/0xf0 (unreliable) follow_page_mask+0x40/0x3e0 __get_user_pages+0xc8/0x450 get_user_pages_remote+0x8c/0x250 copy_strings+0x110/0x390 copy_strings_kernel+0x2c/0x50 do_execveat_common+0x478/0x630 do_execve+0x2c/0x40 try_to_run_init_process+0x18/0x60 kernel_init+0xbc/0x110 ret_from_kernel_thread+0x5c/0x64 This impacts all nxp (ex-freescale) 32-bit booke platforms. This was caused by the change of hugepd_t.pd from signed to unsigned, and the update to the nohash version of hugepd_ok(). Previously hugepd_ok() could exclude all non-huge and NULL pgds using > 0, whereas now we need to explicitly check that the value is not zero and also that PD_HUGE is *clear*. This isn't protected by the pgd_none() check in __find_linux_pte_or_hugepte() because on 32-bit we use pgtable-nopud.h, which causes the pgd_none() check to be always false. Fixes: 20717e1ff526 ("powerpc/mm: Fix little-endian 4K hugetlb") Cc: stable@vger.kernel.org # v4.7+ Reported-by: Madalin-Cristian Bucur Signed-off-by: Laurentiu Tudor [mpe: Flesh out change log details.] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 0cd8a3852763..e5805ad78e12 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -230,7 +230,7 @@ static inline int hugepd_ok(hugepd_t hpd) return ((hpd_val(hpd) & 0x4) != 0); #else /* We clear the top bit to indicate hugepd */ - return ((hpd_val(hpd) & PD_HUGE) == 0); + return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0); #endif } -- cgit v1.2.3 From 0265634eb9e04a16ae99941c320718c38eb865e0 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sat, 25 Feb 2017 08:28:16 -0300 Subject: [media] serial_ir: ensure we're ready to receive interrupts When the interrupt requested with devm_request_irq(), serial_ir.rcdev is still null so will cause null deference if the irq handler is called early on. Also ensure that timeout_timer is setup. Link: http://lkml.kernel.org/r/CA+55aFxsh2uF8gi5sN_guY3Z+tiLv7LpJYKBw+y8vqLzp+TsnQ@mail.gmail.com [mchehab@s-opensource.com: moved serial_ir_probe() back to its original place] Cc: # 4.10 Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/serial_ir.c | 123 ++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 61 deletions(-) diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c index 923fb2299553..41b54e40176c 100644 --- a/drivers/media/rc/serial_ir.c +++ b/drivers/media/rc/serial_ir.c @@ -487,10 +487,69 @@ static void serial_ir_timeout(unsigned long arg) ir_raw_event_handle(serial_ir.rcdev); } +/* Needed by serial_ir_probe() */ +static int serial_ir_tx(struct rc_dev *dev, unsigned int *txbuf, + unsigned int count); +static int serial_ir_tx_duty_cycle(struct rc_dev *dev, u32 cycle); +static int serial_ir_tx_carrier(struct rc_dev *dev, u32 carrier); +static int serial_ir_open(struct rc_dev *rcdev); +static void serial_ir_close(struct rc_dev *rcdev); + static int serial_ir_probe(struct platform_device *dev) { + struct rc_dev *rcdev; int i, nlow, nhigh, result; + rcdev = devm_rc_allocate_device(&dev->dev, RC_DRIVER_IR_RAW); + if (!rcdev) + return -ENOMEM; + + if (hardware[type].send_pulse && hardware[type].send_space) + rcdev->tx_ir = serial_ir_tx; + if (hardware[type].set_send_carrier) + rcdev->s_tx_carrier = serial_ir_tx_carrier; + if (hardware[type].set_duty_cycle) + rcdev->s_tx_duty_cycle = serial_ir_tx_duty_cycle; + + switch (type) { + case IR_HOMEBREW: + rcdev->input_name = "Serial IR type home-brew"; + break; + case IR_IRDEO: + rcdev->input_name = "Serial IR type IRdeo"; + break; + case IR_IRDEO_REMOTE: + rcdev->input_name = "Serial IR type IRdeo remote"; + break; + case IR_ANIMAX: + rcdev->input_name = "Serial IR type AnimaX"; + break; + case IR_IGOR: + rcdev->input_name = "Serial IR type IgorPlug"; + break; + } + + rcdev->input_phys = KBUILD_MODNAME "/input0"; + rcdev->input_id.bustype = BUS_HOST; + rcdev->input_id.vendor = 0x0001; + rcdev->input_id.product = 0x0001; + rcdev->input_id.version = 0x0100; + rcdev->open = serial_ir_open; + rcdev->close = serial_ir_close; + rcdev->dev.parent = &serial_ir.pdev->dev; + rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER; + rcdev->driver_name = KBUILD_MODNAME; + rcdev->map_name = RC_MAP_RC6_MCE; + rcdev->min_timeout = 1; + rcdev->timeout = IR_DEFAULT_TIMEOUT; + rcdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT; + rcdev->rx_resolution = 250000; + + serial_ir.rcdev = rcdev; + + setup_timer(&serial_ir.timeout_timer, serial_ir_timeout, + (unsigned long)&serial_ir); + result = devm_request_irq(&dev->dev, irq, serial_ir_irq_handler, share_irq ? IRQF_SHARED : 0, KBUILD_MODNAME, &hardware); @@ -516,9 +575,6 @@ static int serial_ir_probe(struct platform_device *dev) return -EBUSY; } - setup_timer(&serial_ir.timeout_timer, serial_ir_timeout, - (unsigned long)&serial_ir); - result = hardware_init_port(); if (result < 0) return result; @@ -552,7 +608,8 @@ static int serial_ir_probe(struct platform_device *dev) sense ? "low" : "high"); dev_dbg(&dev->dev, "Interrupt %d, port %04x obtained\n", irq, io); - return 0; + + return devm_rc_register_device(&dev->dev, rcdev); } static int serial_ir_open(struct rc_dev *rcdev) @@ -723,7 +780,6 @@ static void serial_ir_exit(void) static int __init serial_ir_init_module(void) { - struct rc_dev *rcdev; int result; switch (type) { @@ -754,63 +810,9 @@ static int __init serial_ir_init_module(void) sense = !!sense; result = serial_ir_init(); - if (result) - return result; - - rcdev = devm_rc_allocate_device(&serial_ir.pdev->dev, RC_DRIVER_IR_RAW); - if (!rcdev) { - result = -ENOMEM; - goto serial_cleanup; - } - - if (hardware[type].send_pulse && hardware[type].send_space) - rcdev->tx_ir = serial_ir_tx; - if (hardware[type].set_send_carrier) - rcdev->s_tx_carrier = serial_ir_tx_carrier; - if (hardware[type].set_duty_cycle) - rcdev->s_tx_duty_cycle = serial_ir_tx_duty_cycle; - - switch (type) { - case IR_HOMEBREW: - rcdev->input_name = "Serial IR type home-brew"; - break; - case IR_IRDEO: - rcdev->input_name = "Serial IR type IRdeo"; - break; - case IR_IRDEO_REMOTE: - rcdev->input_name = "Serial IR type IRdeo remote"; - break; - case IR_ANIMAX: - rcdev->input_name = "Serial IR type AnimaX"; - break; - case IR_IGOR: - rcdev->input_name = "Serial IR type IgorPlug"; - break; - } - - rcdev->input_phys = KBUILD_MODNAME "/input0"; - rcdev->input_id.bustype = BUS_HOST; - rcdev->input_id.vendor = 0x0001; - rcdev->input_id.product = 0x0001; - rcdev->input_id.version = 0x0100; - rcdev->open = serial_ir_open; - rcdev->close = serial_ir_close; - rcdev->dev.parent = &serial_ir.pdev->dev; - rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER; - rcdev->driver_name = KBUILD_MODNAME; - rcdev->map_name = RC_MAP_RC6_MCE; - rcdev->min_timeout = 1; - rcdev->timeout = IR_DEFAULT_TIMEOUT; - rcdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT; - rcdev->rx_resolution = 250000; - - serial_ir.rcdev = rcdev; - - result = rc_register_device(rcdev); - if (!result) return 0; -serial_cleanup: + serial_ir_exit(); return result; } @@ -818,7 +820,6 @@ serial_cleanup: static void __exit serial_ir_exit_module(void) { del_timer_sync(&serial_ir.timeout_timer); - rc_unregister_device(serial_ir.rcdev); serial_ir_exit(); } -- cgit v1.2.3 From 2e1e4949f9dfb053122785cd73540bb1e61f768b Mon Sep 17 00:00:00 2001 From: Qi Hou Date: Fri, 3 Mar 2017 15:57:11 +0800 Subject: i2c: add missing of_node_put in i2c_mux_del_adapters Refcount of of_node is increased with of_node_get() in i2c_mux_add_adapter(). It must be decreased with of_node_put() in i2c_mux_del_adapters(). Cc: stable@vger.kernel.org Signed-off-by: Qi Hou Reviewed-by: Zhang Xiao Signed-off-by: Peter Rosin --- drivers/i2c/i2c-mux.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 83768e85a919..2178266bca79 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -429,6 +429,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc) while (muxc->num_adapters) { struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters]; struct i2c_mux_priv *priv = adap->algo_data; + struct device_node *np = adap->dev.of_node; muxc->adapter[muxc->num_adapters] = NULL; @@ -438,6 +439,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc) sysfs_remove_link(&priv->adap.dev.kobj, "mux_device"); i2c_del_adapter(adap); + of_node_put(np); kfree(priv); } } -- cgit v1.2.3 From db5b15b74ed9a5c04bb808d18ffa2c773f5c18c0 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Mon, 13 Feb 2017 10:35:44 -0200 Subject: [media] lirc: fix dead lock between open and wakeup_filter The locking in lirc needs improvement, but for now just fix this potential deadlock. ====================================================== [ INFO: possible circular locking dependency detected ] 4.10.0-rc1+ #1 Not tainted ------------------------------------------------------- bash/2502 is trying to acquire lock: (ir_raw_handler_lock){+.+.+.}, at: [] ir_raw_encode_scancode+0x3e/0xb0 [rc_core] but task is already holding lock: (&dev->lock){+.+.+.}, at: [] store_filter+0x9f/0x240 [rc_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&dev->lock){+.+.+.}: [] lock_acquire+0xfd/0x200 [] mutex_lock_nested+0x77/0x6d0 [] rc_open+0x2a/0x80 [rc_core] [] lirc_dev_fop_open+0xda/0x1e0 [lirc_dev] [] chrdev_open+0xb0/0x210 [] do_dentry_open+0x20a/0x2f0 [] vfs_open+0x4c/0x80 [] path_openat+0x5bc/0xc00 [] do_filp_open+0x91/0x100 [] do_sys_open+0x130/0x220 [] SyS_open+0x1e/0x20 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 -> #1 (lirc_dev_lock){+.+.+.}: [] lock_acquire+0xfd/0x200 [] mutex_lock_nested+0x77/0x6d0 [] lirc_register_driver+0x67/0x59b [lirc_dev] [] ir_lirc_register+0x1f4/0x260 [ir_lirc_codec] [] ir_raw_handler_register+0x7c/0xb0 [rc_core] [] 0xffffffffc0398010 [] do_one_initcall+0x52/0x1b0 [] do_init_module+0x5f/0x1fa [] load_module+0x2675/0x2b00 [] SYSC_finit_module+0xdf/0x110 [] SyS_finit_module+0xe/0x10 [] do_syscall_64+0x6c/0x1f0 [] return_from_SYSCALL_64+0x0/0x7a -> #0 (ir_raw_handler_lock){+.+.+.}: [] __lock_acquire+0x10f7/0x1290 [] lock_acquire+0xfd/0x200 [] mutex_lock_nested+0x77/0x6d0 [] ir_raw_encode_scancode+0x3e/0xb0 [rc_core] [] loop_set_wakeup_filter+0x62/0xbd [rc_loopback] [] store_filter+0x1aa/0x240 [rc_core] [] dev_attr_store+0x18/0x30 [] sysfs_kf_write+0x45/0x60 [] kernfs_fop_write+0x155/0x1e0 [] __vfs_write+0x37/0x160 [] vfs_write+0xc8/0x1e0 [] SyS_write+0x58/0xc0 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 other info that might help us debug this: Chain exists of: ir_raw_handler_lock --> lirc_dev_lock --> &dev->lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&dev->lock); lock(lirc_dev_lock); lock(&dev->lock); lock(ir_raw_handler_lock); *** DEADLOCK *** 4 locks held by bash/2502: #0: (sb_writers#4){.+.+.+}, at: [] vfs_write+0x195/0x1e0 #1: (&of->mutex){+.+.+.}, at: [] kernfs_fop_write+0x11f/0x1e0 #2: (s_active#215){.+.+.+}, at: [] kernfs_fop_write+0x128/0x1e0 #3: (&dev->lock){+.+.+.}, at: [] store_filter+0x9f/0x240 [rc_core] stack backtrace: CPU: 3 PID: 2502 Comm: bash Not tainted 4.10.0-rc1+ #1 Hardware name: /DG45ID, BIOS IDG4510H.86A.0135.2011.0225.1100 02/25/2011 Call Trace: dump_stack+0x86/0xc3 print_circular_bug+0x1be/0x210 __lock_acquire+0x10f7/0x1290 lock_acquire+0xfd/0x200 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core] ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core] mutex_lock_nested+0x77/0x6d0 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core] ? loop_set_wakeup_filter+0x44/0xbd [rc_loopback] ir_raw_encode_scancode+0x3e/0xb0 [rc_core] loop_set_wakeup_filter+0x62/0xbd [rc_loopback] ? loop_set_tx_duty_cycle+0x70/0x70 [rc_loopback] store_filter+0x1aa/0x240 [rc_core] dev_attr_store+0x18/0x30 sysfs_kf_write+0x45/0x60 kernfs_fop_write+0x155/0x1e0 __vfs_write+0x37/0x160 ? rcu_read_lock_sched_held+0x4a/0x80 ? rcu_sync_lockdep_assert+0x2f/0x60 ? __sb_start_write+0x10c/0x220 ? vfs_write+0x195/0x1e0 ? security_file_permission+0x3b/0xc0 vfs_write+0xc8/0x1e0 SyS_write+0x58/0xc0 entry_SYSCALL_64_fastpath+0x1f/0xc2 Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/lirc_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index a54ca531d8ef..4630f3e67538 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -436,6 +436,8 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) return -ERESTARTSYS; ir = irctls[iminor(inode)]; + mutex_unlock(&lirc_dev_lock); + if (!ir) { retval = -ENODEV; goto error; @@ -476,8 +478,6 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) } error: - mutex_unlock(&lirc_dev_lock); - nonseekable_open(inode, file); return retval; -- cgit v1.2.3 From c1305a40722f3e02f3d971acc22f5991aff239a6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 12 Feb 2017 15:01:22 -0200 Subject: [media] rc: nuvoton: fix deadlock in nvt_write_wakeup_codes nvt_write_wakeup_codes acquires the same lock as the ISR but doesn't disable interrupts on the local CPU. This caused the following deadlock. Fix this by using spin_lock_irqsave. [ 432.362008] ================================ [ 432.362074] WARNING: inconsistent lock state [ 432.362144] 4.10.0-rc7-next-20170210 #1 Not tainted [ 432.362219] -------------------------------- [ 432.362286] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. [ 432.362379] swapper/0/0 [HC1[1]:SC0[0]:HE0:SE1] takes: [ 432.362457] (&(&nvt->lock)->rlock){?.+...}, at: [] nvt_cir_isr+0x2d/0x520 [nuvoton_cir] [ 432.362611] {HARDIRQ-ON-W} state was registered at: [ 432.362686] [ 432.362698] [] __lock_acquire+0x5dc/0x1260 [ 432.362812] [ 432.362817] [] lock_acquire+0xe9/0x1d0 [ 432.362927] [ 432.362934] [] _raw_spin_lock+0x33/0x50 [ 432.363045] [ 432.363051] [] nvt_write_wakeup_codes.isra.12+0x22/0xe0 [nuvoton_cir] [ 432.363193] [ 432.363199] [] wakeup_data_store+0xdf/0xf0 [nuvoton_cir] [ 432.363327] [ 432.363333] [] dev_attr_store+0x13/0x20 [ 432.363441] [ 432.363449] [] sysfs_kf_write+0x40/0x50 [ 432.363558] [ 432.363564] [] kernfs_fop_write+0x150/0x1e0 [ 432.363676] [ 432.363685] [] __vfs_write+0x23/0x120 [ 432.363791] [ 432.363798] [] vfs_write+0xc3/0x1e0 [ 432.363902] [ 432.363909] [] SyS_write+0x44/0xa0 [ 432.364012] [ 432.364021] [] do_syscall_64+0x57/0x140 [ 432.364129] [ 432.364135] [] return_from_SYSCALL_64+0x0/0x7a [ 432.364252] irq event stamp: 415118 [ 432.364313] hardirqs last enabled at (415115): [] cpuidle_enter_state+0x11b/0x370 [ 432.364445] hardirqs last disabled at (415116): [] common_interrupt+0x8b/0x90 [ 432.364573] softirqs last enabled at (415118): [] _local_bh_enable+0x1c/0x50 [ 432.364699] softirqs last disabled at (415117): [] irq_enter+0x43/0x60 [ 432.364814] other info that might help us debug this: [ 432.364909] Possible unsafe locking scenario: [ 432.367821] CPU0 [ 432.370645] ---- [ 432.373432] lock(&(&nvt->lock)->rlock); [ 432.376228] [ 432.378982] lock(&(&nvt->lock)->rlock); [ 432.381757] *** DEADLOCK *** [ 432.389888] no locks held by swapper/0/0. [ 432.392574] stack backtrace: [ 432.397774] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-rc7-next-20170210 #1 [ 432.400375] Hardware name: ZOTAC ZBOX-CI321NANO/ZBOX-CI321NANO, BIOS B246P105 06/01/2015 [ 432.403023] Call Trace: [ 432.405636] [ 432.408208] dump_stack+0x68/0x93 [ 432.410775] print_usage_bug+0x1dd/0x1f0 [ 432.413334] mark_lock+0x559/0x5c0 [ 432.415871] ? print_shortest_lock_dependencies+0x1a0/0x1a0 [ 432.418431] __lock_acquire+0x6b1/0x1260 [ 432.420941] lock_acquire+0xe9/0x1d0 [ 432.423396] ? nvt_cir_isr+0x2d/0x520 [nuvoton_cir] [ 432.425844] _raw_spin_lock+0x33/0x50 [ 432.428252] ? nvt_cir_isr+0x2d/0x520 [nuvoton_cir] [ 432.430670] nvt_cir_isr+0x2d/0x520 [nuvoton_cir] [ 432.433085] __handle_irq_event_percpu+0x43/0x330 [ 432.435493] handle_irq_event_percpu+0x1e/0x50 [ 432.437884] handle_irq_event+0x34/0x60 [ 432.440236] handle_edge_irq+0x6a/0x150 [ 432.442561] handle_irq+0x15/0x20 [ 432.444854] do_IRQ+0x57/0x110 [ 432.447115] common_interrupt+0x90/0x90 [ 432.449380] RIP: 0010:cpuidle_enter_state+0x120/0x370 [ 432.451653] RSP: 0018:ffffffff81c03dd8 EFLAGS: 00000206 ORIG_RAX: ffffffffffffffcc [ 432.453994] RAX: ffffffff81c14500 RBX: 0000000000000008 RCX: 00000064aac6f2d2 [ 432.456349] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffffffff81c14500 [ 432.458704] RBP: ffffffff81c03e18 R08: cccccccccccccccd R09: 0000000000000018 [ 432.461072] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880100a21260 [ 432.463450] R13: ffffffff81c7e6f8 R14: 0000000000000008 R15: ffffffff81c7e6e0 [ 432.465819] [ 432.468104] ? cpuidle_enter_state+0x11b/0x370 [ 432.470413] cpuidle_enter+0x12/0x20 [ 432.472698] call_cpuidle+0x1e/0x40 [ 432.474967] do_idle+0xe3/0x1c0 [ 432.477172] cpu_startup_entry+0x18/0x20 [ 432.479376] rest_init+0x130/0x140 [ 432.481565] start_kernel+0x3cc/0x3d9 [ 432.483750] x86_64_start_reservations+0x2a/0x2c [ 432.485980] x86_64_start_kernel+0x178/0x18b [ 432.488222] start_cpu+0x14/0x14 [ 432.490453] ? start_cpu+0x14/0x14 Fixes: 97c129747af5 "[media] rc: nuvoton-cir: Add support wakeup via sysfs filter callback" Signed-off-by: Heiner Kallweit Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/nuvoton-cir.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c index b109f8246b96..ec4b25bd2ec2 100644 --- a/drivers/media/rc/nuvoton-cir.c +++ b/drivers/media/rc/nuvoton-cir.c @@ -176,12 +176,13 @@ static void nvt_write_wakeup_codes(struct rc_dev *dev, { u8 tolerance, config; struct nvt_dev *nvt = dev->priv; + unsigned long flags; int i; /* hardcode the tolerance to 10% */ tolerance = DIV_ROUND_UP(count, 10); - spin_lock(&nvt->lock); + spin_lock_irqsave(&nvt->lock, flags); nvt_clear_cir_wake_fifo(nvt); nvt_cir_wake_reg_write(nvt, count, CIR_WAKE_FIFO_CMP_DEEP); @@ -203,7 +204,7 @@ static void nvt_write_wakeup_codes(struct rc_dev *dev, nvt_cir_wake_reg_write(nvt, config, CIR_WAKE_IRCON); - spin_unlock(&nvt->lock); + spin_unlock_irqrestore(&nvt->lock, flags); } static ssize_t wakeup_data_show(struct device *dev, -- cgit v1.2.3 From 413808685dd7c9b54bbc5af79da2eaddd0fc3cb2 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 22 Feb 2017 18:48:01 -0300 Subject: [media] rc: raw decoder for keymap protocol is not loaded on register When the protocol is set via the sysfs protocols attribute, the decoder is loaded. However, when it is not when a device is first plugged in or registered. Fixes: acc1c3c ("[media] media: rc: load decoder modules on-demand") Signed-off-by: Sean Young Cc: # v4.5+ Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/rc-main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 2424946740e6..a158b321b40a 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1663,6 +1663,7 @@ static int rc_setup_rx_device(struct rc_dev *dev) { int rc; struct rc_map *rc_map; + u64 rc_type; if (!dev->map_name) return -EINVAL; @@ -1677,15 +1678,18 @@ static int rc_setup_rx_device(struct rc_dev *dev) if (rc) return rc; - if (dev->change_protocol) { - u64 rc_type = (1ll << rc_map->rc_type); + rc_type = BIT_ULL(rc_map->rc_type); + if (dev->change_protocol) { rc = dev->change_protocol(dev, &rc_type); if (rc < 0) goto out_table; dev->enabled_protocols = rc_type; } + if (dev->driver_type == RC_DRIVER_IR_RAW) + ir_raw_load_modules(&rc_type); + set_bit(EV_KEY, dev->input_dev->evbit); set_bit(EV_REP, dev->input_dev->evbit); set_bit(EV_MSC, dev->input_dev->evbit); -- cgit v1.2.3 From 5df62771c556e4ec9d7ecea1f070ff6da4bce151 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 23 Feb 2017 06:11:21 -0300 Subject: [media] rc: protocol is not set on register for raw IR devices ir_raw_event_register() sets up change_protocol(), and without that set, rc_setup_rx_device() does not set the protocol for the device on register. The standard udev rules run ir-keytable, which writes to the protocols file again, which hides this problem. Fixes: 7ff2c2b ("[media] rc-main: split setup and unregister functions") Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/rc-main.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index a158b321b40a..d84533699668 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1781,12 +1781,6 @@ int rc_register_device(struct rc_dev *dev) dev->input_name ?: "Unspecified device", path ?: "N/A"); kfree(path); - if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { - rc = rc_setup_rx_device(dev); - if (rc) - goto out_dev; - } - if (dev->driver_type == RC_DRIVER_IR_RAW || dev->driver_type == RC_DRIVER_IR_RAW_TX) { if (!raw_init) { @@ -1795,7 +1789,13 @@ int rc_register_device(struct rc_dev *dev) } rc = ir_raw_event_register(dev); if (rc < 0) - goto out_rx; + goto out_dev; + } + + if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { + rc = rc_setup_rx_device(dev); + if (rc) + goto out_raw; } /* Allow the RC sysfs nodes to be accessible */ @@ -1807,8 +1807,8 @@ int rc_register_device(struct rc_dev *dev) return 0; -out_rx: - rc_free_rx_device(dev); +out_raw: + ir_raw_event_unregister(dev); out_dev: device_del(&dev->dev); out_unlock: -- cgit v1.2.3 From 92ad18ec26611e8a47639e600bd9dc42fbe2edcf Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 2 Mar 2017 12:53:26 -0500 Subject: ftrace/graph: Do not modify the EMPTY_HASH for the function_graph filter On boot up, if the kernel command line sets a graph funtion with the kernel command line options "ftrace_graph_filter" or "ftrace_graph_notrace" then it updates the corresponding function graph hash, ftrace_graph_hash or ftrace_graph_notrace_hash respectively. Unfortunately, at boot up, these variables are pointers to the "EMPTY_HASH" which is a constant used as a placeholder when a hash has no entities. The problem was that the comand line version to set the hashes updated the actual EMPTY_HASH instead of creating a new hash for the function graph. This broke the EMPTY_HASH because not only did it modify a constant (not sure how that was allowed to happen, except maybe because it was done at early boot, const variables were still mutable), but it made the filters have functions listed in them when they were actually empty. The kernel command line function needs to allocate a new hash for the function graph filters and assign the necessary variables to that new hash instead. Link: http://lkml.kernel.org/r/1488420091.7212.17.camel@linux.intel.com Cc: Namhyung Kim Fixes: b9b0c831bed2 ("ftrace: Convert graph filter to use hash tables") Reported-by: Todd Brandt Tested-by: Todd Brandt Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fd84f2e30b6d..44122e7a6418 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4421,10 +4421,9 @@ static void __init set_ftrace_early_graph(char *buf, int enable) char *func; struct ftrace_hash *hash; - if (enable) - hash = ftrace_graph_hash; - else - hash = ftrace_graph_notrace_hash; + hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); + if (WARN_ON(!hash)) + return; while (buf) { func = strsep(&buf, ","); @@ -4434,6 +4433,11 @@ static void __init set_ftrace_early_graph(char *buf, int enable) printk(KERN_DEBUG "ftrace: function %s not " "traceable\n", func); } + + if (enable) + ftrace_graph_hash = hash; + else + ftrace_graph_notrace_hash = hash; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3 From ab42632156becd35d3884ee5c14da2bedbf3149a Mon Sep 17 00:00:00 2001 From: David Daney Date: Wed, 1 Mar 2017 14:04:53 -0800 Subject: module: set __jump_table alignment to 8 For powerpc the __jump_table section in modules is not aligned, this causes a WARN_ON() splat when loading a module containing a __jump_table. Strict alignment became necessary with commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key"), currently in linux-next, which uses the two least significant bits of pointers to __jump_table elements. Fix by forcing __jump_table to 8, which is the same alignment used for this section in the kernel proper. Link: http://lkml.kernel.org/r/20170301220453.4756-1-david.daney@cavium.com Reviewed-by: Jason Baron Acked-by: Jessica Yu Acked-by: Michael Ellerman (powerpc) Tested-by: Sachin Sant Signed-off-by: David Daney Signed-off-by: Steven Rostedt (VMware) --- scripts/module-common.lds | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/module-common.lds b/scripts/module-common.lds index 73a2c7da0e55..53234e85192a 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -19,4 +19,6 @@ SECTIONS { . = ALIGN(8); .init_array 0 : { *(SORT(.init_array.*)) *(.init_array) } + + __jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) } } -- cgit v1.2.3 From cd8d860dcce906cd477be7d0648ba6f56a52eaa6 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 28 Feb 2017 11:32:22 -0500 Subject: jump_label: Fix anonymous union initialization Pre-4.6 gcc do not allow direct static initialization of members of anonymous structs/unions. After commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key") STATIC_KEY_INIT_{TRUE|FALSE} definitions cannot be compiled with those older compilers. Placing initializers inside curved brackets works around this problem. Link: http://lkml.kernel.org/r/1488299542-30765-1-git-send-email-boris.ostrovsky@oracle.com Fixes: 3821fd35b58d ("jump_label: Reduce the size of struct static_key") Reviewed-by: Jason Baron Compiled-by: Chris Mason Signed-off-by: Boris Ostrovsky Signed-off-by: Steven Rostedt (VMware) --- include/linux/jump_label.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 680c98b2f41c..a7f90117cf7d 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -166,10 +166,10 @@ extern void static_key_disable(struct static_key *key); */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ - .entries = (void *)JUMP_TYPE_TRUE } + { .entries = (void *)JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ - .entries = (void *)JUMP_TYPE_FALSE } + { .entries = (void *)JUMP_TYPE_FALSE } } #else /* !HAVE_JUMP_LABEL */ -- cgit v1.2.3 From b17ef2ed624aa7c1f68ed11acd16ecbf80fe01d7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 2 Mar 2017 17:28:45 -0500 Subject: jump_label: Add comment about initialization order for anonymous unions Commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key") broke old compilers that could not handle static initialization of anonymous unions. Boris fixed it with a patch that added brackets around the static initializer. But this creates a dependency between those initializers and the structure's order of its fields. Document this dependency in case new fields are added to struct static_key in the future. Noted-by: Boris Ostrovsky Suggested-by: Chris Mason Signed-off-by: Steven Rostedt (VMware) --- include/linux/jump_label.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index a7f90117cf7d..28e04a33535a 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -90,6 +90,13 @@ extern bool static_key_initialized; struct static_key { atomic_t enabled; /* + * Note: + * To make anonymous unions work with old compilers, the static + * initialization of them requires brackets. This creates a dependency + * on the order of the struct with the initializers. If any fields + * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need + * to be modified. + * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod -- cgit v1.2.3 From bf7165cfa23695c51998231c4efa080fe1d3548d Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 28 Sep 2016 22:55:54 -0400 Subject: tracing: Add #undef to fix compile error There are several trace include files that define TRACE_INCLUDE_FILE. Include several of them in the same .c file (as I currently have in some code I am working on), and the compile will blow up with a "warning: "TRACE_INCLUDE_FILE" redefined #define TRACE_INCLUDE_FILE syscalls" Every other include file in include/trace/events/ avoids that issue by having a #undef TRACE_INCLUDE_FILE before the #define; syscalls.h should have one, too. Link: http://lkml.kernel.org/r/20160928225554.13bd7ac6@annuminas.surriel.com Cc: stable@vger.kernel.org Fixes: b8007ef74222 ("tracing: Separate raw syscall from syscall tracer") Signed-off-by: Rik van Riel Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/syscalls.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 14e49c798135..b35533b94277 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -1,5 +1,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM raw_syscalls +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE syscalls #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) -- cgit v1.2.3 From 65a50c656276b0846bea09dd011c0a3d35b77f3e Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Thu, 2 Mar 2017 16:12:15 -0800 Subject: ftrace/graph: Add ftrace_graph_max_depth kernel parameter Early trace callgraphs can be extremely large on systems with several seconds of boot time. The max_depth parameter limits how deep the graph trace goes and reduces the output size. This parameter is the same as the max_graph_depth file in tracefs. Link: http://lkml.kernel.org/r/1488499935-23216-1-git-send-email-todd.e.brandt@linux.intel.com Signed-off-by: Todd Brandt [ changed comments about debugfs to tracefs ] Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ kernel/trace/ftrace.c | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 21e2d8863705..1c9016b27ee9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1173,6 +1173,12 @@ functions that can be changed at run time by the set_graph_notrace file in the debugfs tracing directory. + ftrace_graph_max_depth= + [FTRACE] Used with the function graph tracer. This is + the max depth it will trace into a function. This value + can be changed at run time by the max_graph_depth file + in the tracefs tracing directory. default: 0 (no limit) + gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 44122e7a6418..d129ae51329a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4415,6 +4415,15 @@ static int __init set_graph_notrace_function(char *str) } __setup("ftrace_graph_notrace=", set_graph_notrace_function); +static int __init set_graph_max_depth_function(char *str) +{ + if (!str) + return 0; + fgraph_max_depth = simple_strtoul(str, NULL, 0); + return 1; +} +__setup("ftrace_graph_max_depth=", set_graph_max_depth_function); + static void __init set_ftrace_early_graph(char *buf, int enable) { int ret; -- cgit v1.2.3 From 2bc756e7dde20972300bb8e2e46dd430d6d2d5db Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 Mar 2017 23:22:29 +0100 Subject: cpufreq: intel_pstate: Do not use performance_limits in passive mode Using performance_limits in the passive mode doesn't make sense, because in that mode the global limits are applied to the frequency selected by the scaling governor. The maximum and minimum P-state limits in performance_limits are both set to 100 percent which will put all CPUs into the turbo range regardless of what governor is used and what frequencies are selected by it (that is particularly undesirable on CPUs with the generic powersave governor attached). For this reason, make intel_pstate_register_driver() always point limits to powersave_limits in the passive mode. Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9ee13f195c37..7fc1f8a0ef44 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2442,8 +2442,11 @@ static int intel_pstate_register_driver(void) intel_pstate_init_limits(&powersave_limits); intel_pstate_set_performance_limits(&performance_limits); - limits = IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) ? - &performance_limits : &powersave_limits; + if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) && + intel_pstate_driver == &intel_pstate) + limits = &performance_limits; + else + limits = &powersave_limits; ret = cpufreq_register_driver(intel_pstate_driver); if (ret) { -- cgit v1.2.3 From 7f17326fc0b69afbda7aed6c4ce8e2328b74203b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 Mar 2017 23:24:36 +0100 Subject: cpufreq: intel_pstate: Fix intel_cpufreq_verify_policy() The intel_pstate_update_perf_limits() called from intel_cpufreq_verify_policy() may cause global P-state limits to change which is generally confusing and unnecessary. In the passive mode the global limits are only applied to the frequency selected by the scaling governor (they are not taken into account by governors when making decisions anyway), so making them follow the per-policy limits serves no purpose and may go against user expectations (as it generally causes the global attributes in sysfs to change even though they have not been written to in some cases). Fix that by dropping the intel_pstate_update_perf_limits() invocation from intel_cpufreq_verify_policy() (which also reduces the code size by a few lines). This change does not affect the per-CPU limits case, because those limits allow any P-state to be set by default in the passive mode and it removes the only piece of code updating them in that mode, so the per-policy settings will be the only ones taken into account in that case as expected. Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7fc1f8a0ef44..04b2aa162276 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2306,7 +2306,6 @@ static struct cpufreq_driver intel_pstate = { static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; - struct perf_limits *perf_limits = limits; update_turbo_state(); policy->cpuinfo.max_freq = limits->turbo_disabled ? @@ -2314,15 +2313,6 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) cpufreq_verify_within_cpu_limits(policy); - if (per_cpu_limits) - perf_limits = cpu->perf_limits; - - mutex_lock(&intel_pstate_limits_lock); - - intel_pstate_update_perf_limits(policy, perf_limits); - - mutex_unlock(&intel_pstate_limits_lock); - return 0; } -- cgit v1.2.3 From 6407829901f074cf6beef566d6af9a0b0e238f0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Mar 2017 23:51:31 +0100 Subject: cpufreq: intel_pstate: Avoid triggering cpu_frequency tracepoint unnecessarily In the passive mode the cpu_frequency trace event is already triggered by the cpufreq core or by scaling governors, so intel_pstate should not trigger it once again for the same P-state updates. In addition to that, the frequency returned by intel_cpufreq_fast_switch() and passed via freqs.new from intel_cpufreq_target() to cpufreq_freq_transition_end() should reflect the P-state actually set, so make that happen. Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 04b2aa162276..5e066b332598 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1879,13 +1879,11 @@ static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) intel_pstate_get_min_max(cpu, &min_perf, &max_perf); pstate = clamp_t(int, pstate, min_perf, max_perf); - trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); return pstate; } static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) { - pstate = intel_pstate_prepare_request(cpu, pstate); if (pstate == cpu->pstate.current_pstate) return; @@ -1905,6 +1903,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) update_turbo_state(); + target_pstate = intel_pstate_prepare_request(cpu, target_pstate); + trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu); intel_pstate_update_pstate(cpu, target_pstate); sample = &cpu->sample; @@ -2365,6 +2365,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, target_pstate)); } + freqs.new = target_pstate * cpu->pstate.scaling; cpufreq_freq_transition_end(policy, &freqs, false); return 0; @@ -2378,8 +2379,9 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq); target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); + target_pstate = intel_pstate_prepare_request(cpu, target_pstate); intel_pstate_update_pstate(cpu, target_pstate); - return target_freq; + return target_pstate * cpu->pstate.scaling; } static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) -- cgit v1.2.3 From 2a9c4f40ab2c281f95d41577ff0f7f78668feb73 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 2 Mar 2017 17:41:24 +1100 Subject: powerpc/powernv: Fix opal tracepoints with JUMP_LABEL=n The recent commit to allow calling OPAL calls in real mode, commit ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode calls"), introduced a bug when CONFIG_JUMP_LABEL=n. The commit moved the "mfmsr r12" prior to the call to OPAL_BRANCH, but we missed that OPAL_BRANCH clobbers r12 when jump labels are disabled. This leads to us using the tracepoint refcount as the MSR value, typically zero, and saving that into PACASAVEDMSR. When we return from OPAL we use that value as the MSR value for rfid, meaning we switch to 32-bit BE real mode - hilarity ensues. Fix it by using r11 in OPAL_BRANCH, which is not live at the time the macro is used in OPAL_CALL. Fixes: ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode calls") Suggested-by: Paul Mackerras Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-wrappers.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6693f75e93d1..da8a0f7a035c 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -39,8 +39,8 @@ opal_tracepoint_refcount: BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r12,opal_tracepoint_refcount@toc(r2); \ - cmpdi r12,0; \ + ld r11,opal_tracepoint_refcount@toc(r2); \ + cmpdi r11,0; \ bne- LABEL; \ 1: -- cgit v1.2.3 From 6ad966d7303b70165228dba1ee8da1a05c10eefe Mon Sep 17 00:00:00 2001 From: Shile Zhang Date: Sat, 4 Feb 2017 17:03:40 +0800 Subject: powerpc/64: Fix checksum folding in csum_add() Paul's patch to fix checksum folding, commit b492f7e4e07a ("powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold") missed a case in csum_add(). Fix it. Signed-off-by: Shile Zhang Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 4e63787dc3be..842124b199b5 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -112,7 +112,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #ifdef __powerpc64__ res += (__force u64)addend; - return (__force __wsum)((u32)res + (res >> 32)); + return (__force __wsum) from64to32(res); #else asm("addc %0,%0,%1;" "addze %0,%0;" -- cgit v1.2.3 From 6c4f0fa643cb9e775dcc976e3db00d649468ff1d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 2 Mar 2017 14:03:20 +0530 Subject: cpufreq: schedutil: move cached_raw_freq to struct sugov_policy cached_raw_freq applies to the entire cpufreq policy and not individual CPUs. Apart from wasting per-cpu memory, it is actually wrong to keep it in struct sugov_cpu as we may end up comparing next_freq with a stale cached_raw_freq of a random CPU. Move cached_raw_freq to struct sugov_policy. Fixes: 5cbea46984d6 (cpufreq: schedutil: map raw required frequency to driver frequency) Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 8f8de3d4d6b7..3ab2aeaec6ec 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -36,6 +36,7 @@ struct sugov_policy { u64 last_freq_update_time; s64 freq_update_delay_ns; unsigned int next_freq; + unsigned int cached_raw_freq; /* The next fields are only needed if fast switch cannot be used. */ struct irq_work irq_work; @@ -52,7 +53,6 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; - unsigned int cached_raw_freq; unsigned long iowait_boost; unsigned long iowait_boost_max; u64 last_update; @@ -146,9 +146,9 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, freq = (freq + (freq >> 2)) * util / max; - if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) + if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX) return sg_policy->next_freq; - sg_cpu->cached_raw_freq = freq; + sg_policy->cached_raw_freq = freq; return cpufreq_driver_resolve_freq(policy, freq); } @@ -580,6 +580,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->next_freq = UINT_MAX; sg_policy->work_in_progress = false; sg_policy->need_freq_update = false; + sg_policy->cached_raw_freq = 0; for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); @@ -590,7 +591,6 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->max = 0; sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->last_update = 0; - sg_cpu->cached_raw_freq = 0; sg_cpu->iowait_boost = 0; sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, -- cgit v1.2.3 From 655cb1ebff4b7918fc560502c3297af2d3c7d114 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 2 Mar 2017 14:03:21 +0530 Subject: cpufreq: schedutil: Pass sg_policy to get_next_freq() get_next_freq() uses sg_cpu only to get sg_policy, which the callers of get_next_freq() already have. Pass sg_policy instead of sg_cpu to get_next_freq(), to make it more efficient. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 3ab2aeaec6ec..cd7cd489f739 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -116,7 +116,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, /** * get_next_freq - Compute a new frequency for a given cpufreq policy. - * @sg_cpu: schedutil cpu object to compute the new frequency for. + * @sg_policy: schedutil policy object to compute the new frequency for. * @util: Current CPU utilization. * @max: CPU capacity. * @@ -136,10 +136,9 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, * next_freq (as calculated above) is returned, subject to policy min/max and * cpufreq driver limitations. */ -static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, - unsigned long max) +static unsigned int get_next_freq(struct sugov_policy *sg_policy, + unsigned long util, unsigned long max) { - struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; @@ -213,7 +212,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, } else { sugov_get_util(&util, &max); sugov_iowait_boost(sg_cpu, &util, &max); - next_f = get_next_freq(sg_cpu, util, max); + next_f = get_next_freq(sg_policy, util, max); } sugov_update_commit(sg_policy, time, next_f); } @@ -267,7 +266,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, sugov_iowait_boost(j_sg_cpu, &util, &max); } - return get_next_freq(sg_cpu, util, max); + return get_next_freq(sg_policy, util, max); } static void sugov_update_shared(struct update_util_data *hook, u64 time, -- cgit v1.2.3 From d82f26925599cae83c38d17d07ae982356e81318 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 28 Feb 2017 16:44:16 -0500 Subject: cpufreq: Add the "cpufreq.off=1" cmdline option Add the "cpufreq.off=1" cmdline option. At boot-time, this allows a user to request CONFIG_CPU_FREQ=n behavior from a kernel built with CONFIG_CPU_FREQ=y. This is analogous to the existing "cpuidle.off=1" option and CONFIG_CPU_IDLE=y This capability is valuable when we need to debug end-user issues in the BIOS or in Linux. It is also convenient for enabling comparisons, which may otherwise require a new kernel, or help from BIOS SETUP, which may be buggy or unavailable. Signed-off-by: Len Brown Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ drivers/cpufreq/cpufreq.c | 1 + 2 files changed, 4 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index be7c0d9506b1..3988d2311f97 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -662,6 +662,9 @@ cpuidle.off=1 [CPU_IDLE] disable the cpuidle sub-system + cpufreq.off=1 [CPU_FREQ] + disable the cpufreq sub-system + cpu_init_udelay=N [X86] Delay for N microsec between assert and de-assert of APIC INIT to start processors. This delay occurs diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 80a785ad17e8..7790db2645d7 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2532,4 +2532,5 @@ static int __init cpufreq_core_init(void) return 0; } +module_param(off, int, 0444); core_initcall(cpufreq_core_init); -- cgit v1.2.3 From cd59b4bed9d11a2aefc4bb44eed9de0e6c1eea06 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 1 Mar 2017 00:07:36 +0100 Subject: cpufreq: intel_pstate: Fix global settings in active mode Commit 111b8b3fe4fa (cpufreq: intel_pstate: Always keep all limits settings in sync) changed intel_pstate to invoke cpufreq_update_policy() for every registered CPU on global sysfs attributes updates, but that led to undesirable effects in the active mode if the "performance" P-state selection algorithm is configufred for one CPU and the "powersave" one is chosen for all of the other CPUs. Namely, in that case, the following is possible: # cd /sys/devices/system/cpu/ # cat intel_pstate/max_perf_pct 100 # cat intel_pstate/min_perf_pct 26 # echo performance > cpufreq/policy0/scaling_governor # cat intel_pstate/max_perf_pct 100 # cat intel_pstate/min_perf_pct 100 # echo 94 > intel_pstate/min_perf_pct # cat intel_pstate/min_perf_pct 26 The reason why this happens is because intel_pstate attempts to maintain two sets of global limits in the active mode, one for the "performance" P-state selection algorithm and one for the "powersave" P-state selection algorithm, but the P-state selection algorithms are set per policy, so the global limits cannot reflect all of them at the same time if they are different for different policies. In the particular situation above, the attempt to change min_perf_pct to 94 caused cpufreq_update_policy() to be run for a CPU with the "powersave" P-state selection algorithm and intel_pstate_set_policy() called by it silently switched the global limits to the "powersave" set which finally was reflected by the sysfs interface. To prevent that from happening, modify intel_pstate_update_policies() to always switch back to the set of limits that was used right before it has been invoked. Fixes: 111b8b3fe4fa (cpufreq: intel_pstate: Always keep all limits settings in sync) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 5e066b332598..436a4c5ba70d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -973,11 +973,20 @@ static int intel_pstate_resume(struct cpufreq_policy *policy) } static void intel_pstate_update_policies(void) + __releases(&intel_pstate_limits_lock) + __acquires(&intel_pstate_limits_lock) { + struct perf_limits *saved_limits = limits; int cpu; + mutex_unlock(&intel_pstate_limits_lock); + for_each_possible_cpu(cpu) cpufreq_update_policy(cpu); + + mutex_lock(&intel_pstate_limits_lock); + + limits = saved_limits; } /************************** debugfs begin ************************/ @@ -1185,10 +1194,10 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, limits->no_turbo = clamp_t(int, input, 0, 1); - mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_limits_lock); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1222,10 +1231,10 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits->max_perf_pct); limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); - mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_limits_lock); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1259,10 +1268,10 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, limits->min_perf_pct); limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); - mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_limits_lock); + mutex_unlock(&intel_pstate_driver_lock); return count; -- cgit v1.2.3 From d74b19929130c9b5395a497030dcf161353cd526 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 1 Mar 2017 00:11:05 +0100 Subject: cpufreq: intel_pstate: Fix intel_pstate_verify_policy() The code added to intel_pstate_verify_policy() by commit 1443ebbacfd7 (cpufreq: intel_pstate: Fix sysfs limits enforcement for performance policy) should use perf_limits instead of limits, because otherwise setting global limits via sysfs may affect policies inconsistently. For example, in the sequence of shell commands below, the scaling_min_freq attribute for policy1 and policy2 should be affected in the same way, because scaling_governor is set in the same way for both of them: # cat cpufreq/policy1/scaling_governor powersave # cat cpufreq/policy2/scaling_governor powersave # echo performance > cpufreq/policy0/scaling_governor # echo 94 > intel_pstate/min_perf_pct # cat cpufreq/policy0/scaling_min_freq 2914000 # cat cpufreq/policy1/scaling_min_freq 2914000 # cat cpufreq/policy2/scaling_min_freq 800000 The are affected differently, because intel_pstate_verify_policy() is invoked with limits set to &performance_limits (left behind by policy0) for policy1 and with limits set to &powersave_limits (left behind by policy1) for policy2. Since perf_limits is set to the set of limits matching the policy being updated, using it instead of limits fixes the inconsistency. Fixes: 1443ebbacfd7 (cpufreq: intel_pstate: Fix sysfs limits enforcement for performance policy) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 436a4c5ba70d..3dc546601c88 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2212,9 +2212,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy) unsigned int max_freq, min_freq; max_freq = policy->cpuinfo.max_freq * - limits->max_sysfs_pct / 100; + perf_limits->max_sysfs_pct / 100; min_freq = policy->cpuinfo.max_freq * - limits->min_sysfs_pct / 100; + perf_limits->min_sysfs_pct / 100; cpufreq_verify_within_limits(policy, min_freq, max_freq); } -- cgit v1.2.3 From a240c4aa5d0f9c8bb0db380ab9c1ec1f68b923ad Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 Mar 2017 23:29:12 +0100 Subject: cpufreq: intel_pstate: Do not reinit performance limits in ->setpolicy If the current P-state selection algorithm is set to "performance" in intel_pstate_set_policy(), the limits may be initialized from scratch, but only if no_turbo is not set and the maximum frequency allowed for the given CPU (i.e. the policy object representing it) is at least equal to the max frequency supported by the CPU. In all of the other cases, the limits will not be updated. For example, the following can happen: # cat intel_pstate/status active # echo performance > cpufreq/policy0/scaling_governor # cat intel_pstate/min_perf_pct 100 # echo 94 > intel_pstate/min_perf_pct # cat intel_pstate/min_perf_pct 100 # cat cpufreq/policy0/scaling_max_freq 3100000 echo 3000000 > cpufreq/policy0/scaling_max_freq # cat intel_pstate/min_perf_pct 94 # echo 95 > intel_pstate/min_perf_pct # cat intel_pstate/min_perf_pct 95 That is confusing for two reasons. First, the initial attempt to change min_perf_pct to 94 seems to have no effect, even though setting the global limits should always work. Second, after changing scaling_max_freq for policy0 the global min_perf_pct attribute shows 94, even though it should have not been affected by that operation in principle. Moreover, the final attempt to change min_perf_pct to 95 worked as expected, because scaling_max_freq for the only policy with scaling_governor equal to "performance" was different from the maximum at that time. To make all that confusion go away, modify intel_pstate_set_policy() so that it doesn't reinitialize the limits at all. At the same time, change intel_pstate_set_performance_limits() to set min_sysfs_pct to 100 in the "performance" limits set so that switching the P-state selection algorithm to "performance" causes intel_pstate/min_perf_pct in sysfs to go to 100 (or whatever value min_sysfs_pct in the "performance" limits is set to later). That requires per-CPU limits to be initialized explicitly rather than by copying the global limits to avoid setting min_sysfs_pct in the per-CPU limits to 100. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3dc546601c88..f1f8fbe0b4c4 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -382,6 +382,7 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits) intel_pstate_init_limits(limits); limits->min_perf_pct = 100; limits->min_perf = int_ext_tofp(1); + limits->min_sysfs_pct = 100; } static DEFINE_MUTEX(intel_pstate_driver_lock); @@ -2146,16 +2147,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) mutex_lock(&intel_pstate_limits_lock); if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + pr_debug("set performance\n"); if (!perf_limits) { limits = &performance_limits; perf_limits = limits; } - if (policy->max >= policy->cpuinfo.max_freq && - !limits->no_turbo) { - pr_debug("set performance\n"); - intel_pstate_set_performance_limits(perf_limits); - goto out; - } } else { pr_debug("set powersave\n"); if (!perf_limits) { @@ -2166,7 +2162,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) } intel_pstate_update_perf_limits(policy, perf_limits); - out: + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { /* * NOHZ_FULL CPUs need this as the governor callback may not @@ -2257,13 +2253,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; - /* - * We need sane value in the cpu->perf_limits, so inherit from global - * perf_limits limits, which are seeded with values based on the - * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up. - */ if (per_cpu_limits) - memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits)); + intel_pstate_init_limits(cpu->perf_limits); policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; -- cgit v1.2.3 From 36fc579761b50784b63dafd0f2e796b659e0f5ee Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Mon, 20 Feb 2017 16:25:45 +0100 Subject: drm/edid: Add EDID_QUIRK_FORCE_8BPC quirk for Rotel RSX-1058 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rotel RSX-1058 is a receiver with 4 HDMI inputs and a HDMI output, all 1.1. When a sink that supports deep color is connected to the output, the receiver will send EDIDs that advertise this capability, even if it isn't possible with HDMI versions earlier than 1.3. Currently the kernel is assuming that deep color is possible and the sink displays an error. This quirk will make sure that deep color isn't used with this particular receiver. Fixes: 7a0baa623446 ("Revert "drm/i915: Disable 12bpc hdmi for now"") Signed-off-by: Tomeu Vizoso Link: http://patchwork.freedesktop.org/patch/msgid/20170220152545.13153-1-tomeu.vizoso@collabora.com Cc: stable@vger.kernel.org Cc: Matt Horan Tested-by: Matt Horan Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99869 Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index c8baab9bee0d..ba58f1b11d1e 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -148,6 +148,9 @@ static const struct edid_quirk { /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */ { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC }, + + /* Rotel RSX-1058 forwards sink's EDID but only does HDMI 1.1*/ + { "ETR", 13896, EDID_QUIRK_FORCE_8BPC }, }; /* -- cgit v1.2.3 From 7369090a9fb57c3fc705ce355d2e4523a5a24716 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 31 Jan 2017 13:24:54 +0200 Subject: usb: dwc3: gadget: make Set Endpoint Configuration macros safe Some gadget drivers are bad, bad boys. We notice that ADB was passing bad Burst Size which caused top bits of param0 to be overwritten which confused DWC3 when running this command. In order to avoid future issues, we're going to make sure values passed by macros are always safe for the controller. Note that ADB still needs a fix to *not* pass bad values. Cc: # v3.2+ Reported-by: Mohamed Abbas Sugested-by: Adam Andruszak Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index 3129bcf74d7d..265e223ab645 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -28,23 +28,23 @@ struct dwc3; #define gadget_to_dwc(g) (container_of(g, struct dwc3, gadget)) /* DEPCFG parameter 1 */ -#define DWC3_DEPCFG_INT_NUM(n) ((n) << 0) +#define DWC3_DEPCFG_INT_NUM(n) (((n) & 0x1f) << 0) #define DWC3_DEPCFG_XFER_COMPLETE_EN (1 << 8) #define DWC3_DEPCFG_XFER_IN_PROGRESS_EN (1 << 9) #define DWC3_DEPCFG_XFER_NOT_READY_EN (1 << 10) #define DWC3_DEPCFG_FIFO_ERROR_EN (1 << 11) #define DWC3_DEPCFG_STREAM_EVENT_EN (1 << 13) -#define DWC3_DEPCFG_BINTERVAL_M1(n) ((n) << 16) +#define DWC3_DEPCFG_BINTERVAL_M1(n) (((n) & 0xff) << 16) #define DWC3_DEPCFG_STREAM_CAPABLE (1 << 24) -#define DWC3_DEPCFG_EP_NUMBER(n) ((n) << 25) +#define DWC3_DEPCFG_EP_NUMBER(n) (((n) & 0x1f) << 25) #define DWC3_DEPCFG_BULK_BASED (1 << 30) #define DWC3_DEPCFG_FIFO_BASED (1 << 31) /* DEPCFG parameter 0 */ -#define DWC3_DEPCFG_EP_TYPE(n) ((n) << 1) -#define DWC3_DEPCFG_MAX_PACKET_SIZE(n) ((n) << 3) -#define DWC3_DEPCFG_FIFO_NUMBER(n) ((n) << 17) -#define DWC3_DEPCFG_BURST_SIZE(n) ((n) << 22) +#define DWC3_DEPCFG_EP_TYPE(n) (((n) & 0x3) << 1) +#define DWC3_DEPCFG_MAX_PACKET_SIZE(n) (((n) & 0x7ff) << 3) +#define DWC3_DEPCFG_FIFO_NUMBER(n) (((n) & 0x1f) << 17) +#define DWC3_DEPCFG_BURST_SIZE(n) (((n) & 0xf) << 22) #define DWC3_DEPCFG_DATA_SEQ_NUM(n) ((n) << 26) /* This applies for core versions earlier than 1.94a */ #define DWC3_DEPCFG_IGN_SEQ_NUM (1 << 31) -- cgit v1.2.3 From 2bfa0719ac2a9b2f3c91345873d3cdebd0296ba9 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 31 Jan 2017 14:54:45 +0200 Subject: usb: gadget: function: f_fs: pass companion descriptor along If we're dealing with SuperSpeed endpoints, we need to make sure to pass along the companion descriptor and initialize fields needed by the Gadget API. Eventually, f_fs.c should be converted to use config_ep_by_speed() like all other functions, though. Cc: Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index a5b7cd615698..7e976f00cb02 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1834,11 +1834,14 @@ static int ffs_func_eps_enable(struct ffs_function *func) spin_lock_irqsave(&func->ffs->eps_lock, flags); while(count--) { struct usb_endpoint_descriptor *ds; + struct usb_ss_ep_comp_descriptor *comp_desc = NULL; + int needs_comp_desc = false; int desc_idx; - if (ffs->gadget->speed == USB_SPEED_SUPER) + if (ffs->gadget->speed == USB_SPEED_SUPER) { desc_idx = 2; - else if (ffs->gadget->speed == USB_SPEED_HIGH) + needs_comp_desc = true; + } else if (ffs->gadget->speed == USB_SPEED_HIGH) desc_idx = 1; else desc_idx = 0; @@ -1855,6 +1858,14 @@ static int ffs_func_eps_enable(struct ffs_function *func) ep->ep->driver_data = ep; ep->ep->desc = ds; + + comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds + + USB_DT_ENDPOINT_SIZE); + ep->ep->maxburst = comp_desc->bMaxBurst + 1; + + if (needs_comp_desc) + ep->ep->comp_desc = comp_desc; + ret = usb_ep_enable(ep->ep); if (likely(!ret)) { epfile->ep = ep; -- cgit v1.2.3 From cf3113d893d4427b166ec8695460efa7aa660923 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 17 Feb 2017 11:12:44 +0200 Subject: usb: dwc3: gadget: properly increment dequeue pointer on ep_dequeue If request was already started, this means we had to stop the transfer. With that we also need to ignore all TRBs used by the request, however TRBs can only be modified after completion of END_TRANSFER command. So what we have to do here is wait for END_TRANSFER completion and only after that jump over TRBs by clearing HWO and incrementing dequeue pointer. Note that we have 2 possible types of transfers here: i) Linear buffer request ii) SG-list based request SG-list based requests will have r->num_pending_sgs set to a valid number (> 0). Linear requests, normally use a single TRB. For each of these two cases, if r->unaligned flag is set, one extra TRB has been used to align transfer size to wMaxPacketSize. All of these cases need to be taken into consideration so we don't mess up our TRB ring pointers. Tested-by: Janusz Dziedzic Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 4db97ecae885..f875b3f4b0ec 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1342,6 +1342,68 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, if (r == req) { /* wait until it is processed */ dwc3_stop_active_transfer(dwc, dep->number, true); + + /* + * If request was already started, this means we had to + * stop the transfer. With that we also need to ignore + * all TRBs used by the request, however TRBs can only + * be modified after completion of END_TRANSFER + * command. So what we do here is that we wait for + * END_TRANSFER completion and only after that, we jump + * over TRBs by clearing HWO and incrementing dequeue + * pointer. + * + * Note that we have 2 possible types of transfers here: + * + * i) Linear buffer request + * ii) SG-list based request + * + * SG-list based requests will have r->num_pending_sgs + * set to a valid number (> 0). Linear requests, + * normally use a single TRB. + * + * For each of these two cases, if r->unaligned flag is + * set, one extra TRB has been used to align transfer + * size to wMaxPacketSize. + * + * All of these cases need to be taken into + * consideration so we don't mess up our TRB ring + * pointers. + */ + wait_event_lock_irq(dep->wait_end_transfer, + !(dep->flags & DWC3_EP_END_TRANSFER_PENDING), + dwc->lock); + + if (!r->trb) + goto out1; + + if (r->num_pending_sgs) { + struct dwc3_trb *trb; + int i = 0; + + for (i = 0; i < r->num_pending_sgs; i++) { + trb = r->trb + i; + trb->ctrl &= ~DWC3_TRB_CTRL_HWO; + dwc3_ep_inc_deq(dep); + } + + if (r->unaligned) { + trb = r->trb + r->num_pending_sgs + 1; + trb->ctrl &= ~DWC3_TRB_CTRL_HWO; + dwc3_ep_inc_deq(dep); + } + } else { + struct dwc3_trb *trb = r->trb; + + trb->ctrl &= ~DWC3_TRB_CTRL_HWO; + dwc3_ep_inc_deq(dep); + + if (r->unaligned) { + trb = r->trb + 1; + trb->ctrl &= ~DWC3_TRB_CTRL_HWO; + dwc3_ep_inc_deq(dep); + } + } goto out1; } dev_err(dwc->dev, "request %p was not queued to %s\n", @@ -1352,6 +1414,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, out1: /* giveback the request */ + dep->queued_requests--; dwc3_gadget_giveback(dep, req, -ECONNRESET); out0: -- cgit v1.2.3 From 2e46565cf622dd0534a9d8bffe152a577b48d7aa Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Feb 2017 19:11:28 +0100 Subject: USB: serial: digi_acceleport: fix OOB-event processing A recent change claimed to fix an off-by-one error in the OOB-port completion handler, but instead introduced such an error. This could specifically led to modem-status changes going unnoticed, effectively breaking TIOCMGET. Note that the offending commit fixes a loop-condition underflow and is marked for stable, but should not be backported without this fix. Reported-by: Ben Hutchings Fixes: 2d380889215f ("USB: serial: digi_acceleport: fix OOB data sanity check") Cc: stable # v2.6.30: 2d380889215f Signed-off-by: Johan Hovold --- drivers/usb/serial/digi_acceleport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index ab78111e0968..6537d3ca2797 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -1500,7 +1500,7 @@ static int digi_read_oob_callback(struct urb *urb) return -1; /* handle each oob command */ - for (i = 0; i < urb->actual_length - 4; i += 4) { + for (i = 0; i < urb->actual_length - 3; i += 4) { opcode = buf[i]; line = buf[i + 1]; status = buf[i + 2]; -- cgit v1.2.3 From 8f1117abb408808af9cc4c948925c726bec4755a Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Mon, 6 Mar 2017 13:05:24 +0800 Subject: drm/i915/gvt: handle workload lifecycle properly Currently i915 has a request replay mechanism which can make sure the request can be replayed after a GPU reset. With this mechanism, gvt should wait until the GVT request seqno passed before complete the current workload. So that there should be a context switch interrupt come before gvt free the workload. In this way, workload lifecylce matches with the i915 request lifecycle. The workload can only be freed after the request is completed. v2: use gvt_dbg_sched instead of gvt_err to print when wait again Signed-off-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 49 ++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index e355a82ccabd..d3a56c949025 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -151,6 +151,15 @@ static int shadow_context_status_change(struct notifier_block *nb, case INTEL_CONTEXT_SCHEDULE_OUT: intel_gvt_restore_render_mmio(workload->vgpu, workload->ring_id); + /* If the status is -EINPROGRESS means this workload + * doesn't meet any issue during dispatching so when + * get the SCHEDULE_OUT set the status to be zero for + * good. If the status is NOT -EINPROGRESS means there + * is something wrong happened during dispatching and + * the status should not be set to zero + */ + if (workload->status == -EINPROGRESS) + workload->status = 0; atomic_set(&workload->shadow_ctx_active, 0); break; default: @@ -362,15 +371,23 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload = scheduler->current_workload[ring_id]; vgpu = workload->vgpu; - if (!workload->status && !vgpu->resetting) { + /* For the workload w/ request, needs to wait for the context + * switch to make sure request is completed. + * For the workload w/o request, directly complete the workload. + */ + if (workload->req) { wait_event(workload->shadow_ctx_status_wq, !atomic_read(&workload->shadow_ctx_active)); - update_guest_context(workload); + i915_gem_request_put(fetch_and_zero(&workload->req)); - for_each_set_bit(event, workload->pending_events, - INTEL_GVT_EVENT_MAX) - intel_vgpu_trigger_virtual_event(vgpu, event); + if (!workload->status && !vgpu->resetting) { + update_guest_context(workload); + + for_each_set_bit(event, workload->pending_events, + INTEL_GVT_EVENT_MAX) + intel_vgpu_trigger_virtual_event(vgpu, event); + } } gvt_dbg_sched("ring id %d complete workload %p status %d\n", @@ -400,7 +417,6 @@ static int workload_thread(void *priv) int ring_id = p->ring_id; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct intel_vgpu_workload *workload = NULL; - long lret; int ret; bool need_force_wake = IS_SKYLAKE(gvt->dev_priv); DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -449,23 +465,24 @@ static int workload_thread(void *priv) gvt_dbg_sched("ring id %d wait workload %p\n", workload->ring_id, workload); - - lret = i915_wait_request(workload->req, +retry: + i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT); - if (lret < 0) { - workload->status = lret; - gvt_err("fail to wait workload, skip\n"); - } else { - workload->status = 0; + /* I915 has replay mechanism and a request will be replayed + * if there is i915 reset. So the seqno will be updated anyway. + * If the seqno is not updated yet after waiting, which means + * the replay may still be in progress and we can wait again. + */ + if (!i915_gem_request_completed(workload->req)) { + gvt_dbg_sched("workload %p not completed, wait again\n", + workload); + goto retry; } complete: gvt_dbg_sched("will complete workload %p, status: %d\n", workload, workload->status); - if (workload->req) - i915_gem_request_put(fetch_and_zero(&workload->req)); - complete_current_workload(gvt, ring_id); if (need_force_wake) -- cgit v1.2.3 From c2e04fdab33181b53b5a2f9662b7b607b720f79f Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 6 Mar 2017 17:08:30 +0800 Subject: drm/i915/gvt: protect RO and Rsvd bits of virtual vgpu configuration space Per PCI specification, Configuration Register has different types (RO, RW, RW1C, Rsvd). For RO Register bits are read-only and cannot be altered by software. For RW1C Register bits indicate status when read. A Set bit indicates a status event which is Cleared by writing a 1b. Writing a 0b to RW1C bits has no effect. Reserved Register is for future implementations, and they are read-only and must return zero when read. Current vGPU configuration write emulation just copy the value as it is. So we haven't emulated RO, RW1C and Rsvd Registers correctly. This patch is following the Spec to correct emulation logic. We add a function vgpu_cfg_mem_write to wrap the access to vGPU configuration memory. The write function uses a RW Register bitmap to avoid RO bits be overwritten, and emulate RW1C behavior for the particular status Register. v2: new = src[i] --> new = src[i] & mask (zhenyu) Signed-off-by: Changbin Du Cc: Xiaoguang Chen Cc: Zhiyuan Lv Cc: Min He Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cfg_space.c | 54 ++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index a77e050b85a3..b7d7721e72fa 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -41,6 +41,54 @@ enum { INTEL_GVT_PCI_BAR_MAX, }; +/* bitmap for writable bits (RW or RW1C bits, but cannot co-exist in one + * byte) byte by byte in standard pci configuration space. (not the full + * 256 bytes.) + */ +static const u8 pci_cfg_space_rw_bmp[PCI_INTERRUPT_LINE + 4] = { + [PCI_COMMAND] = 0xff, 0x07, + [PCI_STATUS] = 0x00, 0xf9, /* the only one RW1C byte */ + [PCI_CACHE_LINE_SIZE] = 0xff, + [PCI_BASE_ADDRESS_0 ... PCI_CARDBUS_CIS - 1] = 0xff, + [PCI_ROM_ADDRESS] = 0x01, 0xf8, 0xff, 0xff, + [PCI_INTERRUPT_LINE] = 0xff, +}; + +/** + * vgpu_pci_cfg_mem_write - write virtual cfg space memory + * + * Use this function to write virtual cfg space memory. + * For standard cfg space, only RW bits can be changed, + * and we emulates the RW1C behavior of PCI_STATUS register. + */ +static void vgpu_pci_cfg_mem_write(struct intel_vgpu *vgpu, unsigned int off, + u8 *src, unsigned int bytes) +{ + u8 *cfg_base = vgpu_cfg_space(vgpu); + u8 mask, new, old; + int i = 0; + + for (; i < bytes && (off + i < sizeof(pci_cfg_space_rw_bmp)); i++) { + mask = pci_cfg_space_rw_bmp[off + i]; + old = cfg_base[off + i]; + new = src[i] & mask; + + /** + * The PCI_STATUS high byte has RW1C bits, here + * emulates clear by writing 1 for these bits. + * Writing a 0b to RW1C bits has no effect. + */ + if (off + i == PCI_STATUS + 1) + new = (~new & old) & mask; + + cfg_base[off + i] = (old & ~mask) | new; + } + + /* For other configuration space directly copy as it is. */ + if (i < bytes) + memcpy(cfg_base + off + i, src + i, bytes - i); +} + /** * intel_vgpu_emulate_cfg_read - emulate vGPU configuration space read * @@ -123,7 +171,7 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu, u8 changed = old ^ new; int ret; - memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes); + vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes); if (!(changed & PCI_COMMAND_MEMORY)) return 0; @@ -277,10 +325,10 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, if (ret) return ret; - memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes); + vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes); break; default: - memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes); + vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes); break; } return 0; -- cgit v1.2.3 From 68925176296a8b995e503349200e256674bfe5ac Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Feb 2017 14:32:18 +0000 Subject: arm64: KVM: VHE: Clear HCR_TGE when invalidating guest TLBs When invalidating guest TLBs, special care must be taken to actually shoot the guest TLBs and not the host ones if we're running on a VHE system. This is controlled by the HCR_EL2.TGE bit, which we forget to clear before invalidating TLBs. Address the issue by introducing two wrappers (__tlb_switch_to_guest and __tlb_switch_to_host) that take care of both the VTTBR_EL2 and HCR_EL2.TGE switching. Reported-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Reviewed-by: Christoffer Dall Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/tlb.c | 64 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index e8e7ba2bc11f..9e1d2b75eecd 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -18,14 +18,62 @@ #include #include +static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) +{ + u64 val; + + /* + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and + * most TLB operations target EL2/EL0. In order to affect the + * guest TLBs (EL1/EL0), we need to change one of these two + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so + * let's flip TGE before executing the TLB operation. + */ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + val = read_sysreg(hcr_el2); + val &= ~HCR_TGE; + write_sysreg(val, hcr_el2); + isb(); +} + +static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) +{ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); +} + +static hyp_alternate_select(__tlb_switch_to_guest, + __tlb_switch_to_guest_nvhe, + __tlb_switch_to_guest_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + +static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm) +{ + /* + * We're done with the TLB operation, let's restore the host's + * view of HCR_EL2. + */ + write_sysreg(0, vttbr_el2); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); +} + +static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm) +{ + write_sysreg(0, vttbr_el2); +} + +static hyp_alternate_select(__tlb_switch_to_host, + __tlb_switch_to_host_nvhe, + __tlb_switch_to_host_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { dsb(ishst); /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); /* * We could do so much better if we had the VA as well. @@ -46,7 +94,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) dsb(ish); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) @@ -55,14 +103,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); __tlbi(vmalls12e1is); dsb(ish); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) @@ -70,14 +117,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); /* Switch to requested VMID */ - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); __tlbi(vmalle1); dsb(nsh); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_flush_vm_context(void) -- cgit v1.2.3 From 4dfc050571523ac2bc02cbf948dd47621f7dd83f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 21 Feb 2017 11:32:47 +0000 Subject: KVM: arm/arm64: vgic-v3: Don't pretend to support IRQ/FIQ bypass Our GICv3 emulation always presents ICC_SRE_EL1 with DIB/DFB set to zero, which implies that there is a way to bypass the GIC and inject raw IRQ/FIQ by driving the CPU pins. Of course, we don't allow that when the GIC is configured, but we fail to indicate that to the guest. The obvious fix is to set these bits (and never let them being changed again). Reported-by: Peter Maydell Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 ++ virt/kvm/arm/vgic/vgic-v3.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 672cfef72fc8..97cbca19430d 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -373,6 +373,8 @@ #define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) #define ICC_IGRPEN1_EL1_SHIFT 0 #define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) +#define ICC_SRE_EL1_DIB (1U << 2) +#define ICC_SRE_EL1_DFB (1U << 1) #define ICC_SRE_EL1_SRE (1U << 0) /* diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index edc6ee2dc852..be0f4c3e0142 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -229,10 +229,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) /* * If we are emulating a GICv3, we do it in an non-GICv2-compatible * way, so we force SRE to 1 to demonstrate this to the guest. + * Also, we don't support any form of IRQ/FIQ bypass. * This goes with the spec allowing the value to be RAO/WI. */ if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; + vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB | + ICC_SRE_EL1_DFB | + ICC_SRE_EL1_SRE); vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; } else { vgic_v3->vgic_sre = 0; -- cgit v1.2.3 From a69e2fb70350a66f91175cd2625f1e8215c5b6e9 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 3 Mar 2017 11:58:44 +1100 Subject: powerpc/xics: Work around limitations of OPAL XICS priority handling The CPPR (Current Processor Priority Register) of a XICS interrupt presentation controller contains a value N, such that only interrupts with a priority "more favoured" than N will be received by the CPU, where "more favoured" means "less than". So if the CPPR has the value 5 then only interrupts with a priority of 0-4 inclusive will be received. In theory the CPPR can support a value of 0 to 255 inclusive. In practice Linux only uses values of 0, 4, 5 and 0xff. Setting the CPPR to 0 rejects all interrupts, setting it to 0xff allows all interrupts. The values 4 and 5 are used to differentiate IPIs from external interrupts. Setting the CPPR to 5 allows IPIs to be received but not external interrupts. The CPPR emulation in the OPAL XICS implementation only directly supports priorities 0 and 0xff. All other priorities are considered equivalent, and mapped to a single priority value internally. This means when using icp-opal we can not allow IPIs but not externals. This breaks Linux's use of priority values when a CPU is hot unplugged. After migrating IRQs away from the CPU that is being offlined, we set the priority to 5, meaning we still want the offline CPU to receive IPIs. But the effect of the OPAL XICS emulation's use of a single priority value is that all interrupts are rejected by the CPU. With the CPU offline, and not receiving IPIs, we may not be able to wake it up to bring it back online. The first part of the fix is in icp_opal_set_cpu_priority(). CPPR values of 0 to 4 inclusive will correctly cause all interrupts to be rejected, so we pass those CPPR values through to OPAL. However if we are called with a CPPR of 5 or greater, the caller is expecting to be able to allow IPIs but not external interrupts. We know this doesn't work, so instead of rejecting all interrupts we choose the opposite which is to allow all interrupts. This is still not correct behaviour, but we know for the only existing caller (xics_migrate_irqs_away()), that it is the better option. The other part of the fix is in xics_migrate_irqs_away(). Instead of setting priority (CPPR) to 0, and then back to 5 before migrating IRQs, we migrate the IRQs before setting the priority back to 5. This should have no effect on an ICP backend with a working set_priority(), and on icp-opal it means we will keep all interrupts blocked until after we've finished doing the IRQ migration. Additionally we wait for 5ms after doing the migration to make sure there are no IRQs in flight. Fixes: d74361881f0d ("powerpc/xics: Add ICP OPAL backend") Cc: stable@vger.kernel.org # v4.8+ Suggested-by: Michael Ellerman Reported-by: Vaidyanathan Srinivasan Tested-by: Vaidyanathan Srinivasan Signed-off-by: Balbir Singh [mpe: Rewrote comments and change log, change delay to 5ms] Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xics/icp-opal.c | 10 ++++++++++ arch/powerpc/sysdev/xics/xics-common.c | 17 ++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index f9670eabfcfa..b53f80f0b4d8 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -91,6 +91,16 @@ static unsigned int icp_opal_get_irq(void) static void icp_opal_set_cpu_priority(unsigned char cppr) { + /* + * Here be dragons. The caller has asked to allow only IPI's and not + * external interrupts. But OPAL XIVE doesn't support that. So instead + * of allowing no interrupts allow all. That's still not right, but + * currently the only caller who does this is xics_migrate_irqs_away() + * and it works in that case. + */ + if (cppr >= DEFAULT_PRIORITY) + cppr = LOWEST_PRIORITY; + xics_set_base_cppr(cppr); opal_int_set_cppr(cppr); iosync(); diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 69d858e51ac7..23efe4e42172 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -198,9 +199,6 @@ void xics_migrate_irqs_away(void) /* Remove ourselves from the global interrupt queue */ xics_set_cpu_giq(xics_default_distrib_server, 0); - /* Allow IPIs again... */ - icp_ops->set_priority(DEFAULT_PRIORITY); - for_each_irq_desc(virq, desc) { struct irq_chip *chip; long server; @@ -255,6 +253,19 @@ void xics_migrate_irqs_away(void) unlock: raw_spin_unlock_irqrestore(&desc->lock, flags); } + + /* Allow "sufficient" time to drop any inflight IRQ's */ + mdelay(5); + + /* + * Allow IPIs again. This is done at the very end, after migrating all + * interrupts, the expectation is that we'll only get woken up by an IPI + * interrupt beyond this point, but leave externals masked just to be + * safe. If we're using icp-opal this may actually allow all + * interrupts anyway, but that should be OK. + */ + icp_ops->set_priority(DEFAULT_PRIORITY); + } #endif /* CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From 12cc9fd6b2d8ee307a735b3b9faed0d17b719463 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 28 Feb 2017 17:03:47 +1100 Subject: powerpc: Parse the command line before calling CAS On POWER9 the hypervisor requires the guest to decide whether it would like to use a hash or radix mmu model at the time it calls ibm,client-architecture-support (CAS) based on what the hypervisor has said it's allowed to do. It is possible to disable radix by passing "disable_radix" on the command line. The next patch will add support for the new CAS format, thus we need to parse the command line before calling CAS so we can correctly select which mmu we would like to use. Signed-off-by: Suraj Jitindar Singh Reviewed-by: Paul Mackerras Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a3944540fe0d..bf3966d12565 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2993,6 +2993,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_check_initrd(r3, r4); + /* + * Do early parsing of command line + */ + early_cmdline_parse(); + #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * On pSeries, inform the firmware about our capabilities @@ -3008,11 +3013,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, if (of_platform != PLATFORM_POWERMAC) copy_and_flush(0, kbase, 0x100, 0); - /* - * Do early parsing of command line - */ - early_cmdline_parse(); - /* * Initialize memory management within prom_init */ -- cgit v1.2.3 From 014d02cbf16b3106dc8e93281d2a9c189751ed5e Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 28 Feb 2017 17:03:48 +1100 Subject: powerpc: Update to new option-vector-5 format for CAS On POWER9 the ibm,client-architecture-support (CAS) negotiation process has been updated to change how the host to guest negotiation is done for the new hash/radix mmu as well as the nest mmu, process tables and guest translation shootdown (GTSE). This is documented in the unreleased PAPR ACR "CAS option vector additions for P9". The host tells the guest which options it supports in ibm,arch-vec-5-platform-support. The guest then chooses a subset of these to request in the CAS call and these are agreed to in the ibm,architecture-vec-5 property of the chosen node. Thus we read ibm,arch-vec-5-platform-support and make our selection before calling CAS. We then parse the ibm,architecture-vec-5 property of the chosen node to check whether we should run as hash or radix. ibm,arch-vec-5-platform-support format: index value pairs: ... index: Option vector 5 byte number val: Some representation of supported values Signed-off-by: Suraj Jitindar Singh Acked-by: Paul Mackerras [mpe: Don't print about unknown options, be consistent with OV5_FEAT] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/prom.h | 18 ++++--- arch/powerpc/kernel/prom_init.c | 110 +++++++++++++++++++++++++++++++++++++++- arch/powerpc/mm/init_64.c | 36 ++++++++++--- 3 files changed, 150 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 4a90634e8322..35c00d7a0cf8 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -160,12 +160,18 @@ struct of_drconf_cell { #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ #define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ -#define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */ -#define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */ -#define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */ -#define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */ -#define OV5_MMU_SLB 0x1800 /* always use SLB */ -#define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */ +/* MMU Base Architecture */ +#define OV5_MMU_SUPPORT 0x18C0 /* MMU Mode Support Mask */ +#define OV5_MMU_HASH 0x1800 /* Hash MMU Only */ +#define OV5_MMU_RADIX 0x1840 /* Radix MMU Only */ +#define OV5_MMU_EITHER 0x1880 /* Hash or Radix Supported */ +#define OV5_MMU_DYNAMIC 0x18C0 /* Hash or Radix Can Switch Later */ +#define OV5_NMMU 0x1820 /* Nest MMU Available */ +/* Hash Table Extensions */ +#define OV5_HASH_SEG_TBL 0x1980 /* In Memory Segment Tables Available */ +#define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */ +/* Radix Table Extensions */ +#define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index bf3966d12565..1c1b44ec7642 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -168,6 +168,14 @@ static unsigned long __initdata prom_tce_alloc_start; static unsigned long __initdata prom_tce_alloc_end; #endif +static bool __initdata prom_radix_disable; + +struct platform_support { + bool hash_mmu; + bool radix_mmu; + bool radix_gtse; +}; + /* Platforms codes are now obsolete in the kernel. Now only used within this * file and ultimately gone too. Feel free to change them if you need, they * are not shared with anything outside of this file anymore @@ -626,6 +634,12 @@ static void __init early_cmdline_parse(void) prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); #endif } + + opt = strstr(prom_cmd_line, "disable_radix"); + if (opt) { + prom_debug("Radix disabled from cmdline\n"); + prom_radix_disable = true; + } } #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) @@ -695,6 +709,8 @@ struct option_vector5 { u8 byte22; u8 intarch; u8 mmu; + u8 hash_ext; + u8 radix_ext; } __packed; struct option_vector6 { @@ -850,8 +866,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .reserved3 = 0, .subprocessors = 1, .intarch = 0, - .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) | - OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE), + .mmu = 0, + .hash_ext = 0, + .radix_ext = 0, }, /* option vector 6: IBM PAPR hints */ @@ -990,6 +1007,92 @@ static int __init prom_count_smt_threads(void) } +static void __init prom_parse_mmu_model(u8 val, + struct platform_support *support) +{ + switch (val) { + case OV5_FEAT(OV5_MMU_DYNAMIC): + case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */ + prom_debug("MMU - either supported\n"); + support->radix_mmu = !prom_radix_disable; + support->hash_mmu = true; + break; + case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */ + prom_debug("MMU - radix only\n"); + if (prom_radix_disable) { + /* + * If we __have__ to do radix, we're better off ignoring + * the command line rather than not booting. + */ + prom_printf("WARNING: Ignoring cmdline option disable_radix\n"); + } + support->radix_mmu = true; + break; + case OV5_FEAT(OV5_MMU_HASH): + prom_debug("MMU - hash only\n"); + support->hash_mmu = true; + break; + default: + prom_debug("Unknown mmu support option: 0x%x\n", val); + break; + } +} + +static void __init prom_parse_platform_support(u8 index, u8 val, + struct platform_support *support) +{ + switch (index) { + case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */ + prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support); + break; + case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */ + if (val & OV5_FEAT(OV5_RADIX_GTSE)) { + prom_debug("Radix - GTSE supported\n"); + support->radix_gtse = true; + } + break; + } +} + +static void __init prom_check_platform_support(void) +{ + struct platform_support supported = { + .hash_mmu = false, + .radix_mmu = false, + .radix_gtse = false + }; + int prop_len = prom_getproplen(prom.chosen, + "ibm,arch-vec-5-platform-support"); + if (prop_len > 1) { + int i; + u8 vec[prop_len]; + prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n", + prop_len); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", + &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 + , vec[i] + , vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], + &supported); + } + } + + if (supported.radix_mmu && supported.radix_gtse) { + /* Radix preferred - but we require GTSE for now */ + prom_debug("Asking for radix with GTSE\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); + ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE); + } else if (supported.hash_mmu) { + /* Default to hash mmu (if we can) */ + prom_debug("Asking for hash\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH); + } else { + /* We're probably on a legacy hypervisor */ + prom_debug("Assuming legacy hash support\n"); + } +} static void __init prom_send_capabilities(void) { @@ -997,6 +1100,9 @@ static void __init prom_send_capabilities(void) prom_arg_t ret; u32 cores; + /* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */ + prom_check_platform_support(); + root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { /* We need to tell the FW about the number of cores we support. diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6aa3b76aa0d6..9be992083d2a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -356,18 +356,42 @@ static void early_check_vec5(void) unsigned long root, chosen; int size; const u8 *vec5; + u8 mmu_supported; root = of_get_flat_dt_root(); chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); - if (chosen == -FDT_ERR_NOTFOUND) + if (chosen == -FDT_ERR_NOTFOUND) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; return; + } vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); - if (!vec5) + if (!vec5) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; return; - if (size <= OV5_INDX(OV5_MMU_RADIX_300) || - !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300))) - /* Hypervisor doesn't support radix */ + } + if (size <= OV5_INDX(OV5_MMU_SUPPORT)) { cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + + /* Check for supported configuration */ + mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] & + OV5_FEAT(OV5_MMU_SUPPORT); + if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) { + /* Hypervisor only supports radix - check enabled && GTSE */ + if (!early_radix_enabled()) { + pr_warn("WARNING: Ignoring cmdline option disable_radix\n"); + } + if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & + OV5_FEAT(OV5_RADIX_GTSE))) { + pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); + } + /* Do radix anyway - the hypervisor said we had to */ + cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; + } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { + /* Hypervisor only supports hash - disable radix */ + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + } } void __init mmu_early_init_devtree(void) @@ -383,7 +407,7 @@ void __init mmu_early_init_devtree(void) * even though the ibm,architecture-vec-5 property created by * skiboot doesn't have the necessary bits set. */ - if (early_radix_enabled() && !(mfmsr() & MSR_HV)) + if (!(mfmsr() & MSR_HV)) early_check_vec5(); if (early_radix_enabled()) -- cgit v1.2.3 From 6ba422c75facb1b1e0e206c464ee121b8073f7e0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 5 Mar 2017 10:54:34 +1100 Subject: powerpc/64: Avoid panic during boot due to divide by zero in init_cache_info() I see a panic in early boot when building with a recent gcc toolchain. The issue is a divide by zero, which is undefined. Older toolchains let us get away with it: int foo(int a) { return a / 0; } foo: li 9,0 divw 3,3,9 extsw 3,3 blr But newer ones catch it: foo: trap Add a check to avoid the divide by zero. Fixes: e2827fe5c156 ("powerpc/64: Clean up ppc64_caches using a struct per cache") Signed-off-by: Anton Blanchard Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index adf2084f214b..9cfaa8b69b5f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -408,7 +408,10 @@ static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, info->line_size = lsize; info->block_size = bsize; info->log_block_size = __ilog2(bsize); - info->blocks_per_page = PAGE_SIZE / bsize; + if (bsize) + info->blocks_per_page = PAGE_SIZE / bsize; + else + info->blocks_per_page = 0; if (sets == 0) info->assoc = 0xffff; -- cgit v1.2.3 From 3b3e78552d3077ec70d2640e629e07e3ab416a6a Mon Sep 17 00:00:00 2001 From: Matjaz Hegedic Date: Sun, 5 Mar 2017 19:16:44 +0100 Subject: x86/reboot/quirks: Add ASUS EeeBook X205TA/W reboot quirk Without the parameter reboot=a, ASUS EeeBook X205TA/W will hang when it should reboot. This adds the appropriate quirk, thus fixing the problem. Signed-off-by: Matjaz Hegedic Link: http://lkml.kernel.org/r/1488737804-20681-1-git-send-email-matjaz.hegedic@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 01c9cda3e1b7..4194d6f9bb29 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -231,6 +231,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), }, }, + { /* Handle problems with rebooting on ASUS EeeBook X205TAW */ + .callback = set_acpi_reboot, + .ident = "ASUS EeeBook X205TAW", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + }, + }, /* Certec */ { /* Handle problems with rebooting on Certec BPC600 */ -- cgit v1.2.3 From f2853308b6409b97799b0beceacd9da43a82fe43 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 6 Mar 2017 10:57:48 +0200 Subject: x86/build/x86_64_defconfig: Enable CONFIG_R8169 Very common PCIe ethernet card. Already enabled in i386_defconfig. Signed-off-by: Andy Shevchenko Cc: Peter Zijlstra Cc: Konstantin Khlebnikov Link: http://lkml.kernel.org/r/20170306085748.85957-1-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/configs/x86_64_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 7ef4a099defc..6205d3b81e6d 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -176,6 +176,7 @@ CONFIG_E1000E=y CONFIG_SKY2=y CONFIG_FORCEDETH=y CONFIG_8139TOO=y +CONFIG_R8169=y CONFIG_FDDI=y CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set -- cgit v1.2.3 From 9c7a00868c3a77c86ab07a2c51f3bb4897bd8550 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 6 Mar 2017 21:51:32 +1100 Subject: powerpc/64: Fix L1D cache shape vector reporting L1I values It seems we didn't pay quite enough attention when testing the new cache shape vectors, which means we didn't notice the bug where the vector for the L1D was using the L1I values. Fix it, resulting in eg: L1I cache size: 0x8000 32768B 32K L1I line size: 0x80 8-way associative L1D cache size: 0x10000 65536B 64K L1D line size: 0x80 8-way associative Fixes: 98a5f361b862 ("powerpc: Add new cache geometry aux vectors") Cut-and-paste-bug-by: Benjamin Herrenschmidt Badly-reviewed-by: Michael Ellerman Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/elf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 93b9b84568e8..09bde6e34f5d 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -144,8 +144,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define ARCH_DLINFO_CACHE_GEOMETRY \ NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size); \ NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i)); \ - NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size); \ - NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i)); \ + NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size); \ + NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d)); \ NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size); \ NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2)); \ NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size); \ -- cgit v1.2.3 From a7d2475af7aedcb9b5c6343989a8bfadbf84429b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 6 Mar 2017 22:53:59 +1100 Subject: powerpc: Sort the selects under CONFIG_PPC We have a big list of selects under CONFIG_PPC, and currently they're completely unsorted. This means people tend to add new selects at the bottom of the list, and so two commits which both add a new select will often conflict. Instead sort it alphabetically. This is nicer in and of itself, but also means two commits that add a new select will have a greater chance of not conflicting. Add a note at the top and bottom asking people to keep it sorted. And while we're here pad out the 'if' expressions to make them stand out. Suggested-by: Stephen Rothwell Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 138 +++++++++++++++++++++++++++------------------------ 1 file changed, 72 insertions(+), 66 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 494091762bd7..97a8bc8a095c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -80,93 +80,99 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK config PPC bool default y - select BUILDTIME_EXTABLE_SORT + # + # Please keep this list sorted alphabetically. + # + select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE + select ARCH_HAS_SG_CHAIN + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_WANT_IPC_PARSE_VERSION select BINFMT_ELF - select ARCH_HAS_ELF_RANDOMIZE - select OF - select OF_EARLY_FLATTREE - select OF_RESERVED_MEM - select HAVE_FTRACE_MCOUNT_RECORD + select BUILDTIME_EXTABLE_SORT + select CLONE_BACKWARDS + select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select EDAC_ATOMIC_SCRUB + select EDAC_SUPPORT + select GENERIC_ATOMIC64 if PPC32 + select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE + select GENERIC_IRQ_SHOW + select GENERIC_IRQ_SHOW_LEVEL + select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER + select GENERIC_TIME_VSYSCALL_OLD + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HARDENED_USERCOPY + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK + select HAVE_CBPF_JIT if !PPC64 + select HAVE_CONTEXT_TRACKING if PPC64 + select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_STACKOVERFLOW + select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL - select HAVE_FUNCTION_TRACER + select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL + select HAVE_EBPF_JIT if PPC64 + select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS - select SYSCTL_EXCEPTION_TRACE - select VIRT_TO_BUS if !PPC64 + select HAVE_GENERIC_RCU_GUP + select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IDE select HAVE_IOREMAP_PROT - select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_KERNEL_GZIP select HAVE_KPROBES - select HAVE_OPTPROBES if PPC64 - select HAVE_ARCH_KGDB select HAVE_KRETPROBES - select HAVE_ARCH_TRACEHOOK + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP - select HAVE_DMA_API_DEBUG + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI if PERF_EVENTS select HAVE_OPROFILE - select HAVE_DEBUG_KMEMLEAK - select ARCH_HAS_SG_CHAIN - select GENERIC_ATOMIC64 if PPC32 + select HAVE_OPTPROBES if PPC64 select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_RCU_TABLE_FREE if SMP select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) - select ARCH_WANT_IPC_PARSE_VERSION - select SPARSE_IRQ + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_VIRT_CPU_ACCOUNTING select IRQ_DOMAIN - select GENERIC_IRQ_SHOW - select GENERIC_IRQ_SHOW_LEVEL select IRQ_FORCED_THREADING - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CBPF_JIT if !PPC64 - select HAVE_EBPF_JIT if PPC64 - select HAVE_ARCH_JUMP_LABEL - select ARCH_HAVE_NMI_SAFE_CMPXCHG - select ARCH_HAS_GCOV_PROFILE_ALL - select GENERIC_SMP_IDLE_THREAD - select GENERIC_CMOS_UPDATE - select GENERIC_TIME_VSYSCALL_OLD - select GENERIC_CLOCKEVENTS - select GENERIC_CLOCKEVENTS_BROADCAST if SMP - select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST - select GENERIC_STRNCPY_FROM_USER - select GENERIC_STRNLEN_USER - select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select CLONE_BACKWARDS - select ARCH_USE_BUILTIN_BSWAP - select OLD_SIGSUSPEND - select OLD_SIGACTION if PPC32 - select HAVE_DEBUG_STACKOVERFLOW - select HAVE_IRQ_EXIT_ON_IRQ_STACK - select ARCH_USE_CMPXCHG_LOCKREF if PPC64 - select HAVE_ARCH_AUDITSYSCALL - select ARCH_SUPPORTS_ATOMIC_RMW - select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select NO_BOOTMEM - select HAVE_GENERIC_RCU_GUP - select HAVE_PERF_EVENTS_NMI if PPC64 - select HAVE_NMI if PERF_EVENTS - select EDAC_SUPPORT - select EDAC_ATOMIC_SCRUB - select ARCH_HAS_DMA_SET_COHERENT_MASK - select ARCH_HAS_DEVMEM_IS_ALLOWED - select HAVE_ARCH_SECCOMP_FILTER - select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS - select GENERIC_CPU_AUTOPROBE - select HAVE_VIRT_CPU_ACCOUNTING - select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE - select HAVE_ARCH_HARDENED_USERCOPY - select HAVE_KERNEL_GZIP - select HAVE_CONTEXT_TRACKING if PPC64 + select OF + select OF_EARLY_FLATTREE + select OF_RESERVED_MEM + select OLD_SIGACTION if PPC32 + select OLD_SIGSUSPEND + select SPARSE_IRQ + select SYSCTL_EXCEPTION_TRACE + select VIRT_TO_BUS if !PPC64 + # + # Please keep this list sorted alphabetically. + # config GENERIC_CSUM def_bool n -- cgit v1.2.3 From 606142af57dad981b78707234cfbd15f9f7b7125 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Wed, 15 Feb 2017 18:29:15 -0200 Subject: [media] dw2102: don't do DMA on stack On Kernel 4.9, WARNINGs about doing DMA on stack are hit at the dw2102 driver: one in su3000_power_ctrl() and the other in tt_s2_4600_frontend_attach(). Both were due to the use of buffers on the stack as parameters to dvb_usb_generic_rw() and the resulting attempt to do DMA with them. The device was non-functional as a result. So, switch this driver over to use a buffer within the device state structure, as has been done with other DVB-USB drivers. Tested with TechnoTrend TT-connect S2-4600. [mchehab@osg.samsung.com: fixed a warning at su3000_i2c_transfer() that state var were dereferenced before check 'd'] Signed-off-by: Jonathan McDowell Cc: Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dw2102.c | 244 ++++++++++++++++++++++--------------- 1 file changed, 145 insertions(+), 99 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c index 6ca502d834b4..4f42d57f81d9 100644 --- a/drivers/media/usb/dvb-usb/dw2102.c +++ b/drivers/media/usb/dvb-usb/dw2102.c @@ -68,6 +68,7 @@ struct dw2102_state { u8 initialized; u8 last_lock; + u8 data[MAX_XFER_SIZE + 4]; struct i2c_client *i2c_client_demod; struct i2c_client *i2c_client_tuner; @@ -661,62 +662,72 @@ static int su3000_i2c_transfer(struct i2c_adapter *adap, struct i2c_msg msg[], int num) { struct dvb_usb_device *d = i2c_get_adapdata(adap); - u8 obuf[0x40], ibuf[0x40]; + struct dw2102_state *state; if (!d) return -ENODEV; + + state = d->priv; + if (mutex_lock_interruptible(&d->i2c_mutex) < 0) return -EAGAIN; + if (mutex_lock_interruptible(&d->data_mutex) < 0) { + mutex_unlock(&d->i2c_mutex); + return -EAGAIN; + } switch (num) { case 1: switch (msg[0].addr) { case SU3000_STREAM_CTRL: - obuf[0] = msg[0].buf[0] + 0x36; - obuf[1] = 3; - obuf[2] = 0; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 0, 0) < 0) + state->data[0] = msg[0].buf[0] + 0x36; + state->data[1] = 3; + state->data[2] = 0; + if (dvb_usb_generic_rw(d, state->data, 3, + state->data, 0, 0) < 0) err("i2c transfer failed."); break; case DW2102_RC_QUERY: - obuf[0] = 0x10; - if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 2, 0) < 0) + state->data[0] = 0x10; + if (dvb_usb_generic_rw(d, state->data, 1, + state->data, 2, 0) < 0) err("i2c transfer failed."); - msg[0].buf[1] = ibuf[0]; - msg[0].buf[0] = ibuf[1]; + msg[0].buf[1] = state->data[0]; + msg[0].buf[0] = state->data[1]; break; default: /* always i2c write*/ - obuf[0] = 0x08; - obuf[1] = msg[0].addr; - obuf[2] = msg[0].len; + state->data[0] = 0x08; + state->data[1] = msg[0].addr; + state->data[2] = msg[0].len; - memcpy(&obuf[3], msg[0].buf, msg[0].len); + memcpy(&state->data[3], msg[0].buf, msg[0].len); - if (dvb_usb_generic_rw(d, obuf, msg[0].len + 3, - ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, msg[0].len + 3, + state->data, 1, 0) < 0) err("i2c transfer failed."); } break; case 2: /* always i2c read */ - obuf[0] = 0x09; - obuf[1] = msg[0].len; - obuf[2] = msg[1].len; - obuf[3] = msg[0].addr; - memcpy(&obuf[4], msg[0].buf, msg[0].len); - - if (dvb_usb_generic_rw(d, obuf, msg[0].len + 4, - ibuf, msg[1].len + 1, 0) < 0) + state->data[0] = 0x09; + state->data[1] = msg[0].len; + state->data[2] = msg[1].len; + state->data[3] = msg[0].addr; + memcpy(&state->data[4], msg[0].buf, msg[0].len); + + if (dvb_usb_generic_rw(d, state->data, msg[0].len + 4, + state->data, msg[1].len + 1, 0) < 0) err("i2c transfer failed."); - memcpy(msg[1].buf, &ibuf[1], msg[1].len); + memcpy(msg[1].buf, &state->data[1], msg[1].len); break; default: warn("more than 2 i2c messages at a time is not handled yet."); break; } + mutex_unlock(&d->data_mutex); mutex_unlock(&d->i2c_mutex); return num; } @@ -844,17 +855,23 @@ static int su3000_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) static int su3000_power_ctrl(struct dvb_usb_device *d, int i) { struct dw2102_state *state = (struct dw2102_state *)d->priv; - u8 obuf[] = {0xde, 0}; + int ret = 0; info("%s: %d, initialized %d", __func__, i, state->initialized); if (i && !state->initialized) { + mutex_lock(&d->data_mutex); + + state->data[0] = 0xde; + state->data[1] = 0; + state->initialized = 1; /* reset board */ - return dvb_usb_generic_rw(d, obuf, 2, NULL, 0, 0); + ret = dvb_usb_generic_rw(d, state->data, 2, NULL, 0, 0); + mutex_unlock(&d->data_mutex); } - return 0; + return ret; } static int su3000_read_mac_address(struct dvb_usb_device *d, u8 mac[6]) @@ -1309,49 +1326,57 @@ static int prof_7500_frontend_attach(struct dvb_usb_adapter *d) return 0; } -static int su3000_frontend_attach(struct dvb_usb_adapter *d) +static int su3000_frontend_attach(struct dvb_usb_adapter *adap) { - u8 obuf[3] = { 0xe, 0x80, 0 }; - u8 ibuf[] = { 0 }; + struct dvb_usb_device *d = adap->dev; + struct dw2102_state *state = d->priv; + + mutex_lock(&d->data_mutex); + + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 0; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x02; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x02; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); msleep(300); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 0; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 0; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0x51; + state->data[0] = 0x51; - if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); - d->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config, - &d->dev->i2c_adap); - if (d->fe_adap[0].fe == NULL) + mutex_unlock(&d->data_mutex); + + adap->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config, + &d->i2c_adap); + if (adap->fe_adap[0].fe == NULL) return -EIO; - if (dvb_attach(ts2020_attach, d->fe_adap[0].fe, + if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe, &dw2104_ts2020_config, - &d->dev->i2c_adap)) { + &d->i2c_adap)) { info("Attached DS3000/TS2020!"); return 0; } @@ -1360,47 +1385,55 @@ static int su3000_frontend_attach(struct dvb_usb_adapter *d) return -EIO; } -static int t220_frontend_attach(struct dvb_usb_adapter *d) +static int t220_frontend_attach(struct dvb_usb_adapter *adap) { - u8 obuf[3] = { 0xe, 0x87, 0 }; - u8 ibuf[] = { 0 }; + struct dvb_usb_device *d = adap->dev; + struct dw2102_state *state = d->priv; + + mutex_lock(&d->data_mutex); - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + state->data[0] = 0xe; + state->data[1] = 0x87; + state->data[2] = 0x0; + + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x86; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x86; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x80; - obuf[2] = 0; + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 0; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); msleep(50); - obuf[0] = 0xe; - obuf[1] = 0x80; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0x51; + state->data[0] = 0x51; - if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); - d->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config, - &d->dev->i2c_adap, NULL); - if (d->fe_adap[0].fe != NULL) { - if (dvb_attach(tda18271_attach, d->fe_adap[0].fe, 0x60, - &d->dev->i2c_adap, &tda18271_config)) { + mutex_unlock(&d->data_mutex); + + adap->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config, + &d->i2c_adap, NULL); + if (adap->fe_adap[0].fe != NULL) { + if (dvb_attach(tda18271_attach, adap->fe_adap[0].fe, 0x60, + &d->i2c_adap, &tda18271_config)) { info("Attached TDA18271HD/CXD2820R!"); return 0; } @@ -1410,23 +1443,30 @@ static int t220_frontend_attach(struct dvb_usb_adapter *d) return -EIO; } -static int m88rs2000_frontend_attach(struct dvb_usb_adapter *d) +static int m88rs2000_frontend_attach(struct dvb_usb_adapter *adap) { - u8 obuf[] = { 0x51 }; - u8 ibuf[] = { 0 }; + struct dvb_usb_device *d = adap->dev; + struct dw2102_state *state = d->priv; + + mutex_lock(&d->data_mutex); - if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0) + state->data[0] = 0x51; + + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); - d->fe_adap[0].fe = dvb_attach(m88rs2000_attach, &s421_m88rs2000_config, - &d->dev->i2c_adap); + mutex_unlock(&d->data_mutex); - if (d->fe_adap[0].fe == NULL) + adap->fe_adap[0].fe = dvb_attach(m88rs2000_attach, + &s421_m88rs2000_config, + &d->i2c_adap); + + if (adap->fe_adap[0].fe == NULL) return -EIO; - if (dvb_attach(ts2020_attach, d->fe_adap[0].fe, + if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe, &dw2104_ts2020_config, - &d->dev->i2c_adap)) { + &d->i2c_adap)) { info("Attached RS2000/TS2020!"); return 0; } @@ -1439,44 +1479,50 @@ static int tt_s2_4600_frontend_attach(struct dvb_usb_adapter *adap) { struct dvb_usb_device *d = adap->dev; struct dw2102_state *state = d->priv; - u8 obuf[3] = { 0xe, 0x80, 0 }; - u8 ibuf[] = { 0 }; struct i2c_adapter *i2c_adapter; struct i2c_client *client; struct i2c_board_info board_info; struct m88ds3103_platform_data m88ds3103_pdata = {}; struct ts2020_config ts2020_config = {}; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + mutex_lock(&d->data_mutex); + + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 0x0; + + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x02; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x02; + state->data[2] = 1; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); msleep(300); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 0; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 0; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 1; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0x51; + state->data[0] = 0x51; - if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); + mutex_unlock(&d->data_mutex); + /* attach demod */ m88ds3103_pdata.clk = 27000000; m88ds3103_pdata.i2c_wr_max = 33; -- cgit v1.2.3 From 8e51533780ba223a3562ff4382c6b6f350c7e9a4 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Fri, 10 Feb 2017 17:21:00 -0600 Subject: pinctrl: qcom: add get_direction function The get_direction callback function allows gpiolib to know the current direction (input vs output) for a given GPIO. This is particularly useful on ACPI systems, where the GPIOs are configured only by firmware (typically UEFI), so the only way to know the initial values to query the hardware directly. Without this function, gpiolib thinks that all GPIOs are configured for input. Signed-off-by: Timur Tabi Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-msm.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index f8e9e1c2b2f6..c978be5eb9eb 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -422,6 +422,20 @@ static int msm_gpio_direction_output(struct gpio_chip *chip, unsigned offset, in return 0; } +static int msm_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) +{ + struct msm_pinctrl *pctrl = gpiochip_get_data(chip); + const struct msm_pingroup *g; + u32 val; + + g = &pctrl->soc->groups[offset]; + + val = readl(pctrl->regs + g->ctl_reg); + + /* 0 = output, 1 = input */ + return val & BIT(g->oe_bit) ? 0 : 1; +} + static int msm_gpio_get(struct gpio_chip *chip, unsigned offset) { const struct msm_pingroup *g; @@ -510,6 +524,7 @@ static void msm_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) static struct gpio_chip msm_gpio_template = { .direction_input = msm_gpio_direction_input, .direction_output = msm_gpio_direction_output, + .get_direction = msm_gpio_get_direction, .get = msm_gpio_get, .set = msm_gpio_set, .request = gpiochip_generic_request, -- cgit v1.2.3 From 96e1ce8fcd66d850196758c038bd9d6f7857c518 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Mon, 20 Feb 2017 21:00:42 +0900 Subject: pinctrl: uniphier: change pin names of aio/xirq for LD11 This patch changes pin names of AIO and XIRQ according to updated specification. Signed-off-by: Kunihiko Hayashi Acked-by: Masahiro Yamada Signed-off-by: Linus Walleij --- drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c index 77a0236ee781..83f8864fa76a 100644 --- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c +++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c @@ -390,22 +390,22 @@ static const struct pinctrl_pin_desc uniphier_ld11_pins[] = { UNIPHIER_PINCTRL_PIN(140, "AO1D0", 140, 140, UNIPHIER_PIN_DRV_1BIT, 140, UNIPHIER_PIN_PULL_DOWN), - UNIPHIER_PINCTRL_PIN(141, "TCON0", 141, + UNIPHIER_PINCTRL_PIN(141, "AO1D1", 141, 141, UNIPHIER_PIN_DRV_1BIT, 141, UNIPHIER_PIN_PULL_DOWN), - UNIPHIER_PINCTRL_PIN(142, "TCON1", 142, + UNIPHIER_PINCTRL_PIN(142, "AO1D2", 142, 142, UNIPHIER_PIN_DRV_1BIT, 142, UNIPHIER_PIN_PULL_DOWN), - UNIPHIER_PINCTRL_PIN(143, "TCON2", 143, + UNIPHIER_PINCTRL_PIN(143, "XIRQ9", 143, 143, UNIPHIER_PIN_DRV_1BIT, 143, UNIPHIER_PIN_PULL_DOWN), - UNIPHIER_PINCTRL_PIN(144, "TCON3", 144, + UNIPHIER_PINCTRL_PIN(144, "XIRQ10", 144, 144, UNIPHIER_PIN_DRV_1BIT, 144, UNIPHIER_PIN_PULL_DOWN), - UNIPHIER_PINCTRL_PIN(145, "TCON4", 145, + UNIPHIER_PINCTRL_PIN(145, "XIRQ11", 145, 145, UNIPHIER_PIN_DRV_1BIT, 145, UNIPHIER_PIN_PULL_DOWN), - UNIPHIER_PINCTRL_PIN(146, "TCON5", 146, + UNIPHIER_PINCTRL_PIN(146, "XIRQ13", 146, 146, UNIPHIER_PIN_DRV_1BIT, 146, UNIPHIER_PIN_PULL_DOWN), UNIPHIER_PINCTRL_PIN(147, "PWMA", 147, -- cgit v1.2.3 From b3bd0f2849f4480fc4f40bb954d27e58f4f0786b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 1 Mar 2017 23:50:19 +0100 Subject: staging/vc04_services: add CONFIG_OF dependency After several hours of debugging this obviously bogus but elaborate gcc-7.0.1 warning, drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c: In function 'vchiq_complete_bulk': drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c:603:4: error: argument 2 null where non-null expected [-Werror=nonnull] memcpy((char *)page_address(pages[0]) + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pagelist->offset, ~~~~~~~~~~~~~~~~~ fragments, ~~~~~~~~~~ head_bytes); ~~~~~~~~~~~ In file included from include/linux/string.h:18:0, from include/linux/bitmap.h:8, from include/linux/cpumask.h:11, from include/linux/interrupt.h:9, from drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c:37: arch/arm/include/asm/string.h:16:15: note: in a call to function 'memcpy' declared here extern void * memcpy(void *, const void *, __kernel_size_t) __nocapture(2); ^~~~~~ I have concluded that gcc was technically right in the first place: vchiq_complete_bulk is an externally visible function that calls free_pagelist(), which in turn derives a pointer from the global g_fragments_base variable. g_fragments_base is initialized in vchiq_platform_init(), but we only get there if of_property_read_u32() successfully reads the cache line size. When CONFIG_OF is disabled, this always fails, and g_fragments_base is guaranteed to be NULL when vchiq_complete_bulk() gets called. This adds a CONFIG_OF Kconfig dependency, which is also technically correct but nonobvious, and thus seems like a good fit for the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/vc04_services/Kconfig b/drivers/staging/vc04_services/Kconfig index e61e4ca064a8..74094fff4367 100644 --- a/drivers/staging/vc04_services/Kconfig +++ b/drivers/staging/vc04_services/Kconfig @@ -1,6 +1,7 @@ config BCM2835_VCHIQ tristate "Videocore VCHIQ" depends on HAS_DMA + depends on OF depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE) default y help -- cgit v1.2.3 From eb38d913c27f32f4df173791051fecf6aca34173 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 2 Mar 2017 10:44:58 +0200 Subject: Revert "usb: gadget: uvc: Add missing call for additional setup data" This reverts commit 4fbac5206afd01b717d4bdc58793d471f3391b4b. This commit breaks g_webcam when used with uvc-gadget [1]. The user space application (e.g. uvc-gadget) is responsible for sending response to UVC class specific requests on control endpoint in uvc_send_response() in uvc_v4l2.c. The bad commit was causing a duplicate response to be sent with incorrect response data thus causing UVC probe to fail at the host and broken control transfer endpoint at the gadget. [1] - git://git.ideasonboard.org/uvc-gadget.git Cc: # v4.9+ Acked-by: Laurent Pinchart Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_uvc.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 27ed51b5082f..29b41b5dee04 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -258,13 +258,6 @@ uvc_function_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl) memcpy(&uvc_event->req, ctrl, sizeof(uvc_event->req)); v4l2_event_queue(&uvc->vdev, &v4l2_event); - /* Pass additional setup data to userspace */ - if (uvc->event_setup_out && uvc->event_length) { - uvc->control_req->length = uvc->event_length; - return usb_ep_queue(uvc->func.config->cdev->gadget->ep0, - uvc->control_req, GFP_ATOMIC); - } - return 0; } -- cgit v1.2.3 From 5bbc852676ae08e818241cf66a3ffe4be44225c4 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 28 Feb 2017 14:25:45 +0800 Subject: usb: gadget: dummy_hcd: clear usb_gadget region before registration When the user does device unbind and rebind test, the kernel will show below dump due to usb_gadget memory region is dirty after unbind. Clear usb_gadget region for every new probe. root@imx6qdlsolo:/sys/bus/platform/drivers/dummy_udc# echo dummy_udc.0 > bind [ 102.523312] kobject (eddd78b0): tried to init an initialized object, something is seriously wrong. [ 102.532447] CPU: 0 PID: 734 Comm: sh Not tainted 4.10.0-rc7-00872-g1b2b8e9 #1298 [ 102.539866] Hardware name: Freescale i.MX6 SoloX (Device Tree) [ 102.545717] Backtrace: [ 102.548225] [] (dump_backtrace) from [] (show_stack+0x18/0x1c) [ 102.555822] r7:ede34000 r6:60010013 r5:00000000 r4:c0f29418 [ 102.561512] [] (show_stack) from [] (dump_stack+0xb4/0xe8) [ 102.568764] [] (dump_stack) from [] (kobject_init+0x80/0x9c) [ 102.576187] r10:0000001f r9:eddd7000 r8:eeaf8c10 r7:eddd78a8 r6:c177891c r5:c0f3b060 [ 102.584036] r4:eddd78b0 r3:00000000 [ 102.587641] [] (kobject_init) from [] (device_initialize+0x28/0xf8) [ 102.595665] r5:eebc4800 r4:eddd78a8 [ 102.599268] [] (device_initialize) from [] (device_register+0x14/0x20) [ 102.607556] r7:eddd78a8 r6:00000000 r5:eebc4800 r4:eddd78a8 [ 102.613256] [] (device_register) from [] (usb_add_gadget_udc_release+0x8c/0x1ec) [ 102.622410] r5:eebc4800 r4:eddd7860 [ 102.626015] [] (usb_add_gadget_udc_release) from [] (usb_add_gadget_udc+0x14/0x18) [ 102.635351] r10:0000001f r9:eddd7000 r8:eddd788c r7:bf003770 r6:eddd77f8 r5:eddd7818 [ 102.643198] r4:eddd785c r3:eddd7b24 [ 102.646834] [] (usb_add_gadget_udc) from [] (dummy_udc_probe+0x170/0x1c4 [dummy_hcd]) [ 102.656458] [] (dummy_udc_probe [dummy_hcd]) from [] (platform_drv_probe+0x54/0xb8) [ 102.665881] r10:00000008 r9:c1778960 r8:bf004128 r7:fffffdfb r6:bf004128 r5:eeaf8c10 [ 102.673727] r4:eeaf8c10 [ 102.676293] [] (platform_drv_probe) from [] (driver_probe_device+0x264/0x474) [ 102.685186] r7:00000000 r6:00000000 r5:c1778960 r4:eeaf8c10 [ 102.690876] [] (driver_probe_device) from [] (bind_store+0xb8/0x14c) [ 102.698994] r10:eeb3bb4c r9:ede34000 r8:0000000c r7:eeaf8c44 r6:bf004128 r5:c0f3b668 [ 102.706840] r4:eeaf8c10 [ 102.709402] [] (bind_store) from [] (drv_attr_store+0x28/0x34) [ 102.716998] r9:ede34000 r8:00000000 r7:ee3863c0 r6:ee3863c0 r5:c0538c80 r4:c053970c [ 102.724776] [] (drv_attr_store) from [] (sysfs_kf_write+0x50/0x54) [ 102.732711] r5:c0538c80 r4:0000000c [ 102.736313] [] (sysfs_kf_write) from [] (kernfs_fop_write+0x100/0x214) [ 102.744599] r7:ee3863c0 r6:eeb3bb40 r5:00000000 r4:00000000 [ 102.750287] [] (kernfs_fop_write) from [] (__vfs_write+0x34/0x120) [ 102.758231] r10:00000000 r9:ede34000 r8:c0108bc4 r7:0000000c r6:ede35f80 r5:c029bd84 [ 102.766077] r4:ee223780 [ 102.768638] [] (__vfs_write) from [] (vfs_write+0xa8/0x170) [ 102.775974] r9:ede34000 r8:c0108bc4 r7:ede35f80 r6:01861cb0 r5:ee223780 r4:0000000c [ 102.783743] [] (vfs_write) from [] (SyS_write+0x4c/0xa8) [ 102.790818] r9:ede34000 r8:c0108bc4 r7:0000000c r6:01861cb0 r5:ee223780 r4:ee223780 [ 102.798595] [] (SyS_write) from [] (ret_fast_syscall+0x0/0x1c) [ 102.806188] r7:00000004 r6:b6e83d58 r5:01861cb0 r4:0000000c Fixes: 90fccb529d24 ("usb: gadget: Gadget directory cleanup - group UDC drivers") Cc: stable Acked-by: Alan Stern Signed-off-by: Peter Chen Tested-by: Xiaolong Ye Reported-by: Fengguang Wu Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/dummy_hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index c60abe3a68f9..8cabc5944d5f 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -1031,6 +1031,8 @@ static int dummy_udc_probe(struct platform_device *pdev) int rc; dum = *((void **)dev_get_platdata(&pdev->dev)); + /* Clear usb_gadget region for new registration to udc-core */ + memzero_explicit(&dum->gadget, sizeof(struct usb_gadget)); dum->gadget.name = gadget_name; dum->gadget.ops = &dummy_ops; dum->gadget.max_speed = USB_SPEED_SUPER; -- cgit v1.2.3 From 077dbaee9df53c597df532a08d721d03f4570f3d Mon Sep 17 00:00:00 2001 From: Franck Demathieu Date: Thu, 23 Feb 2017 10:48:55 +0100 Subject: irqchip/crossbar: Fix incorrect type of local variables The max and entry variables are unsigned according to the dt-bindings. Fix following 3 sparse issues (-Wtypesign): drivers/irqchip/irq-crossbar.c:222:52: warning: incorrect type in argument 3 (different signedness) drivers/irqchip/irq-crossbar.c:222:52: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:222:52: got int * drivers/irqchip/irq-crossbar.c:245:56: warning: incorrect type in argument 4 (different signedness) drivers/irqchip/irq-crossbar.c:245:56: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:245:56: got int * drivers/irqchip/irq-crossbar.c:263:56: warning: incorrect type in argument 4 (different signedness) drivers/irqchip/irq-crossbar.c:263:56: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:263:56: got int * Signed-off-by: Franck Demathieu Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-crossbar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 1eef56a89b1f..05bbf171df37 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -198,7 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = { static int __init crossbar_of_init(struct device_node *node) { - int i, size, max = 0, reserved = 0, entry; + int i, size, reserved = 0; + u32 max = 0, entry; const __be32 *irqsr; int ret = -ENOMEM; -- cgit v1.2.3 From b3e228473e6cec7cf83b4025b4570c8066ab2dd8 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Thu, 2 Mar 2017 16:11:47 +0100 Subject: irqdomain: Add empty irq_domain_check_msi_remap Fix following build error for s390: drivers/vfio/vfio_iommu_type1.c: In function 'vfio_iommu_type1_attach_group': drivers/vfio/vfio_iommu_type1.c:1290:25: error: implicit declaration of function 'irq_domain_check_msi_remap' Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Marc Zyngier --- include/linux/irqdomain.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 188eced6813e..9f3616085423 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -524,6 +524,10 @@ static inline struct irq_domain *irq_find_matching_fwnode( { return NULL; } +static inline bool irq_domain_check_msi_remap(void) +{ + return false; +} #endif /* !CONFIG_IRQ_DOMAIN */ #endif /* _LINUX_IRQDOMAIN_H */ -- cgit v1.2.3 From 38355b2a44776c25b0f2ad466e8c51bb805b3032 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 28 Feb 2017 10:55:30 +0000 Subject: usb: gadget: configs: plug memory leak When binding a gadget to a device, "name" is stored in gi->udc_name, but this does not happen when unregistering and the string is leaked. Signed-off-by: John Keeping Signed-off-by: Felipe Balbi --- drivers/usb/gadget/configfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 78c44979dde3..cbff3b02840d 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -269,6 +269,7 @@ static ssize_t gadget_dev_desc_UDC_store(struct config_item *item, ret = unregister_gadget(gi); if (ret) goto err; + kfree(name); } else { if (gi->composite.gadget_driver.udc_name) { ret = -EBUSY; -- cgit v1.2.3 From 73561128eb72ca04c3be6e240f162a861bf68a86 Mon Sep 17 00:00:00 2001 From: Franck Demathieu Date: Mon, 27 Feb 2017 11:52:46 +0100 Subject: usb: dwc3: Fix incorrect type for utmi mode The utmi mode is unsigned according the dt-bindings. Fix sparse issue (-Wtypesign): drivers/usb/dwc3/dwc3-omap.c:391:50: warning: incorrect type in argument 3 (different signedness) drivers/usb/dwc3/dwc3-omap.c:391:50: expected unsigned int [usertype] *out_value drivers/usb/dwc3/dwc3-omap.c:391:50: got int * Signed-off-by: Franck Demathieu Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 2092e46b1380..4a595777969e 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -392,7 +392,7 @@ static void dwc3_omap_set_utmi_mode(struct dwc3_omap *omap) { u32 reg; struct device_node *node = omap->dev->of_node; - int utmi_mode = 0; + u32 utmi_mode = 0; reg = dwc3_omap_read_utmi_ctrl(omap); -- cgit v1.2.3 From 4242820277b5378c9e4064e79306f326d731472f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Feb 2017 11:33:27 +0100 Subject: usb: gadget: udc: atmel: fix debug output The debug output now contains the wrong variable, as seen from the compiler warning: drivers/usb/gadget/udc/atmel_usba_udc.c: In function 'usba_ep_enable': drivers/usb/gadget/udc/atmel_usba_udc.c:632:550: error: 'ept_cfg' may be used uninitialized in this function [-Werror=maybe-uninitialized] DBG(DBG_ERR, "%s: EPT_CFG = 0x%lx (maxpacket = %lu)\n", This changes the debug output the same way as the other code. Fixes: 741d2558bf0a ("usb: gadget: udc: atmel: Update endpoint allocation scheme") Signed-off-by: Arnd Bergmann Acked-by: Alexandre Belloni Acked-by: Nicolas Ferre Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/atmel_usba_udc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index 11bbce28bc23..2035906b8ced 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -610,7 +610,7 @@ usba_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc) { struct usba_ep *ep = to_usba_ep(_ep); struct usba_udc *udc = ep->udc; - unsigned long flags, ept_cfg, maxpacket; + unsigned long flags, maxpacket; unsigned int nr_trans; DBG(DBG_GADGET, "%s: ep_enable: desc=%p\n", ep->ep.name, desc); @@ -630,7 +630,7 @@ usba_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc) ep->is_in = 0; DBG(DBG_ERR, "%s: EPT_CFG = 0x%lx (maxpacket = %lu)\n", - ep->ep.name, ept_cfg, maxpacket); + ep->ep.name, ep->ept_cfg, maxpacket); if (usb_endpoint_dir_in(desc)) { ep->is_in = 1; -- cgit v1.2.3 From b6e7aeeaf235901c42ec35de4633c7c69501d303 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 21 Feb 2017 22:33:11 +0100 Subject: USB: gadgetfs: Fix a potential memory leak in 'dev_config()' 'kbuf' is allocated just a few lines above using 'memdup_user()'. If the 'if (dev->buf)' test fails, this memory is never released. Signed-off-by: Christophe JAILLET Signed-off-by: Felipe Balbi --- drivers/usb/gadget/legacy/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index a2615d64d07c..0513dfa008e6 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1782,8 +1782,10 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) spin_lock_irq (&dev->lock); value = -EINVAL; - if (dev->buf) + if (dev->buf) { + kfree(kbuf); goto fail; + } dev->buf = kbuf; /* full or low speed config */ -- cgit v1.2.3 From 3ba534df815f233535b5a3dd3de41055666bbd21 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Thu, 16 Feb 2017 14:54:12 +0100 Subject: Revert "usb: gadget: f_fs: Fix ExtCompat descriptor validation" This reverts commit ac670a3a650b899fc020b81f63e810d06015b865. This introduce bug we already fixed in commit 53642399aa71 ("usb: gadget: f_fs: Fix wrong check on reserved1 wof OS_DESC_EXT_COMPAT") Next FFS (adb) SS enumeration fail with Windows OS. Signed-off-by: Janusz Dziedzic Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 7e976f00cb02..a0085571824d 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2264,7 +2264,7 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, if (len < sizeof(*d) || d->bFirstInterfaceNumber >= ffs->interfaces_count || - d->Reserved1) + !d->Reserved1) return -EINVAL; for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i) if (d->Reserved2[i]) -- cgit v1.2.3 From 1551e35ea4189c1f7199fe278395fc94196715f2 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 15 Feb 2017 14:16:26 +0200 Subject: usb: dwc3: gadget: Fix system suspend/resume on TI platforms On TI platforms (dra7, am437x), the DWC3_DSTS_DEVCTRLHLT bit is not set after the device controller is stopped via DWC3_DCTL_RUN_STOP. If we don't disconnect and stop the gadget, it stops working after a system resume with the trace below. There is no point in preventing gadget disconnect and gadget stop during system suspend/resume as we're going to suspend in any case, whether DEVCTRLHLT timed out or not. [ 141.727480] ------------[ cut here ]------------ [ 141.732349] WARNING: CPU: 1 PID: 2135 at drivers/usb/dwc3/gadget.c:2384 dwc3_stop_active_transfer.constprop.4+0xc4/0xe4 [dwc3] [ 141.744299] Modules linked in: usb_f_ss_lb g_zero libcomposite xhci_plat_hcd xhci_hcd usbcore dwc3 evdev udc_core m25p80 usb_common spi_nor snd_soc_davinci_mcasp snd_soc_simple_card snd_soc_edma snd_soc_tlv3e [ 141.792163] CPU: 1 PID: 2135 Comm: irq/456-dwc3 Not tainted 4.10.0-rc8 #1138 [ 141.799547] Hardware name: Generic DRA74X (Flattened Device Tree) [ 141.805940] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 141.814066] [] (show_stack) from [] (dump_stack+0xac/0xe0) [ 141.821648] [] (dump_stack) from [] (__warn+0xd8/0x104) [ 141.828955] [] (__warn) from [] (warn_slowpath_null+0x20/0x28) [ 141.836902] [] (warn_slowpath_null) from [] (dwc3_stop_active_transfer.constprop.4+0xc4/0xe4 [dwc3]) [ 141.848329] [] (dwc3_stop_active_transfer.constprop.4 [dwc3]) from [] (__dwc3_gadget_ep_disable+0x64/0x528 [dwc3]) [ 141.861034] [] (__dwc3_gadget_ep_disable [dwc3]) from [] (dwc3_gadget_ep_disable+0x3c/0xc8 [dwc3]) [ 141.872280] [] (dwc3_gadget_ep_disable [dwc3]) from [] (usb_ep_disable+0x11c/0x18c [udc_core]) [ 141.883160] [] (usb_ep_disable [udc_core]) from [] (disable_ep+0x18/0x54 [usb_f_ss_lb]) [ 141.893408] [] (disable_ep [usb_f_ss_lb]) from [] (disable_endpoints+0x18/0x50 [usb_f_ss_lb]) [ 141.904168] [] (disable_endpoints [usb_f_ss_lb]) from [] (disable_source_sink+0x2c/0x34 [usb_f_ss_lb]) [ 141.915771] [] (disable_source_sink [usb_f_ss_lb]) from [] (reset_config+0x48/0x7c [libcomposite]) [ 141.927012] [] (reset_config [libcomposite]) from [] (composite_disconnect+0x2c/0x54 [libcomposite]) [ 141.938444] [] (composite_disconnect [libcomposite]) from [] (usb_gadget_udc_reset+0x10/0x34 [udc_core]) [ 141.950237] [] (usb_gadget_udc_reset [udc_core]) from [] (dwc3_gadget_reset_interrupt+0x64/0x698 [dwc3]) [ 141.962022] [] (dwc3_gadget_reset_interrupt [dwc3]) from [] (dwc3_thread_interrupt+0x618/0x1a3c [dwc3]) [ 141.973723] [] (dwc3_thread_interrupt [dwc3]) from [] (irq_thread_fn+0x1c/0x54) [ 141.983215] [] (irq_thread_fn) from [] (irq_thread+0x120/0x1f0) [ 141.991247] [] (irq_thread) from [] (kthread+0xf8/0x138) [ 141.998641] [] (kthread) from [] (ret_from_fork+0x14/0x24) [ 142.006213] ---[ end trace b4ecfe9f175b9a9c ]--- Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f875b3f4b0ec..3db5eeae2bfb 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3291,15 +3291,10 @@ void dwc3_gadget_exit(struct dwc3 *dwc) int dwc3_gadget_suspend(struct dwc3 *dwc) { - int ret; - if (!dwc->gadget_driver) return 0; - ret = dwc3_gadget_run_stop(dwc, false, false); - if (ret < 0) - return ret; - + dwc3_gadget_run_stop(dwc, false, false); dwc3_disconnect_gadget(dwc); __dwc3_gadget_stop(dwc); -- cgit v1.2.3 From 0913750f9fb6f26bcd00c8f9dd9a8d1b8d031246 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 15 Feb 2017 13:38:22 +0200 Subject: usb: dwc3-omap: Fix missing break in dwc3_omap_set_mailbox() We need to break from all cases if we want to treat each one of them separately. Reported-by: Gustavo A. R. Silva Fixes: d2728fb3e01f ("usb: dwc3: omap: Pass VBUS and ID events transparently") Cc: #v4.8+ Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-omap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 4a595777969e..f8d0747810e7 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -250,6 +250,7 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap, val = dwc3_omap_read_utmi_ctrl(omap); val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG; dwc3_omap_write_utmi_ctrl(omap, val); + break; case OMAP_DWC3_VBUS_OFF: val = dwc3_omap_read_utmi_ctrl(omap); -- cgit v1.2.3 From df7545719a14fa7b481896fb8689e23d0a00f682 Mon Sep 17 00:00:00 2001 From: Petr Cvek Date: Fri, 24 Feb 2017 02:54:56 +0100 Subject: usb: gadget: pxa27x: Test for a valid argument pointer A call usb_put_phy(udc->transceiver) must be tested for a valid pointer. Use an already existing test for usb_unregister_notifier call. Acked-by: Robert Jarzmik Reported-by: Robert Jarzmik Signed-off-by: Petr Cvek Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/pxa27x_udc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c index e1335ad5bce9..832c4fdbe985 100644 --- a/drivers/usb/gadget/udc/pxa27x_udc.c +++ b/drivers/usb/gadget/udc/pxa27x_udc.c @@ -2534,9 +2534,10 @@ static int pxa_udc_remove(struct platform_device *_dev) usb_del_gadget_udc(&udc->gadget); pxa_cleanup_debugfs(udc); - if (!IS_ERR_OR_NULL(udc->transceiver)) + if (!IS_ERR_OR_NULL(udc->transceiver)) { usb_unregister_notifier(udc->transceiver, &pxa27x_udc_phy); - usb_put_phy(udc->transceiver); + usb_put_phy(udc->transceiver); + } udc->transceiver = NULL; the_controller = NULL; -- cgit v1.2.3 From ef5e2fa9f65befa12f1113c734602d2c1964d2a5 Mon Sep 17 00:00:00 2001 From: Raz Manor Date: Thu, 9 Feb 2017 09:41:08 +0200 Subject: usb: gadget: udc: net2280: Fix tmp reusage in net2280 driver In the function scan_dma_completions() there is a reusage of tmp variable. That coused a wrong value being used in some case when reading a short packet terminated transaction from an endpoint, in 2 concecutive reads. This was my logic for the patch: The req->td->dmadesc equals to 0 iff: -- There was a transaction ending with a short packet, and -- The read() to read it was shorter than the transaction length, and -- The read() to complete it is longer than the residue. I believe this is true from the printouts of various cases, but I can't be positive it is correct. Entering this if, there should be no more data in the endpoint (a short packet terminated the transaction). If there is, the transaction wasn't really done and we should exit and wait for it to finish entirely. That is the inner if. That inner if should never happen, but it is there to be on the safe side. That is why it is marked with the comment /* paranoia */. The size of the data available in the endpoint is ep->dma->dmacount and it is read to tmp. This entire clause is based on my own educated guesses. If we passed that inner if without breaking in the original code, than tmp & DMA_BYTE_MASK_COUNT== 0. That means we will always pass dma bytes count of 0 to dma_done(), meaning all the requested bytes were read. dma_done() reports back to the upper layer that the request (read()) was done and how many bytes were read. In the original code that would always be the request size, regardless of the actual size of the data. That did not make sense to me at all. However, the original value of tmp is req->td->dmacount, which is the dmacount value when the request's dma transaction was finished. And that is a much more reasonable value to report back to the caller. To recreate the problem: Read from a bulk out endpoint in a loop, 1024 * n bytes in each iteration. Connect the PLX to a host you can control. Send to that endpoint 1024 * n + x bytes, such that 0 < x < 1024 * n and (x % 1024) != 0 You would expect the first read() to return 1024 * n and the second read() to return x. But you will get the first read to return 1024 * n and the second one to return 1024 * n. That is true for every positive integer n. Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Signed-off-by: Raz Manor Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/net2280.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index 85504419ab31..3828c2ec8623 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -1146,15 +1146,15 @@ static int scan_dma_completions(struct net2280_ep *ep) */ while (!list_empty(&ep->queue)) { struct net2280_request *req; - u32 tmp; + u32 req_dma_count; req = list_entry(ep->queue.next, struct net2280_request, queue); if (!req->valid) break; rmb(); - tmp = le32_to_cpup(&req->td->dmacount); - if ((tmp & BIT(VALID_BIT)) != 0) + req_dma_count = le32_to_cpup(&req->td->dmacount); + if ((req_dma_count & BIT(VALID_BIT)) != 0) break; /* SHORT_PACKET_TRANSFERRED_INTERRUPT handles "usb-short" @@ -1163,40 +1163,41 @@ static int scan_dma_completions(struct net2280_ep *ep) */ if (unlikely(req->td->dmadesc == 0)) { /* paranoia */ - tmp = readl(&ep->dma->dmacount); - if (tmp & DMA_BYTE_COUNT_MASK) + u32 const ep_dmacount = readl(&ep->dma->dmacount); + + if (ep_dmacount & DMA_BYTE_COUNT_MASK) break; /* single transfer mode */ - dma_done(ep, req, tmp, 0); + dma_done(ep, req, req_dma_count, 0); num_completed++; break; } else if (!ep->is_in && (req->req.length % ep->ep.maxpacket) && !(ep->dev->quirks & PLX_PCIE)) { - tmp = readl(&ep->regs->ep_stat); + u32 const ep_stat = readl(&ep->regs->ep_stat); /* AVOID TROUBLE HERE by not issuing short reads from * your gadget driver. That helps avoids errata 0121, * 0122, and 0124; not all cases trigger the warning. */ - if ((tmp & BIT(NAK_OUT_PACKETS)) == 0) { + if ((ep_stat & BIT(NAK_OUT_PACKETS)) == 0) { ep_warn(ep->dev, "%s lost packet sync!\n", ep->ep.name); req->req.status = -EOVERFLOW; } else { - tmp = readl(&ep->regs->ep_avail); - if (tmp) { + u32 const ep_avail = readl(&ep->regs->ep_avail); + if (ep_avail) { /* fifo gets flushed later */ ep->out_overflow = 1; ep_dbg(ep->dev, "%s dma, discard %d len %d\n", - ep->ep.name, tmp, + ep->ep.name, ep_avail, req->req.length); req->req.status = -EOVERFLOW; } } } - dma_done(ep, req, tmp, 0); + dma_done(ep, req, req_dma_count, 0); num_completed++; } -- cgit v1.2.3 From 587d7e72aedca91cee80c0a56811649c3efab765 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 2 Mar 2017 12:41:48 -0800 Subject: kvm: nVMX: VMCLEAR should not cause the vCPU to shut down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VMCLEAR should silently ignore a failure to clear the launch state of the VMCS referenced by the operand. Signed-off-by: Jim Mattson [Changed "kvm_write_guest(vcpu->kvm" to "kvm_vcpu_write_guest(vcpu".] Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 283aa8601833..3b626d6dc3ac 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7258,9 +7258,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) static int handle_vmclear(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 zero = 0; gpa_t vmptr; - struct vmcs12 *vmcs12; - struct page *page; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -7271,22 +7270,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - /* - * For accurate processor emulation, VMCLEAR beyond available - * physical memory should do nothing at all. However, it is - * possible that a nested vmx bug, not a guest hypervisor bug, - * resulted in this case, so let's shut down before doing any - * more damage: - */ - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); - return 1; - } - vmcs12 = kmap(page); - vmcs12->launch_state = 0; - kunmap(page); - nested_release_page(page); + kvm_vcpu_write_guest(vcpu, + vmptr + offsetof(struct vmcs12, launch_state), + &zero, sizeof(zero)); nested_free_vmcs02(vmx, vmptr); -- cgit v1.2.3 From c771c14baa3319c85512e575671bf0bb18824c11 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Thu, 2 Mar 2017 15:02:06 -0800 Subject: iomap: invalidate page caches should be after iomap_dio_complete() in direct write After XFS switching to iomap based DIO (commit acdda3aae146 ("xfs: use iomap_dio_rw")), I started to notice dio29/dio30 tests failures from LTP run on ppc64 hosts, and they can be reproduced on x86_64 hosts with 512B/1k block size XFS too. dio29 diotest3 -b 65536 -n 100 -i 1000 -o 1024000 dio30 diotest6 -b 65536 -n 100 -i 1000 -o 1024000 The failure message is like: bufcmp: offset 0: Expected: 0x62, got 0x0 diotest03 1 TPASS : Read with Direct IO, Write without diotest03 2 TFAIL : diotest3.c:142: comparsion failed; child=98 offset=1425408 diotest03 3 TFAIL : diotest3.c:194: Write Direct-child 98 failed Direct write wrote 0x62 but buffer read got zero. This is because, when doing direct write to a hole or preallocated file, we invalidate the page caches before converting the extent from unwritten state to normal state, which is done by iomap_dio_complete(), thus leave a window for other buffer reader to cache the unwritten state extent. Consider this case, with sub-page blocksize XFS, two processes are direct writing to different blocksize-aligned regions (say 512B) of the same preallocated file, and reading the region back via buffered I/O to compare contents. process A, region [0,512] process B, region [512,1024] xfs_file_write_iter xfs_file_aio_dio_write iomap_dio_rw iomap_apply invalidate_inode_pages2_range xfs_file_write_iter xfs_file_aio_dio_write iomap_dio_rw iomap_apply invalidate_inode_pages2_range iomap_dio_complete xfs_file_read_iter xfs_file_buffered_aio_read generic_file_read_iter do_generic_file_read iomap_dio_complete xfs_file_read_iter Process A first invalidates page caches, at this point the underlying extent is still in unwritten state (iomap_dio_complete not called yet), and process B finishs direct write and populates page caches via readahead, which caches zeros in page for region A, then process A reads zeros from page cache, instead of the actual data. Fix it by invalidating page caches after converting unwritten extent to make sure we read content from disk after extent state changed, as what we did before switching to iomap based dio. Also introduce a new 'start' variable to save the original write offset (iomap_dio_complete() updates iocb->ki_pos), and a 'err' variable for invalidating caches result, cause we can't reuse 'ret' anymore. Signed-off-by: Eryu Guan Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index 3ca1a8e44135..141c3cd55a8b 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -846,7 +846,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = file_inode(iocb->ki_filp); size_t count = iov_iter_count(iter); - loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0; + loff_t pos = iocb->ki_pos, start = pos; + loff_t end = iocb->ki_pos + count - 1, ret = 0; unsigned int flags = IOMAP_DIRECT; struct blk_plug plug; struct iomap_dio *dio; @@ -887,12 +888,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, } if (mapping->nrpages) { - ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); + ret = filemap_write_and_wait_range(mapping, start, end); if (ret) goto out_free_dio; ret = invalidate_inode_pages2_range(mapping, - iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); + start >> PAGE_SHIFT, end >> PAGE_SHIFT); WARN_ON_ONCE(ret); ret = 0; } @@ -941,6 +942,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, __set_current_state(TASK_RUNNING); } + ret = iomap_dio_complete(dio); + /* * Try again to invalidate clean pages which might have been cached by * non-direct readahead, or faulted in by get_user_pages() if the source @@ -949,12 +952,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, * this invalidation fails, tough, the write still worked... */ if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { - ret = invalidate_inode_pages2_range(mapping, - iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); - WARN_ON_ONCE(ret); + int err = invalidate_inode_pages2_range(mapping, + start >> PAGE_SHIFT, end >> PAGE_SHIFT); + WARN_ON_ONCE(err); } - return iomap_dio_complete(dio); + return ret; out_free_dio: kfree(dio); -- cgit v1.2.3 From 040757f738e13caaa9c5078bca79aa97e11dde88 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 5 Mar 2017 15:03:22 -0600 Subject: ucount: Remove the atomicity from ucount->count Always increment/decrement ucount->count under the ucounts_lock. The increments are there already and moving the decrements there means the locking logic of the code is simpler. This simplification in the locking logic fixes a race between put_ucounts and get_ucounts that could result in a use-after-free because the count could go zero then be found by get_ucounts and then be freed by put_ucounts. A bug presumably this one was found by a combination of syzkaller and KASAN. JongWhan Kim reported the syzkaller failure and Dmitry Vyukov spotted the race in the code. Cc: stable@vger.kernel.org Fixes: f6b2db1a3e8d ("userns: Make the count of user namespaces per user") Reported-by: JongHwan Kim Reported-by: Dmitry Vyukov Reviewed-by: Andrei Vagin Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 2 +- kernel/ucount.c | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index be765234c0a2..32354b4b4b2b 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -72,7 +72,7 @@ struct ucounts { struct hlist_node node; struct user_namespace *ns; kuid_t uid; - atomic_t count; + int count; atomic_t ucount[UCOUNT_COUNTS]; }; diff --git a/kernel/ucount.c b/kernel/ucount.c index 62630a40ab3a..b4eeee03934f 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -144,7 +144,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) new->ns = ns; new->uid = uid; - atomic_set(&new->count, 0); + new->count = 0; spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -155,8 +155,10 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) ucounts = new; } } - if (!atomic_add_unless(&ucounts->count, 1, INT_MAX)) + if (ucounts->count == INT_MAX) ucounts = NULL; + else + ucounts->count += 1; spin_unlock_irq(&ucounts_lock); return ucounts; } @@ -165,13 +167,15 @@ static void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - if (atomic_dec_and_test(&ucounts->count)) { - spin_lock_irqsave(&ucounts_lock, flags); + spin_lock_irqsave(&ucounts_lock, flags); + ucounts->count -= 1; + if (!ucounts->count) hlist_del_init(&ucounts->node); - spin_unlock_irqrestore(&ucounts_lock, flags); + else + ucounts = NULL; + spin_unlock_irqrestore(&ucounts_lock, flags); - kfree(ucounts); - } + kfree(ucounts); } static inline bool atomic_inc_below(atomic_t *v, int u) -- cgit v1.2.3 From 1fbdbcea80056acfc8c8506594744f5ec52208c1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 5 Mar 2017 17:05:57 -0800 Subject: avr32: Fix build error caused by include file reshuffling Various avr32 builds fail: arch/avr32/oprofile/backtrace.c:58: error: dereferencing pointer to incomplete type arch/avr32/oprofile/backtrace.c:60: error: implicit declaration of function 'user_mode' Signed-off-by: Guenter Roeck Acked-by: Hans-Christian Noren Egtvedt Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Robert Richter Cc: Thomas Gleixner Cc: oprofile-list@lists.sf.net Fixes: f780d89a0e82 ("sched/headers: Remove from ...") Link: http://lkml.kernel.org/r/1488762357-4500-1-git-send-email-linux@roeck-us.net Signed-off-by: Ingo Molnar --- arch/avr32/oprofile/backtrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/avr32/oprofile/backtrace.c b/arch/avr32/oprofile/backtrace.c index 75d9ad6f99cf..29cf2f191bfd 100644 --- a/arch/avr32/oprofile/backtrace.c +++ b/arch/avr32/oprofile/backtrace.c @@ -14,7 +14,7 @@ */ #include -#include +#include #include /* The first two words of each frame on the stack look like this if we have -- cgit v1.2.3 From 80aa1a54f054a1c71cc88e0cad6cfa0d20a10f23 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 5 Mar 2017 10:27:14 -0800 Subject: h8300: Fix build breakage caused by header file changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following h8300 build failures: arch/h8300/kernel/ptrace_h.c: In function ‘trace_trap’: arch/h8300/kernel/ptrace_h.c:253:3: error: implicit declaration of function ‘force_sig’ Signed-off-by: Guenter Roeck Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: uclinux-h8-devel@lists.sourceforge.jp Fixes: c3edc4010e9d ("sched/headers: Move task_struct::signal and ...") Link: http://lkml.kernel.org/r/1488738434-3504-1-git-send-email-linux@roeck-us.net Signed-off-by: Ingo Molnar --- arch/h8300/kernel/ptrace_h.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c index fe3b5673baba..f5ff3b794c85 100644 --- a/arch/h8300/kernel/ptrace_h.c +++ b/arch/h8300/kernel/ptrace_h.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #define BREAKINST 0x5730 /* trapa #3 */ -- cgit v1.2.3 From bb35e4515411396219431fa235bf21bf9c2794e9 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 5 Mar 2017 17:13:31 -0800 Subject: drivers/char/nwbutton: Fix build breakage caused by include file reshuffling Fix: drivers/char/nwbutton.c: In function 'button_sequence_finished': drivers/char/nwbutton.c:134:3: error: implicit declaration of function 'kill_cad_pid' The declaration has been moved from one include file to another. Signed-off-by: Guenter Roeck Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: c3edc4010e9d102 ("sched/headers: Move task_struct::signal and ...") Link: http://lkml.kernel.org/r/1488762811-9022-1-git-send-email-linux@roeck-us.net Signed-off-by: Ingo Molnar --- drivers/char/nwbutton.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c index a5b1eb276c0b..e6d0d271c58c 100644 --- a/drivers/char/nwbutton.c +++ b/drivers/char/nwbutton.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 5c51f4ae84df0f9df33ac08aa5be50061a8b4242 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 2 Mar 2017 16:57:23 -0600 Subject: objtool: Fix another GCC jump table detection issue Arnd Bergmann reported a (false positive) objtool warning: drivers/infiniband/sw/rxe/rxe_resp.o: warning: objtool: rxe_responder()+0xfe: sibling call from callable instruction with changed frame pointer The issue is in find_switch_table(). It tries to find a switch statement's jump table by walking backwards from an indirect jump instruction, looking for a relocation to the .rodata section. In this case it stopped walking prematurely: the first .rodata relocation it encountered was for a variable (resp_state_name) instead of a jump table, so it just assumed there wasn't a jump table. The fix is to ignore any .rodata relocation which refers to an ELF object symbol. This works because the jump tables are anonymous and have no symbols associated with them. Reported-by: Arnd Bergmann Tested-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 3732710ff6f2 ("objtool: Improve rare switch jump table pattern detection") Link: http://lkml.kernel.org/r/20170302225723.3ndbsnl4hkqbne7a@treble Signed-off-by: Ingo Molnar --- tools/objtool/builtin-check.c | 15 ++++++++++++--- tools/objtool/elf.c | 12 ++++++++++++ tools/objtool/elf.h | 1 + 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 4cfdbb5b6967..066086dd59a8 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -805,11 +805,20 @@ static struct rela *find_switch_table(struct objtool_file *file, insn->jump_dest->offset > orig_insn->offset)) break; + /* look for a relocation which references .rodata */ text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); - if (text_rela && text_rela->sym == file->rodata->sym) - return find_rela_by_dest(file->rodata, - text_rela->addend); + if (!text_rela || text_rela->sym != file->rodata->sym) + continue; + + /* + * Make sure the .rodata address isn't associated with a + * symbol. gcc jump tables are anonymous data. + */ + if (find_symbol_containing(file->rodata, text_rela->addend)) + continue; + + return find_rela_by_dest(file->rodata, text_rela->addend); } return NULL; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 0d7983ac63ef..d897702ce742 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -85,6 +85,18 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) return NULL; } +struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) +{ + struct symbol *sym; + + list_for_each_entry(sym, &sec->symbol_list, list) + if (sym->type != STT_SECTION && + offset >= sym->offset && offset < sym->offset + sym->len) + return sym; + + return NULL; +} + struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, unsigned int len) { diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index aa1ff6596684..731973e1a3f5 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -79,6 +79,7 @@ struct elf { struct elf *elf_open(const char *name); struct section *find_section_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); +struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, unsigned int len); -- cgit v1.2.3 From fa3aa7a54fe6d3abf128f13cd4bbd40eaa48fed2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 7 Mar 2017 10:55:34 +0100 Subject: jiffies: Revert bogus conversion of NSEC_PER_SEC to TICK_NSEC commit 93825f2ec736 converted NSEC_PER_SEC to TICK_NSEC because the author confused NSEC_PER_JIFFY with NSEC_PER_SEC. As a result, the calculation of refined jiffies got broken, triggering lockups. Fixes: 93825f2ec736 ("jiffies: Reuse TICK_NSEC instead of NSEC_PER_JIFFY") Reported-and-tested-by: Meelis Roos Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1488880534-3777-1-git-send-email-fweisbec@gmail.com Signed-off-by: Thomas Gleixner --- kernel/time/jiffies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 7906b3f0c41a..497719127bf9 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -125,7 +125,7 @@ int register_refined_jiffies(long cycles_per_second) shift_hz += cycles_per_tick/2; do_div(shift_hz, cycles_per_tick); /* Calculate nsec_per_tick using shift_hz */ - nsec_per_tick = (u64)TICK_NSEC << 8; + nsec_per_tick = (u64)NSEC_PER_SEC << 8; nsec_per_tick += (u32)shift_hz/2; do_div(nsec_per_tick, (u32)shift_hz); -- cgit v1.2.3 From 9afd30dbc82a9dbea4101aba57beb2a2a7e1b8d5 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 28 Feb 2017 18:53:53 +0100 Subject: libceph: fix crush_decode() for older maps Older (shorter) CRUSH maps too need to be finalized. Fixes: 66a0e2d579db ("crush: remove mutable part of CRUSH map") Signed-off-by: Ilya Dryomov --- net/ceph/osdmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 6824c0ec8373..cc22dd282a3e 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -390,9 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end) dout("crush decode tunable chooseleaf_stable = %d\n", c->chooseleaf_stable); - crush_finalize(c); - done: + crush_finalize(c); dout("crush_decode success\n"); return c; -- cgit v1.2.3 From b581a5854eee4b7851dedb0f8c2ceb54fb902c06 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 1 Mar 2017 17:33:27 +0100 Subject: libceph: don't set weight to IN when OSD is destroyed Since ceph.git commit 4e28f9e63644 ("osd/OSDMap: clear osd_info, osd_xinfo on osd deletion"), weight is set to IN when OSD is deleted. This changes the result of applying an incremental for clients, not just OSDs. Because CRUSH computations are obviously affected, pre-4e28f9e63644 servers disagree with post-4e28f9e63644 clients on object placement, resulting in misdirected requests. Mirrors ceph.git commit a6009d1039a55e2c77f431662b3d6cc5a8e8e63f. Fixes: 930c53286977 ("libceph: apply new_state before new_up_client on incrementals") Link: http://tracker.ceph.com/issues/19122 Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/osdmap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index cc22dd282a3e..ffe9e904d4d1 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1379,7 +1379,6 @@ static int decode_new_up_state_weight(void **p, void *end, if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && (xorstate & CEPH_OSD_EXISTS)) { pr_info("osd%d does not exist\n", osd); - map->osd_weight[osd] = CEPH_OSD_IN; ret = set_primary_affinity(map, osd, CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); if (ret) -- cgit v1.2.3 From 8767b293a4ab6632f9288f34bcf2ab9ba20dca3a Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 2 Mar 2017 19:56:57 +0100 Subject: rbd: supported_features bus attribute ... so that userspace can generate meaningful error messages and spell out unsupported features that need to be disabled. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- drivers/block/rbd.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4d6807723798..517838b65964 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -120,10 +120,11 @@ static int atomic_dec_return_safe(atomic_t *v) /* Feature bits */ -#define RBD_FEATURE_LAYERING (1<<0) -#define RBD_FEATURE_STRIPINGV2 (1<<1) -#define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2) -#define RBD_FEATURE_DATA_POOL (1<<7) +#define RBD_FEATURE_LAYERING (1ULL<<0) +#define RBD_FEATURE_STRIPINGV2 (1ULL<<1) +#define RBD_FEATURE_EXCLUSIVE_LOCK (1ULL<<2) +#define RBD_FEATURE_DATA_POOL (1ULL<<7) + #define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \ RBD_FEATURE_STRIPINGV2 | \ RBD_FEATURE_EXCLUSIVE_LOCK | \ @@ -499,16 +500,23 @@ static bool rbd_is_lock_owner(struct rbd_device *rbd_dev) return is_lock_owner; } +static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf) +{ + return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED); +} + static BUS_ATTR(add, S_IWUSR, NULL, rbd_add); static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove); static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major); static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major); +static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL); static struct attribute *rbd_bus_attrs[] = { &bus_attr_add.attr, &bus_attr_remove.attr, &bus_attr_add_single_major.attr, &bus_attr_remove_single_major.attr, + &bus_attr_supported_features.attr, NULL, }; -- cgit v1.2.3 From 7cc5e38f2f0b0b58a22a4c18a56348dd99a71270 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 12 Feb 2017 17:11:07 +0100 Subject: libceph: osd_request_timeout option osd_request_timeout specifies how many seconds to wait for a response from OSDs before returning -ETIMEDOUT from an OSD request. 0 (default) means no limit. osd_request_timeout is osdkeepalive-precise -- in-flight requests are swept through every osdkeepalive seconds. With ack vs commit behaviour gone, abort_request() is really simple. This is based on a patch from Artur Molchanov . Tested-by: Artur Molchanov Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/libceph.h | 2 ++ include/linux/ceph/osd_client.h | 1 + net/ceph/ceph_common.c | 15 +++++++++++++++ net/ceph/osd_client.c | 36 +++++++++++++++++++++++++++++++++++- 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 1816c5e26581..88cd5dc8e238 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -48,6 +48,7 @@ struct ceph_options { unsigned long mount_timeout; /* jiffies */ unsigned long osd_idle_ttl; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */ + unsigned long osd_request_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -68,6 +69,7 @@ struct ceph_options { #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */ #define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000) #define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 2ea0c282f3dc..c125b5d9e13c 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -189,6 +189,7 @@ struct ceph_osd_request { /* internal */ unsigned long r_stamp; /* jiffies, send or check time */ + unsigned long r_start_stamp; /* jiffies */ int r_attempts; struct ceph_eversion r_replay_version; /* aka reassert_version */ u32 r_last_force_resend; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 464e88599b9d..108533859a53 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -230,6 +230,7 @@ enum { Opt_osdkeepalivetimeout, Opt_mount_timeout, Opt_osd_idle_ttl, + Opt_osd_request_timeout, Opt_last_int, /* int args above */ Opt_fsid, @@ -256,6 +257,7 @@ static match_table_t opt_tokens = { {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, {Opt_mount_timeout, "mount_timeout=%d"}, {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, + {Opt_osd_request_timeout, "osd_request_timeout=%d"}, /* int args above */ {Opt_fsid, "fsid=%s"}, {Opt_name, "name=%s"}, @@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name, opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; + opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; /* get mon ip(s) */ /* ip1[:port1][,ip2[:port2]...] */ @@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name, } opt->mount_timeout = msecs_to_jiffies(intval * 1000); break; + case Opt_osd_request_timeout: + /* 0 is "wait forever" (i.e. infinite timeout) */ + if (intval < 0 || intval > INT_MAX / 1000) { + pr_err("osd_request_timeout out of range\n"); + err = -EINVAL; + goto out; + } + opt->osd_request_timeout = msecs_to_jiffies(intval * 1000); + break; case Opt_share: opt->flags &= ~CEPH_OPT_NOSHARE; @@ -557,6 +569,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) seq_printf(m, "osdkeepalivetimeout=%d,", jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000); + if (opt->osd_request_timeout != CEPH_OSD_REQUEST_TIMEOUT_DEFAULT) + seq_printf(m, "osd_request_timeout=%d,", + jiffies_to_msecs(opt->osd_request_timeout) / 1000); /* drop redundant comma */ if (m->count != pos) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b65bbf9f45eb..e15ea9e4c495 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1709,6 +1709,8 @@ static void account_request(struct ceph_osd_request *req) req->r_flags |= CEPH_OSD_FLAG_ONDISK; atomic_inc(&req->r_osdc->num_requests); + + req->r_start_stamp = jiffies; } static void submit_request(struct ceph_osd_request *req, bool wrlocked) @@ -1789,6 +1791,14 @@ static void cancel_request(struct ceph_osd_request *req) ceph_osdc_put_request(req); } +static void abort_request(struct ceph_osd_request *req, int err) +{ + dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err); + + cancel_map_check(req); + complete_request(req, err); +} + static void check_pool_dne(struct ceph_osd_request *req) { struct ceph_osd_client *osdc = req->r_osdc; @@ -2487,6 +2497,7 @@ static void handle_timeout(struct work_struct *work) container_of(work, struct ceph_osd_client, timeout_work.work); struct ceph_options *opts = osdc->client->options; unsigned long cutoff = jiffies - opts->osd_keepalive_timeout; + unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout; LIST_HEAD(slow_osds); struct rb_node *n, *p; @@ -2502,15 +2513,23 @@ static void handle_timeout(struct work_struct *work) struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); bool found = false; - for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) { + for (p = rb_first(&osd->o_requests); p; ) { struct ceph_osd_request *req = rb_entry(p, struct ceph_osd_request, r_node); + p = rb_next(p); /* abort_request() */ + if (time_before(req->r_stamp, cutoff)) { dout(" req %p tid %llu on osd%d is laggy\n", req, req->r_tid, osd->o_osd); found = true; } + if (opts->osd_request_timeout && + time_before(req->r_start_stamp, expiry_cutoff)) { + pr_err_ratelimited("tid %llu on osd%d timeout\n", + req->r_tid, osd->o_osd); + abort_request(req, -ETIMEDOUT); + } } for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) { struct ceph_osd_linger_request *lreq = @@ -2530,6 +2549,21 @@ static void handle_timeout(struct work_struct *work) list_move_tail(&osd->o_keepalive_item, &slow_osds); } + if (opts->osd_request_timeout) { + for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) { + struct ceph_osd_request *req = + rb_entry(p, struct ceph_osd_request, r_node); + + p = rb_next(p); /* abort_request() */ + + if (time_before(req->r_start_stamp, expiry_cutoff)) { + pr_err_ratelimited("tid %llu on osd%d timeout\n", + req->r_tid, osdc->homeless_osd.o_osd); + abort_request(req, -ETIMEDOUT); + } + } + } + if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds)) maybe_request_map(osdc); -- cgit v1.2.3 From 90922a2d03d84de36bf8a9979d62580102f31a92 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Tue, 7 Mar 2017 08:20:38 -0600 Subject: irqchip/gicv3-its: Add workaround for QDF2400 ITS erratum 0065 On Qualcomm Datacenter Technologies QDF2400 SoCs, the ITS hardware implementation uses 16Bytes for Interrupt Translation Entry (ITE), but reports an incorrect value of 8Bytes in GITS_TYPER.ITTE_size. It might cause kernel memory corruption depending on the number of MSI(x) that are configured and the amount of memory that has been allocated for ITEs in its_create_device(). This patch fixes the potential memory corruption by setting the correct ITE size to 16Bytes. Cc: stable@vger.kernel.org Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier --- Documentation/arm64/silicon-errata.txt | 1 + arch/arm64/Kconfig | 10 ++++++++++ drivers/irqchip/irq-gic-v3-its.c | 16 ++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index a71b8095dbd8..2f66683500b8 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -68,3 +68,4 @@ stable kernels. | | | | | | Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | | Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | +| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a39029b5414e..8c7c244247b6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -508,6 +508,16 @@ config QCOM_FALKOR_ERRATUM_1009 If unsure, say Y. +config QCOM_QDF2400_ERRATUM_0065 + bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size" + default y + help + On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports + ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have + been indicated as 16Bytes (0xf), not 8Bytes (0x7). + + If unsure, say Y. + endmenu diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 23201004fd7a..f77f840d2b5f 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1601,6 +1601,14 @@ static void __maybe_unused its_enable_quirk_cavium_23144(void *data) its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144; } +static void __maybe_unused its_enable_quirk_qdf2400_e0065(void *data) +{ + struct its_node *its = data; + + /* On QDF2400, the size of the ITE is 16Bytes */ + its->ite_size = 16; +} + static const struct gic_quirk its_quirks[] = { #ifdef CONFIG_CAVIUM_ERRATUM_22375 { @@ -1617,6 +1625,14 @@ static const struct gic_quirk its_quirks[] = { .mask = 0xffff0fff, .init = its_enable_quirk_cavium_23144, }, +#endif +#ifdef CONFIG_QCOM_QDF2400_ERRATUM_0065 + { + .desc = "ITS: QDF2400 erratum 0065", + .iidr = 0x00001070, /* QDF2400 ITS rev 1.x */ + .mask = 0xffffffff, + .init = its_enable_quirk_qdf2400_e0065, + }, #endif { } -- cgit v1.2.3 From 4b9de5da7e120c7f02395da729f0ec77ce7a6044 Mon Sep 17 00:00:00 2001 From: Franck Demathieu Date: Mon, 6 Mar 2017 14:41:06 +0100 Subject: irqchip/crossbar: Fix incorrect type of register size The 'size' variable is unsigned according to the dt-bindings. As this variable is used as integer in other places, create a new variable that allows to fix the following sparse issue (-Wtypesign): drivers/irqchip/irq-crossbar.c:279:52: warning: incorrect type in argument 3 (different signedness) drivers/irqchip/irq-crossbar.c:279:52: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:279:52: got int * Signed-off-by: Franck Demathieu Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-crossbar.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 05bbf171df37..1070b7b959f2 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -199,7 +199,7 @@ static const struct irq_domain_ops crossbar_domain_ops = { static int __init crossbar_of_init(struct device_node *node) { int i, size, reserved = 0; - u32 max = 0, entry; + u32 max = 0, entry, reg_size; const __be32 *irqsr; int ret = -ENOMEM; @@ -276,9 +276,9 @@ static int __init crossbar_of_init(struct device_node *node) if (!cb->register_offsets) goto err_irq_map; - of_property_read_u32(node, "ti,reg-size", &size); + of_property_read_u32(node, "ti,reg-size", ®_size); - switch (size) { + switch (reg_size) { case 1: cb->write = crossbar_writeb; break; @@ -304,7 +304,7 @@ static int __init crossbar_of_init(struct device_node *node) continue; cb->register_offsets[i] = reserved; - reserved += size; + reserved += reg_size; } of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map); -- cgit v1.2.3 From 2f707d97982286b307ef2a9b034e19aabc1abb56 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 6 Mar 2017 04:03:28 -0800 Subject: KVM: nVMX: reset nested_run_pending if the vCPU is going to be reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: WARNING: CPU: 1 PID: 27742 at arch/x86/kvm/vmx.c:11029 nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029 CPU: 1 PID: 27742 Comm: a.out Not tainted 4.10.0+ #229 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 panic+0x1fb/0x412 kernel/panic.c:179 __warn+0x1c4/0x1e0 kernel/panic.c:540 warn_slowpath_null+0x2c/0x40 kernel/panic.c:583 nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029 vmx_leave_nested arch/x86/kvm/vmx.c:11136 [inline] vmx_set_msr+0x1565/0x1910 arch/x86/kvm/vmx.c:3324 kvm_set_msr+0xd4/0x170 arch/x86/kvm/x86.c:1099 do_set_msr+0x11e/0x190 arch/x86/kvm/x86.c:1128 __msr_io arch/x86/kvm/x86.c:2577 [inline] msr_io+0x24b/0x450 arch/x86/kvm/x86.c:2614 kvm_arch_vcpu_ioctl+0x35b/0x46a0 arch/x86/kvm/x86.c:3497 kvm_vcpu_ioctl+0x232/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2721 vfs_ioctl fs/ioctl.c:43 [inline] do_vfs_ioctl+0x1bf/0x1790 fs/ioctl.c:683 SYSC_ioctl fs/ioctl.c:698 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:689 entry_SYSCALL_64_fastpath+0x1f/0xc2 The syzkaller folks reported a nested_run_pending warning during userspace clear VMX capability which is exposed to L1 before. The warning gets thrown while doing (*(uint32_t*)0x20aecfe8 = (uint32_t)0x1); (*(uint32_t*)0x20aecfec = (uint32_t)0x0); (*(uint32_t*)0x20aecff0 = (uint32_t)0x3a); (*(uint32_t*)0x20aecff4 = (uint32_t)0x0); (*(uint64_t*)0x20aecff8 = (uint64_t)0x0); r[29] = syscall(__NR_ioctl, r[4], 0x4008ae89ul, 0x20aecfe8ul, 0, 0, 0, 0, 0, 0); i.e. KVM_SET_MSR ioctl with struct kvm_msrs { .nmsrs = 1, .pad = 0, .entries = { {.index = MSR_IA32_FEATURE_CONTROL, .reserved = 0, .data = 0} } } The VMLANCH/VMRESUME emulation should be stopped since the CPU is going to reset here. This patch resets the nested_run_pending since the CPU is going to be reset hence there should be nothing pending. Reported-by: Dmitry Vyukov Suggested-by: Radim Krčmář Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Cc: David Hildenbrand Signed-off-by: Wanpeng Li Reviewed-by: David Hildenbrand Reviewed-by: Jim Mattson Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3b626d6dc3ac..ab338581b3ec 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11107,8 +11107,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, */ static void vmx_leave_nested(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.nested_run_pending = 0; nested_vmx_vmexit(vcpu, -1, 0, 0); + } free_nested(to_vmx(vcpu)); } -- cgit v1.2.3 From 370a0ec1819990f8e2a93df7cc9c0146980ed45f Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Mon, 6 Mar 2017 05:42:37 -0800 Subject: KVM: arm/arm64: Let vcpu thread modify its own active state Currently, if a vcpu thread tries to change the active state of an interrupt which is already on the same vcpu's AP list, it will loop forever. Since the VGIC mmio handler is called after a vcpu has already synced back the LR state to the struct vgic_irq, we can just let it proceed safely. Cc: stable@vger.kernel.org Reviewed-by: Marc Zyngier Signed-off-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 3654b4c835ef..2a5db1352722 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -180,21 +180,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool new_active_state) { + struct kvm_vcpu *requester_vcpu; spin_lock(&irq->irq_lock); + + /* + * The vcpu parameter here can mean multiple things depending on how + * this function is called; when handling a trap from the kernel it + * depends on the GIC version, and these functions are also called as + * part of save/restore from userspace. + * + * Therefore, we have to figure out the requester in a reliable way. + * + * When accessing VGIC state from user space, the requester_vcpu is + * NULL, which is fine, because we guarantee that no VCPUs are running + * when accessing VGIC state from user space so irq->vcpu->cpu is + * always -1. + */ + requester_vcpu = kvm_arm_get_running_vcpu(); + /* * If this virtual IRQ was written into a list register, we * have to make sure the CPU that runs the VCPU thread has - * synced back LR state to the struct vgic_irq. We can only - * know this for sure, when either this irq is not assigned to - * anyone's AP list anymore, or the VCPU thread is not - * running on any CPUs. + * synced back the LR state to the struct vgic_irq. * - * In the opposite case, we know the VCPU thread may be on its - * way back from the guest and still has to sync back this - * IRQ, so we release and re-acquire the spin_lock to let the - * other thread sync back the IRQ. + * As long as the conditions below are true, we know the VCPU thread + * may be on its way back from the guest (we kicked the VCPU thread in + * vgic_change_active_prepare) and still has to sync back this IRQ, + * so we release and re-acquire the spin_lock to let the other thread + * sync back the IRQ. */ while (irq->vcpu && /* IRQ may have state in an LR somewhere */ + irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */ irq->vcpu->cpu != -1) /* VCPU thread is running */ cond_resched_lock(&irq->irq_lock); -- cgit v1.2.3 From f050fe7a9164945dd1c28be05bf00e8cfb082ccf Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 20 Feb 2017 12:30:11 +0000 Subject: arm: KVM: Survive unknown traps from guests Currently we BUG() if we see a HSR.EC value we don't recognise. As configurable disables/enables are added to the architecture (controlled by RES1/RES0 bits respectively), with associated synchronous exceptions, it may be possible for a guest to trigger exceptions with classes that we don't recognise. While we can't service these exceptions in a manner useful to the guest, we can avoid bringing down the host. Per ARM DDI 0406C.c, all currently unallocated HSR EC encodings are reserved, and per ARM DDI 0487A.k_iss10775, page G6-4395, EC values within the range 0x00 - 0x2c are reserved for future use with synchronous exceptions, and EC values within the range 0x2d - 0x3f may be used for either synchronous or asynchronous exceptions. The patch makes KVM handle any unknown EC by injecting an UNDEFINED exception into the guest, with a corresponding (ratelimited) warning in the host dmesg. We could later improve on this with with a new (opt-in) exit to the host userspace. Cc: Dave Martin Cc: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_arm.h | 1 + arch/arm/kvm/handle_exit.c | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index e22089fb44dc..a3f0b3d50089 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -209,6 +209,7 @@ #define HSR_EC_IABT_HYP (0x21) #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) +#define HSR_EC_MAX (0x3f) #define HSR_WFI_IS_WFE (_AC(1, UL) << 0) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 4e40d1955e35..96af65a30d78 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -79,7 +79,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n", + hsr); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... HSR_EC_MAX] = kvm_handle_unknown_ec, [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, @@ -98,13 +110,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x\n", - (unsigned int)kvm_vcpu_get_hsr(vcpu)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; } -- cgit v1.2.3 From ba4dd156eabdca93501d92a980ba27fa5f4bbd27 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 20 Feb 2017 12:30:12 +0000 Subject: arm64: KVM: Survive unknown traps from guests Currently we BUG() if we see an ESR_EL2.EC value we don't recognise. As configurable disables/enables are added to the architecture (controlled by RES1/RES0 bits respectively), with associated synchronous exceptions, it may be possible for a guest to trigger exceptions with classes that we don't recognise. While we can't service these exceptions in a manner useful to the guest, we can avoid bringing down the host. Per ARM DDI 0487A.k_iss10775, page D7-1937, EC values within the range 0x00 - 0x2c are reserved for future use with synchronous exceptions, and EC values within the range 0x2d - 0x3f may be used for either synchronous or asynchronous exceptions. The patch makes KVM handle any unknown EC by injecting an UNDEFINED exception into the guest, with a corresponding (ratelimited) warning in the host dmesg. We could later improve on this with with a new (opt-in) exit to the host userspace. Cc: Dave Martin Cc: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/kvm/handle_exit.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 1bfe30dfbfe7..fa1b18e364fc 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -135,7 +135,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) return ret; } +static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n", + hsr, esr_get_class_string(hsr)); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec, [ESR_ELx_EC_WFx] = kvm_handle_wfx, [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, @@ -162,13 +174,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) u32 hsr = kvm_vcpu_get_hsr(vcpu); u8 hsr_ec = ESR_ELx_EC(hsr); - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x -- %s\n", - hsr, esr_get_class_string(hsr)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; } -- cgit v1.2.3 From a5e1e6ca94a8cec51571fd62e3eaec269717969c Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 16 Feb 2017 10:41:20 +0000 Subject: KVM: arm/arm64: VGIC: Fix command handling while ITS being disabled The ITS spec says that ITS commands are only processed when the ITS is enabled (section 8.19.4, Enabled, bit[0]). Our emulation was not taking this into account. Fix this by checking the enabled state before handling CWRITER writes. On the other hand that means that CWRITER could advance while the ITS is disabled, and enabling it would need those commands to be processed. Fix this case as well by refactoring actual command processing and calling this from both the GITS_CWRITER and GITS_CTLR handlers. Reviewed-by: Eric Auger Reviewed-by: Christoffer Dall Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-its.c | 109 ++++++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 44 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 571b64a01c50..8d1da1af4b09 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) return ret; } -static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - u32 reg = 0; - - mutex_lock(&its->cmd_lock); - if (its->creadr == its->cwriter) - reg |= GITS_CTLR_QUIESCENT; - if (its->enabled) - reg |= GITS_CTLR_ENABLE; - mutex_unlock(&its->cmd_lock); - - return reg; -} - -static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) -{ - its->enabled = !!(val & GITS_CTLR_ENABLE); -} - static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, struct vgic_its *its, gpa_t addr, unsigned int len) @@ -1161,33 +1138,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its, #define ITS_CMD_SIZE 32 #define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5)) -/* - * By writing to CWRITER the guest announces new commands to be processed. - * To avoid any races in the first place, we take the its_cmd lock, which - * protects our ring buffer variables, so that there is only one user - * per ITS handling commands at a given time. - */ -static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) +/* Must be called with the cmd_lock held. */ +static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) { gpa_t cbaser; u64 cmd_buf[4]; - u32 reg; - if (!its) - return; - - mutex_lock(&its->cmd_lock); - - reg = update_64bit_reg(its->cwriter, addr & 7, len, val); - reg = ITS_CMD_OFFSET(reg); - if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { - mutex_unlock(&its->cmd_lock); + /* Commands are only processed when the ITS is enabled. */ + if (!its->enabled) return; - } - its->cwriter = reg; cbaser = CBASER_ADDRESS(its->cbaser); while (its->cwriter != its->creadr) { @@ -1207,6 +1167,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) its->creadr = 0; } +} + +/* + * By writing to CWRITER the guest announces new commands to be processed. + * To avoid any races in the first place, we take the its_cmd lock, which + * protects our ring buffer variables, so that there is only one user + * per ITS handling commands at a given time. + */ +static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u64 reg; + + if (!its) + return; + + mutex_lock(&its->cmd_lock); + + reg = update_64bit_reg(its->cwriter, addr & 7, len, val); + reg = ITS_CMD_OFFSET(reg); + if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + mutex_unlock(&its->cmd_lock); + return; + } + its->cwriter = reg; + + vgic_its_process_commands(kvm, its); mutex_unlock(&its->cmd_lock); } @@ -1287,6 +1275,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, *regptr = reg; } +static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 reg = 0; + + mutex_lock(&its->cmd_lock); + if (its->creadr == its->cwriter) + reg |= GITS_CTLR_QUIESCENT; + if (its->enabled) + reg |= GITS_CTLR_ENABLE; + mutex_unlock(&its->cmd_lock); + + return reg; +} + +static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + mutex_lock(&its->cmd_lock); + + its->enabled = !!(val & GITS_CTLR_ENABLE); + + /* + * Try to process any pending commands. This function bails out early + * if the ITS is disabled or no commands have been queued. + */ + vgic_its_process_commands(kvm, its); + + mutex_unlock(&its->cmd_lock); +} + #define REGISTER_ITS_DESC(off, rd, wr, length, acc) \ { \ .reg_offset = off, \ -- cgit v1.2.3 From 8c71fff434e5ecf5ff27bd61db1bc9ac4c2b2a1b Mon Sep 17 00:00:00 2001 From: Kieran Bingham Date: Fri, 3 Mar 2017 06:31:48 -0300 Subject: [media] v4l: vsp1: Adapt vsp1_du_setup_lif() interface to use a structure The interface to configure the LIF in the VSP1 requires adapting the function prototype for any changes. This makes extending the interface difficult. Change the function prototype to pass a structure which can be easily extended. This changes the means of disabling the pipeline, by now passing a NULL configuration rather than passing either a 0 width or height. [Fixed kerneldoc, made vsp1_du_setup_lif() cfg argument const] Signed-off-by: Kieran Bingham Signed-off-by: Laurent Pinchart Acked-by: Dave Airlie Signed-off-by: Mauro Carvalho Chehab --- drivers/gpu/drm/rcar-du/rcar_du_vsp.c | 8 ++++++-- drivers/media/platform/vsp1/vsp1_drm.c | 33 ++++++++++++++++----------------- include/media/vsp1.h | 13 +++++++++++-- 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c index 83ebd162f3ef..22da2d671297 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c @@ -32,6 +32,10 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc) { const struct drm_display_mode *mode = &crtc->crtc.state->adjusted_mode; struct rcar_du_device *rcdu = crtc->group->dev; + struct vsp1_du_lif_config cfg = { + .width = mode->hdisplay, + .height = mode->vdisplay, + }; struct rcar_du_plane_state state = { .state = { .crtc = &crtc->crtc, @@ -66,12 +70,12 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc) */ crtc->group->need_restart = true; - vsp1_du_setup_lif(crtc->vsp->vsp, mode->hdisplay, mode->vdisplay); + vsp1_du_setup_lif(crtc->vsp->vsp, &cfg); } void rcar_du_vsp_disable(struct rcar_du_crtc *crtc) { - vsp1_du_setup_lif(crtc->vsp->vsp, 0, 0); + vsp1_du_setup_lif(crtc->vsp->vsp, NULL); } void rcar_du_vsp_atomic_begin(struct rcar_du_crtc *crtc) diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c index b4b583f7137a..b4c0f10fc3b0 100644 --- a/drivers/media/platform/vsp1/vsp1_drm.c +++ b/drivers/media/platform/vsp1/vsp1_drm.c @@ -54,12 +54,11 @@ EXPORT_SYMBOL_GPL(vsp1_du_init); /** * vsp1_du_setup_lif - Setup the output part of the VSP pipeline * @dev: the VSP device - * @width: output frame width in pixels - * @height: output frame height in pixels + * @cfg: the LIF configuration * - * Configure the output part of VSP DRM pipeline for the given frame @width and - * @height. This sets up formats on the BRU source pad, the WPF0 sink and source - * pads, and the LIF sink pad. + * Configure the output part of VSP DRM pipeline for the given frame @cfg.width + * and @cfg.height. This sets up formats on the BRU source pad, the WPF0 sink + * and source pads, and the LIF sink pad. * * As the media bus code on the BRU source pad is conditioned by the * configuration of the BRU sink 0 pad, we also set up the formats on all BRU @@ -69,8 +68,7 @@ EXPORT_SYMBOL_GPL(vsp1_du_init); * * Return 0 on success or a negative error code on failure. */ -int vsp1_du_setup_lif(struct device *dev, unsigned int width, - unsigned int height) +int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg) { struct vsp1_device *vsp1 = dev_get_drvdata(dev); struct vsp1_pipeline *pipe = &vsp1->drm->pipe; @@ -79,11 +77,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width, unsigned int i; int ret; - dev_dbg(vsp1->dev, "%s: configuring LIF with format %ux%u\n", - __func__, width, height); - - if (width == 0 || height == 0) { - /* Zero width or height means the CRTC is being disabled, stop + if (!cfg) { + /* NULL configuration means the CRTC is being disabled, stop * the pipeline and turn the light off. */ ret = vsp1_pipeline_stop(pipe); @@ -108,6 +103,9 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width, return 0; } + dev_dbg(vsp1->dev, "%s: configuring LIF with format %ux%u\n", + __func__, cfg->width, cfg->height); + /* Configure the format at the BRU sinks and propagate it through the * pipeline. */ @@ -117,8 +115,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width, for (i = 0; i < bru->entity.source_pad; ++i) { format.pad = i; - format.format.width = width; - format.format.height = height; + format.format.width = cfg->width; + format.format.height = cfg->height; format.format.code = MEDIA_BUS_FMT_ARGB8888_1X32; format.format.field = V4L2_FIELD_NONE; @@ -133,8 +131,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width, } format.pad = bru->entity.source_pad; - format.format.width = width; - format.format.height = height; + format.format.width = cfg->width; + format.format.height = cfg->height; format.format.code = MEDIA_BUS_FMT_ARGB8888_1X32; format.format.field = V4L2_FIELD_NONE; @@ -180,7 +178,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width, /* Verify that the format at the output of the pipeline matches the * requested frame size and media bus code. */ - if (format.format.width != width || format.format.height != height || + if (format.format.width != cfg->width || + format.format.height != cfg->height || format.format.code != MEDIA_BUS_FMT_ARGB8888_1X32) { dev_dbg(vsp1->dev, "%s: format mismatch\n", __func__); return -EPIPE; diff --git a/include/media/vsp1.h b/include/media/vsp1.h index 458b400373d4..38aac554dbba 100644 --- a/include/media/vsp1.h +++ b/include/media/vsp1.h @@ -20,8 +20,17 @@ struct device; int vsp1_du_init(struct device *dev); -int vsp1_du_setup_lif(struct device *dev, unsigned int width, - unsigned int height); +/** + * struct vsp1_du_lif_config - VSP LIF configuration + * @width: output frame width + * @height: output frame height + */ +struct vsp1_du_lif_config { + unsigned int width; + unsigned int height; +}; + +int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg); struct vsp1_du_atomic_config { u32 pixelformat; -- cgit v1.2.3 From 82f2341c94d270421f383641b7cd670e474db56b Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Tue, 28 Feb 2017 19:54:40 +0300 Subject: tty: n_hdlc: get rid of racy n_hdlc.tbuf Currently N_HDLC line discipline uses a self-made singly linked list for data buffers and has n_hdlc.tbuf pointer for buffer retransmitting after an error. The commit be10eb7589337e5defbe214dae038a53dd21add8 ("tty: n_hdlc add buffer flushing") introduced racy access to n_hdlc.tbuf. After tx error concurrent flush_tx_queue() and n_hdlc_send_frames() can put one data buffer to tx_free_buf_list twice. That causes double free in n_hdlc_release(). Let's use standard kernel linked list and get rid of n_hdlc.tbuf: in case of tx error put current data buffer after the head of tx_buf_list. Signed-off-by: Alexander Popov Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_hdlc.c | 132 +++++++++++++++++++++++++++------------------------ 1 file changed, 69 insertions(+), 63 deletions(-) diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 1bacbc3b19a0..e94aea8c0d05 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -114,7 +114,7 @@ #define DEFAULT_TX_BUF_COUNT 3 struct n_hdlc_buf { - struct n_hdlc_buf *link; + struct list_head list_item; int count; char buf[1]; }; @@ -122,8 +122,7 @@ struct n_hdlc_buf { #define N_HDLC_BUF_SIZE (sizeof(struct n_hdlc_buf) + maxframe) struct n_hdlc_buf_list { - struct n_hdlc_buf *head; - struct n_hdlc_buf *tail; + struct list_head list; int count; spinlock_t spinlock; }; @@ -136,7 +135,6 @@ struct n_hdlc_buf_list { * @backup_tty - TTY to use if tty gets closed * @tbusy - reentrancy flag for tx wakeup code * @woke_up - FIXME: describe this field - * @tbuf - currently transmitting tx buffer * @tx_buf_list - list of pending transmit frame buffers * @rx_buf_list - list of received frame buffers * @tx_free_buf_list - list unused transmit frame buffers @@ -149,7 +147,6 @@ struct n_hdlc { struct tty_struct *backup_tty; int tbusy; int woke_up; - struct n_hdlc_buf *tbuf; struct n_hdlc_buf_list tx_buf_list; struct n_hdlc_buf_list rx_buf_list; struct n_hdlc_buf_list tx_free_buf_list; @@ -159,6 +156,8 @@ struct n_hdlc { /* * HDLC buffer list manipulation functions */ +static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list, + struct n_hdlc_buf *buf); static void n_hdlc_buf_put(struct n_hdlc_buf_list *list, struct n_hdlc_buf *buf); static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list); @@ -208,16 +207,9 @@ static void flush_tx_queue(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty2n_hdlc(tty); struct n_hdlc_buf *buf; - unsigned long flags; while ((buf = n_hdlc_buf_get(&n_hdlc->tx_buf_list))) n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, buf); - spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock, flags); - if (n_hdlc->tbuf) { - n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, n_hdlc->tbuf); - n_hdlc->tbuf = NULL; - } - spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); } static struct tty_ldisc_ops n_hdlc_ldisc = { @@ -283,7 +275,6 @@ static void n_hdlc_release(struct n_hdlc *n_hdlc) } else break; } - kfree(n_hdlc->tbuf); kfree(n_hdlc); } /* end of n_hdlc_release() */ @@ -402,13 +393,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) n_hdlc->woke_up = 0; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); - /* get current transmit buffer or get new transmit */ - /* buffer from list of pending transmit buffers */ - - tbuf = n_hdlc->tbuf; - if (!tbuf) - tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list); - + tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list); while (tbuf) { if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)sending frame %p, count=%d\n", @@ -420,7 +405,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) /* rollback was possible and has been done */ if (actual == -ERESTARTSYS) { - n_hdlc->tbuf = tbuf; + n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf); break; } /* if transmit error, throw frame away by */ @@ -435,10 +420,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) /* free current transmit buffer */ n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, tbuf); - - /* this tx buffer is done */ - n_hdlc->tbuf = NULL; - + /* wait up sleeping writers */ wake_up_interruptible(&tty->write_wait); @@ -448,10 +430,12 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)frame %p pending\n", __FILE__,__LINE__,tbuf); - - /* buffer not accepted by driver */ - /* set this buffer as pending buffer */ - n_hdlc->tbuf = tbuf; + + /* + * the buffer was not accepted by driver, + * return it back into tx queue + */ + n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf); break; } } @@ -749,7 +733,8 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, int error = 0; int count; unsigned long flags; - + struct n_hdlc_buf *buf = NULL; + if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)n_hdlc_tty_ioctl() called %d\n", __FILE__,__LINE__,cmd); @@ -763,8 +748,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, /* report count of read data available */ /* in next available frame (if any) */ spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock,flags); - if (n_hdlc->rx_buf_list.head) - count = n_hdlc->rx_buf_list.head->count; + buf = list_first_entry_or_null(&n_hdlc->rx_buf_list.list, + struct n_hdlc_buf, list_item); + if (buf) + count = buf->count; else count = 0; spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock,flags); @@ -776,8 +763,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, count = tty_chars_in_buffer(tty); /* add size of next output frame in queue */ spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock,flags); - if (n_hdlc->tx_buf_list.head) - count += n_hdlc->tx_buf_list.head->count; + buf = list_first_entry_or_null(&n_hdlc->tx_buf_list.list, + struct n_hdlc_buf, list_item); + if (buf) + count += buf->count; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock,flags); error = put_user(count, (int __user *)arg); break; @@ -825,14 +814,14 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp, poll_wait(filp, &tty->write_wait, wait); /* set bits for operations that won't block */ - if (n_hdlc->rx_buf_list.head) + if (!list_empty(&n_hdlc->rx_buf_list.list)) mask |= POLLIN | POLLRDNORM; /* readable */ if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) mask |= POLLHUP; if (tty_hung_up_p(filp)) mask |= POLLHUP; if (!tty_is_writelocked(tty) && - n_hdlc->tx_free_buf_list.head) + !list_empty(&n_hdlc->tx_free_buf_list.list)) mask |= POLLOUT | POLLWRNORM; /* writable */ } return mask; @@ -856,7 +845,12 @@ static struct n_hdlc *n_hdlc_alloc(void) spin_lock_init(&n_hdlc->tx_free_buf_list.spinlock); spin_lock_init(&n_hdlc->rx_buf_list.spinlock); spin_lock_init(&n_hdlc->tx_buf_list.spinlock); - + + INIT_LIST_HEAD(&n_hdlc->rx_free_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->tx_free_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->rx_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->tx_buf_list.list); + /* allocate free rx buffer list */ for(i=0;ispinlock, flags); + + list_add(&buf->list_item, &buf_list->list); + buf_list->count++; + + spin_unlock_irqrestore(&buf_list->spinlock, flags); +} + /** * n_hdlc_buf_put - add specified HDLC buffer to tail of specified list - * @list - pointer to buffer list + * @buf_list - pointer to buffer list * @buf - pointer to buffer */ -static void n_hdlc_buf_put(struct n_hdlc_buf_list *list, +static void n_hdlc_buf_put(struct n_hdlc_buf_list *buf_list, struct n_hdlc_buf *buf) { unsigned long flags; - spin_lock_irqsave(&list->spinlock,flags); - - buf->link=NULL; - if (list->tail) - list->tail->link = buf; - else - list->head = buf; - list->tail = buf; - (list->count)++; - - spin_unlock_irqrestore(&list->spinlock,flags); - + + spin_lock_irqsave(&buf_list->spinlock, flags); + + list_add_tail(&buf->list_item, &buf_list->list); + buf_list->count++; + + spin_unlock_irqrestore(&buf_list->spinlock, flags); } /* end of n_hdlc_buf_put() */ /** * n_hdlc_buf_get - remove and return an HDLC buffer from list - * @list - pointer to HDLC buffer list + * @buf_list - pointer to HDLC buffer list * * Remove and return an HDLC buffer from the head of the specified HDLC buffer * list. * Returns a pointer to HDLC buffer if available, otherwise %NULL. */ -static struct n_hdlc_buf* n_hdlc_buf_get(struct n_hdlc_buf_list *list) +static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *buf_list) { unsigned long flags; struct n_hdlc_buf *buf; - spin_lock_irqsave(&list->spinlock,flags); - - buf = list->head; + + spin_lock_irqsave(&buf_list->spinlock, flags); + + buf = list_first_entry_or_null(&buf_list->list, + struct n_hdlc_buf, list_item); if (buf) { - list->head = buf->link; - (list->count)--; + list_del(&buf->list_item); + buf_list->count--; } - if (!list->head) - list->tail = NULL; - - spin_unlock_irqrestore(&list->spinlock,flags); + + spin_unlock_irqrestore(&buf_list->spinlock, flags); return buf; - } /* end of n_hdlc_buf_get() */ static char hdlc_banner[] __initdata = -- cgit v1.2.3 From 2eacc79c27eb683c4a3ded80c2629387ee0d4e04 Mon Sep 17 00:00:00 2001 From: Rehas Sachdeva Date: Sat, 18 Feb 2017 07:31:00 -0500 Subject: radix tree test suite: Add test for idr_get_next() Assert that idr_get_next() returns the next populated entry in the tree with an ID greater than or equal to the value pointed to by @nextid argument. Signed-off-by: Rehas Sachdeva Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/idr-test.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index a26098c6123d..f20690ac3a97 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -153,6 +153,30 @@ void idr_nowait_test(void) idr_destroy(&idr); } +void idr_get_next_test(void) +{ + unsigned long i; + int nextid; + DEFINE_IDR(idr); + + int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0}; + + for(i = 0; indices[i]; i++) { + struct item *item = item_create(indices[i], 0); + assert(idr_alloc(&idr, item, indices[i], indices[i+1], + GFP_KERNEL) == indices[i]); + } + + for(i = 0, nextid = 0; indices[i]; i++) { + idr_get_next(&idr, &nextid); + assert(nextid == indices[i]); + nextid++; + } + + idr_for_each(&idr, item_idr_free, &idr); + idr_destroy(&idr); +} + void idr_checks(void) { unsigned long i; @@ -202,6 +226,7 @@ void idr_checks(void) idr_alloc_test(); idr_null_test(); idr_nowait_test(); + idr_get_next_test(); } /* -- cgit v1.2.3 From 166bb1f532fd9fe1b81c6b411ad5d5c9dd21a685 Mon Sep 17 00:00:00 2001 From: Rehas Sachdeva Date: Mon, 20 Feb 2017 06:40:00 -0500 Subject: radix tree test suite: Add tests for ida_simple_get() and ida_simple_remove() Assert that ida_simple_get() allocates an id in the passed range or returns error on failure, and ida_simple_remove() releases an allocated id. Signed-off-by: Rehas Sachdeva Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/idr-test.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index f20690ac3a97..86de901fa5c6 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -387,6 +387,24 @@ void ida_check_random(void) goto repeat; } +void ida_simple_get_remove_test(void) +{ + DEFINE_IDA(ida); + unsigned long i; + + for (i = 0; i < 10000; i++) { + assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i); + } + assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0); + + for (i = 0; i < 10000; i++) { + ida_simple_remove(&ida, i); + } + assert(ida_is_empty(&ida)); + + ida_destroy(&ida); +} + void ida_checks(void) { DEFINE_IDA(ida); @@ -453,6 +471,7 @@ void ida_checks(void) ida_check_max(); ida_check_conv(); ida_check_random(); + ida_simple_get_remove_test(); radix_tree_cpu_dead(1); } -- cgit v1.2.3 From c629a344accd15022dd6d87fa28c8e22f978fbda Mon Sep 17 00:00:00 2001 From: Rehas Sachdeva Date: Sun, 26 Feb 2017 06:33:00 -0500 Subject: radix tree test suite: Add test for radix_tree_clear_tags() Assert that radix_tree_clear_tags() clears the tags on the passed node and slot. Assert that the case where the radix tree has only one entry at index zero and the node is NULL, is also handled. Signed-off-by: Rehas Sachdeva Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/tag_check.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c index d4ff00989245..36dcf7d6945d 100644 --- a/tools/testing/radix-tree/tag_check.c +++ b/tools/testing/radix-tree/tag_check.c @@ -330,6 +330,34 @@ static void single_check(void) item_kill_tree(&tree); } +void radix_tree_clear_tags_test(void) +{ + unsigned long index; + struct radix_tree_node *node; + struct radix_tree_iter iter; + void **slot; + + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 0); + item_tag_set(&tree, 0, 0); + __radix_tree_lookup(&tree, 0, &node, &slot); + radix_tree_clear_tags(&tree, node, slot); + assert(item_tag_get(&tree, 0, 0) == 0); + + for (index = 0; index < 1000; index++) { + item_insert(&tree, index); + item_tag_set(&tree, index, 0); + } + + radix_tree_for_each_slot(slot, &tree, &iter, 0) { + radix_tree_clear_tags(&tree, iter.node, slot); + assert(item_tag_get(&tree, iter.index, 0) == 0); + } + + item_kill_tree(&tree); +} + void tag_check(void) { single_check(); @@ -347,4 +375,5 @@ void tag_check(void) thrash_tags(); rcu_barrier(); printv(2, "after thrash_tags: %d allocated\n", nr_allocated); + radix_tree_clear_tags_test(); } -- cgit v1.2.3 From 0d4a41c1a0335dc515c6e7fabd447263b57f6457 Mon Sep 17 00:00:00 2001 From: Rehas Sachdeva Date: Sun, 26 Feb 2017 16:17:00 -0500 Subject: radix tree test suite: Add performance benchmarks Add performance benchmarks for radix tree insertion, tagging and deletion. Signed-off-by: Rehas Sachdeva Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/benchmark.c | 82 +++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 7 deletions(-) diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c index 9b09ddfe462f..35741b9c2a4a 100644 --- a/tools/testing/radix-tree/benchmark.c +++ b/tools/testing/radix-tree/benchmark.c @@ -17,6 +17,9 @@ #include #include "test.h" +#define for_each_index(i, base, order) \ + for (i = base; i < base + (1 << order); i++) + #define NSEC_PER_SEC 1000000000L static long long benchmark_iter(struct radix_tree_root *root, bool tagged) @@ -57,22 +60,87 @@ again: return nsec; } +static void benchmark_insert(struct radix_tree_root *root, + unsigned long size, unsigned long step, int order) +{ + struct timespec start, finish; + unsigned long index; + long long nsec; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (index = 0 ; index < size ; index += step) + item_insert_order(root, index, order); + + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n", + size, step, order, nsec); +} + +static void benchmark_tagging(struct radix_tree_root *root, + unsigned long size, unsigned long step, int order) +{ + struct timespec start, finish; + unsigned long index; + long long nsec; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (index = 0 ; index < size ; index += step) + radix_tree_tag_set(root, index, 0); + + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n", + size, step, order, nsec); +} + +static void benchmark_delete(struct radix_tree_root *root, + unsigned long size, unsigned long step, int order) +{ + struct timespec start, finish; + unsigned long index, i; + long long nsec; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (index = 0 ; index < size ; index += step) + for_each_index(i, index, order) + item_delete(root, i); + + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n", + size, step, order, nsec); +} + static void benchmark_size(unsigned long size, unsigned long step, int order) { RADIX_TREE(tree, GFP_KERNEL); long long normal, tagged; - unsigned long index; - for (index = 0 ; index < size ; index += step) { - item_insert_order(&tree, index, order); - radix_tree_tag_set(&tree, index, 0); - } + benchmark_insert(&tree, size, step, order); + benchmark_tagging(&tree, size, step, order); tagged = benchmark_iter(&tree, true); normal = benchmark_iter(&tree, false); - printv(2, "Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n", - size, step, order, tagged, normal); + printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n", + size, step, order, tagged); + printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n", + size, step, order, normal); + + benchmark_delete(&tree, size, step, order); item_kill_tree(&tree); rcu_barrier(); -- cgit v1.2.3 From 6478581c85cd3091a6188a2433a8093f335f8f2a Mon Sep 17 00:00:00 2001 From: Rehas Sachdeva Date: Mon, 27 Feb 2017 07:53:00 -0500 Subject: radix tree test suite: Add performance test for radix_tree_split() Signed-off-by: Rehas Sachdeva Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/benchmark.c | 44 ++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c index 35741b9c2a4a..b03c3f30c740 100644 --- a/tools/testing/radix-tree/benchmark.c +++ b/tools/testing/radix-tree/benchmark.c @@ -146,6 +146,46 @@ static void benchmark_size(unsigned long size, unsigned long step, int order) rcu_barrier(); } +static long long __benchmark_split(unsigned long index, + int old_order, int new_order) +{ + struct timespec start, finish; + long long nsec; + RADIX_TREE(tree, GFP_ATOMIC); + + item_insert_order(&tree, index, old_order); + + clock_gettime(CLOCK_MONOTONIC, &start); + radix_tree_split(&tree, index, new_order); + clock_gettime(CLOCK_MONOTONIC, &finish); + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + item_kill_tree(&tree); + + return nsec; + +} + +static void benchmark_split(unsigned long size, unsigned long step) +{ + int i, j, idx; + long long nsec = 0; + + + for (idx = 0; idx < size; idx += step) { + for (i = 3; i < 11; i++) { + for (j = 0; j < i; j++) { + nsec += __benchmark_split(idx, i, j); + } + } + } + + printv(2, "Size %8ld, step %8ld, split time %10lld ns\n", + size, step, nsec); + +} + void benchmark(void) { unsigned long size[] = {1 << 10, 1 << 20, 0}; @@ -163,4 +203,8 @@ void benchmark(void) for (c = 0; size[c]; c++) for (s = 0; step[s]; s++) benchmark_size(size[c], step[s] << 9, 9); + + for (c = 0; size[c]; c++) + for (s = 0; step[s]; s++) + benchmark_split(size[c], step[s]); } -- cgit v1.2.3 From 54f4d3341c8fe31e20915e2c1fb322ff8a069832 Mon Sep 17 00:00:00 2001 From: Rehas Sachdeva Date: Mon, 27 Feb 2017 08:11:00 -0500 Subject: radix tree test suite: Add performance test for radix_tree_join() Signed-off-by: Rehas Sachdeva Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/benchmark.c | 47 ++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c index b03c3f30c740..99c40f3ed133 100644 --- a/tools/testing/radix-tree/benchmark.c +++ b/tools/testing/radix-tree/benchmark.c @@ -186,6 +186,50 @@ static void benchmark_split(unsigned long size, unsigned long step) } +static long long __benchmark_join(unsigned long index, + unsigned order1, unsigned order2) +{ + unsigned long loc; + struct timespec start, finish; + long long nsec; + void *item, *item2 = item_create(index + 1, order1); + RADIX_TREE(tree, GFP_KERNEL); + + item_insert_order(&tree, index, order2); + item = radix_tree_lookup(&tree, index); + + clock_gettime(CLOCK_MONOTONIC, &start); + radix_tree_join(&tree, index + 1, order1, item2); + clock_gettime(CLOCK_MONOTONIC, &finish); + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + loc = find_item(&tree, item); + if (loc == -1) + free(item); + + item_kill_tree(&tree); + + return nsec; +} + +static void benchmark_join(unsigned long step) +{ + int i, j, idx; + long long nsec = 0; + + for (idx = 0; idx < 1 << 10; idx += step) { + for (i = 1; i < 15; i++) { + for (j = 0; j < i; j++) { + nsec += __benchmark_join(idx, i, j); + } + } + } + + printv(2, "Size %8d, step %8ld, join time %10lld ns\n", + 1 << 10, step, nsec); +} + void benchmark(void) { unsigned long size[] = {1 << 10, 1 << 20, 0}; @@ -207,4 +251,7 @@ void benchmark(void) for (c = 0; size[c]; c++) for (s = 0; step[s]; s++) benchmark_split(size[c], step[s]); + + for (s = 0; step[s]; s++) + benchmark_join(step[s]); } -- cgit v1.2.3 From c4634b08d9eb9c13be2296230bf33c79390dbf9c Mon Sep 17 00:00:00 2001 From: Rehas Sachdeva Date: Mon, 27 Feb 2017 08:49:00 -0500 Subject: radix tree test suite: Build 32 bit binaries Add option 'make BUILD=32' for building 32-bit binaries. Signed-off-by: Rehas Sachdeva Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index f11315bedefc..b59c2a9ef778 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -10,6 +10,10 @@ ifndef SHIFT SHIFT=3 endif +ifeq ($(BUILD), 32) + CFLAGS += -m32 +endif + targets: mapshift $(TARGETS) main: $(OFILES) -- cgit v1.2.3 From 284d96a494d705b9c5330c900e976fceda885b9f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 2 Mar 2017 04:29:00 -0500 Subject: radix tree test suite: Fix build with --as-needed Currently the radix tree test suite doesn't build with toolchains that use --as-needed by default, for example Ubuntu's: cc -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address -lpthread -lurcu main.o ... -o main /usr/bin/ld: regression1.o: undefined reference to symbol 'pthread_join@@GLIBC_2.17' /lib/powerpc64le-linux-gnu/libpthread.so.0: error adding symbols: DSO missing from command line collect2: error: ld returned 1 exit status This is caused by the custom makefile rules placing LDFLAGS before the .o files that need the libraries. We could fix it by using --no-as-needed, or rewriting the custom rules. But we can also just drop the custom rules and move the libraries to LDLIBS, and then the default rules work correctly - with the one caveat that we need to add -fsanitize=address to LDFLAGS because that must be passed to the linker as well as the compiler. Signed-off-by: Michael Ellerman Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index b59c2a9ef778..022488f50fc6 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -1,6 +1,7 @@ CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address -LDFLAGS += -lpthread -lurcu +LDFLAGS += -fsanitize=address +LDLIBS+= -lpthread -lurcu TARGETS = main idr-test multiorder CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ @@ -17,13 +18,10 @@ endif targets: mapshift $(TARGETS) main: $(OFILES) - $(CC) $(CFLAGS) $(LDFLAGS) $^ -o main idr-test: idr-test.o $(CORE_OFILES) - $(CC) $(CFLAGS) $(LDFLAGS) $^ -o idr-test multiorder: multiorder.o $(CORE_OFILES) - $(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder clean: $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h -- cgit v1.2.3 From 3f1b6f9d49ba5a209d745fa2448657d8b66ed0c0 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 2 Mar 2017 12:24:28 -0500 Subject: radix tree test suite: Depend on Makefile and quieten grep Changing the CFLAGS in the Makefile didn't always lead to a recompilation because the OFILES didn't depend on the Makefile. Also, after doing make clean, grep would still complain about a missing map-shift.h; we need -s as well as -q. Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 022488f50fc6..4c6289c5d415 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -28,7 +28,7 @@ clean: vpath %.c ../../lib -$(OFILES): *.h */*.h generated/map-shift.h \ +$(OFILES): Makefile *.h */*.h generated/map-shift.h \ ../../include/linux/*.h \ ../../include/asm/*.h \ ../../../include/linux/radix-tree.h \ @@ -43,7 +43,7 @@ idr.c: ../../../lib/idr.c .PHONY: mapshift mapshift: - @if ! grep -qw $(SHIFT) generated/map-shift.h; then \ + @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ generated/map-shift.h; \ fi -- cgit v1.2.3 From 4ecd9542dbc3e07f3bd3870aac12839f72b47db4 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 3 Mar 2017 12:16:10 -0500 Subject: ida: Free correct IDA bitmap There's a relatively rare race where we look at the per-cpu preallocated IDA bitmap, see it's NULL, allocate a new one, and atomically update it. If the kmalloc() happened to sleep and we were rescheduled to a different CPU, or an interrupt came in at the exact right time, another task might have successfully allocated a bitmap and already deposited it. I forgot what the semantics of cmpxchg() were and ended up freeing the wrong bitmap leading to KASAN reporting a use-after-free. Dmitry found the bug with syzkaller & wrote the patch. I wrote the test case that will reproduce the bug without his patch being applied. Reported-by: Dmitry Vyukov Signed-off-by: Matthew Wilcox --- lib/radix-tree.c | 4 ++-- tools/testing/radix-tree/idr-test.c | 34 +++++++++++++++++++++++++++++++--- tools/testing/radix-tree/main.c | 1 + tools/testing/radix-tree/test.h | 1 + 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 5ed506d648c4..691a9ad48497 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2129,8 +2129,8 @@ int ida_pre_get(struct ida *ida, gfp_t gfp) struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); if (!bitmap) return 0; - bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap); - kfree(bitmap); + if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap)) + kfree(bitmap); } return 1; diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 86de901fa5c6..30cd0b296f1a 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -363,7 +363,7 @@ void ida_check_random(void) { DEFINE_IDA(ida); DECLARE_BITMAP(bitmap, 2048); - int id; + int id, err; unsigned int i; time_t s = time(NULL); @@ -377,8 +377,11 @@ void ida_check_random(void) ida_remove(&ida, bit); } else { __set_bit(bit, bitmap); - ida_pre_get(&ida, GFP_KERNEL); - assert(!ida_get_new_above(&ida, bit, &id)); + do { + ida_pre_get(&ida, GFP_KERNEL); + err = ida_get_new_above(&ida, bit, &id); + } while (err == -ENOMEM); + assert(!err); assert(id == bit); } } @@ -476,11 +479,36 @@ void ida_checks(void) radix_tree_cpu_dead(1); } +static void *ida_random_fn(void *arg) +{ + rcu_register_thread(); + ida_check_random(); + rcu_unregister_thread(); + return NULL; +} + +void ida_thread_tests(void) +{ + pthread_t threads[10]; + int i; + + for (i = 0; i < ARRAY_SIZE(threads); i++) + if (pthread_create(&threads[i], NULL, ida_random_fn, NULL)) { + perror("creating ida thread"); + exit(1); + } + + while (i--) + pthread_join(threads[i], NULL); +} + int __weak main(void) { radix_tree_init(); idr_checks(); ida_checks(); + ida_thread_tests(); + radix_tree_cpu_dead(1); rcu_barrier(); if (nr_allocated) printf("nr_allocated = %d\n", nr_allocated); diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index b829127d5670..bc9a78449572 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -368,6 +368,7 @@ int main(int argc, char **argv) iteration_test(0, 10 + 90 * long_run); iteration_test(7, 10 + 90 * long_run); single_thread_tests(long_run); + ida_thread_tests(); /* Free any remaining preallocated nodes */ radix_tree_cpu_dead(0); diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index b30e11d9d271..0f8220cc6166 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -36,6 +36,7 @@ void iteration_test(unsigned order, unsigned duration); void benchmark(void); void idr_checks(void); void ida_checks(void); +void ida_thread_tests(void); struct item * item_tag_set(struct radix_tree_root *root, unsigned long index, int tag); -- cgit v1.2.3 From f0f3f2d0a3e0f7c48163fd8b45f5909d46e7f371 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 3 Mar 2017 12:28:37 -0500 Subject: radix tree test suite: Specify -m32 in LDFLAGS too Michael's patch to use the default make rule for linking and the patch from Rehas to use -m32 if building a 32-bit test-suite on a 64-bit platform don't work well together. Reported-by: Rehas Sachdeva Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 4c6289c5d415..6a9480c03cbd 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -13,6 +13,7 @@ endif ifeq ($(BUILD), 32) CFLAGS += -m32 + LDFLAGS += -m32 endif targets: mapshift $(TARGETS) -- cgit v1.2.3 From 544714d8e17c33822319d5a1a00e5ddc4db502b6 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Tue, 7 Mar 2017 19:54:05 +0900 Subject: PCI: exynos: Initialize elbi_base even when using PHY framework Even when using the PHY framework, we need the elbi_base. Before this patch, we didn't initialize elbi_base, which caused NULL pointer dereferences later. Fixes: e7cd7ef58e1f ("PCI: exynos: Support the PHY generic framework") Signed-off-by: Jaehoon Chung Signed-off-by: Bjorn Helgaas --- drivers/pci/dwc/pci-exynos.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/dwc/pci-exynos.c b/drivers/pci/dwc/pci-exynos.c index 993b650ef275..44f774c12fb2 100644 --- a/drivers/pci/dwc/pci-exynos.c +++ b/drivers/pci/dwc/pci-exynos.c @@ -132,10 +132,6 @@ static int exynos5440_pcie_get_mem_resources(struct platform_device *pdev, struct device *dev = pci->dev; struct resource *res; - /* If using the PHY framework, doesn't need to get other resource */ - if (ep->using_phy) - return 0; - ep->mem_res = devm_kzalloc(dev, sizeof(*ep->mem_res), GFP_KERNEL); if (!ep->mem_res) return -ENOMEM; @@ -145,6 +141,10 @@ static int exynos5440_pcie_get_mem_resources(struct platform_device *pdev, if (IS_ERR(ep->mem_res->elbi_base)) return PTR_ERR(ep->mem_res->elbi_base); + /* If using the PHY framework, doesn't need to get other resource */ + if (ep->using_phy) + return 0; + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); ep->mem_res->phy_base = devm_ioremap_resource(dev, res); if (IS_ERR(ep->mem_res->phy_base)) -- cgit v1.2.3 From f98c7bce570bdbe344b74ff5daa7dfeef3f22929 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 25 Feb 2017 18:36:44 +0200 Subject: serial: samsung: Continue to work if DMA request fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If DMA is not available (even when configured in DeviceTree), the driver will fail the startup procedure thus making serial console not available. For example this causes boot failure on QEMU ARMv7 (Exynos4210, SMDKC210): [    1.302575] OF: amba_device_add() failed (-19) for /amba/pdma@12680000 ... [   11.435732] samsung-uart 13800000.serial: DMA request failed [   72.963893] samsung-uart 13800000.serial: DMA request failed [   73.143361] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000000 DMA is not necessary for serial to work, so continue with UART startup after emitting a warning. Fixes: 62c37eedb74c ("serial: samsung: add dma reqest/release functions") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index b4f86c219db1..7a17aedbf902 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -1031,8 +1031,10 @@ static int s3c64xx_serial_startup(struct uart_port *port) if (ourport->dma) { ret = s3c24xx_serial_request_dma(ourport); if (ret < 0) { - dev_warn(port->dev, "DMA request failed\n"); - return ret; + dev_warn(port->dev, + "DMA request failed, DMA will not be used\n"); + devm_kfree(port->dev, ourport->dma); + ourport->dma = NULL; } } -- cgit v1.2.3 From 0d5370d1d85251e5893ab7c90a429464de2e140b Mon Sep 17 00:00:00 2001 From: Ethan Zhao Date: Mon, 27 Feb 2017 17:08:44 +0900 Subject: PCI: Prevent VPD access for QLogic ISP2722 QLogic ISP2722-based 16/32Gb Fibre Channel to PCIe Adapter has the VPD access issue too, while read the common pci-sysfs access interface shown as /sys/devices/pci0000:00/0000:00:03.2/0000:0b:00.0/vpd with simple 'cat' could cause system hang and panic: Kernel panic - not syncing: An NMI occurred. Depending on your system the reason for the NMI is logged in any one of the following resources: 1. Integrated Management Log (IML) 2. OA Syslog 3. OA Forward Progress Log 4. iLO Event Log CPU: 0 PID: 15070 Comm: udevadm Not tainted 4.1.12 Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 12/27/2015 0000000000000086 000000007f0cdf51 ffff880c4fa05d58 ffffffff817193de ffffffffa00b42d8 0000000000000075 ffff880c4fa05dd8 ffffffff81714072 0000000000000008 ffff880c4fa05de8 ffff880c4fa05d88 000000007f0cdf51 Call Trace: [] dump_stack+0x63/0x81 [] panic+0xd0/0x20e [] hpwdt_pretimeout+0xdd/0xe0 [hpwdt] [] ? sched_clock+0x9/0x10 [] nmi_handle+0x91/0x170 [] ? nmi_handle+0x9c/0x170 [] io_check_error+0x1e/0xa0 [] default_do_nmi+0x99/0x140 [] do_nmi+0xf4/0x170 [] end_repeat_nmi+0x1a/0x1e [] ? pci_conf1_read+0xeb/0x120 [] ? pci_conf1_read+0xeb/0x120 [] ? pci_conf1_read+0xeb/0x120 <> [] raw_pci_read+0x23/0x40 [] pci_read+0x2c/0x30 [] pci_user_read_config_word+0x72/0x110 [] pci_vpd_pci22_wait+0x96/0x130 [] pci_vpd_pci22_read+0xdb/0x1a0 [] pci_read_vpd+0x20/0x30 [] read_vpd_attr+0x30/0x40 [] sysfs_kf_bin_read+0x47/0x70 [] kernfs_fop_read+0xae/0x180 [] __vfs_read+0x37/0x100 [] ? security_file_permission+0x84/0xa0 [] ? rw_verify_area+0x56/0xe0 [] vfs_read+0x86/0x140 [] SyS_read+0x55/0xd0 [] system_call_fastpath+0x12/0x71 Shutting down cpus with NMI Kernel Offset: disabled drm_kms_helper: panic occurred, switching back to text console So blacklist the access to its VPD. Signed-off-by: Ethan Zhao Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v4.6+ --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index f754453fe754..673683660b5c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2174,6 +2174,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, quirk_blacklist_vpd); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, quirk_blacklist_vpd); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID, quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_QLOGIC, 0x2261, quirk_blacklist_vpd); /* * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the -- cgit v1.2.3 From 3bd7db63a841e8c5297bb18ad801df67d5e38ad2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 1 Mar 2017 00:25:40 -0800 Subject: PCI/ASPM: Always set link->downstream to avoid NULL dereference on remove We call pcie_aspm_exit_link_state() when we remove a device. If the device is the last PCIe function to be removed below a bridge and the bridge has an ASPM link_state struct, we disable ASPM on the link. Disabling ASPM requires link->downstream (used in pcie_config_aspm_link()). We previously set link->downstream in pcie_aspm_cap_init(), but only if the device was not blacklisted. Removing the blacklisted device caused a NULL pointer dereference in the pcie_aspm_exit_link_state() -> pcie_config_aspm_link() path: # echo 1 > /sys/bus/pci/devices/0000\:0b\:00.0/remove ... BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 IP: pcie_config_aspm_link+0x5d/0x2b0 Call Trace: pcie_aspm_exit_link_state+0x75/0x130 pci_stop_bus_device+0xa4/0xb0 pci_stop_and_remove_bus_device_locked+0x1a/0x30 remove_store+0x50/0x70 dev_attr_store+0x18/0x30 sysfs_kf_write+0x44/0x60 kernfs_fop_write+0x10e/0x190 __vfs_write+0x28/0x110 ? rcu_read_lock_sched_held+0x5d/0x80 ? rcu_sync_lockdep_assert+0x2c/0x60 ? __sb_start_write+0x173/0x1a0 ? vfs_write+0xb3/0x180 vfs_write+0xc4/0x180 SyS_write+0x49/0xa0 do_syscall_64+0xa6/0x1c0 entry_SYSCALL64_slow_path+0x25/0x25 ---[ end trace bd187ee0267df5d9 ]--- To avoid this, set link->downstream in alloc_pcie_link_state(), so every pcie_link_state structure has a valid link->downstream pointer. [bhelgaas: changelog] Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Acked-by: Rajat Jain CC: stable@vger.kernel.org --- drivers/pci/pcie/aspm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 973472c23d89..1dfa10cc566b 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -478,7 +478,7 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link, static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) { - struct pci_dev *child, *parent = link->pdev; + struct pci_dev *child = link->downstream, *parent = link->pdev; struct pci_bus *linkbus = parent->subordinate; struct aspm_register_info upreg, dwreg; @@ -491,9 +491,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) /* Get upstream/downstream components' register state */ pcie_get_aspm_reg(parent, &upreg); - child = pci_function_0(linkbus); pcie_get_aspm_reg(child, &dwreg); - link->downstream = child; /* * If ASPM not supported, don't mess with the clocks and link, @@ -800,6 +798,7 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev) INIT_LIST_HEAD(&link->children); INIT_LIST_HEAD(&link->link); link->pdev = pdev; + link->downstream = pci_function_0(pdev->subordinate); /* * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe -- cgit v1.2.3 From 3802a345321a08093ba2ddb1849e736f84e8d450 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Mar 2017 16:45:58 -0800 Subject: xfs: only reclaim unwritten COW extents periodically We only want to reclaim preallocations from our periodic work item. Currently this is archived by looking for a dirty inode, but that check is rather fragile. Instead add a flag to xfs_reflink_cancel_cow_* so that the caller can ask for just cancelling unwritten extents in the COW fork. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong [darrick: fix typos in commit message] Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_aops.c | 2 +- fs/xfs/xfs_icache.c | 2 +- fs/xfs/xfs_inode.c | 2 +- fs/xfs/xfs_reflink.c | 23 ++++++++++++++++------- fs/xfs/xfs_reflink.h | 4 ++-- fs/xfs/xfs_super.c | 2 +- 6 files changed, 22 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index bf65a9ea8642..aa8a6f0d09c3 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -293,7 +293,7 @@ xfs_end_io( goto done; if (ioend->io_bio->bi_error) { error = xfs_reflink_cancel_cow_range(ip, - ioend->io_offset, ioend->io_size); + ioend->io_offset, ioend->io_size, true); goto done; } error = xfs_reflink_end_cow(ip, ioend->io_offset, diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 7234b9748c36..3531f8f72fa5 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1608,7 +1608,7 @@ xfs_inode_free_cowblocks( xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index edfa6a55b064..7eaf1ef74e3c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1615,7 +1615,7 @@ xfs_itruncate_extents( /* Remove all pending CoW reservations. */ error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, - last_block); + last_block, true); if (error) goto out; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index da6d08fb359c..4a84c5ea266d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -548,14 +548,18 @@ xfs_reflink_trim_irec_to_next_cow( } /* - * Cancel all pending CoW reservations for some block range of an inode. + * Cancel CoW reservations for some block range of an inode. + * + * If cancel_real is true this function cancels all COW fork extents for the + * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_blocks( struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, - xfs_fileoff_t end_fsb) + xfs_fileoff_t end_fsb, + bool cancel_real) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got, del; @@ -579,7 +583,7 @@ xfs_reflink_cancel_cow_blocks( &idx, &got, &del); if (error) break; - } else { + } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { xfs_trans_ijoin(*tpp, ip, 0); xfs_defer_init(&dfops, &firstfsb); @@ -621,13 +625,17 @@ xfs_reflink_cancel_cow_blocks( } /* - * Cancel all pending CoW reservations for some byte range of an inode. + * Cancel CoW reservations for some byte range of an inode. + * + * If cancel_real is true this function cancels all COW fork extents for the + * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_range( struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t count) + xfs_off_t count, + bool cancel_real) { struct xfs_trans *tp; xfs_fileoff_t offset_fsb; @@ -653,7 +661,8 @@ xfs_reflink_cancel_cow_range( xfs_trans_ijoin(tp, ip, 0); /* Scrape out the old CoW reservations */ - error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb); + error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb, + cancel_real); if (error) goto out_cancel; @@ -1450,7 +1459,7 @@ next: * We didn't find any shared blocks so turn off the reflink flag. * First, get rid of any leftover CoW mappings. */ - error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF); + error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true); if (error) return error; diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 33ac9b8db683..d29a7967f029 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -39,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, - xfs_fileoff_t end_fsb); + xfs_fileoff_t end_fsb, bool cancel_real); extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t count); + xfs_off_t count, bool cancel_real); extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 890862f2447c..685c042a120f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -953,7 +953,7 @@ xfs_fs_destroy_inode( XFS_STATS_INC(ip->i_mount, vn_remove); if (xfs_is_reflink_inode(ip)) { - error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) xfs_warn(ip->i_mount, "Error %d while evicting CoW blocks for inode %llu.", -- cgit v1.2.3 From 627c845c0907894a1e5cd2d90ff4fc86c9e4458e Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Tue, 7 Mar 2017 04:08:34 -0500 Subject: drm/i915/gvt: change some gvt_err to gvt_dbg_cmd gvt_err should be used for dumping error message. This patch changes some gvt_err to gvt_dbg_cmd, as they are only debugging message, not errors. Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 7bb11a555b76..f53cab604d49 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -669,7 +669,7 @@ static inline void print_opcode(u32 cmd, int ring_id) if (d_info == NULL) return; - gvt_err("opcode=0x%x %s sub_ops:", + gvt_dbg_cmd("opcode=0x%x %s sub_ops:", cmd >> (32 - d_info->op_len), d_info->name); for (i = 0; i < d_info->nr_sub_op; i++) @@ -694,23 +694,23 @@ static void parser_exec_state_dump(struct parser_exec_state *s) int cnt = 0; int i; - gvt_err(" vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)" + gvt_dbg_cmd(" vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)" " ring_head(%08lx) ring_tail(%08lx)\n", s->vgpu->id, s->ring_id, s->ring_start, s->ring_start + s->ring_size, s->ring_head, s->ring_tail); - gvt_err(" %s %s ip_gma(%08lx) ", + gvt_dbg_cmd(" %s %s ip_gma(%08lx) ", s->buf_type == RING_BUFFER_INSTRUCTION ? "RING_BUFFER" : "BATCH_BUFFER", s->buf_addr_type == GTT_BUFFER ? "GTT" : "PPGTT", s->ip_gma); if (s->ip_va == NULL) { - gvt_err(" ip_va(NULL)"); + gvt_dbg_cmd(" ip_va(NULL)"); return; } - gvt_err(" ip_va=%p: %08x %08x %08x %08x\n", + gvt_dbg_cmd(" ip_va=%p: %08x %08x %08x %08x\n", s->ip_va, cmd_val(s, 0), cmd_val(s, 1), cmd_val(s, 2), cmd_val(s, 3)); -- cgit v1.2.3 From 787eb485509f9d58962bd8b4dbc6a5ac6e2034fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Mar 2017 15:02:51 -0800 Subject: xfs: fix and streamline error handling in xfs_end_io There are two different cases of buffered I/O errors: - first we can have an already shutdown fs. In that case we should skip any on-disk operations and just clean up the appen transaction if present and destroy the ioend - a real I/O error. In that case we should cleanup any lingering COW blocks. This gets skipped in the current code and is fixed by this patch. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_aops.c | 59 +++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index aa8a6f0d09c3..61494295d92f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -274,54 +274,49 @@ xfs_end_io( struct xfs_ioend *ioend = container_of(work, struct xfs_ioend, io_work); struct xfs_inode *ip = XFS_I(ioend->io_inode); + xfs_off_t offset = ioend->io_offset; + size_t size = ioend->io_size; int error = ioend->io_bio->bi_error; /* - * Set an error if the mount has shut down and proceed with end I/O - * processing so it can perform whatever cleanups are necessary. + * Just clean up the in-memory strutures if the fs has been shut down. */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { error = -EIO; + goto done; + } /* - * For a CoW extent, we need to move the mapping from the CoW fork - * to the data fork. If instead an error happened, just dump the - * new blocks. + * Clean up any COW blocks on an I/O error. */ - if (ioend->io_type == XFS_IO_COW) { - if (error) - goto done; - if (ioend->io_bio->bi_error) { - error = xfs_reflink_cancel_cow_range(ip, - ioend->io_offset, ioend->io_size, true); - goto done; + if (unlikely(error)) { + switch (ioend->io_type) { + case XFS_IO_COW: + xfs_reflink_cancel_cow_range(ip, offset, size, true); + break; } - error = xfs_reflink_end_cow(ip, ioend->io_offset, - ioend->io_size); - if (error) - goto done; + + goto done; } /* - * For unwritten extents we need to issue transactions to convert a - * range to normal written extens after the data I/O has finished. - * Detecting and handling completion IO errors is done individually - * for each case as different cleanup operations need to be performed - * on error. + * Success: commit the COW or unwritten blocks if needed. */ - if (ioend->io_type == XFS_IO_UNWRITTEN) { - if (error) - goto done; - error = xfs_iomap_write_unwritten(ip, ioend->io_offset, - ioend->io_size); - } else if (ioend->io_append_trans) { - error = xfs_setfilesize_ioend(ioend, error); - } else { - ASSERT(!xfs_ioend_is_append(ioend) || - ioend->io_type == XFS_IO_COW); + switch (ioend->io_type) { + case XFS_IO_COW: + error = xfs_reflink_end_cow(ip, offset, size); + break; + case XFS_IO_UNWRITTEN: + error = xfs_iomap_write_unwritten(ip, offset, size); + break; + default: + ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); + break; } done: + if (ioend->io_append_trans) + error = xfs_setfilesize_ioend(ioend, error); xfs_destroy_ioend(ioend, error); } -- cgit v1.2.3 From d5825712ee98d68a2c17bc89dad2c30276894cba Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Thu, 2 Mar 2017 15:06:33 -0800 Subject: xfs: Use xfs_icluster_size_fsb() to calculate inode alignment mask When block size is larger than inode cluster size, the call to XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size) returns 0. Also, mkfs.xfs would have set xfs_sb->sb_inoalignmt to 0. Hence in xfs_set_inoalignment(), xfs_mount->m_inoalign_mask gets initialized to -1 instead of 0. However, xfs_mount->m_sinoalign would get correctly intialized to 0 because for every positive value of xfs_mount->m_dalign, the condition "!(mp->m_dalign & mp->m_inoalign_mask)" would evaluate to false. Also, xfs_imap() worked fine even with xfs_mount->m_inoalign_mask having -1 as the value because blks_per_cluster variable would have the value 1 and hence we would never have a need to use xfs_mount->m_inoalign_mask to compute the inode chunk's agbno and offset within the chunk. Signed-off-by: Chandan Rajendra Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_mount.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 450bde68bb75..688ebff1f663 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -513,8 +513,7 @@ STATIC void xfs_set_inoalignment(xfs_mount_t *mp) { if (xfs_sb_version_hasalign(&mp->m_sb) && - mp->m_sb.sb_inoalignmt >= - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) + mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; else mp->m_inoalign_mask = 0; -- cgit v1.2.3 From 08b005f1333154ae5b404ca28766e0ffb9f1c150 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 6 Mar 2017 11:58:20 -0800 Subject: xfs: remove kmem_zalloc_greedy The sole remaining caller of kmem_zalloc_greedy is bulkstat, which uses it to grab 1-4 pages for staging of inobt records. The infinite loop in the greedy allocation function is causing hangs[1] in generic/269, so just get rid of the greedy allocator in favor of kmem_zalloc_large. This makes bulkstat somewhat more likely to ENOMEM if there's really no pages to spare, but eliminates a source of hangs. [1] http://lkml.kernel.org/r/20170301044634.rgidgdqqiiwsmfpj%40XZHOUW.usersys.redhat.com Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- v2: remove single-page fallback --- fs/xfs/kmem.c | 18 ------------------ fs/xfs/kmem.h | 2 -- fs/xfs/xfs_itable.c | 6 ++---- 3 files changed, 2 insertions(+), 24 deletions(-) diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 2dfdc62f795e..70a5b55e0870 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -25,24 +25,6 @@ #include "kmem.h" #include "xfs_message.h" -/* - * Greedy allocation. May fail and may return vmalloced memory. - */ -void * -kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) -{ - void *ptr; - size_t kmsize = maxsize; - - while (!(ptr = vzalloc(kmsize))) { - if ((kmsize >>= 1) <= minsize) - kmsize = minsize; - } - if (ptr) - *size = kmsize; - return ptr; -} - void * kmem_alloc(size_t size, xfs_km_flags_t flags) { diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 689f746224e7..f0fc84fcaac2 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -69,8 +69,6 @@ static inline void kmem_free(const void *ptr) } -extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); - static inline void * kmem_zalloc(size_t size, xfs_km_flags_t flags) { diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 66e881790c17..2a6d9b1558e0 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -361,7 +361,6 @@ xfs_bulkstat( xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ - size_t irbsize; /* size of irec buffer in bytes */ xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ int nirbuf; /* size of irbuf */ int ubcount; /* size of user's buffer */ @@ -388,11 +387,10 @@ xfs_bulkstat( *ubcountp = 0; *done = 0; - irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); + irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP); if (!irbuf) return -ENOMEM; - - nirbuf = irbsize / sizeof(*irbuf); + nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf); /* * Loop over the allocation groups, starting from the last -- cgit v1.2.3 From 69eb1596b4df8ca834ba6f9a3df40f78943f6dca Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 17 Feb 2017 15:32:23 +0100 Subject: staging: octeon: remove unused variable A cleanup patch left one local variable without a reference: drivers/staging/octeon/ethernet-rx.c:339:28: warning: unused variable 'priv' [-Wunused-variable] This removes the declaration too. Fixes: 66812da3a689 ("staging: octeon: Use net_device_stats from struct net_device") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet-rx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c index 7f8cf875157c..65a285631994 100644 --- a/drivers/staging/octeon/ethernet-rx.c +++ b/drivers/staging/octeon/ethernet-rx.c @@ -336,7 +336,6 @@ static int cvm_oct_poll(struct oct_rx_group *rx_group, int budget) if (likely((port < TOTAL_NUMBER_OF_PORTS) && cvm_oct_device[port])) { struct net_device *dev = cvm_oct_device[port]; - struct octeon_ethernet *priv = netdev_priv(dev); /* * Only accept packets for devices that are -- cgit v1.2.3 From fc69910f329d61821897871e0e957eda39beb3d8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Mar 2017 08:29:31 +0100 Subject: MIPS: Add missing include files After the split of linux/sched.h, several platforms in arch/mips stopped building. Add the respective additional #include statements to fix the problem I first tried adding these into asm/processor.h, but ran into circular header dependencies with that which I could not figure out. The commit I listed as causing the problem is the branch merge, as there is likely a combination of multiple patches in that branch. Signed-off-by: Arnd Bergmann Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: ralf@linux-mips.org Fixes: 1827adb11ad2 ("Merge branch 'WIP.sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip") Link: http://lkml.kernel.org/r/20170308072931.3836696-1-arnd@arndb.de Signed-off-by: Ingo Molnar --- arch/mips/cavium-octeon/cpu.c | 2 ++ arch/mips/cavium-octeon/crypto/octeon-crypto.c | 1 + arch/mips/cavium-octeon/smp.c | 1 + arch/mips/include/asm/fpu.h | 1 + arch/mips/kernel/smp-bmips.c | 1 + arch/mips/kernel/smp-mt.c | 1 + arch/mips/loongson64/loongson-3/cop2-ex.c | 1 + arch/mips/netlogic/common/smp.c | 1 + arch/mips/netlogic/xlp/cop2-ex.c | 3 +++ arch/mips/sgi-ip22/ip28-berr.c | 1 + arch/mips/sgi-ip27/ip27-berr.c | 2 ++ arch/mips/sgi-ip27/ip27-smp.c | 3 +++ arch/mips/sgi-ip32/ip32-berr.c | 1 + arch/mips/sgi-ip32/ip32-reset.c | 1 + 14 files changed, 20 insertions(+) diff --git a/arch/mips/cavium-octeon/cpu.c b/arch/mips/cavium-octeon/cpu.c index a5b427909b5c..036d56cc4591 100644 --- a/arch/mips/cavium-octeon/cpu.c +++ b/arch/mips/cavium-octeon/cpu.c @@ -10,7 +10,9 @@ #include #include #include +#include #include +#include #include #include diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c index 4d22365844af..cfb4a146cf17 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "octeon-crypto.h" diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4b94b7fbafa3..3de786545ded 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 321752bcbab6..f94455f964ec 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -12,6 +12,7 @@ #include #include +#include #include #include diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 3daa2cae50b0..1b070a76fcdd 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index e077ea3e11fb..e398cbc3d776 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/loongson64/loongson-3/cop2-ex.c b/arch/mips/loongson64/loongson-3/cop2-ex.c index ea13764d0a03..621d6af5f6eb 100644 --- a/arch/mips/loongson64/loongson-3/cop2-ex.c +++ b/arch/mips/loongson64/loongson-3/cop2-ex.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c index 10d86d54880a..bddf1ef553a4 100644 --- a/arch/mips/netlogic/common/smp.c +++ b/arch/mips/netlogic/common/smp.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/netlogic/xlp/cop2-ex.c b/arch/mips/netlogic/xlp/cop2-ex.c index 52bc5de42005..21e439b3db70 100644 --- a/arch/mips/netlogic/xlp/cop2-ex.c +++ b/arch/mips/netlogic/xlp/cop2-ex.c @@ -9,11 +9,14 @@ * Copyright (C) 2009 Wind River Systems, * written by Ralf Baechle */ +#include #include #include #include #include +#include #include +#include #include #include diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c index 1f2a5bc4779e..75460e1e106b 100644 --- a/arch/mips/sgi-ip22/ip28-berr.c +++ b/arch/mips/sgi-ip22/ip28-berr.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index d12879eb2b1f..83efe03d5c60 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -12,7 +12,9 @@ #include /* for SIGBUS */ #include /* schow_regs(), force_sig() */ #include +#include +#include #include #include #include diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c index f5ed45e8f442..4cd47d23d81a 100644 --- a/arch/mips/sgi-ip27/ip27-smp.c +++ b/arch/mips/sgi-ip27/ip27-smp.c @@ -8,10 +8,13 @@ */ #include #include +#include #include #include + #include #include +#include #include #include #include diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c index 57d8c7486fe6..c1f12a9cf305 100644 --- a/arch/mips/sgi-ip32/ip32-berr.c +++ b/arch/mips/sgi-ip32/ip32-berr.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c index 8bd415c8729f..b3b442def423 100644 --- a/arch/mips/sgi-ip32/ip32-reset.c +++ b/arch/mips/sgi-ip32/ip32-reset.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 35b2719e72d375f3e32c819858165668d948d5f2 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 8 Mar 2017 13:56:37 +0200 Subject: usb: dwc3: gadget: make to increment req->remaining in all cases Sometimes, we might get a completion for a TRB which is left with HWO bit. Even in these cases, we should increment req->remaining to properly report total transferred size. I noticed this while debuggin a separate problem seen with MSC tests from USBCV. Sometimes we would erroneously report a completion for a 512-byte transfer when, in reality, we transferred 0 bytes. Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 3db5eeae2bfb..0d75158e43fe 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2189,12 +2189,12 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, return 1; } - if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN) - return 1; - count = trb->size & DWC3_TRB_SIZE_MASK; req->remaining += count; + if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN) + return 1; + if (dep->direction) { if (count) { trb_status = DWC3_TRB_SIZE_TRBSTS(trb->size); -- cgit v1.2.3 From 0b1d250afb8eb9d65afb568bac9b9f9253a82b49 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:37 +0100 Subject: USB: serial: io_ti: fix NULL-deref in interrupt callback Fix a NULL-pointer dereference in the interrupt callback should a malicious device send data containing a bad port number by adding the missing sanity check. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/io_ti.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index ceaeebaa6f90..4561dd4cde8b 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1674,6 +1674,12 @@ static void edge_interrupt_callback(struct urb *urb) function = TIUMP_GET_FUNC_FROM_CODE(data[0]); dev_dbg(dev, "%s - port_number %d, function %d, info 0x%x\n", __func__, port_number, function, data[1]); + + if (port_number >= edge_serial->serial->num_ports) { + dev_err(dev, "bad port number %d\n", port_number); + goto exit; + } + port = edge_serial->serial->port[port_number]; edge_port = usb_get_serial_port_data(port); if (!edge_port) { -- cgit v1.2.3 From 30572418b445d85fcfe6c8fe84c947d2606767d8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:38 +0100 Subject: USB: serial: omninet: fix reference leaks at open This driver needlessly took another reference to the tty on open, a reference which was then never released on close. This lead to not just a leak of the tty, but also a driver reference leak that prevented the driver from being unloaded after a port had once been opened. Fixes: 4a90f09b20f4 ("tty: usb-serial krefs") Cc: stable # 2.6.28 Signed-off-by: Johan Hovold --- drivers/usb/serial/omninet.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index a180b17d2432..76564b3bebb9 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -142,12 +142,6 @@ static int omninet_port_remove(struct usb_serial_port *port) static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port) { - struct usb_serial *serial = port->serial; - struct usb_serial_port *wport; - - wport = serial->port[1]; - tty_port_tty_set(&wport->port, tty); - return usb_serial_generic_open(tty, port); } -- cgit v1.2.3 From 367ec1706745912702c187722065285cd4d2aee7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:39 +0100 Subject: USB: serial: omninet: drop open callback Remove the now redundant open callback and let core call the generic handler for us instead. Signed-off-by: Johan Hovold --- drivers/usb/serial/omninet.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index 76564b3bebb9..dd706953b466 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -31,7 +31,6 @@ #define BT_IGNITIONPRO_ID 0x2000 /* function prototypes */ -static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port); static void omninet_process_read_urb(struct urb *urb); static void omninet_write_bulk_callback(struct urb *urb); static int omninet_write(struct tty_struct *tty, struct usb_serial_port *port, @@ -60,7 +59,6 @@ static struct usb_serial_driver zyxel_omninet_device = { .attach = omninet_attach, .port_probe = omninet_port_probe, .port_remove = omninet_port_remove, - .open = omninet_open, .write = omninet_write, .write_room = omninet_write_room, .write_bulk_callback = omninet_write_bulk_callback, @@ -140,11 +138,6 @@ static int omninet_port_remove(struct usb_serial_port *port) return 0; } -static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port) -{ - return usb_serial_generic_open(tty, port); -} - #define OMNINET_HEADERLEN 4 #define OMNINET_BULKOUTSIZE 64 #define OMNINET_PAYLOADSIZE (OMNINET_BULKOUTSIZE - OMNINET_HEADERLEN) -- cgit v1.2.3 From 654b404f2a222f918af9b0cd18ad469d0c941a8e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:40 +0100 Subject: USB: serial: io_ti: fix information leak in completion handler Add missing sanity check to the bulk-in completion handler to avoid an integer underflow that can be triggered by a malicious device. This avoids leaking 128 kB of memory content from after the URB transfer buffer to user space. Fixes: 8c209e6782ca ("USB: make actual_length in struct urb field u32") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable # 2.6.30 Signed-off-by: Johan Hovold --- drivers/usb/serial/io_ti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index 4561dd4cde8b..a76b95d32157 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1761,7 +1761,7 @@ static void edge_bulk_in_callback(struct urb *urb) port_number = edge_port->port->port_number; - if (edge_port->lsr_event) { + if (urb->actual_length > 0 && edge_port->lsr_event) { edge_port->lsr_event = 0; dev_dbg(dev, "%s ===== Port %u LSR Status = %02x, Data = %02x ======\n", __func__, port_number, edge_port->lsr_mask, *data); -- cgit v1.2.3 From 8c76d7cd520ebffc1ea9ea0850d87a224a50c7f2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2017 17:36:41 +0100 Subject: USB: serial: safe_serial: fix information leak in completion handler Add missing sanity check to the bulk-in completion handler to avoid an integer underflow that could be triggered by a malicious device. This avoids leaking up to 56 bytes from after the URB transfer buffer to user space. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/safe_serial.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c index 93c6c9b08daa..8a069aa154ed 100644 --- a/drivers/usb/serial/safe_serial.c +++ b/drivers/usb/serial/safe_serial.c @@ -200,6 +200,11 @@ static void safe_process_read_urb(struct urb *urb) if (!safe) goto out; + if (length < 2) { + dev_err(&port->dev, "malformed packet\n"); + return; + } + fcs = fcs_compute10(data, length, CRC10_INITFCS); if (fcs) { dev_err(&port->dev, "%s - bad CRC %x\n", __func__, fcs); -- cgit v1.2.3 From 99c014a87979c9244806685f3c3fc7767f79f645 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 8 Mar 2017 10:16:17 -0500 Subject: ktest: Fix while loop in wait_for_input The run_command function was changed to use the wait_for_input function to allow having a timeout if the command to run takes too much time. There was a bug in the wait_for_input where it could end up going into an infinite loop. There's two issues here. One is that the return value of the sysread wasn't used for the write (to write a proper size), and that it should continue processing the passed in file descriptor too even if there was input. There was no check for error, if for some reason STDIN returned an error, the function would go into an infinite loop and never exit. Reported-by: Greg Kroah-Hartman Tested-by: Greg Kroah-Hartman Fixes: 6e98d1b4415f ("ktest: Add timeout to ssh command") Signed-off-by: Steven Rostedt (VMware) --- tools/testing/ktest/ktest.pl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 29470b554711..0c006a2f165d 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1904,11 +1904,12 @@ sub wait_for_input # copy data from stdin to the console if (vec($rout, fileno(\*STDIN), 1) == 1) { - sysread(\*STDIN, $buf, 1000); - syswrite($fp, $buf, 1000); - next; + $nr = sysread(\*STDIN, $buf, 1000); + syswrite($fp, $buf, $nr) if ($nr > 0); } + next if (vec($rout, fileno($fp), 1) != 1); + $line = ""; # try to read one char at a time -- cgit v1.2.3 From f7c6401ff84ab8ffffc281a29aa0a787f7eb346e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 8 Mar 2017 10:36:59 -0500 Subject: ktest: Make sure wait_for_input does honor the timeout The function wait_for_input takes in a timeout, and even has a default timeout. But if for some reason the STDIN descriptor keeps sending in data, the function will never time out. The timout is to wait for the data from the passed in file descriptor, not for STDIN. Adding a test in the case where there's no data from the passed in file descriptor that checks to see if the timeout passed, will ensure that it will timeout properly even if there's input in STDIN. Signed-off-by: Steven Rostedt (VMware) --- tools/testing/ktest/ktest.pl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 0c006a2f165d..49f7c8b9d9c4 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1880,6 +1880,7 @@ sub get_grub_index { sub wait_for_input { my ($fp, $time) = @_; + my $start_time; my $rin; my $rout; my $nr; @@ -1895,12 +1896,12 @@ sub wait_for_input vec($rin, fileno($fp), 1) = 1; vec($rin, fileno(\*STDIN), 1) = 1; + $start_time = time; + while (1) { $nr = select($rout=$rin, undef, undef, $time); - if ($nr <= 0) { - return undef; - } + last if ($nr <= 0); # copy data from stdin to the console if (vec($rout, fileno(\*STDIN), 1) == 1) { @@ -1908,7 +1909,11 @@ sub wait_for_input syswrite($fp, $buf, $nr) if ($nr > 0); } - next if (vec($rout, fileno($fp), 1) != 1); + # The timeout is based on time waiting for the fp data + if (vec($rout, fileno($fp), 1) != 1) { + last if (defined($time) && (time - $start_time > $time)); + next; + } $line = ""; @@ -1918,12 +1923,11 @@ sub wait_for_input last if ($ch eq "\n"); } - if (!length($line)) { - return undef; - } + last if (!length($line)); return $line; } + return undef; } sub reboot_to { -- cgit v1.2.3 From 2501c1bb054290679baad0ff7f4f07c714251f4c Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Mon, 6 Mar 2017 15:20:51 -0500 Subject: i2c: riic: fix restart condition While modifying the driver to use the STOP interrupt, the completion of the intermediate transfers need to wake the driver back up in order to initiate the next transfer (restart condition). Otherwise you get never ending interrupts and only the first transfer sent. Fixes: 71ccea095ea1 ("i2c: riic: correctly finish transfers") Reported-by: Simon Horman Signed-off-by: Chris Brandt Tested-by: Simon Horman Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-riic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c index 8f11d347b3ec..c811af4c8d81 100644 --- a/drivers/i2c/busses/i2c-riic.c +++ b/drivers/i2c/busses/i2c-riic.c @@ -218,8 +218,12 @@ static irqreturn_t riic_tend_isr(int irq, void *data) } if (riic->is_last || riic->err) { - riic_clear_set_bit(riic, 0, ICIER_SPIE, RIIC_ICIER); + riic_clear_set_bit(riic, ICIER_TEIE, ICIER_SPIE, RIIC_ICIER); writeb(ICCR2_SP, riic->base + RIIC_ICCR2); + } else { + /* Transfer is complete, but do not send STOP */ + riic_clear_set_bit(riic, ICIER_TEIE, 0, RIIC_ICIER); + complete(&riic->msg_done); } return IRQ_HANDLED; -- cgit v1.2.3 From 737f98cfe7de8df7433a4d846850aa8efa44bd48 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Feb 2017 18:13:59 +0800 Subject: blk-mq: initialize mq kobjects in blk_mq_init_allocated_queue() Both q->mq_kobj and sw queues' kobjects should have been initialized once, instead of doing that each add_disk context. Also this patch removes clearing of ctx in blk_mq_init_cpu_queues() because percpu allocator fills zero to allocated variable. This patch fixes one issue[1] reported from Omar. [1] kernel wearning when doing unbind/bind on one scsi-mq device [ 19.347924] kobject (ffff8800791ea0b8): tried to init an initialized object, something is seriously wrong. [ 19.349781] CPU: 1 PID: 84 Comm: kworker/u8:1 Not tainted 4.10.0-rc7-00210-g53f39eeaa263 #34 [ 19.350686] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-20161122_114906-anatol 04/01/2014 [ 19.350920] Workqueue: events_unbound async_run_entry_fn [ 19.350920] Call Trace: [ 19.350920] dump_stack+0x63/0x83 [ 19.350920] kobject_init+0x77/0x90 [ 19.350920] blk_mq_register_dev+0x40/0x130 [ 19.350920] blk_register_queue+0xb6/0x190 [ 19.350920] device_add_disk+0x1ec/0x4b0 [ 19.350920] sd_probe_async+0x10d/0x1c0 [sd_mod] [ 19.350920] async_run_entry_fn+0x48/0x150 [ 19.350920] process_one_work+0x1d0/0x480 [ 19.350920] worker_thread+0x48/0x4e0 [ 19.350920] kthread+0x101/0x140 [ 19.350920] ? process_one_work+0x480/0x480 [ 19.350920] ? kthread_create_on_node+0x60/0x60 [ 19.350920] ret_from_fork+0x2c/0x40 Cc: Omar Sandoval Signed-off-by: Ming Lei Tested-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 4 +--- block/blk-mq.c | 4 +++- block/blk-mq.h | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 295e69670c39..124305407c80 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -277,7 +277,7 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) kobject_init(&hctx->kobj, &blk_mq_hw_ktype); } -static void blk_mq_sysfs_init(struct request_queue *q) +void blk_mq_sysfs_init(struct request_queue *q) { struct blk_mq_ctx *ctx; int cpu; @@ -297,8 +297,6 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q) blk_mq_disable_hotplug(); - blk_mq_sysfs_init(q); - ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); if (ret < 0) goto out; diff --git a/block/blk-mq.c b/block/blk-mq.c index b2fd175e84d7..ed4b55176cdd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2045,7 +2045,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); struct blk_mq_hw_ctx *hctx; - memset(__ctx, 0, sizeof(*__ctx)); __ctx->cpu = i; spin_lock_init(&__ctx->lock); INIT_LIST_HEAD(&__ctx->rq_list); @@ -2352,6 +2351,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->queue_ctx) goto err_exit; + /* init q->mq_kobj and sw queues' kobjects */ + blk_mq_sysfs_init(q); + q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node); if (!q->queue_hw_ctx) diff --git a/block/blk-mq.h b/block/blk-mq.h index 088ced003c13..ad8bfd7473ef 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -77,6 +77,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, /* * sysfs helpers */ +extern void blk_mq_sysfs_init(struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); -- cgit v1.2.3 From 7ea5fe31c12dd8bcf4a9c5a4a7e8e23826a9a3b8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Feb 2017 18:14:00 +0800 Subject: blk-mq: make lifetime consitent between q/ctx and its kobject Currently from kobject view, both q->mq_kobj and ctx->kobj can be released during one cycle of blk_mq_register_dev() and blk_mq_unregister_dev(). Actually, sw queue's lifetime is same with its request queue's, which is covered by request_queue->kobj. So we don't need to call kobject_put() for the two kinds of kobject in __blk_mq_unregister_dev(), instead we do that in release handler of request queue. Signed-off-by: Ming Lei Tested-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 20 +++++++++++++------- block/blk-mq.c | 7 ++++++- block/blk-mq.h | 1 + 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 124305407c80..77fb238af2be 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -242,15 +242,11 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) { struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; - int i, j; + int i; queue_for_each_hw_ctx(q, hctx, i) { blk_mq_unregister_hctx(hctx); - hctx_for_each_ctx(hctx, ctx, j) - kobject_put(&ctx->kobj); - kobject_put(&hctx->kobj); } @@ -258,8 +254,6 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); - kobject_put(&q->mq_kobj); - kobject_put(&dev->kobj); q->mq_sysfs_init_done = false; @@ -277,6 +271,18 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) kobject_init(&hctx->kobj, &blk_mq_hw_ktype); } +void blk_mq_sysfs_deinit(struct request_queue *q) +{ + struct blk_mq_ctx *ctx; + int cpu; + + for_each_possible_cpu(cpu) { + ctx = per_cpu_ptr(q->queue_ctx, cpu); + kobject_put(&ctx->kobj); + } + kobject_put(&q->mq_kobj); +} + void blk_mq_sysfs_init(struct request_queue *q) { struct blk_mq_ctx *ctx; diff --git a/block/blk-mq.c b/block/blk-mq.c index ed4b55176cdd..b985c236f50f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2264,7 +2264,12 @@ void blk_mq_release(struct request_queue *q) kfree(q->queue_hw_ctx); - /* ctx kobj stays in queue_ctx */ + /* + * release .mq_kobj and sw queue's kobject now because + * both share lifetime with request queue. + */ + blk_mq_sysfs_deinit(q); + free_percpu(q->queue_ctx); } diff --git a/block/blk-mq.h b/block/blk-mq.h index ad8bfd7473ef..b79f9a7d8cf6 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -78,6 +78,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, * sysfs helpers */ extern void blk_mq_sysfs_init(struct request_queue *q); +extern void blk_mq_sysfs_deinit(struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); -- cgit v1.2.3 From 6c8b232efea1ad3d263ff8b9c16b7e8767a77488 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Feb 2017 18:14:01 +0800 Subject: blk-mq: make lifetime consistent between hctx and its kobject This patch removes kobject_put() over hctx in __blk_mq_unregister_dev(), and trys to keep lifetime consistent between hctx and hctx's kobject. Now blk_mq_sysfs_register() and blk_mq_sysfs_unregister() become totally symmetrical, and kobject's refcounter drops to zero just when the hctx is freed. Signed-off-by: Ming Lei Tested-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 15 ++++++++++----- block/blk-mq.c | 5 +---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 77fb238af2be..cb19ec16a7fc 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -17,6 +17,14 @@ static void blk_mq_sysfs_release(struct kobject *kobj) { } +static void blk_mq_hw_sysfs_release(struct kobject *kobj) +{ + struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, + kobj); + kfree(hctx->ctxs); + kfree(hctx); +} + struct blk_mq_ctx_sysfs_entry { struct attribute attr; ssize_t (*show)(struct blk_mq_ctx *, char *); @@ -200,7 +208,7 @@ static struct kobj_type blk_mq_ctx_ktype = { static struct kobj_type blk_mq_hw_ktype = { .sysfs_ops = &blk_mq_hw_sysfs_ops, .default_attrs = default_hw_ctx_attrs, - .release = blk_mq_sysfs_release, + .release = blk_mq_hw_sysfs_release, }; static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx) @@ -244,12 +252,9 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; - queue_for_each_hw_ctx(q, hctx, i) { + queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - kobject_put(&hctx->kobj); - } - blk_mq_debugfs_unregister_hctxs(q); kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); diff --git a/block/blk-mq.c b/block/blk-mq.c index b985c236f50f..f70595e5fb86 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2256,8 +2256,7 @@ void blk_mq_release(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) { if (!hctx) continue; - kfree(hctx->ctxs); - kfree(hctx); + kobject_put(&hctx->kobj); } q->mq_map = NULL; @@ -2336,8 +2335,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, blk_mq_exit_hctx(q, set, hctx, j); free_cpumask_var(hctx->cpumask); kobject_put(&hctx->kobj); - kfree(hctx->ctxs); - kfree(hctx); hctxs[j] = NULL; } -- cgit v1.2.3 From 01388df37627d2e89f0b835377c0eb39d81f671c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Feb 2017 18:14:02 +0800 Subject: blk-mq: free hctx->cpumask in release handler of hctx's kobject It is obviously that hctx->cpumask is per hctx, and both share same lifetime, so this patch moves freeing of hctx->cpumask into release handler of hctx's kobject. Signed-off-by: Ming Lei Tested-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 1 + block/blk-mq.c | 12 ------------ 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index cb19ec16a7fc..d745ab81033a 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -21,6 +21,7 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj) { struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj); + free_cpumask_var(hctx->cpumask); kfree(hctx->ctxs); kfree(hctx); } diff --git a/block/blk-mq.c b/block/blk-mq.c index f70595e5fb86..159187a28d66 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1955,16 +1955,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, } } -static void blk_mq_free_hw_queues(struct request_queue *q, - struct blk_mq_tag_set *set) -{ - struct blk_mq_hw_ctx *hctx; - unsigned int i; - - queue_for_each_hw_ctx(q, hctx, i) - free_cpumask_var(hctx->cpumask); -} - static int blk_mq_init_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) @@ -2333,7 +2323,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, if (hctx->tags) blk_mq_free_map_and_requests(set, j); blk_mq_exit_hctx(q, set, hctx, j); - free_cpumask_var(hctx->cpumask); kobject_put(&hctx->kobj); hctxs[j] = NULL; @@ -2446,7 +2435,6 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_del_queue_tag_set(q); blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); - blk_mq_free_hw_queues(q, set); } /* Basically redo blk_mq_init_queue with queue frozen */ -- cgit v1.2.3 From 0bc315381fe9ed9fb91db8b0e82171b645ac008f Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 6 Mar 2017 11:23:35 +0100 Subject: zram: set physical queue limits to avoid array out of bounds accesses zram can handle at most SECTORS_PER_PAGE sectors in a bio's bvec. When using the NVMe over Fabrics loopback target which potentially sends a huge bulk of pages attached to the bio's bvec this results in a kernel panic because of array out of bounds accesses in zram_decompress_page(). Signed-off-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Reviewed-by: Sergey Senozhatsky Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e27d89a36c34..dceb5edd1e54 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1189,6 +1189,8 @@ static int zram_add(void) blk_queue_io_min(zram->disk->queue, PAGE_SIZE); blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); zram->disk->queue->limits.discard_granularity = PAGE_SIZE; + zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE; + zram->disk->queue->limits.chunk_sectors = 0; blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); /* * zram_bio_discard() will clear all logical blocks if logical block -- cgit v1.2.3 From b0bfdfc2bf7fa85317824c6a389fc373dfcef5bc Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Mon, 6 Mar 2017 08:41:04 -0700 Subject: block/sed: Fix opal user range check and unused variables Fixes check that the opal user is within the range, and cleans up unused method variables. Signed-off-by: Jon Derrick Reviewed-by: Scott Bauer Signed-off-by: Jens Axboe --- block/sed-opal.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/block/sed-opal.c b/block/sed-opal.c index 1e18dca360fc..14035f826b5e 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -1023,7 +1023,6 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont) static int gen_key(struct opal_dev *dev, void *data) { - const u8 *method; u8 uid[OPAL_UID_LENGTH]; int err = 0; @@ -1031,7 +1030,6 @@ static int gen_key(struct opal_dev *dev, void *data) set_comid(dev, dev->comid); memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len)); - method = opalmethod[OPAL_GENKEY]; kfree(dev->prev_data); dev->prev_data = NULL; @@ -1669,7 +1667,6 @@ static int add_user_to_lr(struct opal_dev *dev, void *data) static int lock_unlock_locking_range(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; - const u8 *method; struct opal_lock_unlock *lkul = data; u8 read_locked = 1, write_locked = 1; int err = 0; @@ -1677,7 +1674,6 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data) clear_opal_cmd(dev); set_comid(dev, dev->comid); - method = opalmethod[OPAL_SET]; if (build_locking_range(lr_buffer, sizeof(lr_buffer), lkul->session.opal_key.lr) < 0) return -ERANGE; @@ -1733,14 +1729,12 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; u8 read_locked = 1, write_locked = 1; - const u8 *method; struct opal_lock_unlock *lkul = data; int ret; clear_opal_cmd(dev); set_comid(dev, dev->comid); - method = opalmethod[OPAL_SET]; if (build_locking_range(lr_buffer, sizeof(lr_buffer), lkul->session.opal_key.lr) < 0) return -ERANGE; @@ -2133,7 +2127,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev, pr_err("Locking state was not RO or RW\n"); return -EINVAL; } - if (lk_unlk->session.who < OPAL_USER1 && + if (lk_unlk->session.who < OPAL_USER1 || lk_unlk->session.who > OPAL_USER9) { pr_err("Authority was not within the range of users: %d\n", lk_unlk->session.who); @@ -2316,7 +2310,7 @@ static int opal_activate_user(struct opal_dev *dev, int ret; /* We can't activate Admin1 it's active as manufactured */ - if (opal_session->who < OPAL_USER1 && + if (opal_session->who < OPAL_USER1 || opal_session->who > OPAL_USER9) { pr_err("Who was not a valid user: %d\n", opal_session->who); return -EINVAL; -- cgit v1.2.3 From 02dbfa5e5583523035f05636c614a0eca77f1aab Mon Sep 17 00:00:00 2001 From: Qi Hou Date: Fri, 3 Mar 2017 15:57:11 +0800 Subject: i2c: add missing of_node_put in i2c_mux_del_adapters Refcount of of_node is increased with of_node_get() in i2c_mux_add_adapter(). It must be decreased with of_node_put() in i2c_mux_del_adapters(). Signe-off-by: Qi Hou Reviewed-by: Zhang Xiao Acked-by: Peter Rosin Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-mux.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 83768e85a919..2178266bca79 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -429,6 +429,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc) while (muxc->num_adapters) { struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters]; struct i2c_mux_priv *priv = adap->algo_data; + struct device_node *np = adap->dev.of_node; muxc->adapter[muxc->num_adapters] = NULL; @@ -438,6 +439,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc) sysfs_remove_link(&priv->adap.dev.kobj, "mux_device"); i2c_del_adapter(adap); + of_node_put(np); kfree(priv); } } -- cgit v1.2.3 From 2de3ec4f1d4ba6ee380478055104eb918bd50cce Mon Sep 17 00:00:00 2001 From: Jaedon Shin Date: Fri, 3 Mar 2017 10:55:03 +0900 Subject: i2c: brcmstb: Fix START and STOP conditions The BSC data buffers to send and receive data are each of size 32 bytes or 8 bytes 'xfersz' depending on SoC. The problem observed for all the combined message transfer was if length of data transfer was a multiple of 'xfersz' a repeated START was being transmitted by BSC driver. Fixed this by appropriately setting START/STOP conditions for such transfers. Fixes: dd1aa2524bc5 ("i2c: brcmstb: Add Broadcom settop SoC i2c controller driver") Signed-off-by: Jaedon Shin Acked-by: Kamal Dasu Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-brcmstb.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c index 0652281662a8..78792b4d6437 100644 --- a/drivers/i2c/busses/i2c-brcmstb.c +++ b/drivers/i2c/busses/i2c-brcmstb.c @@ -465,6 +465,7 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter, u8 *tmp_buf; int len = 0; int xfersz = brcmstb_i2c_get_xfersz(dev); + u32 cond, cond_per_msg; if (dev->is_suspended) return -EBUSY; @@ -481,10 +482,11 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter, pmsg->buf ? pmsg->buf[0] : '0', pmsg->len); if (i < (num - 1) && (msgs[i + 1].flags & I2C_M_NOSTART)) - brcmstb_set_i2c_start_stop(dev, ~(COND_START_STOP)); + cond = ~COND_START_STOP; else - brcmstb_set_i2c_start_stop(dev, - COND_RESTART | COND_NOSTOP); + cond = COND_RESTART | COND_NOSTOP; + + brcmstb_set_i2c_start_stop(dev, cond); /* Send slave address */ if (!(pmsg->flags & I2C_M_NOSTART)) { @@ -497,13 +499,24 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter, } } + cond_per_msg = cond; + /* Perform data transfer */ while (len) { bytes_to_xfer = min(len, xfersz); - if (len <= xfersz && i == (num - 1)) - brcmstb_set_i2c_start_stop(dev, - ~(COND_START_STOP)); + if (len <= xfersz) { + if (i == (num - 1)) + cond_per_msg = cond_per_msg & + ~(COND_RESTART | COND_NOSTOP); + else + cond_per_msg = cond; + } else { + cond_per_msg = (cond_per_msg & ~COND_RESTART) | + COND_NOSTOP; + } + + brcmstb_set_i2c_start_stop(dev, cond_per_msg); rc = brcmstb_i2c_xfer_bsc_data(dev, tmp_buf, bytes_to_xfer, pmsg); @@ -512,6 +525,8 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter, len -= bytes_to_xfer; tmp_buf += bytes_to_xfer; + + cond_per_msg = COND_NOSTART | COND_NOSTOP; } } -- cgit v1.2.3 From 1945250d979203ed326b5a44fd705b6b2c46eec5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 27 Feb 2017 20:25:05 +0100 Subject: i2c: m65xx: drop superfluous quirk structure All length fields in Linux I2C are u16, so a HW length limitation of 16 bit lengths is not a limitation. Remove the quirk structure. Tested-by: Jun Gao Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 4a7d9bc2142b..45d61714c81b 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -172,14 +172,6 @@ static const struct i2c_adapter_quirks mt6577_i2c_quirks = { .max_comb_2nd_msg_len = 31, }; -static const struct i2c_adapter_quirks mt8173_i2c_quirks = { - .max_num_msgs = 65535, - .max_write_len = 65535, - .max_read_len = 65535, - .max_comb_1st_msg_len = 65535, - .max_comb_2nd_msg_len = 65535, -}; - static const struct mtk_i2c_compatible mt6577_compat = { .quirks = &mt6577_i2c_quirks, .pmic_i2c = 0, @@ -199,7 +191,6 @@ static const struct mtk_i2c_compatible mt6589_compat = { }; static const struct mtk_i2c_compatible mt8173_compat = { - .quirks = &mt8173_i2c_quirks, .pmic_i2c = 0, .dcm = 1, .auto_restart = 1, -- cgit v1.2.3 From b0c1e95ab44feaad8831f2c06a3473c974003b49 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 28 Feb 2017 11:10:51 -0800 Subject: i2c: copy device properties when using i2c_register_board_info() This will allow marking device property lists as __initdata, the same as board info structures themselves. Signed-off-by: Dmitry Torokhov Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-boardinfo.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c index 6e5fac6a5262..5b8f6c3a6950 100644 --- a/drivers/i2c/i2c-boardinfo.c +++ b/drivers/i2c/i2c-boardinfo.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -55,6 +56,7 @@ EXPORT_SYMBOL_GPL(__i2c_first_dynamic_bus_num); * * The board info passed can safely be __initdata, but be careful of embedded * pointers (for platform_data, functions, etc) since that won't be copied. + * Device properties are deep-copied though. */ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned len) { @@ -78,6 +80,14 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig devinfo->busnum = busnum; devinfo->board_info = *info; + + if (info->properties) { + devinfo->board_info.properties = + property_entries_dup(info->properties); + if (IS_ERR(devinfo->board_info.properties)) + return PTR_ERR(devinfo->board_info.properties); + } + list_add_tail(&devinfo->list, &__i2c_board_list); } -- cgit v1.2.3 From 3b0277f198ac928f323c42e180680d2f79aa980d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 7 Mar 2017 21:06:38 +0100 Subject: i2c: meson: fix wrong variable usage in meson_i2c_put_data Most likely a copy & paste error. Signed-off-by: Heiner Kallweit Acked-by: Jerome Brunet Signed-off-by: Wolfram Sang Fixes: 30021e3707a7 ("i2c: add support for Amlogic Meson I2C controller") --- drivers/i2c/busses/i2c-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c index 2aa61bbbd307..73b97c71a484 100644 --- a/drivers/i2c/busses/i2c-meson.c +++ b/drivers/i2c/busses/i2c-meson.c @@ -175,7 +175,7 @@ static void meson_i2c_put_data(struct meson_i2c *i2c, char *buf, int len) wdata1 |= *buf++ << ((i - 4) * 8); writel(wdata0, i2c->regs + REG_TOK_WDATA0); - writel(wdata0, i2c->regs + REG_TOK_WDATA1); + writel(wdata1, i2c->regs + REG_TOK_WDATA1); dev_dbg(i2c->dev, "%s: data %08x %08x len %d\n", __func__, wdata0, wdata1, len); -- cgit v1.2.3 From ab809fd81fde3d416f8656d8f814a0777fb9b65e Mon Sep 17 00:00:00 2001 From: Zhangfei Gao Date: Tue, 27 Dec 2016 22:22:40 +0800 Subject: i2c: designware: add reset interface Some platforms like hi3660 need do reset first to allow accessing registers Signed-off-by: Zhangfei Gao Reviewed-by: Andy Shevchenko Tested-by: Ramiro Oliveira Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.h | 1 + drivers/i2c/busses/i2c-designware-platdrv.c | 28 ++++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index c1db3a5a340f..d9aaf1790e0e 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -88,6 +88,7 @@ struct dw_i2c_dev { void __iomem *base; struct completion cmd_complete; struct clk *clk; + struct reset_control *rst; u32 (*get_clk_rate_khz) (struct dw_i2c_dev *dev); struct dw_pci_controller *controller; int cmd_err; diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 6ce431323125..79c4b4ea0539 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -199,6 +200,14 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) dev->irq = irq; platform_set_drvdata(pdev, dev); + dev->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL); + if (IS_ERR(dev->rst)) { + if (PTR_ERR(dev->rst) == -EPROBE_DEFER) + return -EPROBE_DEFER; + } else { + reset_control_deassert(dev->rst); + } + if (pdata) { dev->clk_freq = pdata->i2c_scl_freq; } else { @@ -235,12 +244,13 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) && dev->clk_freq != 1000000 && dev->clk_freq != 3400000) { dev_err(&pdev->dev, "Only 100kHz, 400kHz, 1MHz and 3.4MHz supported"); - return -EINVAL; + r = -EINVAL; + goto exit_reset; } r = i2c_dw_eval_lock_support(dev); if (r) - return r; + goto exit_reset; dev->functionality = I2C_FUNC_10BIT_ADDR | DW_IC_DEFAULT_FUNCTIONALITY; @@ -286,10 +296,18 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) } r = i2c_dw_probe(dev); - if (r && !dev->pm_runtime_disabled) - pm_runtime_disable(&pdev->dev); + if (r) + goto exit_probe; return r; + +exit_probe: + if (!dev->pm_runtime_disabled) + pm_runtime_disable(&pdev->dev); +exit_reset: + if (!IS_ERR_OR_NULL(dev->rst)) + reset_control_assert(dev->rst); + return r; } static int dw_i2c_plat_remove(struct platform_device *pdev) @@ -306,6 +324,8 @@ static int dw_i2c_plat_remove(struct platform_device *pdev) pm_runtime_put_sync(&pdev->dev); if (!dev->pm_runtime_disabled) pm_runtime_disable(&pdev->dev); + if (!IS_ERR_OR_NULL(dev->rst)) + reset_control_assert(dev->rst); return 0; } -- cgit v1.2.3 From b6f8fec4448aa52a8c36a392aa1ca2ea99acd460 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Mar 2017 17:48:31 +0100 Subject: block: Allow bdi re-registration SCSI can call device_add_disk() several times for one request queue when a device in unbound and bound, creating new gendisk each time. This will lead to bdi being repeatedly registered and unregistered. This was not a big problem until commit 165a5e22fafb "block: Move bdi_unregister() to del_gendisk()" since bdi was only registered repeatedly (bdi_register() handles repeated calls fine, only we ended up leaking reference to gendisk due to overwriting bdi->owner) but unregistered only in blk_cleanup_queue() which didn't get called repeatedly. After 165a5e22fafb we were doing correct bdi_register() - bdi_unregister() cycles however bdi_unregister() is not prepared for it. So make sure bdi_unregister() cleans up bdi in such a way that it is prepared for a possible following bdi_register() call. An easy way to provoke this behavior is to enable CONFIG_DEBUG_TEST_DRIVER_REMOVE and use scsi_debug driver to create a scsi disk which immediately hangs without this fix. Fixes: 165a5e22fafb127ecb5914e12e8c32a1f0d3f820 Signed-off-by: Jan Kara Tested-by: Omar Sandoval Signed-off-by: Jens Axboe --- mm/backing-dev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 6d861d090e9f..6ac932210f56 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -710,6 +710,11 @@ static void cgwb_bdi_destroy(struct backing_dev_info *bdi) */ atomic_dec(&bdi->usage_cnt); wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt)); + /* + * Grab back our reference so that we hold it when @bdi gets + * re-registered. + */ + atomic_inc(&bdi->usage_cnt); } /** @@ -857,6 +862,8 @@ int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner) MINOR(owner->devt)); if (rc) return rc; + /* Leaking owner reference... */ + WARN_ON(bdi->owner); bdi->owner = owner; get_device(owner); return 0; -- cgit v1.2.3 From df23de55615fa7a190a85f49a950ccecdd9102f3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Mar 2017 17:48:32 +0100 Subject: bdi: Fix use-after-free in wb_congested_put() bdi_writeback_congested structures get created for each blkcg and bdi regardless whether bdi is registered or not. When they are created in unregistered bdi and the request queue (and thus bdi) is then destroyed while blkg still holds reference to bdi_writeback_congested structure, this structure will be referencing freed bdi and last wb_congested_put() will try to remove the structure from already freed bdi. With commit 165a5e22fafb "block: Move bdi_unregister() to del_gendisk()", SCSI started to destroy bdis without calling bdi_unregister() first (previously it was calling bdi_unregister() even for unregistered bdis) and thus the code detaching bdi_writeback_congested in cgwb_bdi_destroy() was not triggered and we started hitting this use-after-free bug. It is enough to boot a KVM instance with virtio-scsi device to trigger this behavior. Fix the problem by detaching bdi_writeback_congested structures in bdi_exit() instead of bdi_unregister(). This is also more logical as they can get attached to bdi regardless whether it ever got registered or not. Fixes: 165a5e22fafb127ecb5914e12e8c32a1f0d3f820 Signed-off-by: Jan Kara Tested-by: Omar Sandoval Signed-off-by: Jens Axboe --- mm/backing-dev.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 6ac932210f56..c6f2a37028c2 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -683,30 +683,18 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { struct radix_tree_iter iter; - struct rb_node *rbn; void **slot; WARN_ON(test_bit(WB_registered, &bdi->wb.state)); spin_lock_irq(&cgwb_lock); - radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) cgwb_kill(*slot); - - while ((rbn = rb_first(&bdi->cgwb_congested_tree))) { - struct bdi_writeback_congested *congested = - rb_entry(rbn, struct bdi_writeback_congested, rb_node); - - rb_erase(rbn, &bdi->cgwb_congested_tree); - congested->bdi = NULL; /* mark @congested unlinked */ - } - spin_unlock_irq(&cgwb_lock); /* - * All cgwb's and their congested states must be shutdown and - * released before returning. Drain the usage counter to wait for - * all cgwb's and cgwb_congested's ever created on @bdi. + * All cgwb's must be shutdown and released before returning. Drain + * the usage counter to wait for all cgwb's ever created on @bdi. */ atomic_dec(&bdi->usage_cnt); wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt)); @@ -754,6 +742,21 @@ void wb_blkcg_offline(struct blkcg *blkcg) spin_unlock_irq(&cgwb_lock); } +static void cgwb_bdi_exit(struct backing_dev_info *bdi) +{ + struct rb_node *rbn; + + spin_lock_irq(&cgwb_lock); + while ((rbn = rb_first(&bdi->cgwb_congested_tree))) { + struct bdi_writeback_congested *congested = + rb_entry(rbn, struct bdi_writeback_congested, rb_node); + + rb_erase(rbn, &bdi->cgwb_congested_tree); + congested->bdi = NULL; /* mark @congested unlinked */ + } + spin_unlock_irq(&cgwb_lock); +} + #else /* CONFIG_CGROUP_WRITEBACK */ static int cgwb_bdi_init(struct backing_dev_info *bdi) @@ -774,7 +777,9 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) return 0; } -static void cgwb_bdi_destroy(struct backing_dev_info *bdi) +static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { } + +static void cgwb_bdi_exit(struct backing_dev_info *bdi) { wb_congested_put(bdi->wb_congested); } @@ -905,6 +910,7 @@ static void bdi_exit(struct backing_dev_info *bdi) { WARN_ON_ONCE(bdi->dev); wb_exit(&bdi->wb); + cgwb_bdi_exit(bdi); } static void release_bdi(struct kref *ref) -- cgit v1.2.3 From 90f16fddcc2802726142b8386c65ccb89f044613 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Mar 2017 17:48:33 +0100 Subject: block: Make del_gendisk() safer for disks without queues Commit 165a5e22fafb "block: Move bdi_unregister() to del_gendisk()" added disk->queue dereference to del_gendisk(). Although del_gendisk() is not supposed to be called without disk->queue valid and blk_unregister_queue() warns in that case, this change will make it oops instead. Return to the old more robust behavior of just warning when del_gendisk() gets called for gendisk with disk->queue being NULL. Reported-by: Dan Carpenter Signed-off-by: Jan Kara Tested-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/genhd.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index b26a5ea115d0..94f323842b52 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -681,12 +681,16 @@ void del_gendisk(struct gendisk *disk) disk->flags &= ~GENHD_FL_UP; sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); - /* - * Unregister bdi before releasing device numbers (as they can get - * reused and we'd get clashes in sysfs). - */ - bdi_unregister(disk->queue->backing_dev_info); - blk_unregister_queue(disk); + if (disk->queue) { + /* + * Unregister bdi before releasing device numbers (as they can + * get reused and we'd get clashes in sysfs). + */ + bdi_unregister(disk->queue->backing_dev_info); + blk_unregister_queue(disk); + } else { + WARN_ON(1); + } blk_unregister_region(disk_devt(disk), disk->minors); part_stat_set_all(&disk->part0, 0); -- cgit v1.2.3 From c01228db4ba965986511a5b28c478bddd7e2726e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Mar 2017 17:48:34 +0100 Subject: Revert "scsi, block: fix duplicate bdi name registration crashes" This reverts commit 0dba1314d4f81115dce711292ec7981d17231064. It causes leaking of device numbers for SCSI when SCSI registers multiple gendisks for one request_queue in succession. It can be easily reproduced using Omar's script [1] on kernel with CONFIG_DEBUG_TEST_DRIVER_REMOVE. Furthermore the protection provided by this commit is not needed anymore as the problem it was fixing got also fixed by commit 165a5e22fafb "block: Move bdi_unregister() to del_gendisk()". [1]: http://marc.info/?l=linux-block&m=148554717109098&w=2 Signed-off-by: Jan Kara Acked-by: Dan Williams Tested-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- block/genhd.c | 21 --------------------- drivers/scsi/sd.c | 41 ++++++++--------------------------------- include/linux/blkdev.h | 1 - include/linux/genhd.h | 8 -------- 5 files changed, 8 insertions(+), 65 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 1086dac8724c..a76895c9776d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -578,8 +578,6 @@ void blk_cleanup_queue(struct request_queue *q) q->queue_lock = &q->__queue_lock; spin_unlock_irq(lock); - put_disk_devt(q->disk_devt); - /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } diff --git a/block/genhd.c b/block/genhd.c index 94f323842b52..a9c516a8b37d 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -572,20 +572,6 @@ exit: disk_part_iter_exit(&piter); } -void put_disk_devt(struct disk_devt *disk_devt) -{ - if (disk_devt && atomic_dec_and_test(&disk_devt->count)) - disk_devt->release(disk_devt); -} -EXPORT_SYMBOL(put_disk_devt); - -void get_disk_devt(struct disk_devt *disk_devt) -{ - if (disk_devt) - atomic_inc(&disk_devt->count); -} -EXPORT_SYMBOL(get_disk_devt); - /** * device_add_disk - add partitioning information to kernel list * @parent: parent device for the disk @@ -626,13 +612,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk) disk_alloc_events(disk); - /* - * Take a reference on the devt and assign it to queue since it - * must not be reallocated while the bdi is registered - */ - disk->queue->disk_devt = disk->disk_devt; - get_disk_devt(disk->disk_devt); - /* Register BDI before referencing it from bdev */ bdi = disk->queue->backing_dev_info; bdi_register_owner(bdi, disk_to_dev(disk)); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c7839f6c35cc..d277e8620e3e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3075,23 +3075,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie) put_device(&sdkp->dev); } -struct sd_devt { - int idx; - struct disk_devt disk_devt; -}; - -static void sd_devt_release(struct disk_devt *disk_devt) -{ - struct sd_devt *sd_devt = container_of(disk_devt, struct sd_devt, - disk_devt); - - spin_lock(&sd_index_lock); - ida_remove(&sd_index_ida, sd_devt->idx); - spin_unlock(&sd_index_lock); - - kfree(sd_devt); -} - /** * sd_probe - called during driver initialization and whenever a * new scsi device is attached to the system. It is called once @@ -3113,7 +3096,6 @@ static void sd_devt_release(struct disk_devt *disk_devt) static int sd_probe(struct device *dev) { struct scsi_device *sdp = to_scsi_device(dev); - struct sd_devt *sd_devt; struct scsi_disk *sdkp; struct gendisk *gd; int index; @@ -3139,13 +3121,9 @@ static int sd_probe(struct device *dev) if (!sdkp) goto out; - sd_devt = kzalloc(sizeof(*sd_devt), GFP_KERNEL); - if (!sd_devt) - goto out_free; - gd = alloc_disk(SD_MINORS); if (!gd) - goto out_free_devt; + goto out_free; do { if (!ida_pre_get(&sd_index_ida, GFP_KERNEL)) @@ -3161,11 +3139,6 @@ static int sd_probe(struct device *dev) goto out_put; } - atomic_set(&sd_devt->disk_devt.count, 1); - sd_devt->disk_devt.release = sd_devt_release; - sd_devt->idx = index; - gd->disk_devt = &sd_devt->disk_devt; - error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); if (error) { sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n"); @@ -3205,12 +3178,11 @@ static int sd_probe(struct device *dev) return 0; out_free_index: - put_disk_devt(&sd_devt->disk_devt); - sd_devt = NULL; + spin_lock(&sd_index_lock); + ida_remove(&sd_index_ida, index); + spin_unlock(&sd_index_lock); out_put: put_disk(gd); - out_free_devt: - kfree(sd_devt); out_free: kfree(sdkp); out: @@ -3271,7 +3243,10 @@ static void scsi_disk_release(struct device *dev) struct scsi_disk *sdkp = to_scsi_disk(dev); struct gendisk *disk = sdkp->disk; - put_disk_devt(disk->disk_devt); + spin_lock(&sd_index_lock); + ida_remove(&sd_index_ida, sdkp->index); + spin_unlock(&sd_index_lock); + disk->private_data = NULL; put_disk(disk); put_device(&sdkp->device->sdev_gendev); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 796016e63c1d..5a7da607ca04 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -435,7 +435,6 @@ struct request_queue { struct delayed_work delay_work; struct backing_dev_info *backing_dev_info; - struct disk_devt *disk_devt; /* * The queue owner gets to use this for whatever they like. diff --git a/include/linux/genhd.h b/include/linux/genhd.h index a999d281a2f1..76f39754e7b0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -167,13 +167,6 @@ struct blk_integrity { }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ -struct disk_devt { - atomic_t count; - void (*release)(struct disk_devt *disk_devt); -}; - -void put_disk_devt(struct disk_devt *disk_devt); -void get_disk_devt(struct disk_devt *disk_devt); struct gendisk { /* major, first_minor and minors are input parameters only, @@ -183,7 +176,6 @@ struct gendisk { int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ - struct disk_devt *disk_devt; char disk_name[DISK_NAME_LEN]; /* name of major driver */ char *(*devnode)(struct gendisk *gd, umode_t *mode); -- cgit v1.2.3 From 79bd99596b7305ab08109a8bf44a6a4511dbf1cd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 8 Mar 2017 07:38:05 +1100 Subject: blk: improve order of bio handling in generic_make_request() To avoid recursion on the kernel stack when stacked block devices are in use, generic_make_request() will, when called recursively, queue new requests for later handling. They will be handled when the make_request_fn for the current bio completes. If any bios are submitted by a make_request_fn, these will ultimately be handled seqeuntially. If the handling of one of those generates further requests, they will be added to the end of the queue. This strict first-in-first-out behaviour can lead to deadlocks in various ways, normally because a request might need to wait for a previous request to the same device to complete. This can happen when they share a mempool, and can happen due to interdependencies particular to the device. Both md and dm have examples where this happens. These deadlocks can be erradicated by more selective ordering of bios. Specifically by handling them in depth-first order. That is: when the handling of one bio generates one or more further bios, they are handled immediately after the parent, before any siblings of the parent. That way, when generic_make_request() calls make_request_fn for some particular device, we can be certain that all previously submited requests for that device have been completely handled and are not waiting for anything in the queue of requests maintained in generic_make_request(). An easy way to achieve this would be to use a last-in-first-out stack instead of a queue. However this will change the order of consecutive bios submitted by a make_request_fn, which could have unexpected consequences. Instead we take a slightly more complex approach. A fresh queue is created for each call to a make_request_fn. After it completes, any bios for a different device are placed on the front of the main queue, followed by any bios for the same device, followed by all bios that were already on the queue before the make_request_fn was called. This provides the depth-first approach without reordering bios on the same level. This, by itself, it not enough to remove all deadlocks. It just makes it possible for drivers to take the extra step required themselves. To avoid deadlocks, drivers must never risk waiting for a request after submitting one to generic_make_request. This includes never allocing from a mempool twice in the one call to a make_request_fn. A common pattern in drivers is to call bio_split() in a loop, handling the first part and then looping around to possibly split the next part. Instead, a driver that finds it needs to split a bio should queue (with generic_make_request) the second part, handle the first part, and then return. The new code in generic_make_request will ensure the requests to underlying bios are processed first, then the second bio that was split off. If it splits again, the same process happens. In each case one bio will be completely handled before the next one is attempted. With this is place, it should be possible to disable the punt_bios_to_recover() recovery thread for many block devices, and eventually it may be possible to remove it completely. Ref: http://www.spinics.net/lists/raid/msg54680.html Tested-by: Jinpu Wang Inspired-by: Lars Ellenberg Signed-off-by: NeilBrown Signed-off-by: Jens Axboe --- block/blk-core.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index a76895c9776d..0eeb99ef654f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2015,17 +2015,34 @@ blk_qc_t generic_make_request(struct bio *bio) struct request_queue *q = bdev_get_queue(bio->bi_bdev); if (likely(blk_queue_enter(q, false) == 0)) { + struct bio_list hold; + struct bio_list lower, same; + + /* Create a fresh bio_list for all subordinate requests */ + hold = bio_list_on_stack; + bio_list_init(&bio_list_on_stack); ret = q->make_request_fn(q, bio); blk_queue_exit(q); - bio = bio_list_pop(current->bio_list); + /* sort new bios into those for a lower level + * and those for the same level + */ + bio_list_init(&lower); + bio_list_init(&same); + while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL) + if (q == bdev_get_queue(bio->bi_bdev)) + bio_list_add(&same, bio); + else + bio_list_add(&lower, bio); + /* now assemble so we handle the lowest level first */ + bio_list_merge(&bio_list_on_stack, &lower); + bio_list_merge(&bio_list_on_stack, &same); + bio_list_merge(&bio_list_on_stack, &hold); } else { - struct bio *bio_next = bio_list_pop(current->bio_list); - bio_io_error(bio); - bio = bio_next; } + bio = bio_list_pop(current->bio_list); } while (bio); current->bio_list = NULL; /* deactivate */ -- cgit v1.2.3 From 672a2c87c83649fb0167202342ce85af9a3b4f1c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Mar 2017 14:56:05 +0100 Subject: axonram: Fix gendisk handling It is invalid to call del_gendisk() when disk->queue is NULL. Fix error handling in axon_ram_probe() to avoid doing that. Also del_gendisk() does not drop a reference to gendisk allocated by alloc_disk(). That has to be done by put_disk(). Add that call where needed. Reported-by: Dan Carpenter Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- arch/powerpc/sysdev/axonram.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index ada29eaed6e2..f523ac883150 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -274,7 +274,9 @@ failed: if (bank->disk->major > 0) unregister_blkdev(bank->disk->major, bank->disk->disk_name); - del_gendisk(bank->disk); + if (bank->disk->flags & GENHD_FL_UP) + del_gendisk(bank->disk); + put_disk(bank->disk); } device->dev.platform_data = NULL; if (bank->io_addr != 0) @@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device) device_remove_file(&device->dev, &dev_attr_ecc); free_irq(bank->irq_id, device); del_gendisk(bank->disk); + put_disk(bank->disk); iounmap((void __iomem *) bank->io_addr); kfree(bank); -- cgit v1.2.3 From f65e6fad293b3a5793b7fa2044800506490e7a2e Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 8 Mar 2017 09:58:08 -0800 Subject: xfs: use iomap new flag for newly allocated delalloc blocks Commit fa7f138 ("xfs: clear delalloc and cache on buffered write failure") fixed one regression in the iomap error handling code and exposed another. The fundamental problem is that if a buffered write is a rewrite of preexisting delalloc blocks and the write fails, the failure handling code can punch out preexisting blocks with valid file data. This was reproduced directly by sub-block writes in the LTP kernel/syscalls/write/write03 test. A first 100 byte write allocates a single block in a file. A subsequent 100 byte write fails and punches out the block, including the data successfully written by the previous write. To address this problem, update the ->iomap_begin() handler to distinguish newly allocated delalloc blocks from preexisting delalloc blocks via the IOMAP_F_NEW flag. Use this flag in the ->iomap_end() handler to decide when a failed or short write should punch out delalloc blocks. This introduces the subtle requirement that ->iomap_begin() should never combine newly allocated delalloc blocks with existing blocks in the resulting iomap descriptor. This can occur when a new delalloc reservation merges with a neighboring extent that is part of the current write, for example. Therefore, drop the post-allocation extent lookup from xfs_bmapi_reserve_delalloc() and just return the record inserted into the fork. This ensures only new blocks are returned and thus that preexisting delalloc blocks are always handled as "found" blocks and not punched out on a failed rewrite. Reported-by: Xiong Zhou Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 24 ++++++++++++++---------- fs/xfs/xfs_iomap.c | 25 ++++++++++++++++++------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index a9c66d47757a..bfa59a1a2d09 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4150,6 +4150,19 @@ xfs_bmapi_read( return 0; } +/* + * Add a delayed allocation extent to an inode. Blocks are reserved from the + * global pool and the extent inserted into the inode in-core extent tree. + * + * On entry, got refers to the first extent beyond the offset of the extent to + * allocate or eof is specified if no such extent exists. On return, got refers + * to the extent record that was inserted to the inode fork. + * + * Note that the allocated extent may have been merged with contiguous extents + * during insertion into the inode fork. Thus, got does not reflect the current + * state of the inode fork on return. If necessary, the caller can use lastx to + * look up the updated record in the inode fork. + */ int xfs_bmapi_reserve_delalloc( struct xfs_inode *ip, @@ -4236,13 +4249,8 @@ xfs_bmapi_reserve_delalloc( got->br_startblock = nullstartblock(indlen); got->br_blockcount = alen; got->br_state = XFS_EXT_NORM; - xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); - /* - * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay - * might have merged it into one of the neighbouring ones. - */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); + xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); /* * Tag the inode if blocks were preallocated. Note that COW fork @@ -4254,10 +4262,6 @@ xfs_bmapi_reserve_delalloc( if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) xfs_inode_set_cowblocks_tag(ip); - ASSERT(got->br_startoff <= aoff); - ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); - ASSERT(isnullstartblock(got->br_startblock)); - ASSERT(got->br_state == XFS_EXT_NORM); return 0; out_unreserve_blocks: diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 41662fb14e87..288ee5b840d7 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -630,6 +630,11 @@ retry: goto out_unlock; } + /* + * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch + * them out if the write happens to fail. + */ + iomap->flags = IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, count, 0, &got); done: if (isnullstartblock(got.br_startblock)) @@ -1071,16 +1076,22 @@ xfs_file_iomap_end_delalloc( struct xfs_inode *ip, loff_t offset, loff_t length, - ssize_t written) + ssize_t written, + struct iomap *iomap) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t start_fsb; xfs_fileoff_t end_fsb; int error = 0; - /* behave as if the write failed if drop writes is enabled */ - if (xfs_mp_drop_writes(mp)) + /* + * Behave as if the write failed if drop writes is enabled. Set the NEW + * flag to force delalloc cleanup. + */ + if (xfs_mp_drop_writes(mp)) { + iomap->flags |= IOMAP_F_NEW; written = 0; + } /* * start_fsb refers to the first unused block after a short write. If @@ -1094,14 +1105,14 @@ xfs_file_iomap_end_delalloc( end_fsb = XFS_B_TO_FSB(mp, offset + length); /* - * Trim back delalloc blocks if we didn't manage to write the whole - * range reserved. + * Trim delalloc blocks if they were allocated by this write and we + * didn't manage to write the whole range. * * We don't need to care about racing delalloc as we hold i_mutex * across the reserve/allocate/unreserve calls. If there are delalloc * blocks in the range, they are ours. */ - if (start_fsb < end_fsb) { + if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), XFS_FSB_TO_B(mp, end_fsb) - 1); @@ -1131,7 +1142,7 @@ xfs_file_iomap_end( { if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, - length, written); + length, written, iomap); return 0; } -- cgit v1.2.3 From bd0f9b356d00aa241ced36fb075a07041c28d3b8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 7 Mar 2017 15:33:14 -0800 Subject: sched/headers: fix up header file dependency on The scheduler header file split and cleanups ended up exposing a few nasty header file dependencies, and in particular it showed how we in ended up depending on "signal_pending()", which now comes from . That's a very subtle and annoying dependency, which already caused a semantic merge conflict (see commit e58bc927835a "Pull overlayfs updates from Miklos Szeredi", which added that fixup in the merge commit). It turns out that we can avoid this dependency _and_ improve code generation by moving the guts of the fairly nasty helper #define __wait_event_interruptible_locked() to out-of-line code. The code that includes the signal_pending() check is all in the slow-path where we actually go to sleep waiting for the event anyway, so using a helper function is the right thing to do. Using a helper function is also what we already did for the non-locked versions, see the "__wait_event*()" macros and the "prepare_to_wait*()" set of helper functions. We might want to try to unify all these macro games, we have a _lot_ of subtly different wait-event loops. But this is the minimal patch to fix the annoying header dependency. Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/wait.h | 31 ++++++++++--------------------- kernel/sched/wait.c | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/include/linux/wait.h b/include/linux/wait.h index aacb1282d19a..db076ca7f11d 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -620,30 +620,19 @@ do { \ __ret; \ }) +extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *); +extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *); -#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \ +#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \ ({ \ - int __ret = 0; \ + int __ret; \ DEFINE_WAIT(__wait); \ if (exclusive) \ __wait.flags |= WQ_FLAG_EXCLUSIVE; \ do { \ - if (likely(list_empty(&__wait.task_list))) \ - __add_wait_queue_tail(&(wq), &__wait); \ - set_current_state(TASK_INTERRUPTIBLE); \ - if (signal_pending(current)) { \ - __ret = -ERESTARTSYS; \ + __ret = fn(&(wq), &__wait); \ + if (__ret) \ break; \ - } \ - if (irq) \ - spin_unlock_irq(&(wq).lock); \ - else \ - spin_unlock(&(wq).lock); \ - schedule(); \ - if (irq) \ - spin_lock_irq(&(wq).lock); \ - else \ - spin_lock(&(wq).lock); \ } while (!(condition)); \ __remove_wait_queue(&(wq), &__wait); \ __set_current_state(TASK_RUNNING); \ @@ -676,7 +665,7 @@ do { \ */ #define wait_event_interruptible_locked(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr)) /** * wait_event_interruptible_locked_irq - sleep until a condition gets true @@ -703,7 +692,7 @@ do { \ */ #define wait_event_interruptible_locked_irq(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq)) /** * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true @@ -734,7 +723,7 @@ do { \ */ #define wait_event_interruptible_exclusive_locked(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr)) /** * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true @@ -765,7 +754,7 @@ do { \ */ #define wait_event_interruptible_exclusive_locked_irq(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq)) #define __wait_event_killable(wq, condition) \ diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 4d2ea6f25568..b8c84c6dee64 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -242,6 +242,45 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state) } EXPORT_SYMBOL(prepare_to_wait_event); +/* + * Note! These two wait functions are entered with the + * wait-queue lock held (and interrupts off in the _irq + * case), so there is no race with testing the wakeup + * condition in the caller before they add the wait + * entry to the wake queue. + */ +int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait) +{ + if (likely(list_empty(&wait->task_list))) + __add_wait_queue_tail(wq, wait); + + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) + return -ERESTARTSYS; + + spin_unlock(&wq->lock); + schedule(); + spin_lock(&wq->lock); + return 0; +} +EXPORT_SYMBOL(do_wait_intr); + +int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait) +{ + if (likely(list_empty(&wait->task_list))) + __add_wait_queue_tail(wq, wait); + + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) + return -ERESTARTSYS; + + spin_unlock_irq(&wq->lock); + schedule(); + spin_lock_irq(&wq->lock); + return 0; +} +EXPORT_SYMBOL(do_wait_intr_irq); + /** * finish_wait - clean up after waiting in a queue * @q: waitqueue waited on -- cgit v1.2.3 From 2fcc319d2467a5f5b78f35f79fd6e22741a31b1e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Mar 2017 10:38:53 -0800 Subject: xfs: try any AG when allocating the first btree block when reflinking When a reflink operation causes the bmap code to allocate a btree block we're currently doing single-AG allocations due to having ->firstblock set and then try any higher AG due a little reflink quirk we've put in when adding the reflink code. But given that we do not have a minleft reservation of any kind in this AG we can still not have any space in the same or higher AG even if the file system has enough free space. To fix this use a XFS_ALLOCTYPE_FIRST_AG allocation in this fall back path instead. [And yes, we need to redo this properly instead of piling hacks over hacks. I'm working on that, but it's not going to be a small series. In the meantime this fixes the customer reported issue] Also add a warning for failing allocations to make it easier to debug. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 10 +++++++--- fs/xfs/libxfs/xfs_bmap_btree.c | 6 +++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index bfa59a1a2d09..9bd104f32908 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -763,8 +763,8 @@ xfs_bmap_extents_to_btree( args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); } else if (dfops->dop_low) { -try_another_ag: args.type = XFS_ALLOCTYPE_START_BNO; +try_another_ag: args.fsbno = *firstblock; } else { args.type = XFS_ALLOCTYPE_NEAR_BNO; @@ -790,13 +790,17 @@ try_another_ag: if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && args.fsbno == NULLFSBLOCK && args.type == XFS_ALLOCTYPE_NEAR_BNO) { - dfops->dop_low = true; + args.type = XFS_ALLOCTYPE_FIRST_AG; goto try_another_ag; } + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { + xfs_iroot_realloc(ip, -1, whichfork); + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return -ENOSPC; + } /* * Allocation can't fail, the space was reserved. */ - ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(*firstblock == NULLFSBLOCK || args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); *firstblock = cur->bc_private.b.firstblock = args.fsbno; diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index f93072b58a58..fd55db479385 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -447,8 +447,8 @@ xfs_bmbt_alloc_block( if (args.fsbno == NULLFSBLOCK) { args.fsbno = be64_to_cpu(start->l); -try_another_ag: args.type = XFS_ALLOCTYPE_START_BNO; +try_another_ag: /* * Make sure there is sufficient room left in the AG to * complete a full tree split for an extent insert. If @@ -488,8 +488,8 @@ try_another_ag: if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && args.fsbno == NULLFSBLOCK && args.type == XFS_ALLOCTYPE_NEAR_BNO) { - cur->bc_private.b.dfops->dop_low = true; args.fsbno = cur->bc_private.b.firstblock; + args.type = XFS_ALLOCTYPE_FIRST_AG; goto try_another_ag; } @@ -506,7 +506,7 @@ try_another_ag: goto error0; cur->bc_private.b.dfops->dop_low = true; } - if (args.fsbno == NULLFSBLOCK) { + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; -- cgit v1.2.3 From 04bb94b13c02e9dbc92d622cddf88937127bd7ed Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Mar 2017 10:42:13 -0800 Subject: overlayfs: remove now unnecessary header file include This removes the extra include header file that was added in commit e58bc927835a "Pull overlayfs updates from Miklos Szeredi" now that it is no longer needed. There are probably other such includes that got added during the scheduler header splitup series, but this is the one that annoyed me personally and I know about. Signed-off-by: Linus Torvalds --- fs/overlayfs/util.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 1953986ee6bc..6e610a205e15 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -12,7 +12,6 @@ #include #include #include -#include #include "overlayfs.h" #include "ovl_entry.h" -- cgit v1.2.3 From b4fb8f66f1ae2e167d06c12d018025a8d4d3ba7e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 8 Mar 2017 09:35:39 -0800 Subject: mm, page_alloc: Add missing check for memory holes Commit 13ad59df67f1 ("mm, page_alloc: avoid page_to_pfn() when merging buddies") moved the check for memory holes out of page_is_buddy() and had the callers do the check. But this wasn't done correctly in one place which caused ia64 to crash very early in boot. Update to fix that and make ia64 boot again. [ v2: Vlastimil pointed out we don't need to call page_to_pfn() since we already have the result of that in "buddy_pfn" ] Fixes: 13ad59df67f1 ("avoid page_to_pfn() when merging buddies") Cc: Mel Gorman Cc: Joonsoo Kim Cc: Michal Hocko Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Andrew Morton Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eaa64d2ffdc5..6cbde310abed 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -873,7 +873,8 @@ done_merging: higher_page = page + (combined_pfn - pfn); buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1); higher_buddy = higher_page + (buddy_pfn - combined_pfn); - if (page_is_buddy(higher_page, higher_buddy, order + 1)) { + if (pfn_valid_within(buddy_pfn) && + page_is_buddy(higher_page, higher_buddy, order + 1)) { list_add_tail(&page->lru, &zone->free_area[order].free_list[migratetype]); goto out; -- cgit v1.2.3 From c085bd5119d5d0bdf3ef591a5563566be7dedced Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Wed, 1 Mar 2017 15:53:29 +0800 Subject: drm/amd/amdgpu: fix console deadlock if late init failed Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6abb238b25c9..4120b351a8e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2094,8 +2094,11 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } r = amdgpu_late_init(adev); - if (r) + if (r) { + if (fbcon) + console_unlock(); return r; + } /* pin cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { -- cgit v1.2.3 From a677e7046ab5edb33d051bda60cb3be0d60a48cc Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 8 Mar 2017 11:38:32 +0530 Subject: KVM: Add documentation for KVM_CAP_NR_MEMSLOTS Add documentation for KVM_CAP_NR_MEMSLOTS capability. Reviewed-by: Christoffer Dall Signed-off-by: Linu Cherian Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 069450938b79..3c248f772ae6 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -951,6 +951,10 @@ This ioctl allows the user to create or modify a guest physical memory slot. When changing an existing slot, it may be moved in the guest physical memory space, or its flags may be modified. It may not be resized. Slots may not overlap in guest physical address space. +Bits 0-15 of "slot" specifies the slot id and this value should be +less than the maximum number of user memory slots supported per VM. +The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS, +if this capability is supported by the architecture. If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot" specifies the address space which is being modified. They must be -- cgit v1.2.3 From 7af4df85796589e60a2dfc0f821eca0c4bbce4d2 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 8 Mar 2017 11:38:33 +0530 Subject: KVM: arm/arm64: Enable KVM_CAP_NR_MEMSLOTS on arm/arm64 Return KVM_USER_MEM_SLOTS for userspace capability query on NR_MEMSLOTS. Reviewed-by: Christoffer Dall Signed-off-by: Linu Cherian Signed-off-by: Marc Zyngier --- arch/arm/kvm/arm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index c9a2103faeb9..96dba7cd8be7 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -221,6 +221,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; case KVM_CAP_MSI_DEVID: if (!kvm) r = -EINVAL; -- cgit v1.2.3 From 3e92f94a3b8e925a6dd7ec88a5794b2084b5fb65 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 8 Mar 2017 11:38:34 +0530 Subject: KVM: arm/arm64: Remove KVM_PRIVATE_MEM_SLOTS definition that are unused arm/arm64 architecture doesnt use private memslots, hence removing KVM_PRIVATE_MEM_SLOTS macro definition. Reviewed-by: Christoffer Dall Signed-off-by: Linu Cherian Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 1 - arch/arm64/include/asm/kvm_host.h | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index cc495d799c67..31ee468ce667 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -30,7 +30,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED #define KVM_USER_MEM_SLOTS 32 -#define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HAVE_ONE_REG #define KVM_HALT_POLL_NS_DEFAULT 500000 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f21fd3894370..6ac17ee887c9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -31,7 +31,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED #define KVM_USER_MEM_SLOTS 32 -#define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HALT_POLL_NS_DEFAULT 500000 -- cgit v1.2.3 From 955a3fc6d2a1c11d6d00bce4f3816100ce0530cf Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 8 Mar 2017 11:38:35 +0530 Subject: KVM: arm64: Increase number of user memslots to 512 Having only 32 memslots is a real constraint for the maximum number of PCI devices that can be assigned to a single guest. Assuming each PCI device/virtual function having two memory BAR regions, we could assign only 15 devices/virtual functions to a guest. Hence increase KVM_USER_MEM_SLOTS to 512 as done in other archs like powerpc. Reviewed-by: Christoffer Dall Signed-off-by: Linu Cherian Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 6ac17ee887c9..e7705e7bb07b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -30,7 +30,7 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED -#define KVM_USER_MEM_SLOTS 32 +#define KVM_USER_MEM_SLOTS 512 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HALT_POLL_NS_DEFAULT 500000 -- cgit v1.2.3 From 85550f9148a852ed363a386577ad31b97b95dfb8 Mon Sep 17 00:00:00 2001 From: Jelle Martijn Kok Date: Tue, 21 Feb 2017 12:48:18 +0100 Subject: usb: ohci-at91: Do not drop unhandled USB suspend control requests In patch 2e2aa1bc7eff90ecm, USB suspend and wakeup control requests are passed to SFR_OHCIICR register. If a processor does not have such a register, this hub control request will be dropped. If no such a SFR register is available, all USB suspend control requests will now be processed using ohci_hub_control() (like before patch 2e2aa1bc7eff90ecm.) Tested on an Atmel AT91SAM9G20 with an on-board TI TUSB2046B hub chip If the last USB device is unplugged from the USB hub, the hub goes into sleep and will not wakeup when an USB devices is inserted. Fixes: 2e2aa1bc7eff90ec ("usb: ohci-at91: Forcibly suspend ports while USB suspend") Signed-off-by: Jelle Martijn Kok Tested-by: Wenyou Yang Cc: Wenyou Yang Cc: Alan Stern Cc: stable Acked-by: Nicolas Ferre Reviewed-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-at91.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index 414e3c376dbb..5302f988e7e6 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -350,7 +350,7 @@ static int ohci_at91_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_SUSPEND: dev_dbg(hcd->self.controller, "SetPortFeat: SUSPEND\n"); - if (valid_port(wIndex)) { + if (valid_port(wIndex) && ohci_at91->sfr_regmap) { ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); return 0; @@ -393,7 +393,7 @@ static int ohci_at91_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_SUSPEND: dev_dbg(hcd->self.controller, "ClearPortFeature: SUSPEND\n"); - if (valid_port(wIndex)) { + if (valid_port(wIndex) && ohci_at91->sfr_regmap) { ohci_at91_port_suspend(ohci_at91->sfr_regmap, 0); return 0; -- cgit v1.2.3 From fd567653bdb908009b650f079bfd4b63169e2ac4 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 22 Feb 2017 15:23:22 -0300 Subject: usb: phy: isp1301: Add OF device ID table The driver doesn't have a struct of_device_id table but supported devices are registered via Device Trees. This is working on the assumption that a I2C device registered via OF will always match a legacy I2C device ID and that the MODALIAS reported will always be of the form i2c:. But this could change in the future so the correct approach is to have an OF device ID table if the devices are registered via OF. Signed-off-by: Javier Martinez Canillas Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-isp1301.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c index db68156568e6..b3b33cf7ddf6 100644 --- a/drivers/usb/phy/phy-isp1301.c +++ b/drivers/usb/phy/phy-isp1301.c @@ -33,6 +33,12 @@ static const struct i2c_device_id isp1301_id[] = { }; MODULE_DEVICE_TABLE(i2c, isp1301_id); +static const struct of_device_id isp1301_of_match[] = { + {.compatible = "nxp,isp1301" }, + { }, +}; +MODULE_DEVICE_TABLE(of, isp1301_of_match); + static struct i2c_client *isp1301_i2c_client; static int __isp1301_write(struct isp1301 *isp, u8 reg, u8 value, u8 clear) @@ -130,6 +136,7 @@ static int isp1301_remove(struct i2c_client *client) static struct i2c_driver isp1301_driver = { .driver = { .name = DRV_NAME, + .of_match_table = of_match_ptr(isp1301_of_match), }, .probe = isp1301_probe, .remove = isp1301_remove, -- cgit v1.2.3 From b7321e81fc369abe353cf094d4f0dc2fe11ab95f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Mar 2017 16:11:03 +0100 Subject: USB: iowarrior: fix NULL-deref at probe Make sure to check for the required interrupt-in endpoint to avoid dereferencing a NULL-pointer should a malicious device lack such an endpoint. Note that a fairly recent change purported to fix this issue, but added an insufficient test on the number of endpoints only, a test which can now be removed. Fixes: 4ec0ef3a8212 ("USB: iowarrior: fix oops with malicious USB descriptors") Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.") Cc: stable # 2.6.21 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 095778ff984d..3ad058cbe6ca 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -781,12 +781,6 @@ static int iowarrior_probe(struct usb_interface *interface, iface_desc = interface->cur_altsetting; dev->product_id = le16_to_cpu(udev->descriptor.idProduct); - if (iface_desc->desc.bNumEndpoints < 1) { - dev_err(&interface->dev, "Invalid number of endpoints\n"); - retval = -EINVAL; - goto error; - } - /* set up the endpoint information */ for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; @@ -797,6 +791,13 @@ static int iowarrior_probe(struct usb_interface *interface, /* this one will match for the IOWarrior56 only */ dev->int_out_endpoint = endpoint; } + + if (!dev->int_in_endpoint) { + dev_err(&interface->dev, "no interrupt-in endpoint found\n"); + retval = -ENODEV; + goto error; + } + /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && -- cgit v1.2.3 From de46e56653de7b3b54baa625bd582635008b8d05 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Mar 2017 16:11:04 +0100 Subject: USB: iowarrior: fix NULL-deref in write Make sure to verify that we have the required interrupt-out endpoint for IOWarrior56 devices to avoid dereferencing a NULL-pointer in write should a malicious device lack such an endpoint. Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.") Cc: stable # 2.6.21 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 3ad058cbe6ca..37c63cb39714 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -798,6 +798,14 @@ static int iowarrior_probe(struct usb_interface *interface, goto error; } + if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) { + if (!dev->int_out_endpoint) { + dev_err(&interface->dev, "no interrupt-out endpoint found\n"); + retval = -ENODEV; + goto error; + } + } + /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && -- cgit v1.2.3 From d595259fbb7a7afed241b1afb2c4fe4b47de47fa Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Tue, 28 Feb 2017 00:46:58 +0100 Subject: usb-storage: Add ignore-residue quirk for Initio INIC-3619 This USB-SATA bridge chip is used in a StarTech enclosure for optical drives. Without the quirk MakeMKV fails during the key exchange with an installed BluRay drive: > Error 'Scsi error - ILLEGAL REQUEST:COPY PROTECTION KEY EXCHANGE FAILURE - KEY NOT ESTABLISHED' > occurred while issuing SCSI command AD010..080002400 to device 'SG:dev_11:2' Signed-off-by: Tobias Jakobi Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 16cc18369111..9129f6cb8230 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2071,6 +2071,20 @@ UNUSUAL_DEV( 0x1370, 0x6828, 0x0110, 0x0110, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_IGNORE_RESIDUE ), +/* + * Reported by Tobias Jakobi + * The INIC-3619 bridge is used in the StarTech SLSODDU33B + * SATA-USB enclosure for slimline optical drives. + * + * The quirk enables MakeMKV to properly exchange keys with + * an installed BD drive. + */ +UNUSUAL_DEV( 0x13fd, 0x3609, 0x0209, 0x0209, + "Initio Corporation", + "INIC-3619", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_RESIDUE ), + /* Reported by Qinglin Ye */ UNUSUAL_DEV( 0x13fe, 0x3600, 0x0100, 0x0100, "Kingston", -- cgit v1.2.3 From cfa47afe77b393e2c24a57e7e9611857a0b064f1 Mon Sep 17 00:00:00 2001 From: Richard Leitner Date: Mon, 6 Mar 2017 09:24:20 +0100 Subject: usb: usb251xb: remove max_{power,current}_{sp,bp} properties Remove the max_{power,current}_{sp,bp} properties of the usb251xb driver from devicetree. This is done to simplify the dt bindings as requested by Rob Herring in https://lkml.org/lkml/2017/2/15/1283. If those properties are ever needed by somebody they can be enabled again easily. Signed-off-by: Richard Leitner Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/usb251xb.txt | 20 ------------------ drivers/usb/misc/usb251xb.c | 24 ++++------------------ 2 files changed, 4 insertions(+), 40 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt index 0c065f77658f..a5efd10ace9e 100644 --- a/Documentation/devicetree/bindings/usb/usb251xb.txt +++ b/Documentation/devicetree/bindings/usb/usb251xb.txt @@ -40,26 +40,6 @@ Optional properties : device connected. - sp-disabled-ports : Specifies the ports which will be self-power disabled - bp-disabled-ports : Specifies the ports which will be bus-power disabled - - max-sp-power : Specifies the maximum current the hub consumes from an - upstream port when operating as self-powered hub including the power - consumption of a permanently attached peripheral if the hub is - configured as a compound device. The value is given in mA in a 0 - 500 - range (default is 2). - - max-bp-power : Specifies the maximum current the hub consumes from an - upstream port when operating as bus-powered hub including the power - consumption of a permanently attached peripheral if the hub is - configured as a compound device. The value is given in mA in a 0 - 500 - range (default is 100). - - max-sp-current : Specifies the maximum current the hub consumes from an - upstream port when operating as self-powered hub EXCLUDING the power - consumption of a permanently attached peripheral if the hub is - configured as a compound device. The value is given in mA in a 0 - 500 - range (default is 2). - - max-bp-current : Specifies the maximum current the hub consumes from an - upstream port when operating as bus-powered hub EXCLUDING the power - consumption of a permanently attached peripheral if the hub is - configured as a compound device. The value is given in mA in a 0 - 500 - range (default is 100). - power-on-time : Specifies the time it takes from the time the host initiates the power-on sequence to a port until the port has adequate power. The value is given in ms in a 0 - 510 range (default is 100ms). diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c index 4e18600dc9b4..3f9c3060c477 100644 --- a/drivers/usb/misc/usb251xb.c +++ b/drivers/usb/misc/usb251xb.c @@ -432,26 +432,6 @@ static int usb251xb_get_ofdata(struct usb251xb *hub, } } - hub->max_power_sp = USB251XB_DEF_MAX_POWER_SELF; - if (!of_property_read_u32(np, "max-sp-power", property_u32)) - hub->max_power_sp = min_t(u8, be32_to_cpu(*property_u32) / 2, - 250); - - hub->max_power_bp = USB251XB_DEF_MAX_POWER_BUS; - if (!of_property_read_u32(np, "max-bp-power", property_u32)) - hub->max_power_bp = min_t(u8, be32_to_cpu(*property_u32) / 2, - 250); - - hub->max_current_sp = USB251XB_DEF_MAX_CURRENT_SELF; - if (!of_property_read_u32(np, "max-sp-current", property_u32)) - hub->max_current_sp = min_t(u8, be32_to_cpu(*property_u32) / 2, - 250); - - hub->max_current_bp = USB251XB_DEF_MAX_CURRENT_BUS; - if (!of_property_read_u32(np, "max-bp-current", property_u32)) - hub->max_current_bp = min_t(u8, be32_to_cpu(*property_u32) / 2, - 250); - hub->power_on_time = USB251XB_DEF_POWER_ON_TIME; if (!of_property_read_u32(np, "power-on-time", property_u32)) hub->power_on_time = min_t(u8, be32_to_cpu(*property_u32) / 2, @@ -492,6 +472,10 @@ static int usb251xb_get_ofdata(struct usb251xb *hub, /* The following parameters are currently not exposed to devicetree, but * may be as soon as needed. */ + hub->max_power_sp = USB251XB_DEF_MAX_POWER_SELF; + hub->max_power_bp = USB251XB_DEF_MAX_POWER_BUS; + hub->max_current_sp = USB251XB_DEF_MAX_CURRENT_SELF; + hub->max_current_bp = USB251XB_DEF_MAX_CURRENT_BUS; hub->bat_charge_en = USB251XB_DEF_BATTERY_CHARGING_ENABLE; hub->boost_up = USB251XB_DEF_BOOST_UP; hub->boost_x = USB251XB_DEF_BOOST_X; -- cgit v1.2.3 From 7f7d8ba3b2140d993887a7db7a83d85c1f8db0e8 Mon Sep 17 00:00:00 2001 From: Richard Leitner Date: Mon, 6 Mar 2017 09:24:21 +0100 Subject: usb: usb251xb: dt: add unit suffix to oc-delay and power-on-time Rename oc-delay-* to oc-delay-us and make it expect a time value. Furthermore add -ms suffix to power-on-time. There changes were suggested by Rob Herring in https://lkml.org/lkml/2017/2/15/1283. Signed-off-by: Richard Leitner Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/usb251xb.txt | 10 ++++--- drivers/usb/misc/usb251xb.c | 35 ++++++++++++---------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt index a5efd10ace9e..91499ae028db 100644 --- a/Documentation/devicetree/bindings/usb/usb251xb.txt +++ b/Documentation/devicetree/bindings/usb/usb251xb.txt @@ -31,7 +31,9 @@ Optional properties : (default is individual) - dynamic-power-switching : enable auto-switching from self- to bus-powered operation if the local power source is removed or unavailable - - oc-delay-{100us,4ms,8ms,16ms} : set over current timer delay (default is 8ms) + - oc-delay-us : Delay time (in microseconds) for filtering the over-current + sense inputs. Valid values are 100, 4000, 8000 (default) and 16000. If + an invalid value is given, the default is used instead. - compound-device : indicated the hub is part of a compound device - port-mapping-mode : enable port mapping mode - string-support : enable string descriptor support (required for manufacturer, @@ -40,9 +42,9 @@ Optional properties : device connected. - sp-disabled-ports : Specifies the ports which will be self-power disabled - bp-disabled-ports : Specifies the ports which will be bus-power disabled - - power-on-time : Specifies the time it takes from the time the host initiates - the power-on sequence to a port until the port has adequate power. The - value is given in ms in a 0 - 510 range (default is 100ms). + - power-on-time-ms : Specifies the time it takes from the time the host + initiates the power-on sequence to a port until the port has adequate + power. The value is given in ms in a 0 - 510 range (default is 100ms). Examples: usb2512b@2c { diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c index 3f9c3060c477..91f66d68bcb7 100644 --- a/drivers/usb/misc/usb251xb.c +++ b/drivers/usb/misc/usb251xb.c @@ -375,18 +375,24 @@ static int usb251xb_get_ofdata(struct usb251xb *hub, if (of_get_property(np, "dynamic-power-switching", NULL)) hub->conf_data2 |= BIT(7); - if (of_get_property(np, "oc-delay-100us", NULL)) { - hub->conf_data2 &= ~BIT(5); - hub->conf_data2 &= ~BIT(4); - } else if (of_get_property(np, "oc-delay-4ms", NULL)) { - hub->conf_data2 &= ~BIT(5); - hub->conf_data2 |= BIT(4); - } else if (of_get_property(np, "oc-delay-8ms", NULL)) { - hub->conf_data2 |= BIT(5); - hub->conf_data2 &= ~BIT(4); - } else if (of_get_property(np, "oc-delay-16ms", NULL)) { - hub->conf_data2 |= BIT(5); - hub->conf_data2 |= BIT(4); + if (!of_property_read_u32(np, "oc-delay-us", property_u32)) { + if (*property_u32 == 100) { + /* 100 us*/ + hub->conf_data2 &= ~BIT(5); + hub->conf_data2 &= ~BIT(4); + } else if (*property_u32 == 4000) { + /* 4 ms */ + hub->conf_data2 &= ~BIT(5); + hub->conf_data2 |= BIT(4); + } else if (*property_u32 == 16000) { + /* 16 ms */ + hub->conf_data2 |= BIT(5); + hub->conf_data2 |= BIT(4); + } else { + /* 8 ms (DEFAULT) */ + hub->conf_data2 |= BIT(5); + hub->conf_data2 &= ~BIT(4); + } } if (of_get_property(np, "compound-device", NULL)) @@ -433,9 +439,8 @@ static int usb251xb_get_ofdata(struct usb251xb *hub, } hub->power_on_time = USB251XB_DEF_POWER_ON_TIME; - if (!of_property_read_u32(np, "power-on-time", property_u32)) - hub->power_on_time = min_t(u8, be32_to_cpu(*property_u32) / 2, - 255); + if (!of_property_read_u32(np, "power-on-time-ms", property_u32)) + hub->power_on_time = min_t(u8, *property_u32 / 2, 255); if (of_property_read_u16_array(np, "language-id", &hub->lang_id, 1)) hub->lang_id = USB251XB_DEF_LANGUAGE_ID; -- cgit v1.2.3 From fa56fe4ca4a1e4d9715f144857c98074e41bc94f Mon Sep 17 00:00:00 2001 From: Richard Leitner Date: Mon, 6 Mar 2017 09:24:22 +0100 Subject: doc: dt-bindings: usb251xb: mark reg as required Mark the reg property as required and furthermore fix some typos and spellings in the documentation. Signed-off-by: Richard Leitner Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/usb251xb.txt | 23 +++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt index 91499ae028db..3957d4edaa74 100644 --- a/Documentation/devicetree/bindings/usb/usb251xb.txt +++ b/Documentation/devicetree/bindings/usb/usb251xb.txt @@ -7,18 +7,18 @@ Required properties : - compatible : Should be "microchip,usb251xb" or one of the specific types: "microchip,usb2512b", "microchip,usb2512bi", "microchip,usb2513b", "microchip,usb2513bi", "microchip,usb2514b", "microchip,usb2514bi" - - hub-reset-gpios : Should specify the gpio for hub reset + - reset-gpios : Should specify the gpio for hub reset + - reg : I2C address on the selected bus (default is <0x2C>) Optional properties : - - reg : I2C address on the selected bus (default is <0x2C>) - skip-config : Skip Hub configuration, but only send the USB-Attach command - - vendor-id : USB Vendor ID of the hub (16 bit, default is 0x0424) - - product-id : USB Product ID of the hub (16 bit, default depends on type) - - device-id : USB Device ID of the hub (16 bit, default is 0x0bb3) - - language-id : USB Language ID (16 bit, default is 0x0000) - - manufacturer : USB Manufacturer string (max 31 characters long) - - product : USB Product string (max 31 characters long) - - serial : USB Serial string (max 31 characters long) + - vendor-id : Set USB Vendor ID of the hub (16 bit, default is 0x0424) + - product-id : Set USB Product ID of the hub (16 bit, default depends on type) + - device-id : Set USB Device ID of the hub (16 bit, default is 0x0bb3) + - language-id : Set USB Language ID (16 bit, default is 0x0000) + - manufacturer : Set USB Manufacturer string (max 31 characters long) + - product : Set USB Product string (max 31 characters long) + - serial : Set USB Serial string (max 31 characters long) - {bus,self}-powered : selects between self- and bus-powered operation (default is self-powered) - disable-hi-speed : disable USB Hi-Speed support @@ -34,7 +34,7 @@ Optional properties : - oc-delay-us : Delay time (in microseconds) for filtering the over-current sense inputs. Valid values are 100, 4000, 8000 (default) and 16000. If an invalid value is given, the default is used instead. - - compound-device : indicated the hub is part of a compound device + - compound-device : indicate the hub is part of a compound device - port-mapping-mode : enable port mapping mode - string-support : enable string descriptor support (required for manufacturer, product and serial string configuration) @@ -49,7 +49,8 @@ Optional properties : Examples: usb2512b@2c { compatible = "microchip,usb2512b"; - hub-reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; + reg = <0x2c>; + reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; }; usb2514b@2c { -- cgit v1.2.3 From 829b84db0c0c120ae5855a69cc0c94ff75fafabb Mon Sep 17 00:00:00 2001 From: Richard Leitner Date: Mon, 6 Mar 2017 09:24:23 +0100 Subject: MAINTAINERS: usb251xb: remove reference inexistent file The platform_data header file was dropped in the merged version of the USB251xB driver. Therefore remove its reference from the MAINTAINERS file. Signed-off-by: Richard Leitner Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c265a5fe4848..c776906f67a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8307,7 +8307,6 @@ M: Richard Leitner L: linux-usb@vger.kernel.org S: Maintained F: drivers/usb/misc/usb251xb.c -F: include/linux/platform_data/usb251xb.h F: Documentation/devicetree/bindings/usb/usb251xb.txt MICROSOFT SURFACE PRO 3 BUTTON DRIVER -- cgit v1.2.3 From 2f6821462fe3ace62df3f1b5a9463153e8288298 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Feb 2017 19:11:28 +0100 Subject: USB: serial: digi_acceleport: fix OOB-event processing A recent change claimed to fix an off-by-one error in the OOB-port completion handler, but instead introduced such an error. This could specifically led to modem-status changes going unnoticed, effectively breaking TIOCMGET. Note that the offending commit fixes a loop-condition underflow and is marked for stable, but should not be backported without this fix. Reported-by: Ben Hutchings Fixes: 2d380889215f ("USB: serial: digi_acceleport: fix OOB data sanity check") Cc: stable # v2.6.30 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/digi_acceleport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index ab78111e0968..6537d3ca2797 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -1500,7 +1500,7 @@ static int digi_read_oob_callback(struct urb *urb) return -1; /* handle each oob command */ - for (i = 0; i < urb->actual_length - 4; i += 4) { + for (i = 0; i < urb->actual_length - 3; i += 4) { opcode = buf[i]; line = buf[i + 1]; status = buf[i + 2]; -- cgit v1.2.3 From 05d8d34611139f8435af90ac54b65eb31e82e388 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Tue, 7 Mar 2017 17:51:49 +0100 Subject: KVM: nVMX: do not warn when MSR bitmap address is not backed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before trying to do nested_get_page() in nested_vmx_merge_msr_bitmap(), we have already checked that the MSR bitmap address is valid (4k aligned and within physical limits). SDM doesn't specify what happens if the there is no memory mapped at the valid address, but Intel CPUs treat the situation as if the bitmap was configured to trap all MSRs. KVM already does that by returning false and a correct handling doesn't need the guest-trigerrable warning that was reported by syzkaller: (The warning was originally there to catch some possible bugs in nVMX.) ------------[ cut here ]------------ WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709 nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline] WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709 nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 7832 Comm: syz-executor1 Not tainted 4.10.0+ #229 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 panic+0x1fb/0x412 kernel/panic.c:179 __warn+0x1c4/0x1e0 kernel/panic.c:540 warn_slowpath_null+0x2c/0x40 kernel/panic.c:583 nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline] nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640 enter_vmx_non_root_mode arch/x86/kvm/vmx.c:10471 [inline] nested_vmx_run+0x6186/0xaab0 arch/x86/kvm/vmx.c:10561 handle_vmlaunch+0x1a/0x20 arch/x86/kvm/vmx.c:7312 vmx_handle_exit+0xfc0/0x3f00 arch/x86/kvm/vmx.c:8526 vcpu_enter_guest arch/x86/kvm/x86.c:6982 [inline] vcpu_run arch/x86/kvm/x86.c:7044 [inline] kvm_arch_vcpu_ioctl_run+0x1418/0x4840 arch/x86/kvm/x86.c:7205 kvm_vcpu_ioctl+0x673/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2570 Reported-by: Dmitry Vyukov Reviewed-by: Jim Mattson [Jim Mattson explained the bare metal behavior: "I believe this behavior would be documented in the chipset data sheet rather than the SDM, since the chipset returns all 1s for an unclaimed read."] Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ab338581b3ec..98e82ee1e699 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9680,10 +9680,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, return false; page = nested_get_page(vcpu, vmcs12->msr_bitmap); - if (!page) { - WARN_ON(1); + if (!page) return false; - } msr_bitmap_l1 = (unsigned long *)kmap(page); memset(msr_bitmap_l0, 0xff, PAGE_SIZE); -- cgit v1.2.3 From 9ad224744218a352964f31007a1420f2420a08a0 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 9 Mar 2017 11:05:33 -0300 Subject: i2c: exynos5: Avoid transaction timeouts due TRANSFER_DONE_AUTO not set After commit 7999eecb7e56 ("i2c: exynos5: fix arbitration lost handling"), some I2C transactions are failing because the TRANSFER_DONE_AUTO field is not set in the I2C_TRANS_STATUS register so the i2c->status value is left to -EINVAL causing the i2c->msg_complete completion to never be signaled. For example, when reading the time of an I2C rtc on an Exynos5800 machine: $ cat /sys/class/rtc/rtc0/time [ 25.924594] exynos5-hsi2c 12e10000.i2c: rx timeout [ 65.028365] max77686-rtc max77802-rtc: Fail to read time reg(-22) cat: /sys/class/rtc/rtc0/time: Invalid argument The Exynos5422 manual states clearly that most I2C_TRANS_STATUS reg bits (including TRANSFER_DONE_AUTO) are cleared after the register is read. So reading has side effects and should only be done if HSI2C_INT_I2C was set. Fixes: 7999eecb7e56 ("i2c: exynos5: fix arbitration lost handling") Signed-off-by: Javier Martinez Canillas Reviewed-by: Andrzej Hajda Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-exynos5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c index cbd93ce0661f..736a82472101 100644 --- a/drivers/i2c/busses/i2c-exynos5.c +++ b/drivers/i2c/busses/i2c-exynos5.c @@ -457,7 +457,6 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id) int_status = readl(i2c->regs + HSI2C_INT_STATUS); writel(int_status, i2c->regs + HSI2C_INT_STATUS); - trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS); /* handle interrupt related to the transfer status */ if (i2c->variant->hw == HSI2C_EXYNOS7) { @@ -482,11 +481,13 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id) goto stop; } + trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS); if ((trans_status & HSI2C_MASTER_ST_MASK) == HSI2C_MASTER_ST_LOSE) { i2c->state = -EAGAIN; goto stop; } } else if (int_status & HSI2C_INT_I2C) { + trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS); if (trans_status & HSI2C_NO_DEV_ACK) { dev_dbg(i2c->dev, "No ACK from device\n"); i2c->state = -ENXIO; -- cgit v1.2.3 From 8ce0928e6867fda4d208d7bddf251bce96ab8431 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 9 Mar 2017 16:32:17 +0100 Subject: Revert "i2c: add missing of_node_put in i2c_mux_del_adapters" This reverts commit 02dbfa5e5583523035f05636c614a0eca77f1aab. I grabbed the wrong version from the list and will pull the proper one from Peter Rosin's mux tree. Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-mux.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 2178266bca79..83768e85a919 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -429,7 +429,6 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc) while (muxc->num_adapters) { struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters]; struct i2c_mux_priv *priv = adap->algo_data; - struct device_node *np = adap->dev.of_node; muxc->adapter[muxc->num_adapters] = NULL; @@ -439,7 +438,6 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc) sysfs_remove_link(&priv->adap.dev.kobj, "mux_device"); i2c_del_adapter(adap); - of_node_put(np); kfree(priv); } } -- cgit v1.2.3 From 806dbb20efde821910b5f747befed794077a9109 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 9 Mar 2017 16:41:48 +0100 Subject: Revert "i2c: copy device properties when using i2c_register_board_info()" This reverts commit b0c1e95ab44feaad8831f2c06a3473c974003b49. It contains a flaw and the next version has more features added which makes me want to move it to the next cycle. Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-boardinfo.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c index 5b8f6c3a6950..6e5fac6a5262 100644 --- a/drivers/i2c/i2c-boardinfo.c +++ b/drivers/i2c/i2c-boardinfo.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -56,7 +55,6 @@ EXPORT_SYMBOL_GPL(__i2c_first_dynamic_bus_num); * * The board info passed can safely be __initdata, but be careful of embedded * pointers (for platform_data, functions, etc) since that won't be copied. - * Device properties are deep-copied though. */ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned len) { @@ -80,14 +78,6 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig devinfo->busnum = busnum; devinfo->board_info = *info; - - if (info->properties) { - devinfo->board_info.properties = - property_entries_dup(info->properties); - if (IS_ERR(devinfo->board_info.properties)) - return PTR_ERR(devinfo->board_info.properties); - } - list_add_tail(&devinfo->list, &__i2c_board_list); } -- cgit v1.2.3 From 94a631d91ad341b3b4bdac72d1104d9f090e0ca9 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 9 Mar 2017 15:39:34 +0200 Subject: usb: xhci-mtk: check hcc_params after adding primary hcd hcc_params is set in xhci_gen_setup() called from usb_add_hcd(), so checks the Maximum Primary Stream Array Size in the hcc_params register after adding primary hcd. Signed-off-by: Chunfeng Yun Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 9066ec9e0c2e..6ac73a6b4da6 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -678,13 +678,13 @@ static int xhci_mtk_probe(struct platform_device *pdev) goto power_off_phys; } - if (HCC_MAX_PSA(xhci->hcc_params) >= 4) - xhci->shared_hcd->can_do_streams = 1; - ret = usb_add_hcd(hcd, irq, IRQF_SHARED); if (ret) goto put_usb3_hcd; + if (HCC_MAX_PSA(xhci->hcc_params) >= 4) + xhci->shared_hcd->can_do_streams = 1; + ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED); if (ret) goto dealloc_usb2_hcd; -- cgit v1.2.3 From 20e4e37e4a2f1bfd43bcc8c3e666e47665036cc3 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 9 Mar 2017 15:39:35 +0200 Subject: usb: xhci: remove dummy extra_priv_size for size of xhci_hcd struct because hcd_priv_size is already size of xhci_hcd struct, extra_priv_size is not needed anymore for MTK and tegra drivers. Signed-off-by: Chunfeng Yun Tested-by: Thierry Reding Acked-by: Thierry Reding Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk.c | 1 - drivers/usb/host/xhci-tegra.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 6ac73a6b4da6..67d5dc79b6b5 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -382,7 +382,6 @@ static int usb_wakeup_of_property_parse(struct xhci_hcd_mtk *mtk, static int xhci_mtk_setup(struct usb_hcd *hcd); static const struct xhci_driver_overrides xhci_mtk_overrides __initconst = { - .extra_priv_size = sizeof(struct xhci_hcd), .reset = xhci_mtk_setup, }; diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index a59fafb4b329..74436f8ca538 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1308,7 +1308,6 @@ static int tegra_xhci_setup(struct usb_hcd *hcd) } static const struct xhci_driver_overrides tegra_xhci_overrides __initconst = { - .extra_priv_size = sizeof(struct xhci_hcd), .reset = tegra_xhci_setup, }; -- cgit v1.2.3 From f95e60a7dbecd2de816bb3ad517b3d4fbc20b507 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 9 Mar 2017 15:39:36 +0200 Subject: usb: host: xhci-dbg: HCIVERSION should be a binary number According to xHCI spec, HCIVERSION containing a BCD encoding of the xHCI specification revision number, 0100h corresponds to xHCI version 1.0. Change "100" as "0x100". Cc: Lu Baolu Cc: stable Fixes: 04abb6de2825 ("xhci: Read and parse new xhci 1.1 capability register") Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-dbg.c b/drivers/usb/host/xhci-dbg.c index 363d125300ea..2b4a00fa735d 100644 --- a/drivers/usb/host/xhci-dbg.c +++ b/drivers/usb/host/xhci-dbg.c @@ -109,7 +109,7 @@ static void xhci_print_cap_regs(struct xhci_hcd *xhci) xhci_dbg(xhci, "RTSOFF 0x%x:\n", temp & RTSOFF_MASK); /* xhci 1.1 controllers have the HCCPARAMS2 register */ - if (hci_version > 100) { + if (hci_version > 0x100) { temp = readl(&xhci->cap_regs->hcc_params2); xhci_dbg(xhci, "HCC PARAMS2 0x%x:\n", (unsigned int) temp); xhci_dbg(xhci, " HC %s Force save context capability", -- cgit v1.2.3 From dcc7620cad5ad1326a78f4031a7bf4f0e5b42984 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 9 Mar 2017 15:39:37 +0200 Subject: usb: host: xhci-plat: Fix timeout on removal of hot pluggable xhci controllers Upstream commit 98d74f9ceaef ("xhci: fix 10 second timeout on removal of PCI hotpluggable xhci controllers") fixes a problem with hot pluggable PCI xhci controllers which can result in excessive timeouts, to the point where the system reports a deadlock. The same problem is seen with hot pluggable xhci controllers using the xhci-plat driver, such as the driver used for Type-C ports on rk3399. Similar to hot-pluggable PCI controllers, the driver for this chip removes the xhci controller from the system when the Type-C cable is disconnected. The solution for PCI devices works just as well for non-PCI devices and avoids the problem. Signed-off-by: Guenter Roeck Cc: stable Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 6d33b42ffcf5..bd02a6cd8e2c 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -286,6 +286,8 @@ static int xhci_plat_remove(struct platform_device *dev) struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct clk *clk = xhci->clk; + xhci->xhc_state |= XHCI_STATE_REMOVING; + usb_remove_hcd(xhci->shared_hcd); usb_phy_shutdown(hcd->usb_phy); -- cgit v1.2.3 From 6fb895692a034393d58679cd8d00c9e229719a5f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:02 +0300 Subject: x86/cpufeature: Add 5-level paging detection Look for 'la57' in /proc/cpuinfo to see if your machine supports 5-level paging. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- arch/x86/include/asm/cpufeatures.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4e7772387c6e..b04bb6dfed7f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -289,7 +289,8 @@ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ -- cgit v1.2.3 From 505a60e225606fbd3d2eadc31ff793d939ba66f1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:03 +0300 Subject: asm-generic: introduce 5level-fixup.h We are going to switch core MM to 5-level paging abstraction. This is preparation step which adds As with 4level-fixup.h, the new header allows quickly make all architectures compatible with 5-level paging in core MM. In long run we would like to switch architectures to properly folded p4d level by using , but it requires more changes to arch-specific code. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/asm-generic/4level-fixup.h | 3 ++- include/asm-generic/5level-fixup.h | 41 ++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 3 +++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 include/asm-generic/5level-fixup.h diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h index 5bdab6bffd23..928fd66b1271 100644 --- a/include/asm-generic/4level-fixup.h +++ b/include/asm-generic/4level-fixup.h @@ -15,7 +15,6 @@ ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \ NULL: pmd_offset(pud, address)) -#define pud_alloc(mm, pgd, address) (pgd) #define pud_offset(pgd, start) (pgd) #define pud_none(pud) 0 #define pud_bad(pud) 0 @@ -35,4 +34,6 @@ #undef pud_addr_end #define pud_addr_end(addr, end) (end) +#include + #endif diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h new file mode 100644 index 000000000000..b5ca82dc4175 --- /dev/null +++ b/include/asm-generic/5level-fixup.h @@ -0,0 +1,41 @@ +#ifndef _5LEVEL_FIXUP_H +#define _5LEVEL_FIXUP_H + +#define __ARCH_HAS_5LEVEL_HACK +#define __PAGETABLE_P4D_FOLDED + +#define P4D_SHIFT PGDIR_SHIFT +#define P4D_SIZE PGDIR_SIZE +#define P4D_MASK PGDIR_MASK +#define PTRS_PER_P4D 1 + +#define p4d_t pgd_t + +#define pud_alloc(mm, p4d, address) \ + ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \ + NULL : pud_offset(p4d, address)) + +#define p4d_alloc(mm, pgd, address) (pgd) +#define p4d_offset(pgd, start) (pgd) +#define p4d_none(p4d) 0 +#define p4d_bad(p4d) 0 +#define p4d_present(p4d) 1 +#define p4d_ERROR(p4d) do { } while (0) +#define p4d_clear(p4d) pgd_clear(p4d) +#define p4d_val(p4d) pgd_val(p4d) +#define p4d_populate(mm, p4d, pud) pgd_populate(mm, p4d, pud) +#define p4d_page(p4d) pgd_page(p4d) +#define p4d_page_vaddr(p4d) pgd_page_vaddr(p4d) + +#define __p4d(x) __pgd(x) +#define set_p4d(p4dp, p4d) set_pgd(p4dp, p4d) + +#undef p4d_free_tlb +#define p4d_free_tlb(tlb, x, addr) do { } while (0) +#define p4d_free(mm, x) do { } while (0) +#define __p4d_free_tlb(tlb, x, addr) do { } while (0) + +#undef p4d_addr_end +#define p4d_addr_end(addr, end) (end) + +#endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 0d65dd72c0f4..be1fe264eb37 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1619,11 +1619,14 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); * Remove it when 4level-fixup.h has been removed. */ #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) + +#ifndef __ARCH_HAS_5LEVEL_HACK static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) { return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? NULL: pud_offset(pgd, address); } +#endif /* !__ARCH_HAS_5LEVEL_HACK */ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { -- cgit v1.2.3 From 30ec842660bd0d056d4a7028ac5bd4a82b113d4f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:04 +0300 Subject: asm-generic: introduce __ARCH_USE_5LEVEL_HACK We are going to introduce to provide abstraction for properly (in opposite to 5level-fixup.h hack) folded p4d level. The new header will be included from pgtable-nopud.h. If an architecture uses , we cannot use 5level-fixup.h directly to quickly convert the architecture to 5-level paging as it would conflict with pgtable-nop4d.h. With this patch an architecture can define __ARCH_USE_5LEVEL_HACK before inclusion to use 5level-fixup.h. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable-nop4d-hack.h | 62 ++++++++++++++++++++++++++++++++ include/asm-generic/pgtable-nopud.h | 5 +++ 2 files changed, 67 insertions(+) create mode 100644 include/asm-generic/pgtable-nop4d-hack.h diff --git a/include/asm-generic/pgtable-nop4d-hack.h b/include/asm-generic/pgtable-nop4d-hack.h new file mode 100644 index 000000000000..752fb7511750 --- /dev/null +++ b/include/asm-generic/pgtable-nop4d-hack.h @@ -0,0 +1,62 @@ +#ifndef _PGTABLE_NOP4D_HACK_H +#define _PGTABLE_NOP4D_HACK_H + +#ifndef __ASSEMBLY__ +#include + +#define __PAGETABLE_PUD_FOLDED + +/* + * Having the pud type consist of a pgd gets the size right, and allows + * us to conceptually access the pgd entry that this pud is folded into + * without casting. + */ +typedef struct { pgd_t pgd; } pud_t; + +#define PUD_SHIFT PGDIR_SHIFT +#define PTRS_PER_PUD 1 +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the pud is never bad, and a pud always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline void pgd_clear(pgd_t *pgd) { } +#define pud_ERROR(pud) (pgd_ERROR((pud).pgd)) + +#define pgd_populate(mm, pgd, pud) do { } while (0) +/* + * (puds are folded into pgds so this doesn't get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pgd(pgdptr, pgdval) set_pud((pud_t *)(pgdptr), (pud_t) { pgdval }) + +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) +{ + return (pud_t *)pgd; +} + +#define pud_val(x) (pgd_val((x).pgd)) +#define __pud(x) ((pud_t) { __pgd(x) }) + +#define pgd_page(pgd) (pud_page((pud_t){ pgd })) +#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd })) + +/* + * allocating and freeing a pud is trivial: the 1-entry pud is + * inside the pgd, so has no extra memory associated with it. + */ +#define pud_alloc_one(mm, address) NULL +#define pud_free(mm, x) do { } while (0) +#define __pud_free_tlb(tlb, x, a) do { } while (0) + +#undef pud_addr_end +#define pud_addr_end(addr, end) (end) + +#endif /* __ASSEMBLY__ */ +#endif /* _PGTABLE_NOP4D_HACK_H */ diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index 810431d8351b..5e49430a30a4 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -3,6 +3,10 @@ #ifndef __ASSEMBLY__ +#ifdef __ARCH_USE_5LEVEL_HACK +#include +#else + #define __PAGETABLE_PUD_FOLDED /* @@ -58,4 +62,5 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address) #define pud_addr_end(addr, end) (end) #endif /* __ASSEMBLY__ */ +#endif /* !__ARCH_USE_5LEVEL_HACK */ #endif /* _PGTABLE_NOPUD_H */ -- cgit v1.2.3 From 9849a5697d3defb2087cb6b9be5573a142697889 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:05 +0300 Subject: arch, mm: convert all architectures to use 5level-fixup.h If an architecture uses 4level-fixup.h we don't need to do anything as it includes 5level-fixup.h. If an architecture uses pgtable-nop*d.h, define __ARCH_USE_5LEVEL_HACK before inclusion of the header. It makes asm-generic code to use 5level-fixup.h. If an architecture has 4-level paging or folds levels on its own, include 5level-fixup.h directly. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- arch/arc/include/asm/hugepage.h | 1 + arch/arc/include/asm/pgtable.h | 1 + arch/arm/include/asm/pgtable.h | 1 + arch/arm64/include/asm/pgtable-types.h | 4 ++++ arch/avr32/include/asm/pgtable-2level.h | 1 + arch/cris/include/asm/pgtable.h | 1 + arch/frv/include/asm/pgtable.h | 1 + arch/h8300/include/asm/pgtable.h | 1 + arch/hexagon/include/asm/pgtable.h | 1 + arch/ia64/include/asm/pgtable.h | 2 ++ arch/metag/include/asm/pgtable.h | 1 + arch/microblaze/include/asm/page.h | 3 ++- arch/mips/include/asm/pgtable-32.h | 1 + arch/mips/include/asm/pgtable-64.h | 1 + arch/mn10300/include/asm/page.h | 1 + arch/nios2/include/asm/pgtable.h | 1 + arch/openrisc/include/asm/pgtable.h | 1 + arch/powerpc/include/asm/book3s/32/pgtable.h | 1 + arch/powerpc/include/asm/book3s/64/pgtable.h | 3 +++ arch/powerpc/include/asm/nohash/32/pgtable.h | 1 + arch/powerpc/include/asm/nohash/64/pgtable-4k.h | 3 +++ arch/powerpc/include/asm/nohash/64/pgtable-64k.h | 1 + arch/s390/include/asm/pgtable.h | 1 + arch/score/include/asm/pgtable.h | 1 + arch/sh/include/asm/pgtable-2level.h | 1 + arch/sh/include/asm/pgtable-3level.h | 1 + arch/sparc/include/asm/pgtable_64.h | 1 + arch/tile/include/asm/pgtable_32.h | 1 + arch/tile/include/asm/pgtable_64.h | 1 + arch/um/include/asm/pgtable-2level.h | 1 + arch/um/include/asm/pgtable-3level.h | 1 + arch/unicore32/include/asm/pgtable.h | 1 + arch/x86/include/asm/pgtable_types.h | 4 ++++ arch/xtensa/include/asm/pgtable.h | 1 + 34 files changed, 46 insertions(+), 1 deletion(-) diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index 317ff773e1ca..b18fcb606908 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -11,6 +11,7 @@ #define _ASM_ARC_HUGEPAGE_H #include +#define __ARCH_USE_5LEVEL_HACK #include static inline pte_t pmd_pte(pmd_t pmd) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index e94ca72b974e..ee22d40afef4 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -37,6 +37,7 @@ #include #include +#define __ARCH_USE_5LEVEL_HACK #include #include diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index a8d656d9aec7..1c462381c225 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -20,6 +20,7 @@ #else +#define __ARCH_USE_5LEVEL_HACK #include #include #include diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index 69b2fd41503c..345a072b5856 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t; #define __pgprot(x) ((pgprot_t) { (x) } ) #if CONFIG_PGTABLE_LEVELS == 2 +#define __ARCH_USE_5LEVEL_HACK #include #elif CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include +#elif CONFIG_PGTABLE_LEVELS == 4 +#include #endif #endif /* __ASM_PGTABLE_TYPES_H */ diff --git a/arch/avr32/include/asm/pgtable-2level.h b/arch/avr32/include/asm/pgtable-2level.h index 425dd567b5b9..d5b1c63993ec 100644 --- a/arch/avr32/include/asm/pgtable-2level.h +++ b/arch/avr32/include/asm/pgtable-2level.h @@ -8,6 +8,7 @@ #ifndef __ASM_AVR32_PGTABLE_2LEVEL_H #define __ASM_AVR32_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h index 2a3210ba4c72..fa3a73004cc5 100644 --- a/arch/cris/include/asm/pgtable.h +++ b/arch/cris/include/asm/pgtable.h @@ -6,6 +6,7 @@ #define _CRIS_PGTABLE_H #include +#define __ARCH_USE_5LEVEL_HACK #include #ifndef __ASSEMBLY__ diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h index a0513d463a1f..ab6e7e961b54 100644 --- a/arch/frv/include/asm/pgtable.h +++ b/arch/frv/include/asm/pgtable.h @@ -16,6 +16,7 @@ #ifndef _ASM_PGTABLE_H #define _ASM_PGTABLE_H +#include #include #include #include diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h index 8341db67821d..7d265d28ba5e 100644 --- a/arch/h8300/include/asm/pgtable.h +++ b/arch/h8300/include/asm/pgtable.h @@ -1,5 +1,6 @@ #ifndef _H8300_PGTABLE_H #define _H8300_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #include #define pgtable_cache_init() do { } while (0) diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 49eab8136ec3..24a9177fb897 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -26,6 +26,7 @@ */ #include #include +#define __ARCH_USE_5LEVEL_HACK #include /* A handy thing to have if one has the RAM. Declared in head.S */ diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 384794e665fc..6cc22c8d8923 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -587,8 +587,10 @@ extern struct page *zero_page_memmap_ptr; #if CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include #endif +#include #include #endif /* _ASM_IA64_PGTABLE_H */ diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h index ffa3a3a2ecad..0c151e5af079 100644 --- a/arch/metag/include/asm/pgtable.h +++ b/arch/metag/include/asm/pgtable.h @@ -6,6 +6,7 @@ #define _METAG_PGTABLE_H #include +#define __ARCH_USE_5LEVEL_HACK #include /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index fd850879854d..d506bb0893f9 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -95,7 +95,8 @@ typedef struct { unsigned long pgd; } pgd_t; # else /* CONFIG_MMU */ typedef struct { unsigned long ste[64]; } pmd_t; typedef struct { pmd_t pue[1]; } pud_t; -typedef struct { pud_t pge[1]; } pgd_t; +typedef struct { pud_t p4e[1]; } p4d_t; +typedef struct { p4d_t pge[1]; } pgd_t; # endif /* CONFIG_MMU */ # define pte_val(x) ((x).pte) diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index d21f3da7bdb6..6f94bed571c4 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -16,6 +16,7 @@ #include #include +#define __ARCH_USE_5LEVEL_HACK #include extern int temp_tlb_entry; diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 514cbc0a6a67..130a2a6c1531 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -17,6 +17,7 @@ #include #include +#define __ARCH_USE_5LEVEL_HACK #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48) #include #else diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h index 3810a6f740fd..dfe730a5ede0 100644 --- a/arch/mn10300/include/asm/page.h +++ b/arch/mn10300/include/asm/page.h @@ -57,6 +57,7 @@ typedef struct page *pgtable_t; #define __pgd(x) ((pgd_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) }) +#define __ARCH_USE_5LEVEL_HACK #include #endif /* !__ASSEMBLY__ */ diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 298393c3cb42..db4f7d179220 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -22,6 +22,7 @@ #include #include +#define __ARCH_USE_5LEVEL_HACK #include #define FIRST_USER_ADDRESS 0UL diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 3567aa7be555..ff97374ca069 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -25,6 +25,7 @@ #ifndef __ASM_OPENRISC_PGTABLE_H #define __ASM_OPENRISC_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 012223638815..26ed228d4dc6 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #include diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 1eeeb72c7015..13c39b6d5d64 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1,9 +1,12 @@ #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ +#include + #ifndef __ASSEMBLY__ #include #endif + /* * Common bits between hash and Radix page table */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index ba9921bf202e..5134ade2e850 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H #define _ASM_POWERPC_NOHASH_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index d0db98793dd8..9f4de0a1035e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -1,5 +1,8 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H + +#include + /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h index 55b28ef3409a..1facb584dd29 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H +#define __ARCH_USE_5LEVEL_HACK #include diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7ed1972b1920..93e37b12e882 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -24,6 +24,7 @@ * the S390 page table tree. */ #ifndef __ASSEMBLY__ +#include #include #include #include diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h index 0553e5cd5985..46ff8fd678a7 100644 --- a/arch/score/include/asm/pgtable.h +++ b/arch/score/include/asm/pgtable.h @@ -2,6 +2,7 @@ #define _ASM_SCORE_PGTABLE_H #include +#define __ARCH_USE_5LEVEL_HACK #include #include diff --git a/arch/sh/include/asm/pgtable-2level.h b/arch/sh/include/asm/pgtable-2level.h index 19bd89db17e7..f75cf4387257 100644 --- a/arch/sh/include/asm/pgtable-2level.h +++ b/arch/sh/include/asm/pgtable-2level.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_PGTABLE_2LEVEL_H #define __ASM_SH_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index 249a985d9648..9b1e776eca31 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_PGTABLE_3LEVEL_H #define __ASM_SH_PGTABLE_3LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 56e49c8f770d..8a598528ec1f 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -12,6 +12,7 @@ * the SpitFire page tables. */ +#include #include #include #include diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index d26a42279036..5f8c615cb5e9 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -74,6 +74,7 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */; #define MAXMEM (_VMALLOC_START - PAGE_OFFSET) /* We have no pmd or pud since we are strictly a two-level page table */ +#define __ARCH_USE_5LEVEL_HACK #include static inline int pud_huge_page(pud_t pud) { return 0; } diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index e96cec52f6d8..96fe58b45118 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -59,6 +59,7 @@ #ifndef __ASSEMBLY__ /* We have no pud since we are a three-level page table. */ +#define __ARCH_USE_5LEVEL_HACK #include /* diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index cfbe59752469..179c0ea87a0c 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -8,6 +8,7 @@ #ifndef __UM_PGTABLE_2LEVEL_H #define __UM_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index bae8523a162f..c4d876dfb9ac 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -7,6 +7,7 @@ #ifndef __UM_PGTABLE_3LEVEL_H #define __UM_PGTABLE_3LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index 818d0f5598e3..a4f2bef37e70 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -12,6 +12,7 @@ #ifndef __UNICORE_PGTABLE_H__ #define __UNICORE_PGTABLE_H__ +#define __ARCH_USE_5LEVEL_HACK #include #include diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8b4de22d6429..62484333673d 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -273,6 +273,8 @@ static inline pgdval_t pgd_flags(pgd_t pgd) } #if CONFIG_PGTABLE_LEVELS > 3 +#include + typedef struct { pudval_t pud; } pud_t; static inline pud_t native_make_pud(pmdval_t val) @@ -285,6 +287,7 @@ static inline pudval_t native_pud_val(pud_t pud) return pud.pud; } #else +#define __ARCH_USE_5LEVEL_HACK #include static inline pudval_t native_pud_val(pud_t pud) @@ -306,6 +309,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) return pmd.pmd; } #else +#define __ARCH_USE_5LEVEL_HACK #include static inline pmdval_t native_pmd_val(pmd_t pmd) diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 8aa0e0d9cbb2..30dd5b2e4ad5 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -11,6 +11,7 @@ #ifndef _XTENSA_PGTABLE_H #define _XTENSA_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #include #include -- cgit v1.2.3 From 048456dcf2c56ad6f6248e2899dda92fb6a613f6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:06 +0300 Subject: asm-generic: introduce Like with pgtable-nopud.h for 4-level paging, this new header is base for converting an architectures to properly folded p4d_t level. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable-nop4d.h | 56 +++++++++++++++++++++++++++++++++++++ include/asm-generic/pgtable-nopud.h | 43 ++++++++++++++-------------- include/asm-generic/tlb.h | 14 ++++++++-- 3 files changed, 89 insertions(+), 24 deletions(-) create mode 100644 include/asm-generic/pgtable-nop4d.h diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h new file mode 100644 index 000000000000..de364ecb8df6 --- /dev/null +++ b/include/asm-generic/pgtable-nop4d.h @@ -0,0 +1,56 @@ +#ifndef _PGTABLE_NOP4D_H +#define _PGTABLE_NOP4D_H + +#ifndef __ASSEMBLY__ + +#define __PAGETABLE_P4D_FOLDED + +typedef struct { pgd_t pgd; } p4d_t; + +#define P4D_SHIFT PGDIR_SHIFT +#define PTRS_PER_P4D 1 +#define P4D_SIZE (1UL << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE-1)) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the p4d is never bad, and a p4d always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline void pgd_clear(pgd_t *pgd) { } +#define p4d_ERROR(p4d) (pgd_ERROR((p4d).pgd)) + +#define pgd_populate(mm, pgd, p4d) do { } while (0) +/* + * (p4ds are folded into pgds so this doesn't get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pgd(pgdptr, pgdval) set_p4d((p4d_t *)(pgdptr), (p4d_t) { pgdval }) + +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +{ + return (p4d_t *)pgd; +} + +#define p4d_val(x) (pgd_val((x).pgd)) +#define __p4d(x) ((p4d_t) { __pgd(x) }) + +#define pgd_page(pgd) (p4d_page((p4d_t){ pgd })) +#define pgd_page_vaddr(pgd) (p4d_page_vaddr((p4d_t){ pgd })) + +/* + * allocating and freeing a p4d is trivial: the 1-entry p4d is + * inside the pgd, so has no extra memory associated with it. + */ +#define p4d_alloc_one(mm, address) NULL +#define p4d_free(mm, x) do { } while (0) +#define __p4d_free_tlb(tlb, x, a) do { } while (0) + +#undef p4d_addr_end +#define p4d_addr_end(addr, end) (end) + +#endif /* __ASSEMBLY__ */ +#endif /* _PGTABLE_NOP4D_H */ diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index 5e49430a30a4..c2b9b96d6268 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -6,53 +6,54 @@ #ifdef __ARCH_USE_5LEVEL_HACK #include #else +#include #define __PAGETABLE_PUD_FOLDED /* - * Having the pud type consist of a pgd gets the size right, and allows - * us to conceptually access the pgd entry that this pud is folded into + * Having the pud type consist of a p4d gets the size right, and allows + * us to conceptually access the p4d entry that this pud is folded into * without casting. */ -typedef struct { pgd_t pgd; } pud_t; +typedef struct { p4d_t p4d; } pud_t; -#define PUD_SHIFT PGDIR_SHIFT +#define PUD_SHIFT P4D_SHIFT #define PTRS_PER_PUD 1 #define PUD_SIZE (1UL << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) /* - * The "pgd_xxx()" functions here are trivial for a folded two-level + * The "p4d_xxx()" functions here are trivial for a folded two-level * setup: the pud is never bad, and a pud always exists (as it's folded - * into the pgd entry) + * into the p4d entry) */ -static inline int pgd_none(pgd_t pgd) { return 0; } -static inline int pgd_bad(pgd_t pgd) { return 0; } -static inline int pgd_present(pgd_t pgd) { return 1; } -static inline void pgd_clear(pgd_t *pgd) { } -#define pud_ERROR(pud) (pgd_ERROR((pud).pgd)) +static inline int p4d_none(p4d_t p4d) { return 0; } +static inline int p4d_bad(p4d_t p4d) { return 0; } +static inline int p4d_present(p4d_t p4d) { return 1; } +static inline void p4d_clear(p4d_t *p4d) { } +#define pud_ERROR(pud) (p4d_ERROR((pud).p4d)) -#define pgd_populate(mm, pgd, pud) do { } while (0) +#define p4d_populate(mm, p4d, pud) do { } while (0) /* - * (puds are folded into pgds so this doesn't get actually called, + * (puds are folded into p4ds so this doesn't get actually called, * but the define is needed for a generic inline function.) */ -#define set_pgd(pgdptr, pgdval) set_pud((pud_t *)(pgdptr), (pud_t) { pgdval }) +#define set_p4d(p4dptr, p4dval) set_pud((pud_t *)(p4dptr), (pud_t) { p4dval }) -static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address) +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) { - return (pud_t *)pgd; + return (pud_t *)p4d; } -#define pud_val(x) (pgd_val((x).pgd)) -#define __pud(x) ((pud_t) { __pgd(x) } ) +#define pud_val(x) (p4d_val((x).p4d)) +#define __pud(x) ((pud_t) { __p4d(x) }) -#define pgd_page(pgd) (pud_page((pud_t){ pgd })) -#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd })) +#define p4d_page(p4d) (pud_page((pud_t){ p4d })) +#define p4d_page_vaddr(p4d) (pud_page_vaddr((pud_t){ p4d })) /* * allocating and freeing a pud is trivial: the 1-entry pud is - * inside the pgd, so has no extra memory associated with it. + * inside the p4d, so has no extra memory associated with it. */ #define pud_alloc_one(mm, address) NULL #define pud_free(mm, x) do { } while (0) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 4329bc6ef04b..8afa4335e5b2 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -270,6 +270,12 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, __pte_free_tlb(tlb, ptep, address); \ } while (0) +#define pmd_free_tlb(tlb, pmdp, address) \ + do { \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __pmd_free_tlb(tlb, pmdp, address); \ + } while (0) + #ifndef __ARCH_HAS_4LEVEL_HACK #define pud_free_tlb(tlb, pudp, address) \ do { \ @@ -278,11 +284,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, } while (0) #endif -#define pmd_free_tlb(tlb, pmdp, address) \ +#ifndef __ARCH_HAS_5LEVEL_HACK +#define p4d_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - __pmd_free_tlb(tlb, pmdp, address); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __p4d_free_tlb(tlb, pudp, address); \ } while (0) +#endif #define tlb_migrate_finish(mm) do {} while (0) -- cgit v1.2.3 From c2febafc67734a62196c1b9dfba926412d4077ba Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:07 +0300 Subject: mm: convert generic code to 5-level paging Convert all non-architecture-specific code to 5-level paging. It's mostly mechanical adding handling one more page table level in places where we deal with pud_t. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- drivers/misc/sgi-gru/grufault.c | 9 +- fs/userfaultfd.c | 6 +- include/asm-generic/pgtable.h | 48 +++++++++- include/linux/hugetlb.h | 5 +- include/linux/kasan.h | 1 + include/linux/mm.h | 31 ++++-- lib/ioremap.c | 39 +++++++- mm/gup.c | 46 +++++++-- mm/huge_memory.c | 7 +- mm/hugetlb.c | 29 +++--- mm/kasan/kasan_init.c | 44 ++++++++- mm/memory.c | 207 +++++++++++++++++++++++++++++++++------- mm/mlock.c | 1 + mm/mprotect.c | 26 ++++- mm/mremap.c | 13 ++- mm/page_vma_mapped.c | 6 +- mm/pagewalk.c | 32 ++++++- mm/pgtable-generic.c | 6 ++ mm/rmap.c | 7 +- mm/sparse-vmemmap.c | 22 ++++- mm/swapfile.c | 26 ++++- mm/userfaultfd.c | 23 +++-- mm/vmalloc.c | 81 ++++++++++++---- 23 files changed, 595 insertions(+), 120 deletions(-) diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 6fb773dbcd0c..93be82fc338a 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -219,15 +219,20 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, int write, unsigned long *paddr, int *pageshift) { pgd_t *pgdp; - pmd_t *pmdp; + p4d_t *p4dp; pud_t *pudp; + pmd_t *pmdp; pte_t pte; pgdp = pgd_offset(vma->vm_mm, vaddr); if (unlikely(pgd_none(*pgdp))) goto err; - pudp = pud_offset(pgdp, vaddr); + p4dp = p4d_offset(pgdp, vaddr); + if (unlikely(p4d_none(*p4dp))) + goto err; + + pudp = pud_offset(p4dp, vaddr); if (unlikely(pud_none(*pudp))) goto err; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 973607df579d..02ce3944d0f5 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -267,6 +267,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, { struct mm_struct *mm = ctx->mm; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd, _pmd; pte_t *pte; @@ -277,7 +278,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto out; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + goto out; + pud = pud_offset(p4d, address); if (!pud_present(*pud)) goto out; pmd = pmd_offset(pud, address); diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f4ca23b158b3..1fad160f35de 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -10,9 +10,9 @@ #include #include -#if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \ - CONFIG_PGTABLE_LEVELS -#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED +#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \ + defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS +#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED #endif /* @@ -424,6 +424,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) (__boundary - 1 < (end) - 1)? __boundary: (end); \ }) +#ifndef p4d_addr_end +#define p4d_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) +#endif + #ifndef pud_addr_end #define pud_addr_end(addr, end) \ ({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ @@ -444,6 +451,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) * Do the tests inline, but report and clear the bad entry in mm/memory.c. */ void pgd_clear_bad(pgd_t *); +void p4d_clear_bad(p4d_t *); void pud_clear_bad(pud_t *); void pmd_clear_bad(pmd_t *); @@ -458,6 +466,17 @@ static inline int pgd_none_or_clear_bad(pgd_t *pgd) return 0; } +static inline int p4d_none_or_clear_bad(p4d_t *p4d) +{ + if (p4d_none(*p4d)) + return 1; + if (unlikely(p4d_bad(*p4d))) { + p4d_clear_bad(p4d); + return 1; + } + return 0; +} + static inline int pud_none_or_clear_bad(pud_t *pud) { if (pud_none(*pud)) @@ -844,11 +863,30 @@ static inline int pmd_protnone(pmd_t pmd) #endif /* CONFIG_MMU */ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP + +#ifndef __PAGETABLE_P4D_FOLDED +int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot); +int p4d_clear_huge(p4d_t *p4d); +#else +static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} +static inline int p4d_clear_huge(p4d_t *p4d) +{ + return 0; +} +#endif /* !__PAGETABLE_P4D_FOLDED */ + int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ +static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { return 0; @@ -857,6 +895,10 @@ static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { return 0; } +static inline int p4d_clear_huge(p4d_t *p4d) +{ + return 0; +} static inline int pud_clear_huge(pud_t *pud) { return 0; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 503099d8aada..b857fc8cc2ec 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); int pmd_huge(pmd_t pmd); -int pud_huge(pud_t pmd); +int pud_huge(pud_t pud); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); @@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, #ifndef pgd_huge #define pgd_huge(x) 0 #endif +#ifndef p4d_huge +#define p4d_huge(x) 0 +#endif #ifndef pgd_write static inline int pgd_write(pgd_t pgd) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index ceb3fe78a0d3..1c823bef4c15 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -18,6 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE]; extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; extern pud_t kasan_zero_pud[PTRS_PER_PUD]; +extern p4d_t kasan_zero_p4d[PTRS_PER_P4D]; void kasan_populate_zero_shadow(const void *shadow_start, const void *shadow_end); diff --git a/include/linux/mm.h b/include/linux/mm.h index be1fe264eb37..5f01c88f0800 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1560,14 +1560,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, return ptep; } +#ifdef __PAGETABLE_P4D_FOLDED +static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, + unsigned long address) +{ + return 0; +} +#else +int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +#endif + #ifdef __PAGETABLE_PUD_FOLDED -static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, +static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { return 0; } #else -int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address); #endif #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU) @@ -1621,10 +1631,18 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) #ifndef __ARCH_HAS_5LEVEL_HACK -static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd, + unsigned long address) +{ + return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ? + NULL : p4d_offset(pgd, address); +} + +static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d, + unsigned long address) { - return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? - NULL: pud_offset(pgd, address); + return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ? + NULL : pud_offset(p4d, address); } #endif /* !__ARCH_HAS_5LEVEL_HACK */ @@ -2388,7 +2406,8 @@ void sparse_mem_maps_populate_node(struct page **map_map, struct page *sparse_mem_map_populate(unsigned long pnum, int nid); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); -pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); +p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); +pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); void *vmemmap_alloc_block(unsigned long size, int node); diff --git a/lib/ioremap.c b/lib/ioremap.c index a3e14ce92a56..4bb30206b942 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -14,6 +14,7 @@ #include #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +static int __read_mostly ioremap_p4d_capable; static int __read_mostly ioremap_pud_capable; static int __read_mostly ioremap_pmd_capable; static int __read_mostly ioremap_huge_disabled; @@ -35,6 +36,11 @@ void __init ioremap_huge_init(void) } } +static inline int ioremap_p4d_enabled(void) +{ + return ioremap_p4d_capable; +} + static inline int ioremap_pud_enabled(void) { return ioremap_pud_capable; @@ -46,6 +52,7 @@ static inline int ioremap_pmd_enabled(void) } #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ +static inline int ioremap_p4d_enabled(void) { return 0; } static inline int ioremap_pud_enabled(void) { return 0; } static inline int ioremap_pmd_enabled(void) { return 0; } #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ @@ -94,14 +101,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, return 0; } -static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, +static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pud_t *pud; unsigned long next; phys_addr -= addr; - pud = pud_alloc(&init_mm, pgd, addr); + pud = pud_alloc(&init_mm, p4d, addr); if (!pud) return -ENOMEM; do { @@ -120,6 +127,32 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, return 0; } +static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +{ + p4d_t *p4d; + unsigned long next; + + phys_addr -= addr; + p4d = p4d_alloc(&init_mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + + if (ioremap_p4d_enabled() && + ((next - addr) == P4D_SIZE) && + IS_ALIGNED(phys_addr + addr, P4D_SIZE)) { + if (p4d_set_huge(p4d, phys_addr + addr, prot)) + continue; + } + + if (ioremap_pud_range(p4d, addr, next, phys_addr + addr, prot)) + return -ENOMEM; + } while (p4d++, addr = next, addr != end); + return 0; +} + int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { @@ -135,7 +168,7 @@ int ioremap_page_range(unsigned long addr, pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot); + err = ioremap_p4d_range(pgd, addr, next, phys_addr+addr, prot); if (err) break; } while (pgd++, addr = next, addr != end); diff --git a/mm/gup.c b/mm/gup.c index 9c047e951aa3..c74bad1bf6e8 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -226,6 +226,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma, unsigned int *page_mask) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; spinlock_t *ptl; @@ -243,8 +244,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma, pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return no_page_table(vma, flags); - - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (p4d_none(*p4d)) + return no_page_table(vma, flags); + BUILD_BUG_ON(p4d_huge(*p4d)); + if (unlikely(p4d_bad(*p4d))) + return no_page_table(vma, flags); + pud = pud_offset(p4d, address); if (pud_none(*pud)) return no_page_table(vma, flags); if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { @@ -325,6 +331,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, struct page **page) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -338,7 +345,9 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, else pgd = pgd_offset_gate(mm, address); BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + BUG_ON(p4d_none(*p4d)); + pud = pud_offset(p4d, address); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, address); if (pmd_none(*pmd)) @@ -1400,13 +1409,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, return 1; } -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, +static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; - pudp = pud_offset(&pgd, addr); + pudp = pud_offset(&p4d, addr); do { pud_t pud = READ_ONCE(*pudp); @@ -1428,6 +1437,31 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + p4d_t *p4dp; + + p4dp = p4d_offset(&pgd, addr); + do { + p4d_t p4d = READ_ONCE(*p4dp); + + next = p4d_addr_end(addr, end); + if (p4d_none(p4d)) + return 0; + BUILD_BUG_ON(p4d_huge(p4d)); + if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) { + if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr, + P4D_SHIFT, next, write, pages, nr)) + return 0; + } else if (!gup_p4d_range(p4d, addr, next, write, pages, nr)) + return 0; + } while (p4dp++, addr = next, addr != end); + + return 1; +} + /* * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to * the regular GUP. It will only return non-negative values. @@ -1478,7 +1512,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, PGDIR_SHIFT, next, write, pages, &nr)) break; - } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr)) break; } while (pgdp++, addr = next, addr != end); local_irq_restore(flags); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d36b2af4d1bf..e4766de25709 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2048,6 +2048,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct page *page) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -2055,7 +2056,11 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, if (!pgd_present(*pgd)) return; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + return; + + pud = pud_offset(p4d, address); if (!pud_present(*pud)) return; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a7aa811b7d14..3d0aab9ee80d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4555,7 +4555,8 @@ out: int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { pgd_t *pgd = pgd_offset(mm, *addr); - pud_t *pud = pud_offset(pgd, *addr); + p4d_t *p4d = p4d_offset(pgd, *addr); + pud_t *pud = pud_offset(p4d, *addr); BUG_ON(page_count(virt_to_page(ptep)) == 0); if (page_count(virt_to_page(ptep)) == 1) @@ -4586,11 +4587,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pte_t *pte = NULL; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_offset(pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (pud) { if (sz == PUD_SIZE) { pte = (pte_t *)pud; @@ -4610,18 +4613,22 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; - pmd_t *pmd = NULL; + pmd_t *pmd; pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) { - if (pud_huge(*pud)) - return (pte_t *)pud; - pmd = pmd_offset(pud, addr); - } - } + if (!pgd_present(*pgd)) + return NULL; + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return NULL; + pud = pud_offset(p4d, addr); + if (!pud_present(*pud)) + return NULL; + if (pud_huge(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); return (pte_t *) pmd; } diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c index 31238dad85fb..b96a5f773d88 100644 --- a/mm/kasan/kasan_init.c +++ b/mm/kasan/kasan_init.c @@ -30,6 +30,9 @@ */ unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; +#if CONFIG_PGTABLE_LEVELS > 4 +p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss; +#endif #if CONFIG_PGTABLE_LEVELS > 3 pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; #endif @@ -82,10 +85,10 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr, } while (pmd++, addr = next, addr != end); } -static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, +static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end) { - pud_t *pud = pud_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); unsigned long next; do { @@ -107,6 +110,23 @@ static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, } while (pud++, addr = next, addr != end); } +static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr, + unsigned long end) +{ + p4d_t *p4d = p4d_offset(pgd, addr); + unsigned long next; + + do { + next = p4d_addr_end(addr, end); + + if (p4d_none(*p4d)) { + p4d_populate(&init_mm, p4d, + early_alloc(PAGE_SIZE, NUMA_NO_NODE)); + } + zero_pud_populate(p4d, addr, next); + } while (p4d++, addr = next, addr != end); +} + /** * kasan_populate_zero_shadow - populate shadow memory region with * kasan_zero_page @@ -125,6 +145,7 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, next = pgd_addr_end(addr, end); if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -135,9 +156,22 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, * 3,2 - level page tables where we don't have * puds,pmds, so pgd_populate(), pud_populate() * is noops. + * + * The ifndef is required to avoid build breakage. + * + * With 5level-fixup.h, pgd_populate() is not nop and + * we reference kasan_zero_p4d. It's not defined + * unless 5-level paging enabled. + * + * The ifndef can be dropped once all KASAN-enabled + * architectures will switch to pgtable-nop4d.h. */ - pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_pud)); - pud = pud_offset(pgd, addr); +#ifndef __ARCH_HAS_5LEVEL_HACK + pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_p4d)); +#endif + p4d = p4d_offset(pgd, addr); + p4d_populate(&init_mm, p4d, lm_alias(kasan_zero_pud)); + pud = pud_offset(p4d, addr); pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd)); pmd = pmd_offset(pud, addr); pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte)); @@ -148,6 +182,6 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, pgd_populate(&init_mm, pgd, early_alloc(PAGE_SIZE, NUMA_NO_NODE)); } - zero_pud_populate(pgd, addr, next); + zero_p4d_populate(pgd, addr, next); } while (pgd++, addr = next, addr != end); } diff --git a/mm/memory.c b/mm/memory.c index a97a4cec2e1f..7f1c2163b3ce 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -445,7 +445,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, mm_dec_nr_pmds(tlb->mm); } -static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, +static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -454,7 +454,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start; start = addr; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) @@ -462,6 +462,39 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, free_pmd_range(tlb, pud, addr, next, floor, ceiling); } while (pud++, addr = next, addr != end); + start &= P4D_MASK; + if (start < floor) + return; + if (ceiling) { + ceiling &= P4D_MASK; + if (!ceiling) + return; + } + if (end - 1 > ceiling - 1) + return; + + pud = pud_offset(p4d, start); + p4d_clear(p4d); + pud_free_tlb(tlb, pud, start); +} + +static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + p4d_t *p4d; + unsigned long next; + unsigned long start; + + start = addr; + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) + continue; + free_pud_range(tlb, p4d, addr, next, floor, ceiling); + } while (p4d++, addr = next, addr != end); + start &= PGDIR_MASK; if (start < floor) return; @@ -473,9 +506,9 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, if (end - 1 > ceiling - 1) return; - pud = pud_offset(pgd, start); + p4d = p4d_offset(pgd, start); pgd_clear(pgd); - pud_free_tlb(tlb, pud, start); + p4d_free_tlb(tlb, p4d, start); } /* @@ -539,7 +572,7 @@ void free_pgd_range(struct mmu_gather *tlb, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - free_pud_range(tlb, pgd, addr, next, floor, ceiling); + free_p4d_range(tlb, pgd, addr, next, floor, ceiling); } while (pgd++, addr = next, addr != end); } @@ -658,7 +691,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, pte_t pte, struct page *page) { pgd_t *pgd = pgd_offset(vma->vm_mm, addr); - pud_t *pud = pud_offset(pgd, addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); pmd_t *pmd = pmd_offset(pud, addr); struct address_space *mapping; pgoff_t index; @@ -1023,16 +1057,16 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src } static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, + p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { pud_t *src_pud, *dst_pud; unsigned long next; - dst_pud = pud_alloc(dst_mm, dst_pgd, addr); + dst_pud = pud_alloc(dst_mm, dst_p4d, addr); if (!dst_pud) return -ENOMEM; - src_pud = pud_offset(src_pgd, addr); + src_pud = pud_offset(src_p4d, addr); do { next = pud_addr_end(addr, end); if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { @@ -1056,6 +1090,28 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src return 0; } +static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + p4d_t *src_p4d, *dst_p4d; + unsigned long next; + + dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr); + if (!dst_p4d) + return -ENOMEM; + src_p4d = p4d_offset(src_pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(src_p4d)) + continue; + if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d, + vma, addr, next)) + return -ENOMEM; + } while (dst_p4d++, src_p4d++, addr = next, addr != end); + return 0; +} + int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, struct vm_area_struct *vma) { @@ -1111,7 +1167,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(src_pgd)) continue; - if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, + if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd, vma, addr, next))) { ret = -ENOMEM; break; @@ -1267,14 +1323,14 @@ next: } static inline unsigned long zap_pud_range(struct mmu_gather *tlb, - struct vm_area_struct *vma, pgd_t *pgd, + struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, struct zap_details *details) { pud_t *pud; unsigned long next; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_trans_huge(*pud) || pud_devmap(*pud)) { @@ -1295,6 +1351,25 @@ next: return addr; } +static inline unsigned long zap_p4d_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, pgd_t *pgd, + unsigned long addr, unsigned long end, + struct zap_details *details) +{ + p4d_t *p4d; + unsigned long next; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) + continue; + next = zap_pud_range(tlb, vma, p4d, addr, next, details); + } while (p4d++, addr = next, addr != end); + + return addr; +} + void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, @@ -1310,7 +1385,7 @@ void unmap_page_range(struct mmu_gather *tlb, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - next = zap_pud_range(tlb, vma, pgd, addr, next, details); + next = zap_p4d_range(tlb, vma, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); } @@ -1465,16 +1540,24 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes); pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl) { - pgd_t *pgd = pgd_offset(mm, addr); - pud_t *pud = pud_alloc(mm, pgd, addr); - if (pud) { - pmd_t *pmd = pmd_alloc(mm, pud, addr); - if (pmd) { - VM_BUG_ON(pmd_trans_huge(*pmd)); - return pte_alloc_map_lock(mm, pmd, addr, ptl); - } - } - return NULL; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return NULL; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return NULL; + + VM_BUG_ON(pmd_trans_huge(*pmd)); + return pte_alloc_map_lock(mm, pmd, addr, ptl); } /* @@ -1740,7 +1823,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, return 0; } -static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, +static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { @@ -1748,7 +1831,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, unsigned long next; pfn -= addr >> PAGE_SHIFT; - pud = pud_alloc(mm, pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (!pud) return -ENOMEM; do { @@ -1760,6 +1843,26 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, return 0; } +static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + unsigned long pfn, pgprot_t prot) +{ + p4d_t *p4d; + unsigned long next; + + pfn -= addr >> PAGE_SHIFT; + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + if (remap_pud_range(mm, p4d, addr, next, + pfn + (addr >> PAGE_SHIFT), prot)) + return -ENOMEM; + } while (p4d++, addr = next, addr != end); + return 0; +} + /** * remap_pfn_range - remap kernel memory to userspace * @vma: user vma to map to @@ -1816,7 +1919,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, flush_cache_range(vma, addr, end); do { next = pgd_addr_end(addr, end); - err = remap_pud_range(mm, pgd, addr, next, + err = remap_p4d_range(mm, pgd, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) break; @@ -1932,7 +2035,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, return err; } -static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, +static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, pte_fn_t fn, void *data) { @@ -1940,7 +2043,7 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, unsigned long next; int err; - pud = pud_alloc(mm, pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (!pud) return -ENOMEM; do { @@ -1952,6 +2055,26 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, return err; } +static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + pte_fn_t fn, void *data) +{ + p4d_t *p4d; + unsigned long next; + int err; + + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + err = apply_to_pud_range(mm, p4d, addr, next, fn, data); + if (err) + break; + } while (p4d++, addr = next, addr != end); + return err; +} + /* * Scan a region of virtual memory, filling in page tables as necessary * and calling a provided function on each leaf page table. @@ -1970,7 +2093,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - err = apply_to_pud_range(mm, pgd, addr, next, fn, data); + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); if (err) break; } while (pgd++, addr = next, addr != end); @@ -3653,11 +3776,15 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, }; struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; + p4d_t *p4d; int ret; pgd = pgd_offset(mm, address); + p4d = p4d_alloc(mm, pgd, address); + if (!p4d) + return VM_FAULT_OOM; - vmf.pud = pud_alloc(mm, pgd, address); + vmf.pud = pud_alloc(mm, p4d, address); if (!vmf.pud) return VM_FAULT_OOM; if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) { @@ -3784,7 +3911,7 @@ EXPORT_SYMBOL_GPL(handle_mm_fault); * Allocate page upper directory. * We've already handled the fast-path in-line. */ -int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { pud_t *new = pud_alloc_one(mm, address); if (!new) @@ -3793,10 +3920,17 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) smp_wmb(); /* See comment in __pte_alloc */ spin_lock(&mm->page_table_lock); - if (pgd_present(*pgd)) /* Another has populated it */ +#ifndef __ARCH_HAS_5LEVEL_HACK + if (p4d_present(*p4d)) /* Another has populated it */ + pud_free(mm, new); + else + p4d_populate(mm, p4d, new); +#else + if (pgd_present(*p4d)) /* Another has populated it */ pud_free(mm, new); else - pgd_populate(mm, pgd, new); + pgd_populate(mm, p4d, new); +#endif /* __ARCH_HAS_5LEVEL_HACK */ spin_unlock(&mm->page_table_lock); return 0; } @@ -3839,6 +3973,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *ptep; @@ -3847,7 +3982,11 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) goto out; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d))) + goto out; + + pud = pud_offset(p4d, address); if (pud_none(*pud) || unlikely(pud_bad(*pud))) goto out; diff --git a/mm/mlock.c b/mm/mlock.c index 1050511f8b2b..945edac46810 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -380,6 +380,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, pte = get_locked_pte(vma->vm_mm, start, &ptl); /* Make sure we do not cross the page table boundary */ end = pgd_addr_end(start, end); + end = p4d_addr_end(start, end); end = pud_addr_end(start, end); end = pmd_addr_end(start, end); diff --git a/mm/mprotect.c b/mm/mprotect.c index 848e946b08e5..8edd0d576254 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -193,14 +193,14 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, } static inline unsigned long change_pud_range(struct vm_area_struct *vma, - pgd_t *pgd, unsigned long addr, unsigned long end, + p4d_t *p4d, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) { pud_t *pud; unsigned long next; unsigned long pages = 0; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) @@ -212,6 +212,26 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, return pages; } +static inline unsigned long change_p4d_range(struct vm_area_struct *vma, + pgd_t *pgd, unsigned long addr, unsigned long end, + pgprot_t newprot, int dirty_accountable, int prot_numa) +{ + p4d_t *p4d; + unsigned long next; + unsigned long pages = 0; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) + continue; + pages += change_pud_range(vma, p4d, addr, next, newprot, + dirty_accountable, prot_numa); + } while (p4d++, addr = next, addr != end); + + return pages; +} + static unsigned long change_protection_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) @@ -230,7 +250,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - pages += change_pud_range(vma, pgd, addr, next, newprot, + pages += change_p4d_range(vma, pgd, addr, next, newprot, dirty_accountable, prot_numa); } while (pgd++, addr = next, addr != end); diff --git a/mm/mremap.c b/mm/mremap.c index 8233b0105c82..cd8a1b199ef9 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -32,6 +32,7 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -39,7 +40,11 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) if (pgd_none_or_clear_bad(pgd)) return NULL; - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_none_or_clear_bad(p4d)) + return NULL; + + pud = pud_offset(p4d, addr); if (pud_none_or_clear_bad(pud)) return NULL; @@ -54,11 +59,15 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, addr); if (!pud) return NULL; diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index a23001a22c15..c4c9def8ffea 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -104,6 +104,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) struct mm_struct *mm = pvmw->vma->vm_mm; struct page *page = pvmw->page; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; /* The only possible pmd mapping has been handled on last iteration */ @@ -133,7 +134,10 @@ restart: pgd = pgd_offset(mm, pvmw->address); if (!pgd_present(*pgd)) return false; - pud = pud_offset(pgd, pvmw->address); + p4d = p4d_offset(pgd, pvmw->address); + if (!p4d_present(*p4d)) + return false; + pud = pud_offset(p4d, pvmw->address); if (!pud_present(*pud)) return false; pvmw->pmd = pmd_offset(pud, pvmw->address); diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 03761577ae86..60f7856e508f 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -69,14 +69,14 @@ again: return err; } -static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, +static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, struct mm_walk *walk) { pud_t *pud; unsigned long next; int err = 0; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { again: next = pud_addr_end(addr, end); @@ -113,6 +113,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, return err; } +static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + p4d_t *p4d; + unsigned long next; + int err = 0; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) { + if (walk->pte_hole) + err = walk->pte_hole(addr, next, walk); + if (err) + break; + continue; + } + if (walk->pmd_entry || walk->pte_entry) + err = walk_pud_range(p4d, addr, next, walk); + if (err) + break; + } while (p4d++, addr = next, addr != end); + + return err; +} + static int walk_pgd_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -131,7 +157,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end, continue; } if (walk->pmd_entry || walk->pte_entry) - err = walk_pud_range(pgd, addr, next, walk); + err = walk_p4d_range(pgd, addr, next, walk); if (err) break; } while (pgd++, addr = next, addr != end); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 4ed5908c65b0..c99d9512a45b 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -22,6 +22,12 @@ void pgd_clear_bad(pgd_t *pgd) pgd_clear(pgd); } +void p4d_clear_bad(p4d_t *p4d) +{ + p4d_ERROR(*p4d); + p4d_clear(p4d); +} + void pud_clear_bad(pud_t *pud) { pud_ERROR(*pud); diff --git a/mm/rmap.c b/mm/rmap.c index 2da487d6cea8..2984403a2424 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -684,6 +684,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd = NULL; pmd_t pmde; @@ -692,7 +693,11 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) if (!pgd_present(*pgd)) goto out; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + goto out; + + pud = pud_offset(p4d, address); if (!pud_present(*pud)) goto out; diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 574c67b663fe..a56c3989f773 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -196,9 +196,9 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) return pmd; } -pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node) +pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) { - pud_t *pud = pud_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); if (pud_none(*pud)) { void *p = vmemmap_alloc_block(PAGE_SIZE, node); if (!p) @@ -208,6 +208,18 @@ pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node) return pud; } +p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) +{ + p4d_t *p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + void *p = vmemmap_alloc_block(PAGE_SIZE, node); + if (!p) + return NULL; + p4d_populate(&init_mm, p4d, p); + } + return p4d; +} + pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) { pgd_t *pgd = pgd_offset_k(addr); @@ -225,6 +237,7 @@ int __meminit vmemmap_populate_basepages(unsigned long start, { unsigned long addr = start; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -233,7 +246,10 @@ int __meminit vmemmap_populate_basepages(unsigned long start, pgd = vmemmap_pgd_populate(addr, node); if (!pgd) return -ENOMEM; - pud = vmemmap_pud_populate(pgd, addr, node); + p4d = vmemmap_p4d_populate(pgd, addr, node); + if (!p4d) + return -ENOMEM; + pud = vmemmap_pud_populate(p4d, addr, node); if (!pud) return -ENOMEM; pmd = vmemmap_pmd_populate(pud, addr, node); diff --git a/mm/swapfile.c b/mm/swapfile.c index 521ef9b6064f..178130880b90 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1517,7 +1517,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, return 0; } -static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, +static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, swp_entry_t entry, struct page *page) { @@ -1525,7 +1525,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long next; int ret; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) @@ -1537,6 +1537,26 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, return 0; } +static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd, + unsigned long addr, unsigned long end, + swp_entry_t entry, struct page *page) +{ + p4d_t *p4d; + unsigned long next; + int ret; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) + continue; + ret = unuse_pud_range(vma, p4d, addr, next, entry, page); + if (ret) + return ret; + } while (p4d++, addr = next, addr != end); + return 0; +} + static int unuse_vma(struct vm_area_struct *vma, swp_entry_t entry, struct page *page) { @@ -1560,7 +1580,7 @@ static int unuse_vma(struct vm_area_struct *vma, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - ret = unuse_pud_range(vma, pgd, addr, next, entry, page); + ret = unuse_p4d_range(vma, pgd, addr, next, entry, page); if (ret) return ret; } while (pgd++, addr = next, addr != end); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 479e631d43c2..8bcb501bce60 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -128,19 +128,22 @@ out_unlock: static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; - pmd_t *pmd = NULL; pgd = pgd_offset(mm, address); - pud = pud_alloc(mm, pgd, address); - if (pud) - /* - * Note that we didn't run this because the pmd was - * missing, the *pmd may be already established and in - * turn it may also be a trans_huge_pmd. - */ - pmd = pmd_alloc(mm, pud, address); - return pmd; + p4d = p4d_alloc(mm, pgd, address); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, address); + if (!pud) + return NULL; + /* + * Note that we didn't run this because the pmd was + * missing, the *pmd may be already established and in + * turn it may also be a trans_huge_pmd. + */ + return pmd_alloc(mm, pud, address); } #ifdef CONFIG_HUGETLB_PAGE diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b4024d688f38..0dd80222b20b 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -86,12 +86,12 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) } while (pmd++, addr = next, addr != end); } -static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) +static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end) { pud_t *pud; unsigned long next; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_clear_huge(pud)) @@ -102,6 +102,22 @@ static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) } while (pud++, addr = next, addr != end); } +static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + p4d_t *p4d; + unsigned long next; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_clear_huge(p4d)) + continue; + if (p4d_none_or_clear_bad(p4d)) + continue; + vunmap_pud_range(p4d, addr, next); + } while (p4d++, addr = next, addr != end); +} + static void vunmap_page_range(unsigned long addr, unsigned long end) { pgd_t *pgd; @@ -113,7 +129,7 @@ static void vunmap_page_range(unsigned long addr, unsigned long end) next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - vunmap_pud_range(pgd, addr, next); + vunmap_p4d_range(pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -160,13 +176,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, return 0; } -static int vmap_pud_range(pgd_t *pgd, unsigned long addr, +static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, int *nr) { pud_t *pud; unsigned long next; - pud = pud_alloc(&init_mm, pgd, addr); + pud = pud_alloc(&init_mm, p4d, addr); if (!pud) return -ENOMEM; do { @@ -177,6 +193,23 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr, return 0; } +static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, + unsigned long end, pgprot_t prot, struct page **pages, int *nr) +{ + p4d_t *p4d; + unsigned long next; + + p4d = p4d_alloc(&init_mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + if (vmap_pud_range(p4d, addr, next, prot, pages, nr)) + return -ENOMEM; + } while (p4d++, addr = next, addr != end); + return 0; +} + /* * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and * will have pfns corresponding to the "pages" array. @@ -196,7 +229,7 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end, pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); + err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr); if (err) return err; } while (pgd++, addr = next, addr != end); @@ -237,6 +270,10 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) unsigned long addr = (unsigned long) vmalloc_addr; struct page *page = NULL; pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep, pte; /* * XXX we might need to change this if we add VIRTUAL_BUG_ON for @@ -244,21 +281,23 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) */ VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr)); - if (!pgd_none(*pgd)) { - pud_t *pud = pud_offset(pgd, addr); - if (!pud_none(*pud)) { - pmd_t *pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) { - pte_t *ptep, pte; - - ptep = pte_offset_map(pmd, addr); - pte = *ptep; - if (pte_present(pte)) - page = pte_page(pte); - pte_unmap(ptep); - } - } - } + if (pgd_none(*pgd)) + return NULL; + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + return NULL; + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) + return NULL; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return NULL; + + ptep = pte_offset_map(pmd, addr); + pte = *ptep; + if (pte_present(pte)) + page = pte_page(pte); + pte_unmap(ptep); return page; } EXPORT_SYMBOL(vmalloc_to_page); -- cgit v1.2.3 From 90eceff1a375f6ffa78caf8654e787c0a8a591ef Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:08 +0300 Subject: mm: introduce __p4d_alloc() For full 5-level paging we need a helper to allocate p4d page table. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- mm/memory.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 7f1c2163b3ce..235ba51b2fbf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3906,6 +3906,29 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, } EXPORT_SYMBOL_GPL(handle_mm_fault); +#ifndef __PAGETABLE_P4D_FOLDED +/* + * Allocate p4d page table. + * We've already handled the fast-path in-line. + */ +int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +{ + p4d_t *new = p4d_alloc_one(mm, address); + if (!new) + return -ENOMEM; + + smp_wmb(); /* See comment in __pte_alloc */ + + spin_lock(&mm->page_table_lock); + if (pgd_present(*pgd)) /* Another has populated it */ + p4d_free(mm, new); + else + pgd_populate(mm, pgd, new); + spin_unlock(&mm->page_table_lock); + return 0; +} +#endif /* __PAGETABLE_P4D_FOLDED */ + #ifndef __PAGETABLE_PUD_FOLDED /* * Allocate page upper directory. -- cgit v1.2.3 From 834e0f8ae4e104fa026ffe5cdee58491f3078403 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Mar 2017 17:40:17 -0500 Subject: drm/amdgpu: validate paramaters in the gem ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reject it if there are any invalid flags or domains. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 51d759463384..106cf83c2e6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -202,6 +202,27 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, bool kernel = false; int r; + /* reject invalid gem flags */ + if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_VRAM_CLEARED| + AMDGPU_GEM_CREATE_SHADOW | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { + r = -EINVAL; + goto error_unlock; + } + /* reject invalid gem domains */ + if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | + AMDGPU_GEM_DOMAIN_GTT | + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GDS | + AMDGPU_GEM_DOMAIN_GWS | + AMDGPU_GEM_DOMAIN_OA)) { + r = -EINVAL; + goto error_unlock; + } + /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { -- cgit v1.2.3 From a5b11dac1f57c4b327c2d6eccb8fdd01499f9e17 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Mar 2017 17:23:21 -0500 Subject: drm/amdgpu: bump driver version for some new features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We added new gem ioctl flags and the new fences ioctl, but forgot to bump the version. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 75fc376ba735..f7adbace428a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -59,9 +59,10 @@ * - 3.7.0 - Add support for VCE clock list packet * - 3.8.0 - Add support raster config init in the kernel * - 3.9.0 - Add support for memory query info about VRAM and GTT. + * - 3.10.0 - Add support for new fences ioctl, new gem ioctl flags */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 9 +#define KMS_DRIVER_MINOR 10 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; -- cgit v1.2.3 From 8bd49ac86677ca43d64f08c45864e438283d6a76 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 6 Mar 2017 09:57:17 +0100 Subject: s390: wire up statx system call Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/unistd.h | 4 +++- arch/s390/kernel/compat_wrapper.c | 1 + arch/s390/kernel/syscalls.S | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 4384bc797a54..152de9b796e1 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -313,7 +313,9 @@ #define __NR_copy_file_range 375 #define __NR_preadv2 376 #define __NR_pwritev2 377 -#define NR_syscalls 378 +/* Number 378 is reserved for guarded storage */ +#define __NR_statx 379 +#define NR_syscalls 380 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index ae2cda5eee5a..e89cc2e71db1 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -178,3 +178,4 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 9b59e6212d8f..2659b5cfeddb 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -386,3 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2) SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ SYSCALL(sys_preadv2,compat_sys_preadv2) SYSCALL(sys_pwritev2,compat_sys_pwritev2) +NI_SYSCALL +SYSCALL(sys_statx,compat_sys_statx) -- cgit v1.2.3 From 6bbc4a4144b1a69743022ac68dfaf6e7d993abb9 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Mar 2017 16:16:28 -0800 Subject: userfaultfd: shmem: __do_fault requires VM_FAULT_NOPAGE __do_fault assumes vmf->page has been initialized and is valid if VM_FAULT_NOPAGE is not returned by vma->vm_ops->fault(vma, vmf). handle_userfault() in turn should return VM_FAULT_NOPAGE if it doesn't return VM_FAULT_SIGBUS or VM_FAULT_RETRY (the other two possibilities). This VM_FAULT_NOPAGE case is only invoked when signal are pending and it didn't matter for anonymous memory before. It only started to matter since shmem was introduced. hugetlbfs also takes a different path and doesn't exercise __do_fault. Link: http://lkml.kernel.org/r/20170228154201.GH5816@redhat.com Signed-off-by: Andrea Arcangeli Reported-by: Dmitry Vyukov Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 973607df579d..f62199b90fd0 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -490,7 +490,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) * in such case. */ down_read(&mm->mmap_sem); - ret = 0; + ret = VM_FAULT_NOPAGE; } } -- cgit v1.2.3 From 8a1115ff6b6d90cf1066ec3a0c4e51276553eebe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 9 Mar 2017 16:16:31 -0800 Subject: scripts/spelling.txt: add "disble(d)" pattern and fix typo instances Fix typos and add the following to the scripts/spelling.txt: disble||disable disbled||disabled I kept the TSL2563_INT_DISBLED in /drivers/iio/light/tsl2563.c untouched. The macro is not referenced at all, but this commit is touching only comment blocks just in case. Link: http://lkml.kernel.org/r/1481573103-11329-20-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kcov.rst | 2 +- arch/cris/arch-v32/drivers/cryptocop.c | 2 +- arch/x86/kernel/ftrace.c | 2 +- drivers/crypto/ux500/cryp/cryp.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 +- drivers/hv/channel.c | 2 +- drivers/isdn/hisax/st5481_b.c | 2 +- drivers/mtd/spi-nor/spi-nor.c | 2 +- drivers/net/ethernet/qlogic/qlge/qlge.h | 2 +- drivers/scsi/aic7xxx/aic79xx_core.c | 2 +- drivers/usb/gadget/legacy/inode.c | 3 +-- drivers/usb/host/xhci.c | 4 ++-- include/linux/regulator/machine.h | 2 +- kernel/cgroup/cgroup.c | 2 +- kernel/events/core.c | 2 +- scripts/spelling.txt | 2 ++ sound/soc/amd/acp-pcm-dma.c | 2 +- 17 files changed, 19 insertions(+), 18 deletions(-) diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index 2c41b713841f..44886c91e112 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -10,7 +10,7 @@ Note that kcov does not aim to collect as much coverage as possible. It aims to collect more or less stable coverage that is function of syscall inputs. To achieve this goal it does not collect coverage in soft/hard interrupts and instrumentation of some inherently non-deterministic parts of kernel is -disbled (e.g. scheduler, locking). +disabled (e.g. scheduler, locking). Usage ----- diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index ae6903d7fdbe..14970f11bbf2 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -2086,7 +2086,7 @@ static void cryptocop_job_queue_close(void) dma_in_cfg.en = regk_dma_no; REG_WR(dma, IN_DMA_INST, rw_cfg, dma_in_cfg); - /* Disble the cryptocop. */ + /* Disable the cryptocop. */ rw_cfg = REG_RD(strcop, regi_strcop, rw_cfg); rw_cfg.en = 0; REG_WR(strcop, regi_strcop, rw_cfg, rw_cfg); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8639bb2ae058..8f3d9cf26ff9 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -535,7 +535,7 @@ static void run_sync(void) { int enable_irqs = irqs_disabled(); - /* We may be called with interrupts disbled (on bootup). */ + /* We may be called with interrupts disabled (on bootup). */ if (enable_irqs) local_irq_enable(); on_each_cpu(do_sync_core, NULL, 1); diff --git a/drivers/crypto/ux500/cryp/cryp.c b/drivers/crypto/ux500/cryp/cryp.c index 43a0c8a26ab0..00a16ab601cb 100644 --- a/drivers/crypto/ux500/cryp/cryp.c +++ b/drivers/crypto/ux500/cryp/cryp.c @@ -82,7 +82,7 @@ void cryp_activity(struct cryp_device_data *device_data, void cryp_flush_inoutfifo(struct cryp_device_data *device_data) { /* - * We always need to disble the hardware before trying to flush the + * We always need to disable the hardware before trying to flush the * FIFO. This is something that isn't written in the design * specification, but we have been informed by the hardware designers * that this must be done. diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 31375bdde6f1..011800f621c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -788,7 +788,7 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) } } - /* disble sdma engine before programing it */ + /* disable sdma engine before programing it */ sdma_v3_0_ctx_switch_enable(adev, false); sdma_v3_0_enable(adev, false); diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 81a80c82f1bd..bd0d1988feb2 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -543,7 +543,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel) /* * In case a device driver's probe() fails (e.g., * util_probe() -> vmbus_open() returns -ENOMEM) and the device is - * rescinded later (e.g., we dynamically disble an Integrated Service + * rescinded later (e.g., we dynamically disable an Integrated Service * in Hyper-V Manager), the driver's remove() invokes vmbus_close(): * here we should skip most of the below cleanup work. */ diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c index 409849165838..f64a36007800 100644 --- a/drivers/isdn/hisax/st5481_b.c +++ b/drivers/isdn/hisax/st5481_b.c @@ -239,7 +239,7 @@ static void st5481B_mode(struct st5481_bcs *bcs, int mode) } } } else { - // Disble B channel interrupts + // Disable B channel interrupts st5481_usb_device_ctrl_msg(adapter, FFMSK_B1+(bcs->channel * 2), 0, NULL, NULL); // Disable B channel FIFOs diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 1ae872bfc3ba..747645c74134 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -186,7 +186,7 @@ static inline int write_enable(struct spi_nor *nor) } /* - * Send write disble instruction to the chip. + * Send write disable instruction to the chip. */ static inline int write_disable(struct spi_nor *nor) { diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h index 6d31f92ef2b6..90b3b46f85cc 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge.h +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h @@ -1163,7 +1163,7 @@ struct ib_mac_iocb_rsp { u8 opcode; /* 0x20 */ u8 flags1; #define IB_MAC_IOCB_RSP_OI 0x01 /* Overide intr delay */ -#define IB_MAC_IOCB_RSP_I 0x02 /* Disble Intr Generation */ +#define IB_MAC_IOCB_RSP_I 0x02 /* Disable Intr Generation */ #define IB_MAC_CSUM_ERR_MASK 0x1c /* A mask to use for csum errs */ #define IB_MAC_IOCB_RSP_TE 0x04 /* Checksum error */ #define IB_MAC_IOCB_RSP_NU 0x08 /* No checksum rcvd */ diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c index 109e2c99e6c1..95d8f25cbcca 100644 --- a/drivers/scsi/aic7xxx/aic79xx_core.c +++ b/drivers/scsi/aic7xxx/aic79xx_core.c @@ -6278,7 +6278,7 @@ ahd_reset(struct ahd_softc *ahd, int reinit) * does not disable its parity logic prior to * the start of the reset. This may cause a * parity error to be detected and thus a - * spurious SERR or PERR assertion. Disble + * spurious SERR or PERR assertion. Disable * PERR and SERR responses during the CHIPRST. */ mod_cmd = cmd & ~(PCIM_CMD_PERRESPEN|PCIM_CMD_SERRESPEN); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index a2615d64d07c..79a2d8fba6b6 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -84,8 +84,7 @@ static int ep_open(struct inode *, struct file *); /* /dev/gadget/$CHIP represents ep0 and the whole device */ enum ep0_state { - /* DISBLED is the initial state. - */ + /* DISABLED is the initial state. */ STATE_DEV_DISABLED = 0, /* Only one open() of /dev/gadget/$CHIP; only one file tracks diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 6d6c46000e56..50aee8b7718b 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -868,7 +868,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) spin_lock_irqsave(&xhci->lock, flags); - /* disble usb3 ports Wake bits*/ + /* disable usb3 ports Wake bits */ port_index = xhci->num_usb3_ports; port_array = xhci->usb3_ports; while (port_index--) { @@ -879,7 +879,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) writel(t2, port_array[port_index]); } - /* disble usb2 ports Wake bits*/ + /* disable usb2 ports Wake bits */ port_index = xhci->num_usb2_ports; port_array = xhci->usb2_ports; while (port_index--) { diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index ad3e5158e586..c9f795e9a2ee 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -65,7 +65,7 @@ struct regulator_state { int uV; /* suspend voltage */ unsigned int mode; /* suspend regulator operating mode */ int enabled; /* is regulator enabled in this suspend state */ - int disabled; /* is the regulator disbled in this suspend state */ + int disabled; /* is the regulator disabled in this suspend state */ }; /** diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0125589c7428..48851327a15e 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2669,7 +2669,7 @@ static bool css_visible(struct cgroup_subsys_state *css) * * Returns 0 on success, -errno on failure. On failure, csses which have * been processed already aren't cleaned up. The caller is responsible for - * cleaning up with cgroup_apply_control_disble(). + * cleaning up with cgroup_apply_control_disable(). */ static int cgroup_apply_control_enable(struct cgroup *cgrp) { diff --git a/kernel/events/core.c b/kernel/events/core.c index 6f41548f2e32..a17ed56c8ce1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -998,7 +998,7 @@ list_update_cgroup_event(struct perf_event *event, */ #define PERF_CPU_HRTIMER (1000 / HZ) /* - * function must be called with interrupts disbled + * function must be called with interrupts disabled */ static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr) { diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 0458b037c8a1..6dae4df472f6 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -372,6 +372,8 @@ disassocation||disassociation disapear||disappear disapeared||disappeared disappared||disappeared +disble||disable +disbled||disabled disconnet||disconnect discontinous||discontinuous dispertion||dispersion diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index ec1067a679da..08b1399d1da2 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -89,7 +89,7 @@ static void acp_reg_write(u32 val, void __iomem *acp_mmio, u32 reg) writel(val, acp_mmio + (reg * 4)); } -/* Configure a given dma channel parameters - enable/disble, +/* Configure a given dma channel parameters - enable/disable, * number of descriptors, priority */ static void config_acp_dma_channel(void __iomem *acp_mmio, u8 ch_num, -- cgit v1.2.3 From 505d3085d7120a9f4cd0d6ffaa876968854b3baa Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 9 Mar 2017 16:16:33 -0800 Subject: scripts/spelling.txt: add "overide" pattern and fix typo instances Fix typos and add the following to the scripts/spelling.txt: overide||override While we are here, fix the doubled "address" in the touched line Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt. Also, fix the comment block style in the touched hunks in drivers/media/dvb-frontends/drx39xyj/drx_driver.h. Link: http://lkml.kernel.org/r/1481573103-11329-21-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt | 2 +- drivers/block/paride/pcd.c | 2 +- drivers/block/paride/pd.c | 2 +- drivers/block/paride/pf.c | 2 +- drivers/block/paride/pg.c | 2 +- drivers/block/paride/pt.c | 2 +- drivers/media/dvb-frontends/drx39xyj/drx_driver.h | 8 +++----- drivers/net/ethernet/qlogic/qlge/qlge.h | 2 +- include/dt-bindings/sound/cs42l42.h | 2 +- include/net/irda/timer.h | 2 +- kernel/trace/trace_stack.c | 2 +- scripts/spelling.txt | 1 + tools/lguest/lguest.c | 2 +- tools/lib/bpf/Makefile | 2 +- tools/lib/traceevent/Makefile | 2 +- tools/lib/traceevent/event-parse.h | 2 +- 16 files changed, 18 insertions(+), 19 deletions(-) diff --git a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt index c3f6546ebac7..6a23ad9ac53a 100644 --- a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt +++ b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt @@ -45,7 +45,7 @@ Required Properties: Optional Properties: - reg-names: In addition to the required properties, the following are optional - "efuse-address" - Contains efuse base address used to pick up ABB info. - - "ldo-address" - Contains address of ABB LDO overide register address. + - "ldo-address" - Contains address of ABB LDO override register. "efuse-address" is required for this. - ti,ldovbb-vset-mask - Required if ldo-address is set, mask for LDO override register to provide override vset value. diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 10aed84244f5..939641d6e262 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -50,7 +50,7 @@ the slower the port i/o. In some cases, setting this to zero will speed up the device. (default -1) - major You may use this parameter to overide the + major You may use this parameter to override the default major number (46) that this driver will use. Be sure to change the device name as well. diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 644ba0888bd4..9cfd2e06a649 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -61,7 +61,7 @@ first drive found. - major You may use this parameter to overide the + major You may use this parameter to override the default major number (45) that this driver will use. Be sure to change the device name as well. diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index ed93e8badf56..14c5d32f5d8b 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -59,7 +59,7 @@ the slower the port i/o. In some cases, setting this to zero will speed up the device. (default -1) - major You may use this parameter to overide the + major You may use this parameter to override the default major number (47) that this driver will use. Be sure to change the device name as well. diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c index 5db955fe3a94..3b5882bfb736 100644 --- a/drivers/block/paride/pg.c +++ b/drivers/block/paride/pg.c @@ -84,7 +84,7 @@ the slower the port i/o. In some cases, setting this to zero will speed up the device. (default -1) - major You may use this parameter to overide the + major You may use this parameter to override the default major number (97) that this driver will use. Be sure to change the device name as well. diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 61fc6824299a..e815312a00ad 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -61,7 +61,7 @@ the slower the port i/o. In some cases, setting this to zero will speed up the device. (default -1) - major You may use this parameter to overide the + major You may use this parameter to override the default major number (96) that this driver will use. Be sure to change the device name as well. diff --git a/drivers/media/dvb-frontends/drx39xyj/drx_driver.h b/drivers/media/dvb-frontends/drx39xyj/drx_driver.h index 7a681d8202c7..4442e478db72 100644 --- a/drivers/media/dvb-frontends/drx39xyj/drx_driver.h +++ b/drivers/media/dvb-frontends/drx39xyj/drx_driver.h @@ -256,8 +256,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner, * * The actual DAP implementation may be restricted to only one of the modes. * A compiler warning or error will be generated if the DAP implementation -* overides or cannot handle the mode defined below. -* +* overrides or cannot handle the mode defined below. */ #ifndef DRXDAP_SINGLE_MASTER #define DRXDAP_SINGLE_MASTER 1 @@ -272,7 +271,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner, * * This maximum size may be restricted by the actual DAP implementation. * A compiler warning or error will be generated if the DAP implementation -* overides or cannot handle the chunksize defined below. +* overrides or cannot handle the chunksize defined below. * * Beware that the DAP uses DRXDAP_MAX_WCHUNKSIZE to create a temporary data * buffer. Do not undefine or choose too large, unless your system is able to @@ -292,8 +291,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner, * * This maximum size may be restricted by the actual DAP implementation. * A compiler warning or error will be generated if the DAP implementation -* overides or cannot handle the chunksize defined below. -* +* overrides or cannot handle the chunksize defined below. */ #ifndef DRXDAP_MAX_RCHUNKSIZE #define DRXDAP_MAX_RCHUNKSIZE 60 diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h index 90b3b46f85cc..84ac50f92c9c 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge.h +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h @@ -1162,7 +1162,7 @@ struct ob_mac_tso_iocb_rsp { struct ib_mac_iocb_rsp { u8 opcode; /* 0x20 */ u8 flags1; -#define IB_MAC_IOCB_RSP_OI 0x01 /* Overide intr delay */ +#define IB_MAC_IOCB_RSP_OI 0x01 /* Override intr delay */ #define IB_MAC_IOCB_RSP_I 0x02 /* Disable Intr Generation */ #define IB_MAC_CSUM_ERR_MASK 0x1c /* A mask to use for csum errs */ #define IB_MAC_IOCB_RSP_TE 0x04 /* Checksum error */ diff --git a/include/dt-bindings/sound/cs42l42.h b/include/dt-bindings/sound/cs42l42.h index 399a123aed58..db69d84ed7d1 100644 --- a/include/dt-bindings/sound/cs42l42.h +++ b/include/dt-bindings/sound/cs42l42.h @@ -20,7 +20,7 @@ #define CS42L42_HPOUT_LOAD_1NF 0 #define CS42L42_HPOUT_LOAD_10NF 1 -/* HPOUT Clamp to GND Overide */ +/* HPOUT Clamp to GND Override */ #define CS42L42_HPOUT_CLAMP_EN 0 #define CS42L42_HPOUT_CLAMP_DIS 1 diff --git a/include/net/irda/timer.h b/include/net/irda/timer.h index cb2615ccf761..d784f242cf7b 100644 --- a/include/net/irda/timer.h +++ b/include/net/irda/timer.h @@ -59,7 +59,7 @@ struct lap_cb; * Slot timer must never exceed 85 ms, and must always be at least 25 ms, * suggested to 75-85 msec by IrDA lite. This doesn't work with a lot of * devices, and other stackes uses a lot more, so it's best we do it as well - * (Note : this is the default value and sysctl overides it - Jean II) + * (Note : this is the default value and sysctl overrides it - Jean II) */ #define SLOT_TIMEOUT (90*HZ/1000) diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 1d68b5b7ad41..5fb1f2c87e6b 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -65,7 +65,7 @@ void stack_trace_print(void) } /* - * When arch-specific code overides this function, the following + * When arch-specific code overrides this function, the following * data should be filled up, assuming stack_trace_max_lock is held to * prevent concurrent updates. * stack_trace_index[] diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 6dae4df472f6..0545f5a8cabe 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -734,6 +734,7 @@ oustanding||outstanding overaall||overall overhread||overhead overlaping||overlapping +overide||override overrided||overridden overriden||overridden overun||overrun diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index 11c8d9bc762e..5d19fdf80292 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -1387,7 +1387,7 @@ static bool pci_data_iowrite(u16 port, u32 mask, u32 val) /* Allow writing to any other BAR, or expansion ROM */ iowrite(portoff, val, mask, &d->config_words[reg]); return true; - /* We let them overide latency timer and cacheline size */ + /* We let them override latency timer and cacheline size */ } else if (&d->config_words[reg] == (void *)&d->config.cacheline_size) { /* Only let them change the first two fields. */ if (mask == 0xFFFFFFFF) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index e2efddf10231..1f5300e56b44 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -132,7 +132,7 @@ else Q = @ endif -# Disable command line variables (CFLAGS) overide from top +# Disable command line variables (CFLAGS) override from top # level Makefile (perf), otherwise build Makefile will get # the same command line setup. MAKEOVERRIDES= diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 47076b15eebe..9b8555ea3459 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -135,7 +135,7 @@ else Q = @ endif -# Disable command line variables (CFLAGS) overide from top +# Disable command line variables (CFLAGS) override from top # level Makefile (perf), otherwise build Makefile will get # the same command line setup. MAKEOVERRIDES= diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 66342804161c..0c03538df74c 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -140,7 +140,7 @@ struct pevent_plugin_option { * struct pevent_plugin_option PEVENT_PLUGIN_OPTIONS[] = { * { * .name = "option-name", - * .plugin_alias = "overide-file-name", (optional) + * .plugin_alias = "override-file-name", (optional) * .description = "description of option to show users", * }, * { -- cgit v1.2.3 From 52c50ca75c534c0772b71900a29b3a71439b32ef Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 9 Mar 2017 16:16:36 -0800 Subject: powerpc/mm: handle protnone ptes on fork We need to mark pages of parent process read only on fork. Numa fault pte needs a protnone ptes variant with saved write flag set. On fork we need to make sure we remove the saved write bit. Instead of adding the protnone check in the caller update ptep_set_wrprotect variants to clear savedwrite bit. Without this we see random segfaults in application on fork. Fixes: c137a2757b886 ("powerpc/mm/autonuma: switch ppc64 to its own implementation of saved write") Link: http://lkml.kernel.org/r/1488203787-17849-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V Cc: Rik van Riel Cc: Mel Gorman Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/pgtable.h | 73 ++++++++++++++++------------ 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 1eeeb72c7015..f0b08acda5eb 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -347,23 +347,53 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, __r; \ }) +static inline int pte_write(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); +} + +#ifdef CONFIG_NUMA_BALANCING +#define pte_savedwrite pte_savedwrite +static inline bool pte_savedwrite(pte_t pte) +{ + /* + * Saved write ptes are prot none ptes that doesn't have + * privileged bit sit. We mark prot none as one which has + * present and pviliged bit set and RWX cleared. To mark + * protnone which used to have _PAGE_WRITE set we clear + * the privileged bit. + */ + return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED)); +} +#else +#define pte_savedwrite pte_savedwrite +static inline bool pte_savedwrite(pte_t pte) +{ + return false; +} +#endif + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); + if (pte_write(*ptep)) + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); + else if (unlikely(pte_savedwrite(*ptep))) + pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0); } static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); + /* + * We should not find protnone for hugetlb, but this complete the + * interface. + */ + if (pte_write(*ptep)) + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); + else if (unlikely(pte_savedwrite(*ptep))) + pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1); } #define __HAVE_ARCH_PTEP_GET_AND_CLEAR @@ -397,11 +427,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_update(mm, addr, ptep, ~0UL, 0, 0); } -static inline int pte_write(pte_t pte) -{ - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); -} - static inline int pte_dirty(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY)); @@ -466,19 +491,6 @@ static inline pte_t pte_clear_savedwrite(pte_t pte) return __pte(pte_val(pte) | _PAGE_PRIVILEGED); } -#define pte_savedwrite pte_savedwrite -static inline bool pte_savedwrite(pte_t pte) -{ - /* - * Saved write ptes are prot none ptes that doesn't have - * privileged bit sit. We mark prot none as one which has - * present and pviliged bit set and RWX cleared. To mark - * protnone which used to have _PAGE_WRITE set we clear - * the privileged bit. - */ - VM_BUG_ON(!pte_protnone(pte)); - return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED)); -} #endif /* CONFIG_NUMA_BALANCING */ static inline int pte_present(pte_t pte) @@ -982,11 +994,10 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - - if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); + if (pmd_write((*pmdp))) + pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); + else if (unlikely(pmd_savedwrite(*pmdp))) + pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED); } static inline int pmd_trans_huge(pmd_t pmd) -- cgit v1.2.3 From d19469e8415813cceaa494b6f538e327b9a95f3b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 9 Mar 2017 16:16:39 -0800 Subject: power/mm: update pte_write and pte_wrprotect to handle savedwrite We use pte_write() to check whethwer the pte entry is writable. This is mostly used to later mark the pte read only if it is writable. The other use of pte_write() is to check whether the pte_entry is writable so that hardware page table entry can be marked accordingly. This is used in kvm where we look at qemu page table entry and update hardware hash page table for the guest with correct write enable bit. With the above, for the first usage we should also check the savedwrite bit so that we can correctly clear the savedwite bit. For the later, we add a new variant __pte_write(). With this we can revert write_protect_page part of 595cd8f256d2 ("mm/ksm: handle protnone saved writes when making page write protect"). But I left it as it is as an example code for savedwrite check. Fixes: c137a2757b886 ("powerpc/mm/autonuma: switch ppc64 to its own implementation of saved write") Link: http://lkml.kernel.org/r/1488203787-17849-2-git-send-email-aneesh.kumar@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V Cc: Rik van Riel Cc: Mel Gorman Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/pgtable.h | 24 +++++++++++++++++++----- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index f0b08acda5eb..ec1e731e6a2d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -347,7 +347,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, __r; \ }) -static inline int pte_write(pte_t pte) +static inline int __pte_write(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); } @@ -373,11 +373,16 @@ static inline bool pte_savedwrite(pte_t pte) } #endif +static inline int pte_write(pte_t pte) +{ + return __pte_write(pte) || pte_savedwrite(pte); +} + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if (pte_write(*ptep)) + if (__pte_write(*ptep)) pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); else if (unlikely(pte_savedwrite(*ptep))) pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0); @@ -390,7 +395,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, * We should not find protnone for hugetlb, but this complete the * interface. */ - if (pte_write(*ptep)) + if (__pte_write(*ptep)) pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); else if (unlikely(pte_savedwrite(*ptep))) pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1); @@ -490,7 +495,13 @@ static inline pte_t pte_clear_savedwrite(pte_t pte) VM_BUG_ON(!pte_protnone(pte)); return __pte(pte_val(pte) | _PAGE_PRIVILEGED); } - +#else +#define pte_clear_savedwrite pte_clear_savedwrite +static inline pte_t pte_clear_savedwrite(pte_t pte) +{ + VM_WARN_ON(1); + return __pte(pte_val(pte) & ~_PAGE_WRITE); +} #endif /* CONFIG_NUMA_BALANCING */ static inline int pte_present(pte_t pte) @@ -518,6 +529,8 @@ static inline unsigned long pte_pfn(pte_t pte) /* Generic modifiers for PTE bits */ static inline pte_t pte_wrprotect(pte_t pte) { + if (unlikely(pte_savedwrite(pte))) + return pte_clear_savedwrite(pte); return __pte(pte_val(pte) & ~_PAGE_WRITE); } @@ -938,6 +951,7 @@ static inline int pmd_protnone(pmd_t pmd) #define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) +#define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -994,7 +1008,7 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (pmd_write((*pmdp))) + if (__pmd_write((*pmdp))) pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); else if (unlikely(pmd_savedwrite(*pmdp))) pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f3158fb16de3..8c68145ba1bd 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -601,7 +601,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, hva, NULL, NULL); if (ptep) { pte = kvmppc_read_update_linux_pte(ptep, 1); - if (pte_write(pte)) + if (__pte_write(pte)) write_ok = 1; } local_irq_restore(flags); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6fca970373ee..ce6f2121fffe 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -256,7 +256,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } pte = kvmppc_read_update_linux_pte(ptep, writing); if (pte_present(pte) && !pte_protnone(pte)) { - if (writing && !pte_write(pte)) + if (writing && !__pte_write(pte)) /* make the actual HPTE be read-only */ ptel = hpte_make_readonly(ptel); is_ci = pte_ci(pte); -- cgit v1.2.3 From ef947b2529f918d9606533eb9c32b187ed6a5ede Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Mar 2017 16:16:42 -0800 Subject: x86, mm: fix gup_pte_range() vs DAX mappings gup_pte_range() fails to check pte_allows_gup() before translating a DAX pte entry, pte_devmap(), to a page. This allows writes to read-only mappings, and bypasses the DAX cacheline dirty tracking due to missed 'mkwrite' faults. The gup_huge_pmd() path and the gup_huge_pud() path correctly check pte_allows_gup() before checking for _devmap() entries. Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Link: http://lkml.kernel.org/r/148804251312.36605.12665024794196605053.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ross Zwisler Signed-off-by: Dan Williams Reported-by: Dave Hansen Reported-by: Ross Zwisler Cc: Xiong Zhou Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/gup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 99c7805a9693..9d32ee608807 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -120,6 +120,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, return 0; } + if (!pte_allows_gup(pte_val(pte), write)) { + pte_unmap(ptep); + return 0; + } + if (pte_devmap(pte)) { pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { @@ -127,8 +132,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, pte_unmap(ptep); return 0; } - } else if (!pte_allows_gup(pte_val(pte), write) || - pte_special(pte)) { + } else if (pte_special(pte)) { pte_unmap(ptep); return 0; } -- cgit v1.2.3 From b2e593e271b0760ebc8999e5f9dd068ae2b9d30a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Mar 2017 16:16:45 -0800 Subject: x86, mm: unify exit paths in gup_pte_range() All exit paths from gup_pte_range() require pte_unmap() of the original pte page before returning. Refactor the code to have a single exit point to do the unmap. This mirrors the flow of the generic gup_pte_range() in mm/gup.c. Link: http://lkml.kernel.org/r/148804251828.36605.14910389618497006945.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Dave Hansen Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/gup.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 9d32ee608807..1f3b6ef105cd 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -106,36 +106,35 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { struct dev_pagemap *pgmap = NULL; - int nr_start = *nr; - pte_t *ptep; + int nr_start = *nr, ret = 0; + pte_t *ptep, *ptem; - ptep = pte_offset_map(&pmd, addr); + /* + * Keep the original mapped PTE value (ptem) around since we + * might increment ptep off the end of the page when finishing + * our loop iteration. + */ + ptem = ptep = pte_offset_map(&pmd, addr); do { pte_t pte = gup_get_pte(ptep); struct page *page; /* Similar to the PMD case, NUMA hinting must take slow path */ - if (pte_protnone(pte)) { - pte_unmap(ptep); - return 0; - } + if (pte_protnone(pte)) + break; - if (!pte_allows_gup(pte_val(pte), write)) { - pte_unmap(ptep); - return 0; - } + if (!pte_allows_gup(pte_val(pte), write)) + break; if (pte_devmap(pte)) { pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { undo_dev_pagemap(nr, nr_start, pages); - pte_unmap(ptep); - return 0; + break; } - } else if (pte_special(pte)) { - pte_unmap(ptep); - return 0; - } + } else if (pte_special(pte)) + break; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); get_page(page); @@ -145,9 +144,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); - pte_unmap(ptep - 1); + if (addr == end) + ret = 1; + pte_unmap(ptem); - return 1; + return ret; } static inline void get_head_page_multiple(struct page *page, int nr) -- cgit v1.2.3 From dd0db88d8094a6d9d4d1fc5fcd56ab619f54ccf8 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Mar 2017 16:16:49 -0800 Subject: userfaultfd: non-cooperative: rollback userfaultfd_exit Patch series "userfaultfd non-cooperative further update for 4.11 merge window". Unfortunately I noticed one relevant bug in userfaultfd_exit while doing more testing. I've been doing testing before and this was also tested by kbuild bot and exercised by the selftest, but this bug never reproduced before. I dropped userfaultfd_exit as result. I dropped it because of implementation difficulty in receiving signals in __mmput and because I think -ENOSPC as result from the background UFFDIO_COPY should be enough already. Before I decided to remove userfaultfd_exit, I noticed userfaultfd_exit wasn't exercised by the selftest and when I tried to exercise it, after moving it to a more correct place in __mmput where it would make more sense and where the vma list is stable, it resulted in the event_wait_completion in D state. So then I added the second patch to be sure even if we call userfaultfd_event_wait_completion too late during task exit(), we won't risk to generate tasks in D state. The same check exists in handle_userfault() for the same reason, except it makes a difference there, while here is just a robustness check and it's run under WARN_ON_ONCE. While looking at the userfaultfd_event_wait_completion() function I looked back at its callers too while at it and I think it's not ok to stop executing dup_fctx on the fcs list because we relay on userfaultfd_event_wait_completion to execute userfaultfd_ctx_put(fctx->orig) which is paired against userfaultfd_ctx_get(fctx->orig) in dup_userfault just before list_add(fcs). This change only takes care of fctx->orig but this area also needs further review looking for similar problems in fctx->new. The only patch that is urgent is the first because it's an use after free during a SMP race condition that affects all processes if CONFIG_USERFAULTFD=y. Very hard to reproduce though and probably impossible without SLUB poisoning enabled. This patch (of 3): I once reproduced this oops with the userfaultfd selftest, it's not easily reproducible and it requires SLUB poisoning to reproduce. general protection fault: 0000 [#1] SMP Modules linked in: CPU: 2 PID: 18421 Comm: userfaultfd Tainted: G ------------ T 3.10.0+ #15 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014 task: ffff8801f83b9440 ti: ffff8801f833c000 task.ti: ffff8801f833c000 RIP: 0010:[] [] userfaultfd_exit+0x29/0xa0 RSP: 0018:ffff8801f833fe80 EFLAGS: 00010202 RAX: ffff8801f833ffd8 RBX: 6b6b6b6b6b6b6b6b RCX: ffff8801f83b9440 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8800baf18600 RBP: ffff8801f833fee8 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: ffffffff8127ceb3 R12: 0000000000000000 R13: ffff8800baf186b0 R14: ffff8801f83b99f8 R15: 00007faed746c700 FS: 0000000000000000(0000) GS:ffff88023fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007faf0966f028 CR3: 0000000001bc6000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Call Trace: do_exit+0x297/0xd10 SyS_exit+0x17/0x20 tracesys+0xdd/0xe2 Code: 00 00 66 66 66 66 90 55 48 89 e5 41 54 53 48 83 ec 58 48 8b 1f 48 85 db 75 11 eb 73 66 0f 1f 44 00 00 48 8b 5b 10 48 85 db 74 64 <4c> 8b a3 b8 00 00 00 4d 85 e4 74 eb 41 f6 84 24 2c 01 00 00 80 RIP [] userfaultfd_exit+0x29/0xa0 RSP ---[ end trace 9fecd6dcb442846a ]--- In the debugger I located the "mm" pointer in the stack and walking mm->mmap->vm_next through the end shows the vma->vm_next list is fully consistent and it is null terminated list as expected. So this has to be an SMP race condition where userfaultfd_exit was running while the vma list was being modified by another CPU. When userfaultfd_exit() run one of the ->vm_next pointers pointed to SLAB_POISON (RBX is the vma pointer and is 0x6b6b..). The reason is that it's not running in __mmput but while there are still other threads running and it's not holding the mmap_sem (it can't as it has to wait the even to be received by the manager). So this is an use after free that was happening for all processes. One more implementation problem aside from the race condition: userfaultfd_exit has really to check a flag in mm->flags before walking the vma or it's going to slowdown the exit() path for regular tasks. One more implementation problem: at that point signals can't be delivered so it would also create a task in D state if the manager doesn't read the event. The major design issue: it overall looks superfluous as the manager can check for -ENOSPC in the background transfer: if (mmget_not_zero(ctx->mm)) { [..] } else { return -ENOSPC; } It's safer to roll it back and re-introduce it later if at all. [rppt@linux.vnet.ibm.com: documentation fixup after removal of UFFD_EVENT_EXIT] Link: http://lkml.kernel.org/r/1488345437-4364-1-git-send-email-rppt@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20170224181957.19736-2-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Signed-off-by: Mike Rapoport Acked-by: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/userfaultfd.txt | 4 ---- fs/userfaultfd.c | 28 ---------------------------- include/linux/userfaultfd_k.h | 6 ------ include/uapi/linux/userfaultfd.h | 5 +---- kernel/exit.c | 1 - 5 files changed, 1 insertion(+), 43 deletions(-) diff --git a/Documentation/vm/userfaultfd.txt b/Documentation/vm/userfaultfd.txt index 0e5543a920e5..bb2f945f87ab 100644 --- a/Documentation/vm/userfaultfd.txt +++ b/Documentation/vm/userfaultfd.txt @@ -172,10 +172,6 @@ the same read(2) protocol as for the page fault notifications. The manager has to explicitly enable these events by setting appropriate bits in uffdio_api.features passed to UFFDIO_API ioctl: -UFFD_FEATURE_EVENT_EXIT - enable notification about exit() of the -non-cooperative process. When the monitored process exits, the uffd -manager will get UFFD_EVENT_EXIT. - UFFD_FEATURE_EVENT_FORK - enable userfaultfd hooks for fork(). When this feature is enabled, the userfaultfd context of the parent process is duplicated into the newly created process. The manager receives diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f62199b90fd0..16d0cc600fa9 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -775,34 +775,6 @@ void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) } } -void userfaultfd_exit(struct mm_struct *mm) -{ - struct vm_area_struct *vma = mm->mmap; - - /* - * We can do the vma walk without locking because the caller - * (exit_mm) knows it now has exclusive access - */ - while (vma) { - struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; - - if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) { - struct userfaultfd_wait_queue ewq; - - userfaultfd_ctx_get(ctx); - - msg_init(&ewq.msg); - ewq.msg.event = UFFD_EVENT_EXIT; - - userfaultfd_event_wait_completion(ctx, &ewq); - - ctx->features &= ~UFFD_FEATURE_EVENT_EXIT; - } - - vma = vma->vm_next; - } -} - static int userfaultfd_release(struct inode *inode, struct file *file) { struct userfaultfd_ctx *ctx = file->private_data; diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 0468548acebf..f2b79bf4c895 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -72,8 +72,6 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); -extern void userfaultfd_exit(struct mm_struct *mm); - #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -139,10 +137,6 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm, { } -static inline void userfaultfd_exit(struct mm_struct *mm) -{ -} - #endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index c055947c5c98..3b059530dac9 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -18,8 +18,7 @@ * means the userland is reading). */ #define UFFD_API ((__u64)0xAA) -#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_EXIT | \ - UFFD_FEATURE_EVENT_FORK | \ +#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ UFFD_FEATURE_EVENT_REMAP | \ UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ @@ -113,7 +112,6 @@ struct uffd_msg { #define UFFD_EVENT_REMAP 0x14 #define UFFD_EVENT_REMOVE 0x15 #define UFFD_EVENT_UNMAP 0x16 -#define UFFD_EVENT_EXIT 0x17 /* flags for UFFD_EVENT_PAGEFAULT */ #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ @@ -163,7 +161,6 @@ struct uffdio_api { #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) #define UFFD_FEATURE_MISSING_SHMEM (1<<5) #define UFFD_FEATURE_EVENT_UNMAP (1<<6) -#define UFFD_FEATURE_EVENT_EXIT (1<<7) __u64 features; __u64 ioctls; diff --git a/kernel/exit.c b/kernel/exit.c index e126ebf2400c..516acdb0e0ec 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -554,7 +554,6 @@ static void exit_mm(void) enter_lazy_tlb(mm, current); task_unlock(current); mm_update_next_owner(mm); - userfaultfd_exit(mm); mmput(mm); if (test_thread_flag(TIF_MEMDIE)) exit_oom_victim(); -- cgit v1.2.3 From 9a69a829f9b656c2a220d65a94ecf7b5887c5da1 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Mar 2017 16:16:52 -0800 Subject: userfaultfd: non-cooperative: robustness check Similar to the handle_userfault() case, also make sure to never attempt to send any event past the PF_EXITING point of no return. This is purely a robustness check. Link: http://lkml.kernel.org/r/20170224181957.19736-3-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Acked-by: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 16d0cc600fa9..668bbbd2e04d 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -530,8 +530,13 @@ out: static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, struct userfaultfd_wait_queue *ewq) { - int ret = 0; + int ret; + + ret = -1; + if (WARN_ON_ONCE(current->flags & PF_EXITING)) + goto out; + ret = 0; ewq->ctx = ctx; init_waitqueue_entry(&ewq->wq, current); @@ -566,7 +571,7 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, * ctx may go away after this if the userfault pseudo fd is * already released. */ - +out: userfaultfd_ctx_put(ctx); return ret; } -- cgit v1.2.3 From 8c9e7bb7a41f2bbd54b2caefb274fb3de239819f Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Mar 2017 16:16:54 -0800 Subject: userfaultfd: non-cooperative: release all ctx in dup_userfaultfd_complete Don't stop running dup_fctx() even if userfaultfd_event_wait_completion fails as it has to run userfaultfd_ctx_put on all ctx to pair against the userfaultfd_ctx_get that was run on all fctx->orig in dup_userfaultfd. Link: http://lkml.kernel.org/r/20170224181957.19736-4-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Acked-by: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 668bbbd2e04d..dd48052e086f 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -527,16 +527,12 @@ out: return ret; } -static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, - struct userfaultfd_wait_queue *ewq) +static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, + struct userfaultfd_wait_queue *ewq) { - int ret; - - ret = -1; if (WARN_ON_ONCE(current->flags & PF_EXITING)) goto out; - ret = 0; ewq->ctx = ctx; init_waitqueue_entry(&ewq->wq, current); @@ -552,7 +548,6 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, break; if (ACCESS_ONCE(ctx->released) || fatal_signal_pending(current)) { - ret = -1; __remove_wait_queue(&ctx->event_wqh, &ewq->wq); break; } @@ -573,7 +568,6 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, */ out: userfaultfd_ctx_put(ctx); - return ret; } static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx, @@ -631,7 +625,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) return 0; } -static int dup_fctx(struct userfaultfd_fork_ctx *fctx) +static void dup_fctx(struct userfaultfd_fork_ctx *fctx) { struct userfaultfd_ctx *ctx = fctx->orig; struct userfaultfd_wait_queue ewq; @@ -641,17 +635,15 @@ static int dup_fctx(struct userfaultfd_fork_ctx *fctx) ewq.msg.event = UFFD_EVENT_FORK; ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new; - return userfaultfd_event_wait_completion(ctx, &ewq); + userfaultfd_event_wait_completion(ctx, &ewq); } void dup_userfaultfd_complete(struct list_head *fcs) { - int ret = 0; struct userfaultfd_fork_ctx *fctx, *n; list_for_each_entry_safe(fctx, n, fcs, list) { - if (!ret) - ret = dup_fctx(fctx); + dup_fctx(fctx); list_del(&fctx->list); kfree(fctx); } -- cgit v1.2.3 From cbfd0c1001bedb4b051cf4a1f5df24f1500381bc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 9 Mar 2017 16:16:57 -0800 Subject: include/linux/fs.h: fix unsigned enum warning with gcc-4.2 With arm-linux-gcc-4.2, almost every file we build in the kernel ends up with this warning: include/linux/fs.h:2648: warning: comparison of unsigned expression < 0 is always false Later versions don't have this problem, but it's easy enough to work around. Link: http://lkml.kernel.org/r/20161216105634.235457-12-arnd@arndb.de Signed-off-by: Arnd Bergmann Cc: Russell King Cc: Brendan Gregg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index aad3fd0ff5f8..7251f7bb45e8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2678,7 +2678,7 @@ static const char * const kernel_read_file_str[] = { static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) { - if (id < 0 || id >= READING_MAX_ID) + if ((unsigned)id >= READING_MAX_ID) return kernel_read_file_str[READING_UNKNOWN]; return kernel_read_file_str[id]; -- cgit v1.2.3 From ce9311cf95ad8baf044a014738d76973d93b739a Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Thu, 9 Mar 2017 16:17:00 -0800 Subject: mm/vmstats: add thp_split_pud event for clarity We added support for PUD-sized transparent hugepages, however we count the event "thp split pud" into thp_split_pmd event. To separate the event count of thp split pud from pmd, add a new event named thp_split_pud. Link: http://lkml.kernel.org/r/1488282380-5076-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Cc: Vlastimil Babka Cc: Johannes Weiner Cc: Michal Hocko Cc: Joonsoo Kim Cc: Sebastian Siewior Cc: Hugh Dickins Cc: Christoph Lameter Cc: Kirill A. Shutemov Cc: Aneesh Kumar K.V Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Ebru Akagunduz Cc: David Rientjes Cc: Hanjun Guo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 3 +++ mm/huge_memory.c | 2 +- mm/vmstat.c | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 6aa1b6cb5828..a80b7b59cf33 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -79,6 +79,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_SPLIT_PAGE_FAILED, THP_DEFERRED_SPLIT_PAGE, THP_SPLIT_PMD, +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + THP_SPLIT_PUD, +#endif THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, #endif diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d36b2af4d1bf..8f037e256c54 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1828,7 +1828,7 @@ static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud, VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma); VM_BUG_ON(!pud_trans_huge(*pud) && !pud_devmap(*pud)); - count_vm_event(THP_SPLIT_PMD); + count_vm_event(THP_SPLIT_PUD); pudp_huge_clear_flush_notify(vma, haddr, pud); } diff --git a/mm/vmstat.c b/mm/vmstat.c index 69f9aff39a2e..b1947f0cbee2 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1065,6 +1065,9 @@ const char * const vmstat_text[] = { "thp_split_page_failed", "thp_deferred_split_page", "thp_split_pmd", +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + "thp_split_pud", +#endif "thp_zero_page_alloc", "thp_zero_page_alloc_failed", #endif -- cgit v1.2.3 From f4b7ac68f438fa8521bbbf421f194ff10b0a7577 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Thu, 9 Mar 2017 16:17:03 -0800 Subject: drivers/md/bcache/util.h: remove duplicate inclusion of blkdev.h Link: http://lkml.kernel.org/r/20170226060230.11555-1-standby24x7@gmail.com Signed-off-by: Masanari Iida Acked-by: Coly Li Cc: Kent Overstreet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/bcache/util.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index a126919ed102..5d13930f0f22 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -4,7 +4,6 @@ #include #include -#include #include #include #include -- cgit v1.2.3 From bfc7228b9a9647e1c353e50b40297a2929801759 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Thu, 9 Mar 2017 16:17:06 -0800 Subject: mm/cgroup: avoid panic when init with low memory The system may panic when initialisation is done when almost all the memory is assigned to the huge pages using the kernel command line parameter hugepage=xxxx. Panic may occur like this: Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc000000000302b88 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=2048 [ 0.082424] NUMA pSeries Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.9.0-15-generic #16-Ubuntu task: c00000021ed01600 task.stack: c00000010d108000 NIP: c000000000302b88 LR: c000000000270e04 CTR: c00000000016cfd0 REGS: c00000010d10b2c0 TRAP: 0300 Not tainted (4.9.0-15-generic) MSR: 8000000002009033 [ 0.082770] CR: 28424422 XER: 00000000 CFAR: c0000000003d28b8 DAR: 0000000000000000 DSISR: 40000000 SOFTE: 1 GPR00: c000000000270e04 c00000010d10b540 c00000000141a300 c00000010fff6300 GPR04: 0000000000000000 00000000026012c0 c00000010d10b630 0000000487ab0000 GPR08: 000000010ee90000 c000000001454fd8 0000000000000000 0000000000000000 GPR12: 0000000000004400 c00000000fb80000 00000000026012c0 00000000026012c0 GPR16: 00000000026012c0 0000000000000000 0000000000000000 0000000000000002 GPR20: 000000000000000c 0000000000000000 0000000000000000 00000000024200c0 GPR24: c0000000016eef48 0000000000000000 c00000010fff7d00 00000000026012c0 GPR28: 0000000000000000 c00000010fff7d00 c00000010fff6300 c00000010d10b6d0 NIP mem_cgroup_soft_limit_reclaim+0xf8/0x4f0 LR do_try_to_free_pages+0x1b4/0x450 Call Trace: do_try_to_free_pages+0x1b4/0x450 try_to_free_pages+0xf8/0x270 __alloc_pages_nodemask+0x7a8/0xff0 new_slab+0x104/0x8e0 ___slab_alloc+0x620/0x700 __slab_alloc+0x34/0x60 kmem_cache_alloc_node_trace+0xdc/0x310 mem_cgroup_init+0x158/0x1c8 do_one_initcall+0x68/0x1d0 kernel_init_freeable+0x278/0x360 kernel_init+0x24/0x170 ret_from_kernel_thread+0x5c/0x74 Instruction dump: eb81ffe0 eba1ffe8 ebc1fff0 ebe1fff8 4e800020 3d230001 e9499a42 3d220004 3929acd8 794a1f24 7d295214 eac90100 2fa90000 419eff74 3b200000 ---[ end trace 342f5208b00d01b6 ]--- This is a chicken and egg issue where the kernel try to get free memory when allocating per node data in mem_cgroup_init(), but in that path mem_cgroup_soft_limit_reclaim() is called which assumes that these data are allocated. As mem_cgroup_soft_limit_reclaim() is best effort, it should return when these data are not yet allocated. This patch also fixes potential null pointer access in mem_cgroup_remove_from_trees() and mem_cgroup_update_tree(). Link: http://lkml.kernel.org/r/1487856999-16581-2-git-send-email-ldufour@linux.vnet.ibm.com Signed-off-by: Laurent Dufour Acked-by: Michal Hocko Acked-by: Johannes Weiner Acked-by: Balbir Singh Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c52ec893e241..76f513cc1b0e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -466,6 +466,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) struct mem_cgroup_tree_per_node *mctz; mctz = soft_limit_tree_from_page(page); + if (!mctz) + return; /* * Necessary to update all ancestors when hierarchy is used. * because their event counter is not touched. @@ -503,7 +505,8 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) for_each_node(nid) { mz = mem_cgroup_nodeinfo(memcg, nid); mctz = soft_limit_tree_node(nid); - mem_cgroup_remove_exceeded(mz, mctz); + if (mctz) + mem_cgroup_remove_exceeded(mz, mctz); } } @@ -2558,7 +2561,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, * is empty. Do it lockless to prevent lock bouncing. Races * are acceptable as soft limit is best effort anyway. */ - if (RB_EMPTY_ROOT(&mctz->rb_root)) + if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root)) return 0; /* -- cgit v1.2.3 From 7eb76d457fd758d396bc2e65cb0ace5aae614149 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 9 Mar 2017 16:17:09 -0800 Subject: userfaultfd: non-cooperative: fix fork fctx->new memleak We have a memleak in the ->new ctx if the uffd of the parent is closed before the fork event is read, nothing frees the new context. Link: http://lkml.kernel.org/r/20170302173738.18994-2-aarcange@redhat.com Signed-off-by: Mike Rapoport Signed-off-by: Andrea Arcangeli Reported-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index dd48052e086f..2407249998c3 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -549,6 +549,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, if (ACCESS_ONCE(ctx->released) || fatal_signal_pending(current)) { __remove_wait_queue(&ctx->event_wqh, &ewq->wq); + if (ewq->msg.event == UFFD_EVENT_FORK) { + struct userfaultfd_ctx *new; + + new = (struct userfaultfd_ctx *) + (unsigned long) + ewq->msg.arg.reserved.reserved1; + + userfaultfd_ctx_put(new); + } break; } -- cgit v1.2.3 From 70ccb92fdd90b35bb6f9200093d4ffd6cb38156b Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Mar 2017 16:17:11 -0800 Subject: userfaultfd: non-cooperative: userfaultfd_remove revalidate vma in MADV_DONTNEED userfaultfd_remove() has to be execute before zapping the pagetables or UFFDIO_COPY could keep filling pages after zap_page_range returned, which would result in non zero data after a MADV_DONTNEED. However userfaultfd_remove() may have to release the mmap_sem. This was handled correctly in MADV_REMOVE, but MADV_DONTNEED accessed a potentially stale vma (the very vma passed to zap_page_range(vma, ...)). The fix consists in revalidating the vma in case userfaultfd_remove() had to release the mmap_sem. This also optimizes away an unnecessary down_read/up_read in the MADV_REMOVE case if UFFD_EVENT_FORK had to be delivered. It all remains zero runtime cost in case CONFIG_USERFAULTFD=n as userfaultfd_remove() will be defined as "true" at build time. Link: http://lkml.kernel.org/r/20170302173738.18994-3-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Acked-by: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 9 +++------ include/linux/userfaultfd_k.h | 7 +++---- mm/madvise.c | 44 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 2407249998c3..9fd5e51ffb31 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -695,8 +695,7 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, userfaultfd_event_wait_completion(ctx, &ewq); } -void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; @@ -705,13 +704,11 @@ void userfaultfd_remove(struct vm_area_struct *vma, ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) - return; + return true; userfaultfd_ctx_get(ctx); up_read(&mm->mmap_sem); - *prev = NULL; /* We wait for ACK w/o the mmap semaphore */ - msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_REMOVE; @@ -720,7 +717,7 @@ void userfaultfd_remove(struct vm_area_struct *vma, userfaultfd_event_wait_completion(ctx, &ewq); - down_read(&mm->mmap_sem); + return false; } static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index f2b79bf4c895..48a3483dccb1 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -61,8 +61,7 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); -extern void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -118,11 +117,11 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, { } -static inline void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +static inline bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + return true; } static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, diff --git a/mm/madvise.c b/mm/madvise.c index dc5927c812d3..7a2abf0127ae 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -513,7 +513,43 @@ static long madvise_dontneed(struct vm_area_struct *vma, if (!can_madv_dontneed_vma(vma)) return -EINVAL; - userfaultfd_remove(vma, prev, start, end); + if (!userfaultfd_remove(vma, start, end)) { + *prev = NULL; /* mmap_sem has been dropped, prev is stale */ + + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, start); + if (!vma) + return -ENOMEM; + if (start < vma->vm_start) { + /* + * This "vma" under revalidation is the one + * with the lowest vma->vm_start where start + * is also < vma->vm_end. If start < + * vma->vm_start it means an hole materialized + * in the user address space within the + * virtual range passed to MADV_DONTNEED. + */ + return -ENOMEM; + } + if (!can_madv_dontneed_vma(vma)) + return -EINVAL; + if (end > vma->vm_end) { + /* + * Don't fail if end > vma->vm_end. If the old + * vma was splitted while the mmap_sem was + * released the effect of the concurrent + * operation may not cause MADV_DONTNEED to + * have an undefined result. There may be an + * adjacent next vma that we'll walk + * next. userfaultfd_remove() will generate an + * UFFD_EVENT_REMOVE repetition on the + * end-vma->vm_end range, but the manager can + * handle a repetition fine. + */ + end = vma->vm_end; + } + VM_WARN_ON(start >= end); + } zap_page_range(vma, start, end - start); return 0; } @@ -554,8 +590,10 @@ static long madvise_remove(struct vm_area_struct *vma, * mmap_sem. */ get_file(f); - userfaultfd_remove(vma, prev, start, end); - up_read(¤t->mm->mmap_sem); + if (userfaultfd_remove(vma, start, end)) { + /* mmap_sem was not released by userfaultfd_remove() */ + up_read(¤t->mm->mmap_sem); + } error = vfs_fallocate(f, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, end - start); -- cgit v1.2.3 From 46aa6a302b53f543f8e8b8e1714dc5e449ad36a6 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Mar 2017 16:17:14 -0800 Subject: userfaultfd: selftest: vm: allow to build in vm/ directory linux/tools/testing/selftests/vm $ make gcc -Wall -I ../../../../usr/include compaction_test.c -lrt -o /compaction_test /usr/lib/gcc/x86_64-pc-linux-gnu/4.9.4/../../../../x86_64-pc-linux-gnu/bin/ld: cannot open output file /compaction_test: Permission denied collect2: error: ld returned 1 exit status make: *** [../lib.mk:54: /compaction_test] Error 1 Since commit a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT") selftests/vm build fails if run from the "selftests/vm" directory, but it works in the selftests/ directory. It's quicker to be able to do a local vm-only build after a tree wipe and this patch allows for it again. Link: http://lkml.kernel.org/r/20170302173738.18994-4-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Cc: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 4cff7e7ddcc4..41642ba5e318 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,5 +1,9 @@ # Makefile for vm selftests +ifndef OUTPUT + OUTPUT := $(shell pwd) +endif + CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test -- cgit v1.2.3 From c9a1b80daeb50e39f21fd7e62f7224f317ac85f0 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 9 Mar 2017 16:17:17 -0800 Subject: mm/memblock.c: fix memblock_next_valid_pfn() Obviously, we should not access memblock.memory.regions[right] if 'right' is outside of [0..memblock.memory.cnt>. Fixes: b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns where possible") Link: http://lkml.kernel.org/r/20170303023745.9104-1-takahiro.akashi@linaro.org Signed-off-by: AKASHI Takahiro Cc: Paul Burton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memblock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index b64b47803e52..696f06d17c4e 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1118,7 +1118,10 @@ unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn, } } while (left < right); - return min(PHYS_PFN(type->regions[right].base), max_pfn); + if (right == type->cnt) + return max_pfn; + else + return min(PHYS_PFN(type->regions[right].base), max_pfn); } /** -- cgit v1.2.3 From 8346242a7e32c4c93316684ad8f45473932586b9 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 16:17:20 -0800 Subject: rmap: fix NULL-pointer dereference on THP munlocking The following test case triggers NULL-pointer derefernce in try_to_unmap_one(): #include #include #include #include int main(int argc, char *argv[]) { int fd; system("mount -t tmpfs -o huge=always none /mnt"); fd = open("/mnt/test", O_CREAT | O_RDWR); ftruncate(fd, 2UL << 20); mmap(NULL, 2UL << 20, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | MAP_LOCKED, fd, 0); mmap(NULL, 2UL << 20, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, fd, 0); munlockall(); return 0; } Apparently, there's a case when we call try_to_unmap() on huge PMDs: it's TTU_MUNLOCK. Let's handle this case correctly. Fixes: c7ab0d2fdc84 ("mm: convert try_to_unmap_one() to use page_vma_mapped_walk()") Link: http://lkml.kernel.org/r/20170302151159.30592-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 2da487d6cea8..250635dc47b7 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1316,12 +1316,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } while (page_vma_mapped_walk(&pvmw)) { - subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); - address = pvmw.address; - - /* Unexpected PMD-mapped THP? */ - VM_BUG_ON_PAGE(!pvmw.pte, page); - /* * If the page is mlock()d, we cannot swap it out. * If it's recently referenced (perhaps page_referenced @@ -1345,6 +1339,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, continue; } + /* Unexpected PMD-mapped THP? */ + VM_BUG_ON_PAGE(!pvmw.pte, page); + + subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); + address = pvmw.address; + + if (!(flags & TTU_IGNORE_ACCESS)) { if (ptep_clear_flush_young_notify(vma, address, pvmw.pte)) { -- cgit v1.2.3 From 6ebb4a1b848fe75323135f93e72c78f8780fd268 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 16:17:23 -0800 Subject: thp: fix another corner case of munlock() vs. THPs The following test case triggers BUG() in munlock_vma_pages_range(): int main(int argc, char *argv[]) { int fd; system("mount -t tmpfs -o huge=always none /mnt"); fd = open("/mnt/test", O_CREAT | O_RDWR); ftruncate(fd, 4UL << 20); mmap(NULL, 4UL << 20, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | MAP_LOCKED, fd, 0); mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, fd, 0); munlockall(); return 0; } The second mmap() create PTE-mapping of the first huge page in file. It makes kernel munlock the page as we never keep PTE-mapped page mlocked. On munlockall() when we handle vma created by the first mmap(), munlock_vma_page() returns page_mask == 0, as the page is not mlocked anymore. On next iteration follow_page_mask() return tail page, but page_mask is HPAGE_NR_PAGES - 1. It makes us skip to the first tail page of the next huge page and step on VM_BUG_ON_PAGE(PageMlocked(page)). The fix is not use the page_mask from follow_page_mask() at all. It has no use for us. Link: http://lkml.kernel.org/r/20170302150252.34120-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mlock.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index 1050511f8b2b..02f138244bf5 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -442,7 +442,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, while (start < end) { struct page *page; - unsigned int page_mask; + unsigned int page_mask = 0; unsigned long page_increm; struct pagevec pvec; struct zone *zone; @@ -456,8 +456,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, * suits munlock very well (and if somehow an abnormal page * has sneaked into the range, we won't oops here: great). */ - page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, - &page_mask); + page = follow_page(vma, start, FOLL_GET | FOLL_DUMP); if (page && !IS_ERR(page)) { if (PageTransTail(page)) { @@ -468,8 +467,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, /* * Any THP page found by follow_page_mask() may * have gotten split before reaching - * munlock_vma_page(), so we need to recompute - * the page_mask here. + * munlock_vma_page(), so we need to compute + * the page_mask here instead. */ page_mask = munlock_vma_page(page); unlock_page(page); -- cgit v1.2.3 From 40e952f9d687928b32db20226f085ae660a7237c Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Thu, 9 Mar 2017 16:17:26 -0800 Subject: mm: do not call mem_cgroup_free() from within mem_cgroup_alloc() mem_cgroup_free() indirectly calls wb_domain_exit() which is not prepared to deal with a struct wb_domain object that hasn't executed wb_domain_init(). For instance, the following warning message is printed by lockdep if alloc_percpu() fails in mem_cgroup_alloc(): INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 1 PID: 1950 Comm: mkdir Not tainted 4.10.0+ #151 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x67/0x99 register_lock_class+0x36d/0x540 __lock_acquire+0x7f/0x1a30 lock_acquire+0xcc/0x200 del_timer_sync+0x3c/0xc0 wb_domain_exit+0x14/0x20 mem_cgroup_free+0x14/0x40 mem_cgroup_css_alloc+0x3f9/0x620 cgroup_apply_control_enable+0x190/0x390 cgroup_mkdir+0x290/0x3d0 kernfs_iop_mkdir+0x58/0x80 vfs_mkdir+0x10e/0x1a0 SyS_mkdirat+0xa8/0xd0 SyS_mkdir+0x14/0x20 entry_SYSCALL_64_fastpath+0x18/0xad Add __mem_cgroup_free() which skips wb_domain_exit(). This is used by both mem_cgroup_free() and mem_cgroup_alloc() clean up. Fixes: 0b8f73e104285 ("mm: memcontrol: clean up alloc, online, offline, free functions") Link: http://lkml.kernel.org/r/20170306192122.24262-1-tahsin@google.com Signed-off-by: Tahsin Erdogan Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 76f513cc1b0e..2bd7541d7c11 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4138,17 +4138,22 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) kfree(memcg->nodeinfo[node]); } -static void mem_cgroup_free(struct mem_cgroup *memcg) +static void __mem_cgroup_free(struct mem_cgroup *memcg) { int node; - memcg_wb_domain_exit(memcg); for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->stat); kfree(memcg); } +static void mem_cgroup_free(struct mem_cgroup *memcg) +{ + memcg_wb_domain_exit(memcg); + __mem_cgroup_free(memcg); +} + static struct mem_cgroup *mem_cgroup_alloc(void) { struct mem_cgroup *memcg; @@ -4199,7 +4204,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) fail: if (memcg->id.id > 0) idr_remove(&mem_cgroup_idr, memcg->id.id); - mem_cgroup_free(memcg); + __mem_cgroup_free(memcg); return NULL; } -- cgit v1.2.3 From 68fd814a3391c7e017ae6ace8855788748edd981 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 9 Mar 2017 16:17:28 -0800 Subject: kasan: resched in quarantine_remove_cache() We see reported stalls/lockups in quarantine_remove_cache() on machines with large amounts of RAM. quarantine_remove_cache() needs to scan whole quarantine in order to take out all objects belonging to the cache. Quarantine is currently 1/32-th of RAM, e.g. on a machine with 256GB of memory that will be 8GB. Moreover quarantine scanning is a walk over uncached linked list, which is slow. Add cond_resched() after scanning of each non-empty batch of objects. Batches are specifically kept of reasonable size for quarantine_put(). On a machine with 256GB of RAM we should have ~512 non-empty batches, each with 16MB of objects. Link: http://lkml.kernel.org/r/20170308154239.25440-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Andrey Ryabinin Cc: Greg Thelen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/quarantine.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 6f1ed1630873..4ac39f20757a 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -283,8 +283,15 @@ void quarantine_remove_cache(struct kmem_cache *cache) on_each_cpu(per_cpu_remove_cache, cache, 1); spin_lock_irqsave(&quarantine_lock, flags); - for (i = 0; i < QUARANTINE_BATCHES; i++) + for (i = 0; i < QUARANTINE_BATCHES; i++) { + if (qlist_empty(&global_quarantine[i])) + continue; qlist_move_cache(&global_quarantine[i], &to_free, cache); + /* Scanning whole quarantine can take a while. */ + spin_unlock_irqrestore(&quarantine_lock, flags); + cond_resched(); + spin_lock_irqsave(&quarantine_lock, flags); + } spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, cache); -- cgit v1.2.3 From ce5bec54bb5debbbe51b40270d8f209a23cadae4 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 9 Mar 2017 16:17:32 -0800 Subject: kasan: fix races in quarantine_remove_cache() quarantine_remove_cache() frees all pending objects that belong to the cache, before we destroy the cache itself. However there are currently two possibilities how it can fail to do so. First, another thread can hold some of the objects from the cache in temp list in quarantine_put(). quarantine_put() has a windows of enabled interrupts, and on_each_cpu() in quarantine_remove_cache() can finish right in that window. These objects will be later freed into the destroyed cache. Then, quarantine_reduce() has the same problem. It grabs a batch of objects from the global quarantine, then unlocks quarantine_lock and then frees the batch. quarantine_remove_cache() can finish while some objects from the cache are still in the local to_free list in quarantine_reduce(). Fix the race with quarantine_put() by disabling interrupts for the whole duration of quarantine_put(). In combination with on_each_cpu() in quarantine_remove_cache() it ensures that quarantine_remove_cache() either sees the objects in the per-cpu list or in the global list. Fix the race with quarantine_reduce() by protecting quarantine_reduce() with srcu critical section and then doing synchronize_srcu() at the end of quarantine_remove_cache(). I've done some assessment of how good synchronize_srcu() works in this case. And on a 4 CPU VM I see that it blocks waiting for pending read critical sections in about 2-3% of cases. Which looks good to me. I suspect that these races are the root cause of some GPFs that I episodically hit. Previously I did not have any explanation for them. BUG: unable to handle kernel NULL pointer dereference at 00000000000000c8 IP: qlist_free_all+0x2e/0xc0 mm/kasan/quarantine.c:155 PGD 6aeea067 PUD 60ed7067 PMD 0 Oops: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 13667 Comm: syz-executor2 Not tainted 4.10.0+ #60 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88005f948040 task.stack: ffff880069818000 RIP: 0010:qlist_free_all+0x2e/0xc0 mm/kasan/quarantine.c:155 RSP: 0018:ffff88006981f298 EFLAGS: 00010246 RAX: ffffea0000ffff00 RBX: 0000000000000000 RCX: ffffea0000ffff1f RDX: 0000000000000000 RSI: ffff88003fffc3e0 RDI: 0000000000000000 RBP: ffff88006981f2c0 R08: ffff88002fed7bd8 R09: 00000001001f000d R10: 00000000001f000d R11: ffff88006981f000 R12: ffff88003fffc3e0 R13: ffff88006981f2d0 R14: ffffffff81877fae R15: 0000000080000000 FS: 00007fb911a2d700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000c8 CR3: 0000000060ed6000 CR4: 00000000000006f0 Call Trace: quarantine_reduce+0x10e/0x120 mm/kasan/quarantine.c:239 kasan_kmalloc+0xca/0xe0 mm/kasan/kasan.c:590 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544 slab_post_alloc_hook mm/slab.h:456 [inline] slab_alloc_node mm/slub.c:2718 [inline] kmem_cache_alloc_node+0x1d3/0x280 mm/slub.c:2754 __alloc_skb+0x10f/0x770 net/core/skbuff.c:219 alloc_skb include/linux/skbuff.h:932 [inline] _sctp_make_chunk+0x3b/0x260 net/sctp/sm_make_chunk.c:1388 sctp_make_data net/sctp/sm_make_chunk.c:1420 [inline] sctp_make_datafrag_empty+0x208/0x360 net/sctp/sm_make_chunk.c:746 sctp_datamsg_from_user+0x7e8/0x11d0 net/sctp/chunk.c:266 sctp_sendmsg+0x2611/0x3970 net/sctp/socket.c:1962 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x660/0x810 net/socket.c:1685 SyS_sendto+0x40/0x50 net/socket.c:1653 I am not sure about backporting. The bug is quite hard to trigger, I've seen it few times during our massive continuous testing (however, it could be cause of some other episodic stray crashes as it leads to memory corruption...). If it is triggered, the consequences are very bad -- almost definite bad memory corruption. The fix is non trivial and has chances of introducing new bugs. I am also not sure how actively people use KASAN on older releases. [dvyukov@google.com: - sorted includes[ Link: http://lkml.kernel.org/r/20170309094028.51088-1-dvyukov@google.com Link: http://lkml.kernel.org/r/20170308151532.5070-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Andrey Ryabinin Cc: Greg Thelen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/quarantine.c | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 4ac39f20757a..3a8ddf8baf7d 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -103,6 +104,7 @@ static int quarantine_tail; /* Total size of all objects in global_quarantine across all batches. */ static unsigned long quarantine_size; static DEFINE_SPINLOCK(quarantine_lock); +DEFINE_STATIC_SRCU(remove_cache_srcu); /* Maximum size of the global queue. */ static unsigned long quarantine_max_size; @@ -173,17 +175,22 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) struct qlist_head *q; struct qlist_head temp = QLIST_INIT; + /* + * Note: irq must be disabled until after we move the batch to the + * global quarantine. Otherwise quarantine_remove_cache() can miss + * some objects belonging to the cache if they are in our local temp + * list. quarantine_remove_cache() executes on_each_cpu() at the + * beginning which ensures that it either sees the objects in per-cpu + * lists or in the global quarantine. + */ local_irq_save(flags); q = this_cpu_ptr(&cpu_quarantine); qlist_put(q, &info->quarantine_link, cache->size); - if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) + if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) { qlist_move_all(q, &temp); - local_irq_restore(flags); - - if (unlikely(!qlist_empty(&temp))) { - spin_lock_irqsave(&quarantine_lock, flags); + spin_lock(&quarantine_lock); WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes); qlist_move_all(&temp, &global_quarantine[quarantine_tail]); if (global_quarantine[quarantine_tail].bytes >= @@ -196,20 +203,33 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) if (new_tail != quarantine_head) quarantine_tail = new_tail; } - spin_unlock_irqrestore(&quarantine_lock, flags); + spin_unlock(&quarantine_lock); } + + local_irq_restore(flags); } void quarantine_reduce(void) { size_t total_size, new_quarantine_size, percpu_quarantines; unsigned long flags; + int srcu_idx; struct qlist_head to_free = QLIST_INIT; if (likely(READ_ONCE(quarantine_size) <= READ_ONCE(quarantine_max_size))) return; + /* + * srcu critical section ensures that quarantine_remove_cache() + * will not miss objects belonging to the cache while they are in our + * local to_free list. srcu is chosen because (1) it gives us private + * grace period domain that does not interfere with anything else, + * and (2) it allows synchronize_srcu() to return without waiting + * if there are no pending read critical sections (which is the + * expected case). + */ + srcu_idx = srcu_read_lock(&remove_cache_srcu); spin_lock_irqsave(&quarantine_lock, flags); /* @@ -237,6 +257,7 @@ void quarantine_reduce(void) spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, NULL); + srcu_read_unlock(&remove_cache_srcu, srcu_idx); } static void qlist_move_cache(struct qlist_head *from, @@ -280,6 +301,13 @@ void quarantine_remove_cache(struct kmem_cache *cache) unsigned long flags, i; struct qlist_head to_free = QLIST_INIT; + /* + * Must be careful to not miss any objects that are being moved from + * per-cpu list to the global quarantine in quarantine_put(), + * nor objects being freed in quarantine_reduce(). on_each_cpu() + * achieves the first goal, while synchronize_srcu() achieves the + * second. + */ on_each_cpu(per_cpu_remove_cache, cache, 1); spin_lock_irqsave(&quarantine_lock, flags); @@ -295,4 +323,6 @@ void quarantine_remove_cache(struct kmem_cache *cache) spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, cache); + + synchronize_srcu(&remove_cache_srcu); } -- cgit v1.2.3 From ca5b58ea3db88b5e69ba2a1f6bc3cf239cdcc64f Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 9 Mar 2017 16:17:34 -0800 Subject: sh: cayman: IDE support fix Remove incorrect CONFIG_IDE ifdef (CONFIG_IDE config option is for internal drivers/ide/ use) and make IDE hardware interface always initialized (not only when IDE subsystem is built-in). This patch allows Cayman board to work with modular IDE subsystem support and removes the requirement of having the whole core IDE subsystem built-in when using libata PATA support. Link: http://lkml.kernel.org/r/1990884.yFoE6lSB9G@amdc3058 Signed-off-by: Bartlomiej Zolnierkiewicz Cc: Yoshinori Sato Cc: Rich Felker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/boards/mach-cayman/setup.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c index 340fd40b381d..9c292c27e0d7 100644 --- a/arch/sh/boards/mach-cayman/setup.c +++ b/arch/sh/boards/mach-cayman/setup.c @@ -128,7 +128,6 @@ static int __init smsc_superio_setup(void) SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_PRIMARY_INT_INDEX); SMSC_SUPERIO_WRITE_INDEXED(12, SMSC_SECONDARY_INT_INDEX); -#ifdef CONFIG_IDE /* * Only IDE1 exists on the Cayman */ @@ -158,7 +157,6 @@ static int __init smsc_superio_setup(void) SMSC_SUPERIO_WRITE_INDEXED(0x01, 0xc5); /* GP45 = IDE1_IRQ */ SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc6); /* GP46 = nIOROP */ SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc7); /* GP47 = nIOWOP */ -#endif /* Exit the configuration state */ outb(SMSC_EXIT_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR); -- cgit v1.2.3 From c0d0e351285161a515396b7b1ee53ec9ffd97e3c Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 9 Mar 2017 16:17:37 -0800 Subject: fat: fix using uninitialized fields of fat_inode/fsinfo_inode Recently fallocate patch was merged and it uses MSDOS_I(inode)->mmu_private at fat_evict_inode(). However, fat_inode/fsinfo_inode that was introduced in past didn't initialize MSDOS_I(inode) properly. With those combinations, it became the cause of accessing random entry in FAT area. Link: http://lkml.kernel.org/r/87pohrj4i8.fsf@mail.parknet.co.jp Signed-off-by: OGAWA Hirofumi Reported-by: Moreno Bartalucci Tested-by: Moreno Bartalucci Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 338d2f73eb29..a2c05f2ada6d 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1359,6 +1359,16 @@ out: return 0; } +static void fat_dummy_inode_init(struct inode *inode) +{ + /* Initialize this dummy inode to work as no-op. */ + MSDOS_I(inode)->mmu_private = 0; + MSDOS_I(inode)->i_start = 0; + MSDOS_I(inode)->i_logstart = 0; + MSDOS_I(inode)->i_attrs = 0; + MSDOS_I(inode)->i_pos = 0; +} + static int fat_read_root(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); @@ -1803,12 +1813,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_inode = new_inode(sb); if (!fat_inode) goto out_fail; - MSDOS_I(fat_inode)->i_pos = 0; + fat_dummy_inode_init(fat_inode); sbi->fat_inode = fat_inode; fsinfo_inode = new_inode(sb); if (!fsinfo_inode) goto out_fail; + fat_dummy_inode_init(fsinfo_inode); fsinfo_inode->i_ino = MSDOS_FSINFO_INO; sbi->fsinfo_inode = fsinfo_inode; insert_inode_hash(fsinfo_inode); -- cgit v1.2.3 From 2378cd6181edd94748d699448823609977283b11 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Mar 2017 16:17:40 -0800 Subject: userfaultfd: remove wrong comment from userfaultfd_ctx_get() It's a void function, so there is no return value; Link: http://lkml.kernel.org/r/20170309150817.7510-1-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 9fd5e51ffb31..2bb1c72380f2 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -138,8 +138,6 @@ out: * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd * context. * @ctx: [in] Pointer to the userfaultfd context. - * - * Returns: In case of success, returns not zero. */ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) { -- cgit v1.2.3 From 10f2889ba35aeb251b9945ec4c461af8c124c41f Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 14 Dec 2016 17:28:41 -0800 Subject: drm: mxsfb: use bus_format to determine LCD bus width The LCD bus width does not need to align with the pixel format. The LCDIF controller automatically converts between pixel formats and bus width by padding or dropping LSBs. The DRM subsystem has the notion of bus_format which allows to determine what bus_formats are supported by the display. Choose the first available or fallback to 24 bit if none are available. Signed-off-by: Stefan Agner Acked-by: Marek Vasut Signed-off-by: Dave Airlie --- drivers/gpu/drm/mxsfb/mxsfb_crtc.c | 34 ++++++++++++++++++++++++++++++++-- drivers/gpu/drm/mxsfb/mxsfb_regs.h | 1 + 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c index e10a4eda4078..259f71b3a76a 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c @@ -65,13 +65,11 @@ static int mxsfb_set_pixel_fmt(struct mxsfb_drm_private *mxsfb) switch (format) { case DRM_FORMAT_RGB565: dev_dbg(drm->dev, "Setting up RGB565 mode\n"); - ctrl |= CTRL_SET_BUS_WIDTH(STMLCDIF_16BIT); ctrl |= CTRL_SET_WORD_LENGTH(0); ctrl1 |= CTRL1_SET_BYTE_PACKAGING(0xf); break; case DRM_FORMAT_XRGB8888: dev_dbg(drm->dev, "Setting up XRGB8888 mode\n"); - ctrl |= CTRL_SET_BUS_WIDTH(STMLCDIF_24BIT); ctrl |= CTRL_SET_WORD_LENGTH(3); /* Do not use packed pixels = one pixel per word instead. */ ctrl1 |= CTRL1_SET_BYTE_PACKAGING(0x7); @@ -87,6 +85,36 @@ static int mxsfb_set_pixel_fmt(struct mxsfb_drm_private *mxsfb) return 0; } +static void mxsfb_set_bus_fmt(struct mxsfb_drm_private *mxsfb) +{ + struct drm_crtc *crtc = &mxsfb->pipe.crtc; + struct drm_device *drm = crtc->dev; + u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24; + u32 reg; + + reg = readl(mxsfb->base + LCDC_CTRL); + + if (mxsfb->connector.display_info.num_bus_formats) + bus_format = mxsfb->connector.display_info.bus_formats[0]; + + reg &= ~CTRL_BUS_WIDTH_MASK; + switch (bus_format) { + case MEDIA_BUS_FMT_RGB565_1X16: + reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_16BIT); + break; + case MEDIA_BUS_FMT_RGB666_1X18: + reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_18BIT); + break; + case MEDIA_BUS_FMT_RGB888_1X24: + reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_24BIT); + break; + default: + dev_err(drm->dev, "Unknown media bus format %d\n", bus_format); + break; + } + writel(reg, mxsfb->base + LCDC_CTRL); +} + static void mxsfb_enable_controller(struct mxsfb_drm_private *mxsfb) { u32 reg; @@ -175,6 +203,8 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb) writel(vdctrl0, mxsfb->base + LCDC_VDCTRL0); + mxsfb_set_bus_fmt(mxsfb); + /* Frame length in lines. */ writel(m->crtc_vtotal, mxsfb->base + LCDC_VDCTRL1); diff --git a/drivers/gpu/drm/mxsfb/mxsfb_regs.h b/drivers/gpu/drm/mxsfb/mxsfb_regs.h index 31d62cd0d3d7..66a6ba9ec533 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_regs.h +++ b/drivers/gpu/drm/mxsfb/mxsfb_regs.h @@ -44,6 +44,7 @@ #define CTRL_DATA_SELECT (1 << 16) #define CTRL_SET_BUS_WIDTH(x) (((x) & 0x3) << 10) #define CTRL_GET_BUS_WIDTH(x) (((x) >> 10) & 0x3) +#define CTRL_BUS_WIDTH_MASK (0x3 << 10) #define CTRL_SET_WORD_LENGTH(x) (((x) & 0x3) << 8) #define CTRL_GET_WORD_LENGTH(x) (((x) >> 8) & 0x3) #define CTRL_MASTER (1 << 5) -- cgit v1.2.3 From 53990e416bb7adaa59d045f325a47f31a11b75ee Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 14 Dec 2016 12:48:09 -0800 Subject: drm: mxsfb: fix pixel clock polarity The DRM subsystem specifies the pixel clock polarity from a controllers perspective: DRM_BUS_FLAG_PIXDATA_NEGEDGE means the controller drives the data on pixel clocks falling edge. That is the controllers DOTCLK_POL=0 (Default is data launched at negative edge). Also change the data enable logic to be high active by default and only change if explicitly requested via bus_flags. With that defaults are: - Data enable: high active - Pixel clock polarity: controller drives data on negative edge Signed-off-by: Stefan Agner Acked-by: Marek Vasut Signed-off-by: Dave Airlie --- drivers/gpu/drm/mxsfb/mxsfb_crtc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c index 259f71b3a76a..165abd227436 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c @@ -196,9 +196,16 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb) vdctrl0 |= VDCTRL0_HSYNC_ACT_HIGH; if (m->flags & DRM_MODE_FLAG_PVSYNC) vdctrl0 |= VDCTRL0_VSYNC_ACT_HIGH; - if (bus_flags & DRM_BUS_FLAG_DE_HIGH) + /* Make sure Data Enable is high active by default */ + if (!(bus_flags & DRM_BUS_FLAG_DE_LOW)) vdctrl0 |= VDCTRL0_ENABLE_ACT_HIGH; - if (bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE) + /* + * DRM_BUS_FLAG_PIXDATA_ defines are controller centric, + * controllers VDCTRL0_DOTCLK is display centric. + * Drive on positive edge -> display samples on falling edge + * DRM_BUS_FLAG_PIXDATA_POSEDGE -> VDCTRL0_DOTCLK_ACT_FALLING + */ + if (bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE) vdctrl0 |= VDCTRL0_DOTCLK_ACT_FALLING; writel(vdctrl0, mxsfb->base + LCDC_VDCTRL0); -- cgit v1.2.3 From 7ad7a5acfb96215216f46b9848bd2d341663358f Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 28 Jan 2017 18:01:57 +0100 Subject: drm: mxsfb: Fix crash when provided invalid DT bindings The mxsfb driver will crash if the mxsfb DT node has a subnode, but the content of the subnode is not of-graph binding with an endpoint linking to panel. The crash was triggered by providing old-style panel bindings to the mxsfb driver instead of the new of-graph ones. The problem happens in mxsfb_create_output(), which is invoked from mxsfb_load(). The mxsfb_create_output() iterates over all mxsfb DT subnode endpoints and tries to bind a panel on each endpoint. If there is any problem binding the panel, that is, mxsfb->panel == NULL, this function will return an error code, otherwise success 0 is returned. If the subnodes do not specify of-graph binding with an endpoint, the iteration over endpoints in mxsfb_create_output() will have zero cycles and the function will immediatelly return 0, but the mxsfb->panel will remain NULL. This is propagated back into the mxsfb_load(), which does not detect any problem and expects that the mxsfb->panel is valid, thus calls mxsfb_panel_attach(). But since mxsfb->panel == NULL, mxsfb_panel_attach() is called with first argument NULL and this crashes the kernel. This patch fixes the problem by explicitly checking for valid mxsfb->panel at the end of the iteration in mxsfb_create_output(). Signed-off-by: Marek Vasut Cc: Daniel Vetter Cc: Dave Airlie Cc: Stefan Agner Cc: Breno Matheus Lima Tested-by: Breno Lima Signed-off-by: Dave Airlie --- drivers/gpu/drm/mxsfb/mxsfb_out.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/mxsfb/mxsfb_out.c b/drivers/gpu/drm/mxsfb/mxsfb_out.c index fa8d17399407..b8e81422d4e2 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_out.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_out.c @@ -112,6 +112,7 @@ static int mxsfb_attach_endpoint(struct drm_device *drm, int mxsfb_create_output(struct drm_device *drm) { + struct mxsfb_drm_private *mxsfb = drm->dev_private; struct device_node *ep_np = NULL; struct of_endpoint ep; int ret; @@ -127,5 +128,8 @@ int mxsfb_create_output(struct drm_device *drm) } } + if (!mxsfb->panel) + return -EPROBE_DEFER; + return 0; } -- cgit v1.2.3 From d42986b6c600da4215973878f1c889cdabbdd122 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 2 Feb 2017 19:26:38 -0200 Subject: drm: mxsfb_crtc: Fix the framebuffer misplacement Currently the framebuffer content is displayed with incorrect offsets in both the vertical and horizontal directions. The fbdev version of the driver does not show this problem. Breno Lima dumped the eLCDIF controller registers on both the drm and fbdev drivers and noticed that the VDCTRL3 register is configured incorrectly in the drm driver. The fbdev driver calculates the vertical and horizontal wait counts of the VDCTRL3 register by doing: back porch + sync length. Looking at the horizontal and vertical timing diagram from include/drm/drm_modes.h this value corresponds to: crtc_[hv]total - crtc_[hv]sync_start So fix the VDCTRL3 register setting accordingly so that the eLCDIF controller can properly show the framebuffer content in the correct position. Reported-by: Breno Lima Signed-off-by: Fabio Estevam Tested-by: Breno Lima Tested-by: Marek Vasut Signed-off-by: Dave Airlie --- drivers/gpu/drm/mxsfb/mxsfb_crtc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c index 165abd227436..1144e0c9e894 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c @@ -221,8 +221,8 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb) VDCTRL2_SET_HSYNC_PERIOD(m->crtc_htotal), mxsfb->base + LCDC_VDCTRL2); - writel(SET_HOR_WAIT_CNT(m->crtc_hblank_end - m->crtc_hsync_end) | - SET_VERT_WAIT_CNT(m->crtc_vblank_end - m->crtc_vsync_end), + writel(SET_HOR_WAIT_CNT(m->crtc_htotal - m->crtc_hsync_start) | + SET_VERT_WAIT_CNT(m->crtc_vtotal - m->crtc_vsync_start), mxsfb->base + LCDC_VDCTRL3); writel(SET_DOTCLK_H_VALID_DATA_CNT(m->hdisplay), -- cgit v1.2.3 From 3f81e1340706e9a7f854808e2f580c3106805d0c Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 1 Feb 2017 15:19:47 -0200 Subject: drm: mxsfb: Implement drm_panel handling Currently when the 'power-supply' regulator is passed via device tree it does not actually work since drm_panel_prepare()/drm_panel_enable() are never called. Quoting Thierry Reding: "It should really call drm_panel_prepare() and drm_panel_enable() while switching on the display pipeline and drm_panel_disable(), followed by drm_panel_unprepare() while switching off the display pipeline." So do as suggested, so that the 'power-supply' regulator can be functional. Reported-by: Breno Lima Suggested-by: Thierry Reding Signed-off-by: Fabio Estevam Tested-by: Marek Vasut Signed-off-by: Dave Airlie --- drivers/gpu/drm/mxsfb/mxsfb_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index cdfbe0284635..ff6d6a6f842e 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -102,14 +102,18 @@ static void mxsfb_pipe_enable(struct drm_simple_display_pipe *pipe, { struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe); + drm_panel_prepare(mxsfb->panel); mxsfb_crtc_enable(mxsfb); + drm_panel_enable(mxsfb->panel); } static void mxsfb_pipe_disable(struct drm_simple_display_pipe *pipe) { struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe); + drm_panel_disable(mxsfb->panel); mxsfb_crtc_disable(mxsfb); + drm_panel_unprepare(mxsfb->panel); } static void mxsfb_pipe_update(struct drm_simple_display_pipe *pipe, -- cgit v1.2.3 From bba8376aea1dcbbe22bbda118c52abee317c7609 Mon Sep 17 00:00:00 2001 From: Matjaz Hegedic Date: Thu, 9 Mar 2017 14:00:17 +0100 Subject: x86/reboot/quirks: Fix typo in ASUS EeeBook X205TA reboot quirk The reboot quirk for ASUS EeeBook X205TA contains a typo in DMI_PRODUCT_NAME, improperly referring to X205TAW instead of X205TA, which prevents the quirk from being triggered. The model X205TAW already has a reboot quirk of its own. This fix simply removes the inappropriate final letter W. Fixes: 90b28ded88dd ("x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk") Signed-off-by: Matjaz Hegedic Link: http://lkml.kernel.org/r/1489064417-7445-1-git-send-email-matjaz.hegedic@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/reboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 4194d6f9bb29..067f9813fd2c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -228,7 +228,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { .ident = "ASUS EeeBook X205TA", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TA"), }, }, { /* Handle problems with rebooting on ASUS EeeBook X205TAW */ -- cgit v1.2.3 From 40c50c1fecdf012a3bf055ec813f0ef2eda2749c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Mar 2017 13:17:18 +0100 Subject: kexec, x86/purgatory: Unbreak it and clean it up The purgatory code defines global variables which are referenced via a symbol lookup in the kexec code (core and arch). A recent commit addressing sparse warnings made these static and thereby broke kexec_file. Why did this happen? Simply because the whole machinery is undocumented and lacks any form of forward declarations. The variable names are unspecific and lack a prefix, so adding forward declarations creates shadow variables in the core code. Aside of that the code relies on magic constants and duplicate struct definitions with no way to ensure that these things stay in sync. The section placement of the purgatory variables happened by chance and not by design. Unbreak kexec and cleanup the mess: - Add proper forward declarations and document the usage - Use common struct definition - Use the proper common defines instead of magic constants - Add a purgatory_ prefix to have a proper name space - Use ARRAY_SIZE() instead of a homebrewn reimplementation - Add proper sections to the purgatory variables [ From Mike ] Fixes: 72042a8c7b01 ("x86/purgatory: Make functions and variables static") Reported-by: Mike Galbraith < Signed-off-by: Thomas Gleixner Cc: Nicholas Mc Guire Cc: Borislav Petkov Cc: Vivek Goyal Cc: "Tobin C. Harding" Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703101315140.3681@nanos Signed-off-by: Thomas Gleixner --- arch/powerpc/purgatory/trampoline.S | 12 ++++++------ arch/x86/include/asm/purgatory.h | 20 ++++++++++++++++++++ arch/x86/kernel/machine_kexec_64.c | 9 ++++++--- arch/x86/purgatory/purgatory.c | 35 +++++++++++++++++------------------ arch/x86/purgatory/purgatory.h | 8 -------- arch/x86/purgatory/setup-x86_64.S | 2 +- arch/x86/purgatory/sha256.h | 1 - include/linux/purgatory.h | 23 +++++++++++++++++++++++ kernel/kexec_file.c | 8 ++++---- kernel/kexec_internal.h | 6 +----- 10 files changed, 78 insertions(+), 46 deletions(-) create mode 100644 arch/x86/include/asm/purgatory.h delete mode 100644 arch/x86/purgatory/purgatory.h create mode 100644 include/linux/purgatory.h diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S index f9760ccf4032..3696ea6c4826 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline.S @@ -116,13 +116,13 @@ dt_offset: .data .balign 8 -.globl sha256_digest -sha256_digest: +.globl purgatory_sha256_digest +purgatory_sha256_digest: .skip 32 - .size sha256_digest, . - sha256_digest + .size purgatory_sha256_digest, . - purgatory_sha256_digest .balign 8 -.globl sha_regions -sha_regions: +.globl purgatory_sha_regions +purgatory_sha_regions: .skip 8 * 2 * 16 - .size sha_regions, . - sha_regions + .size purgatory_sha_regions, . - purgatory_sha_regions diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h new file mode 100644 index 000000000000..d7da2729903d --- /dev/null +++ b/arch/x86/include/asm/purgatory.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_PURGATORY_H +#define _ASM_X86_PURGATORY_H + +#ifndef __ASSEMBLY__ +#include + +extern void purgatory(void); +/* + * These forward declarations serve two purposes: + * + * 1) Make sparse happy when checking arch/purgatory + * 2) Document that these are required to be global so the symbol + * lookup in kexec works + */ +extern unsigned long purgatory_backup_dest; +extern unsigned long purgatory_backup_src; +extern unsigned long purgatory_backup_sz; +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_PURGATORY_H */ diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 307b1f4543de..857cdbd02867 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -194,19 +194,22 @@ static int arch_update_purgatory(struct kimage *image) /* Setup copying of backup region */ if (image->type == KEXEC_TYPE_CRASH) { - ret = kexec_purgatory_get_set_symbol(image, "backup_dest", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_dest", &image->arch.backup_load_addr, sizeof(image->arch.backup_load_addr), 0); if (ret) return ret; - ret = kexec_purgatory_get_set_symbol(image, "backup_src", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_src", &image->arch.backup_src_start, sizeof(image->arch.backup_src_start), 0); if (ret) return ret; - ret = kexec_purgatory_get_set_symbol(image, "backup_sz", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_sz", &image->arch.backup_src_sz, sizeof(image->arch.backup_src_sz), 0); if (ret) diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index b6d5c8946e66..470edad96bb9 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -10,22 +10,19 @@ * Version 2. See the file COPYING for more details. */ +#include +#include + #include "sha256.h" -#include "purgatory.h" #include "../boot/string.h" -struct sha_region { - unsigned long start; - unsigned long len; -}; - -static unsigned long backup_dest; -static unsigned long backup_src; -static unsigned long backup_sz; +unsigned long purgatory_backup_dest __section(.kexec-purgatory); +unsigned long purgatory_backup_src __section(.kexec-purgatory); +unsigned long purgatory_backup_sz __section(.kexec-purgatory); -static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; +u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory); -struct sha_region sha_regions[16] = {}; +struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory); /* * On x86, second kernel requries first 640K of memory to boot. Copy @@ -34,26 +31,28 @@ struct sha_region sha_regions[16] = {}; */ static int copy_backup_region(void) { - if (backup_dest) - memcpy((void *)backup_dest, (void *)backup_src, backup_sz); - + if (purgatory_backup_dest) { + memcpy((void *)purgatory_backup_dest, + (void *)purgatory_backup_src, purgatory_backup_sz); + } return 0; } static int verify_sha256_digest(void) { - struct sha_region *ptr, *end; + struct kexec_sha_region *ptr, *end; u8 digest[SHA256_DIGEST_SIZE]; struct sha256_state sctx; sha256_init(&sctx); - end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])]; - for (ptr = sha_regions; ptr < end; ptr++) + end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions); + + for (ptr = purgatory_sha_regions; ptr < end; ptr++) sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len); sha256_final(&sctx, digest); - if (memcmp(digest, sha256_digest, sizeof(digest))) + if (memcmp(digest, purgatory_sha256_digest, sizeof(digest))) return 1; return 0; diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h deleted file mode 100644 index e2e365a6c192..000000000000 --- a/arch/x86/purgatory/purgatory.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef PURGATORY_H -#define PURGATORY_H - -#ifndef __ASSEMBLY__ -extern void purgatory(void); -#endif /* __ASSEMBLY__ */ - -#endif /* PURGATORY_H */ diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S index f90e9dfa90bb..dfae9b9e60b5 100644 --- a/arch/x86/purgatory/setup-x86_64.S +++ b/arch/x86/purgatory/setup-x86_64.S @@ -9,7 +9,7 @@ * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ -#include "purgatory.h" +#include .text .globl purgatory_start diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h index bd15a4127735..2867d9825a57 100644 --- a/arch/x86/purgatory/sha256.h +++ b/arch/x86/purgatory/sha256.h @@ -10,7 +10,6 @@ #ifndef SHA256_H #define SHA256_H - #include #include diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h new file mode 100644 index 000000000000..d60d4e278609 --- /dev/null +++ b/include/linux/purgatory.h @@ -0,0 +1,23 @@ +#ifndef _LINUX_PURGATORY_H +#define _LINUX_PURGATORY_H + +#include +#include +#include + +struct kexec_sha_region { + unsigned long start; + unsigned long len; +}; + +/* + * These forward declarations serve two purposes: + * + * 1) Make sparse happy when checking arch/purgatory + * 2) Document that these are required to be global so the symbol + * lookup in kexec works + */ +extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX]; +extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE]; + +#endif diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index b56a558e406d..b118735fea9d 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -614,13 +614,13 @@ static int kexec_calculate_store_digests(struct kimage *image) ret = crypto_shash_final(desc, digest); if (ret) goto out_free_digest; - ret = kexec_purgatory_get_set_symbol(image, "sha_regions", - sha_regions, sha_region_sz, 0); + ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions", + sha_regions, sha_region_sz, 0); if (ret) goto out_free_digest; - ret = kexec_purgatory_get_set_symbol(image, "sha256_digest", - digest, SHA256_DIGEST_SIZE, 0); + ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest", + digest, SHA256_DIGEST_SIZE, 0); if (ret) goto out_free_digest; } diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h index 4cef7e4706b0..799a8a452187 100644 --- a/kernel/kexec_internal.h +++ b/kernel/kexec_internal.h @@ -15,11 +15,7 @@ int kimage_is_destination_range(struct kimage *image, extern struct mutex kexec_mutex; #ifdef CONFIG_KEXEC_FILE -struct kexec_sha_region { - unsigned long start; - unsigned long len; -}; - +#include void kimage_file_post_load_cleanup(struct kimage *image); #else /* CONFIG_KEXEC_FILE */ static inline void kimage_file_post_load_cleanup(struct kimage *image) { } -- cgit v1.2.3 From 0acf611997d9d05dbfb559c3c6e379c861eb5957 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 22 Feb 2017 11:07:57 -0800 Subject: score: Fix implicit includes now failing build after extable change After changing from module.h to extable.h, score builds fail with: arch/score/kernel/traps.c: In function 'do_ri': arch/score/kernel/traps.c:248:4: error: implicit declaration of function 'user_disable_single_step' arch/score/mm/extable.c: In function 'fixup_exception': arch/score/mm/extable.c:32:38: error: dereferencing pointer to incomplete type arch/score/mm/extable.c:34:24: error: dereferencing pointer to incomplete type because extable.h doesn't drag in the same amount of headers as the module.h did. Add in the headers which were implicitly expected. Fixes: 90858794c960 ("module.h: remove extable.h include now users have migrated") Signed-off-by: Guenter Roeck [PG: tweak commit log; refresh for sched header refactoring.] Signed-off-by: Paul Gortmaker --- arch/score/kernel/traps.c | 1 + arch/score/mm/extable.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index e359ec675869..12daf45369b4 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -24,6 +24,7 @@ */ #include +#include #include #include #include diff --git a/arch/score/mm/extable.c b/arch/score/mm/extable.c index ec871355fc2d..6736a3ad6286 100644 --- a/arch/score/mm/extable.c +++ b/arch/score/mm/extable.c @@ -24,6 +24,8 @@ */ #include +#include +#include int fixup_exception(struct pt_regs *regs) { -- cgit v1.2.3 From 2c4ea6e28dbf15ab93632c5c189f3948366b8885 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 11 Mar 2017 01:31:19 +0100 Subject: x86/tlb: Fix tlb flushing when lguest clears PGE Fengguang reported random corruptions from various locations on x86-32 after commits d2852a224050 ("arch: add ARCH_HAS_SET_MEMORY config") and 9d876e79df6a ("bpf: fix unlocking of jited image when module ronx not set") that uses the former. While x86-32 doesn't have a JIT like x86_64, the bpf_prog_lock_ro() and bpf_prog_unlock_ro() got enabled due to ARCH_HAS_SET_MEMORY, whereas Fengguang's test kernel doesn't have module support built in and therefore never had the DEBUG_SET_MODULE_RONX setting enabled. After investigating the crashes further, it turned out that using set_memory_ro() and set_memory_rw() didn't have the desired effect, for example, setting the pages as read-only on x86-32 would still let probe_kernel_write() succeed without error. This behavior would manifest itself in situations where the vmalloc'ed buffer was accessed prior to set_memory_*() such as in case of bpf_prog_alloc(). In cases where it wasn't, the page attribute changes seemed to have taken effect, leading to the conclusion that a TLB invalidate didn't happen. Moreover, it turned out that this issue reproduced with qemu in "-cpu kvm64" mode, but not for "-cpu host". When the issue occurs, change_page_attr_set_clr() did trigger a TLB flush as expected via __flush_tlb_all() through cpa_flush_range(), though. There are 3 variants for issuing a TLB flush: invpcid_flush_all() (depends on CPU feature bits X86_FEATURE_INVPCID, X86_FEATURE_PGE), cr4 based flush (depends on X86_FEATURE_PGE), and cr3 based flush. For "-cpu host" case in my setup, the flush used invpcid_flush_all() variant, whereas for "-cpu kvm64", the flush was cr4 based. Switching the kvm64 case to cr3 manually worked fine, and further investigating the cr4 one turned out that X86_CR4_PGE bit was not set in cr4 register, meaning the __native_flush_tlb_global_irq_disabled() wrote cr4 twice with the same value instead of clearing X86_CR4_PGE in the first write to trigger the flush. It turned out that X86_CR4_PGE was cleared from cr4 during init from lguest_arch_host_init() via adjust_pge(). The X86_FEATURE_PGE bit is also cleared from there due to concerns of using PGE in guest kernel that can lead to hard to trace bugs (see bff672e630a0 ("lguest: documentation V: Host") in init()). The CPU feature bits are cleared in dynamic boot_cpu_data, but they never propagated to __flush_tlb_all() as it uses static_cpu_has() instead of boot_cpu_has() for testing which variant of TLB flushing to use, meaning they still used the old setting of the host kernel. Clearing via setup_clear_cpu_cap(X86_FEATURE_PGE) so this would propagate to static_cpu_has() checks is too late at this point as sections have been patched already, so for now, it seems reasonable to switch back to boot_cpu_has(X86_FEATURE_PGE) as it was prior to commit c109bf95992b ("x86/cpufeature: Remove cpu_has_pge"). This lets the TLB flush trigger via cr3 as originally intended, properly makes the new page attributes visible and thus fixes the crashes seen by Fengguang. Fixes: c109bf95992b ("x86/cpufeature: Remove cpu_has_pge") Reported-by: Fengguang Wu Signed-off-by: Daniel Borkmann Cc: bp@suse.de Cc: Kees Cook Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: Rusty Russell Cc: Alexei Starovoitov Cc: Linus Torvalds Cc: lkp@01.org Cc: Laura Abbott Cc: stable@vger.kernel.org Link: http://lkml.kernrl.org/r/20170301125426.l4nf65rx4wahohyl@wfg-t540p.sh.intel.com Link: http://lkml.kernel.org/r/25c41ad9eca164be4db9ad84f768965b7eb19d9e.1489191673.git.daniel@iogearbox.net Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/tlbflush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6fa85944af83..fc5abff9b7fd 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -188,7 +188,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) static inline void __flush_tlb_all(void) { - if (static_cpu_has(X86_FEATURE_PGE)) + if (boot_cpu_has(X86_FEATURE_PGE)) __flush_tlb_global(); else __flush_tlb(); -- cgit v1.2.3 From 4495c08e84729385774601b5146d51d9e5849f81 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Mar 2017 14:47:08 -0700 Subject: Linux 4.11-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 165cf9783a5d..b841fb36beb2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From f3fbd7ec62dec1528fb8044034e2885f2b257941 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 14 Feb 2017 00:03:38 +0900 Subject: arm: kprobes: Allow to handle reentered kprobe on single-stepping This is arm port of commit 6a5022a56ac3 ("kprobes/x86: Allow to handle reentered kprobe on single-stepping") Since the FIQ handlers can interrupt in the single stepping (or preparing the single stepping, do_debug etc.), we should consider a kprobe is hit in the NMI handler. Even in that case, the kprobe is allowed to be reentered as same as the kprobes hit in kprobe handlers (KPROBE_HIT_ACTIVE or KPROBE_HIT_SSDONE). The real issue will happen when a kprobe hit while another reentered kprobe is processing (KPROBE_REENTER), because we already consumed a saved-area for the previous kprobe. Signed-off-by: Masami Hiramatsu Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index b6dc9d838a9a..35148b46c4f5 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -271,6 +271,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs) switch (kcb->kprobe_status) { case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: + case KPROBE_HIT_SS: /* A pre- or post-handler probe got us here. */ kprobes_inc_nmissed_count(p); save_previous_kprobe(kcb); @@ -279,6 +280,11 @@ void __kprobes kprobe_handler(struct pt_regs *regs) singlestep(p, regs, kcb); restore_previous_kprobe(kcb); break; + case KPROBE_REENTER: + /* A nested probe was hit in FIQ, it is a BUG */ + pr_warn("Unrecoverable kprobe detected at %p.\n", + p->addr); + /* fall through */ default: /* impossible cases */ BUG(); -- cgit v1.2.3 From 91fc862c613ab7a0ef6b0b7755c33619127f4e5a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 14 Feb 2017 00:04:48 +0900 Subject: arm: kprobes: Skip single-stepping in recursing path if possible Kprobes/arm skips single-stepping (moreover handling the event) if the conditional instruction must not be executed. This also apply the rule when we hit the recursing kprobe, so that kprobe does not count nmissed up in that case. Signed-off-by: Masami Hiramatsu Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/core.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 35148b46c4f5..269f66e66ff5 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -266,7 +266,15 @@ void __kprobes kprobe_handler(struct pt_regs *regs) #endif if (p) { - if (cur) { + if (!p->ainsn.insn_check_cc(regs->ARM_cpsr)) { + /* + * Probe hit but conditional execution check failed, + * so just skip the instruction and continue as if + * nothing had happened. + * In this case, we can skip recursing check too. + */ + singlestep_skip(p, regs); + } else if (cur) { /* Kprobe is pending, so we're recursing. */ switch (kcb->kprobe_status) { case KPROBE_HIT_ACTIVE: @@ -289,7 +297,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs) /* impossible cases */ BUG(); } - } else if (p->ainsn.insn_check_cc(regs->ARM_cpsr)) { + } else { /* Probe hit and conditional execution check ok. */ set_current_kprobe(p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -310,13 +318,6 @@ void __kprobes kprobe_handler(struct pt_regs *regs) } reset_current_kprobe(); } - } else { - /* - * Probe hit but conditional execution check failed, - * so just skip the instruction and continue as if - * nothing had happened. - */ - singlestep_skip(p, regs); } } else if (cur) { /* We probably hit a jprobe. Call its break handler. */ -- cgit v1.2.3 From 06553175f585b52509c7df37d6f4a50aacb7b211 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 14 Feb 2017 00:05:59 +0900 Subject: arm: kprobes: Fix the return address of multiple kretprobes This is arm port of commit 737480a0d525 ("kprobes/x86: Fix the return address of multiple kretprobes"). Fix the return address of subsequent kretprobes when multiple kretprobes are set on the same function. For example: # cd /sys/kernel/debug/tracing # echo "r:event1 sys_symlink" > kprobe_events # echo "r:event2 sys_symlink" >> kprobe_events # echo 1 > events/kprobes/enable # ln -s /tmp/foo /tmp/bar (without this patch) # cat trace | grep -v ^# ln-82 [000] dn.2 68.446525: event1: (kretprobe_trampoline+0x0/0x18 <- SyS_symlink) ln-82 [000] dn.2 68.447831: event2: (ret_fast_syscall+0x0/0x1c <- SyS_symlink) (with this patch) # cat trace | grep -v ^# ln-81 [000] dn.1 39.463469: event1: (ret_fast_syscall+0x0/0x1c <- SyS_symlink) ln-81 [000] dn.1 39.464701: event2: (ret_fast_syscall+0x0/0x1c <- SyS_symlink) Signed-off-by: Masami Hiramatsu Cc: KUMANO Syuhei Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/core.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 269f66e66ff5..ad1f4e6a9e33 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -441,6 +441,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -463,14 +464,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) /* another task is sharing our hash bucket */ continue; + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __this_cpu_write(current_kprobe, &ri->rp->kp); get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); __this_cpu_write(current_kprobe, NULL); } - orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri, &empty_rp); if (orig_ret_address != trampoline_address) @@ -482,7 +503,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) break; } - kretprobe_assert(ri, orig_ret_address, trampoline_address); kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { -- cgit v1.2.3 From 974310d047f3c7788a51d10c8d255eebdb1fa857 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Thu, 2 Mar 2017 13:04:09 +0000 Subject: arm: kprobes: Align stack to 8-bytes in test code kprobes test cases need to have a stack that is aligned to an 8-byte boundary because they call other functions (and the ARM ABI mandates that alignment) and because test cases include 64-bit accesses to the stack. Unfortunately, GCC doesn't ensure this alignment for inline assembler and for the code in question seems to always misalign it by pushing just the LR register onto the stack. We therefore need to explicitly perform stack alignment at the start of each test case. Without this fix, some test cases will generate alignment faults on systems where alignment is enforced. Even if the kernel is configured to handle these faults in software, triggering them is ugly. It also exposes limitations in the fault handling code which doesn't cope with writes to the stack. E.g. when handling this instruction strd r6, [sp, #-64]! the fault handling code will write to a stack location below the SP value at the point the fault occurred, which coincides with where the exception handler has pushed the saved register context. This results in corruption of those registers. Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/test-core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index c893726aa52d..1c98a87786ca 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -977,7 +977,10 @@ static void coverage_end(void) void __naked __kprobes_test_case_start(void) { __asm__ __volatile__ ( - "stmdb sp!, {r4-r11} \n\t" + "mov r2, sp \n\t" + "bic r3, r2, #7 \n\t" + "mov sp, r3 \n\t" + "stmdb sp!, {r2-r11} \n\t" "sub sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" "bic r0, lr, #1 @ r0 = inline data \n\t" "mov r1, sp \n\t" @@ -997,7 +1000,8 @@ void __naked __kprobes_test_case_end_32(void) "movne pc, r0 \n\t" "mov r0, r4 \n\t" "add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" - "ldmia sp!, {r4-r11} \n\t" + "ldmia sp!, {r2-r11} \n\t" + "mov sp, r2 \n\t" "mov pc, r0 \n\t" ); } @@ -1013,7 +1017,8 @@ void __naked __kprobes_test_case_end_16(void) "bxne r0 \n\t" "mov r0, r4 \n\t" "add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" - "ldmia sp!, {r4-r11} \n\t" + "ldmia sp!, {r2-r11} \n\t" + "mov sp, r2 \n\t" "bx r0 \n\t" ); } -- cgit v1.2.3