From 59495740f79524bebe4ecb8097cf206f86587b25 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Tue, 21 Mar 2023 15:04:18 +0100 Subject: ftrace: Let unregister_ftrace_direct_multi() call ftrace_free_filter() A common pattern when using the ftrace_direct_multi API is to unregister the ops and also immediately free its filter. We've noticed it's very easy for users to miss calling ftrace_free_filter(). This adds a "free_filters" argument to unregister_ftrace_direct_multi() to both remind the user they should free filters and also to make their life easier. Link: https://lkml.kernel.org/r/20230321140424.345218-2-revest@chromium.org Suggested-by: Steven Rostedt Signed-off-by: Florent Revest Acked-by: Mark Rutland Acked-by: Jiri Olsa Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 6 ++++-- kernel/bpf/trampoline.c | 2 +- kernel/trace/ftrace.c | 6 +++++- samples/ftrace/ftrace-direct-multi-modify.c | 3 +-- samples/ftrace/ftrace-direct-multi.c | 3 +-- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 366c730beaa3..5b68ee874bc1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -407,7 +407,8 @@ int ftrace_modify_direct_caller(struct ftrace_func_entry *entry, unsigned long new_addr); unsigned long ftrace_find_rec_direct(unsigned long ip); int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); -int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); +int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr, + bool free_filters); int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr); @@ -446,7 +447,8 @@ static inline int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned { return -ENODEV; } -static inline int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +static inline int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr, + bool free_filters) { return -ENODEV; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index d0ed7d6f5eec..88bc23f1e10a 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -198,7 +198,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) int ret; if (tr->func.ftrace_managed) - ret = unregister_ftrace_direct_multi(tr->fops, (long)old_addr); + ret = unregister_ftrace_direct_multi(tr->fops, (long)old_addr, false); else ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0feea145bb29..98434196c6a1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5805,7 +5805,8 @@ EXPORT_SYMBOL_GPL(register_ftrace_direct_multi); * 0 on success * -EINVAL - The @ops object was not properly registered. */ -int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr, + bool free_filters) { struct ftrace_hash *hash = ops->func_hash->filter_hash; int err; @@ -5823,6 +5824,9 @@ int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) /* cleanup for possible another register call */ ops->func = NULL; ops->trampoline = 0; + + if (free_filters) + ftrace_free_filter(ops); return err; } EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi); diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index b58c594efb51..196b43971cb5 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -151,8 +151,7 @@ static int __init ftrace_direct_multi_init(void) static void __exit ftrace_direct_multi_exit(void) { kthread_stop(simple_tsk); - unregister_ftrace_direct_multi(&direct, my_tramp); - ftrace_free_filter(&direct); + unregister_ftrace_direct_multi(&direct, my_tramp, true); } module_init(ftrace_direct_multi_init); diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c index c27cf130c319..ea0e88ee5e43 100644 --- a/samples/ftrace/ftrace-direct-multi.c +++ b/samples/ftrace/ftrace-direct-multi.c @@ -78,8 +78,7 @@ static int __init ftrace_direct_multi_init(void) static void __exit ftrace_direct_multi_exit(void) { - unregister_ftrace_direct_multi(&direct, (unsigned long) my_tramp); - ftrace_free_filter(&direct); + unregister_ftrace_direct_multi(&direct, (unsigned long) my_tramp, true); } module_init(ftrace_direct_multi_init); -- cgit v1.2.3 From 23edf48309b14a201481b75fba049802eaf0baa6 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Tue, 21 Mar 2023 15:04:19 +0100 Subject: ftrace: Replace uses of _ftrace_direct APIs with _ftrace_direct_multi The _multi API requires that users keep their own ops but can enforce that an op is only associated to one direct call. Link: https://lkml.kernel.org/r/20230321140424.345218-3-revest@chromium.org Signed-off-by: Florent Revest Acked-by: Mark Rutland Tested-by: Mark Rutland Acked-by: Jiri Olsa Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_selftest.c | 10 ++++++---- samples/ftrace/ftrace-direct-modify.c | 12 ++++++++---- samples/ftrace/ftrace-direct-too.c | 12 +++++++----- samples/ftrace/ftrace-direct.c | 12 +++++++----- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index ff0536cea968..9ce80b3ad06d 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -785,6 +785,7 @@ static struct fgraph_ops fgraph_ops __initdata = { }; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +static struct ftrace_ops direct; #ifndef CALL_DEPTH_ACCOUNT #define CALL_DEPTH_ACCOUNT "" #endif @@ -870,8 +871,8 @@ trace_selftest_startup_function_graph(struct tracer *trace, * Register direct function together with graph tracer * and make sure we get graph trace. */ - ret = register_ftrace_direct((unsigned long) DYN_FTRACE_TEST_NAME, - (unsigned long) trace_direct_tramp); + ftrace_set_filter_ip(&direct, (unsigned long)DYN_FTRACE_TEST_NAME, 0, 0); + ret = register_ftrace_direct_multi(&direct, (unsigned long)trace_direct_tramp); if (ret) goto out; @@ -891,8 +892,9 @@ trace_selftest_startup_function_graph(struct tracer *trace, unregister_ftrace_graph(&fgraph_ops); - ret = unregister_ftrace_direct((unsigned long) DYN_FTRACE_TEST_NAME, - (unsigned long) trace_direct_tramp); + ret = unregister_ftrace_direct_multi(&direct, + (unsigned long) trace_direct_tramp, + true); if (ret) goto out; diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index d93abbcb1f4c..f01ac74bac10 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -96,6 +96,8 @@ asm ( #endif /* CONFIG_S390 */ +static struct ftrace_ops direct; + static unsigned long my_tramp = (unsigned long)my_tramp1; static unsigned long tramps[2] = { (unsigned long)my_tramp1, @@ -114,7 +116,7 @@ static int simple_thread(void *arg) if (ret) continue; t ^= 1; - ret = modify_ftrace_direct(my_ip, my_tramp, tramps[t]); + ret = modify_ftrace_direct_multi(&direct, tramps[t]); if (!ret) my_tramp = tramps[t]; WARN_ON_ONCE(ret); @@ -129,7 +131,9 @@ static int __init ftrace_direct_init(void) { int ret; - ret = register_ftrace_direct(my_ip, my_tramp); + ftrace_set_filter_ip(&direct, (unsigned long) my_ip, 0, 0); + ret = register_ftrace_direct_multi(&direct, my_tramp); + if (!ret) simple_tsk = kthread_run(simple_thread, NULL, "event-sample-fn"); return ret; @@ -138,12 +142,12 @@ static int __init ftrace_direct_init(void) static void __exit ftrace_direct_exit(void) { kthread_stop(simple_tsk); - unregister_ftrace_direct(my_ip, my_tramp); + unregister_ftrace_direct_multi(&direct, my_tramp, true); } module_init(ftrace_direct_init); module_exit(ftrace_direct_exit); MODULE_AUTHOR("Steven Rostedt"); -MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct()"); +MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct_multi()"); MODULE_LICENSE("GPL"); diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index 8139dce2a31c..05c3585ac15e 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -70,21 +70,23 @@ asm ( #endif /* CONFIG_S390 */ +static struct ftrace_ops direct; + static int __init ftrace_direct_init(void) { - return register_ftrace_direct((unsigned long)handle_mm_fault, - (unsigned long)my_tramp); + ftrace_set_filter_ip(&direct, (unsigned long) handle_mm_fault, 0, 0); + + return register_ftrace_direct_multi(&direct, (unsigned long) my_tramp); } static void __exit ftrace_direct_exit(void) { - unregister_ftrace_direct((unsigned long)handle_mm_fault, - (unsigned long)my_tramp); + unregister_ftrace_direct_multi(&direct, (unsigned long)my_tramp, true); } module_init(ftrace_direct_init); module_exit(ftrace_direct_exit); MODULE_AUTHOR("Steven Rostedt"); -MODULE_DESCRIPTION("Another example use case of using register_ftrace_direct()"); +MODULE_DESCRIPTION("Another example use case of using register_ftrace_direct_multi()"); MODULE_LICENSE("GPL"); diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 1d3d307ca33d..42ec9e39453b 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -63,21 +63,23 @@ asm ( #endif /* CONFIG_S390 */ +static struct ftrace_ops direct; + static int __init ftrace_direct_init(void) { - return register_ftrace_direct((unsigned long)wake_up_process, - (unsigned long)my_tramp); + ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0); + + return register_ftrace_direct_multi(&direct, (unsigned long) my_tramp); } static void __exit ftrace_direct_exit(void) { - unregister_ftrace_direct((unsigned long)wake_up_process, - (unsigned long)my_tramp); + unregister_ftrace_direct_multi(&direct, (unsigned long)my_tramp, true); } module_init(ftrace_direct_init); module_exit(ftrace_direct_exit); MODULE_AUTHOR("Steven Rostedt"); -MODULE_DESCRIPTION("Example use case of using register_ftrace_direct()"); +MODULE_DESCRIPTION("Example use case of using register_ftrace_direct_multi()"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 8788ca164eb4bad9a2a82c7778223c15ef800b8b Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Tue, 21 Mar 2023 15:04:20 +0100 Subject: ftrace: Remove the legacy _ftrace_direct API This API relies on a single global ops, used for all direct calls registered with it. However, to implement arm64 direct calls, we need each ops to point to a single direct call trampoline. Link: https://lkml.kernel.org/r/20230321140424.345218-4-revest@chromium.org Signed-off-by: Florent Revest Acked-by: Mark Rutland Tested-by: Mark Rutland Acked-by: Jiri Olsa Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 32 ---- kernel/trace/ftrace.c | 393 ------------------------------------------------- 2 files changed, 425 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 5b68ee874bc1..2f400c9f0787 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -397,14 +397,6 @@ struct ftrace_func_entry { #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS extern int ftrace_direct_func_count; -int register_ftrace_direct(unsigned long ip, unsigned long addr); -int unregister_ftrace_direct(unsigned long ip, unsigned long addr); -int modify_ftrace_direct(unsigned long ip, unsigned long old_addr, unsigned long new_addr); -struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr); -int ftrace_modify_direct_caller(struct ftrace_func_entry *entry, - struct dyn_ftrace *rec, - unsigned long old_addr, - unsigned long new_addr); unsigned long ftrace_find_rec_direct(unsigned long ip); int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr, @@ -415,30 +407,6 @@ int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr #else struct ftrace_ops; # define ftrace_direct_func_count 0 -static inline int register_ftrace_direct(unsigned long ip, unsigned long addr) -{ - return -ENOTSUPP; -} -static inline int unregister_ftrace_direct(unsigned long ip, unsigned long addr) -{ - return -ENOTSUPP; -} -static inline int modify_ftrace_direct(unsigned long ip, - unsigned long old_addr, unsigned long new_addr) -{ - return -ENOTSUPP; -} -static inline struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr) -{ - return NULL; -} -static inline int ftrace_modify_direct_caller(struct ftrace_func_entry *entry, - struct dyn_ftrace *rec, - unsigned long old_addr, - unsigned long new_addr) -{ - return -ENODEV; -} static inline unsigned long ftrace_find_rec_direct(unsigned long ip) { return 0; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 98434196c6a1..fbc9abfa1a3e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2591,20 +2591,6 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip, arch_ftrace_set_direct_caller(fregs, addr); } - -static struct ftrace_ops direct_ops = { - .func = call_direct_funcs, - .flags = FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS - | FTRACE_OPS_FL_PERMANENT, - /* - * By declaring the main trampoline as this trampoline - * it will never have one allocated for it. Allocated - * trampolines should not call direct functions. - * The direct_ops should only be called by the builtin - * ftrace_regs_caller trampoline. - */ - .trampoline = FTRACE_REGS_ADDR, -}; #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ /** @@ -5301,387 +5287,8 @@ struct ftrace_direct_func { static LIST_HEAD(ftrace_direct_funcs); -/** - * ftrace_find_direct_func - test an address if it is a registered direct caller - * @addr: The address of a registered direct caller - * - * This searches to see if a ftrace direct caller has been registered - * at a specific address, and if so, it returns a descriptor for it. - * - * This can be used by architecture code to see if an address is - * a direct caller (trampoline) attached to a fentry/mcount location. - * This is useful for the function_graph tracer, as it may need to - * do adjustments if it traced a location that also has a direct - * trampoline attached to it. - */ -struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr) -{ - struct ftrace_direct_func *entry; - bool found = false; - - /* May be called by fgraph trampoline (protected by rcu tasks) */ - list_for_each_entry_rcu(entry, &ftrace_direct_funcs, next) { - if (entry->addr == addr) { - found = true; - break; - } - } - if (found) - return entry; - - return NULL; -} - -static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr) -{ - struct ftrace_direct_func *direct; - - direct = kmalloc(sizeof(*direct), GFP_KERNEL); - if (!direct) - return NULL; - direct->addr = addr; - direct->count = 0; - list_add_rcu(&direct->next, &ftrace_direct_funcs); - ftrace_direct_func_count++; - return direct; -} - static int register_ftrace_function_nolock(struct ftrace_ops *ops); -/** - * register_ftrace_direct - Call a custom trampoline directly - * @ip: The address of the nop at the beginning of a function - * @addr: The address of the trampoline to call at @ip - * - * This is used to connect a direct call from the nop location (@ip) - * at the start of ftrace traced functions. The location that it calls - * (@addr) must be able to handle a direct call, and save the parameters - * of the function being traced, and restore them (or inject new ones - * if needed), before returning. - * - * Returns: - * 0 on success - * -EBUSY - Another direct function is already attached (there can be only one) - * -ENODEV - @ip does not point to a ftrace nop location (or not supported) - * -ENOMEM - There was an allocation failure. - */ -int register_ftrace_direct(unsigned long ip, unsigned long addr) -{ - struct ftrace_direct_func *direct; - struct ftrace_func_entry *entry; - struct ftrace_hash *free_hash = NULL; - struct dyn_ftrace *rec; - int ret = -ENODEV; - - mutex_lock(&direct_mutex); - - ip = ftrace_location(ip); - if (!ip) - goto out_unlock; - - /* See if there's a direct function at @ip already */ - ret = -EBUSY; - if (ftrace_find_rec_direct(ip)) - goto out_unlock; - - ret = -ENODEV; - rec = lookup_rec(ip, ip); - if (!rec) - goto out_unlock; - - /* - * Check if the rec says it has a direct call but we didn't - * find one earlier? - */ - if (WARN_ON(rec->flags & FTRACE_FL_DIRECT)) - goto out_unlock; - - /* Make sure the ip points to the exact record */ - if (ip != rec->ip) { - ip = rec->ip; - /* Need to check this ip for a direct. */ - if (ftrace_find_rec_direct(ip)) - goto out_unlock; - } - - ret = -ENOMEM; - direct = ftrace_find_direct_func(addr); - if (!direct) { - direct = ftrace_alloc_direct_func(addr); - if (!direct) - goto out_unlock; - } - - entry = ftrace_add_rec_direct(ip, addr, &free_hash); - if (!entry) - goto out_unlock; - - ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0); - - if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) { - ret = register_ftrace_function_nolock(&direct_ops); - if (ret) - ftrace_set_filter_ip(&direct_ops, ip, 1, 0); - } - - if (ret) { - remove_hash_entry(direct_functions, entry); - kfree(entry); - if (!direct->count) { - list_del_rcu(&direct->next); - synchronize_rcu_tasks(); - kfree(direct); - if (free_hash) - free_ftrace_hash(free_hash); - free_hash = NULL; - ftrace_direct_func_count--; - } - } else { - direct->count++; - } - out_unlock: - mutex_unlock(&direct_mutex); - - if (free_hash) { - synchronize_rcu_tasks(); - free_ftrace_hash(free_hash); - } - - return ret; -} -EXPORT_SYMBOL_GPL(register_ftrace_direct); - -static struct ftrace_func_entry *find_direct_entry(unsigned long *ip, - struct dyn_ftrace **recp) -{ - struct ftrace_func_entry *entry; - struct dyn_ftrace *rec; - - rec = lookup_rec(*ip, *ip); - if (!rec) - return NULL; - - entry = __ftrace_lookup_ip(direct_functions, rec->ip); - if (!entry) { - WARN_ON(rec->flags & FTRACE_FL_DIRECT); - return NULL; - } - - WARN_ON(!(rec->flags & FTRACE_FL_DIRECT)); - - /* Passed in ip just needs to be on the call site */ - *ip = rec->ip; - - if (recp) - *recp = rec; - - return entry; -} - -int unregister_ftrace_direct(unsigned long ip, unsigned long addr) -{ - struct ftrace_direct_func *direct; - struct ftrace_func_entry *entry; - struct ftrace_hash *hash; - int ret = -ENODEV; - - mutex_lock(&direct_mutex); - - ip = ftrace_location(ip); - if (!ip) - goto out_unlock; - - entry = find_direct_entry(&ip, NULL); - if (!entry) - goto out_unlock; - - hash = direct_ops.func_hash->filter_hash; - if (hash->count == 1) - unregister_ftrace_function(&direct_ops); - - ret = ftrace_set_filter_ip(&direct_ops, ip, 1, 0); - - WARN_ON(ret); - - remove_hash_entry(direct_functions, entry); - - direct = ftrace_find_direct_func(addr); - if (!WARN_ON(!direct)) { - /* This is the good path (see the ! before WARN) */ - direct->count--; - WARN_ON(direct->count < 0); - if (!direct->count) { - list_del_rcu(&direct->next); - synchronize_rcu_tasks(); - kfree(direct); - kfree(entry); - ftrace_direct_func_count--; - } - } - out_unlock: - mutex_unlock(&direct_mutex); - - return ret; -} -EXPORT_SYMBOL_GPL(unregister_ftrace_direct); - -static struct ftrace_ops stub_ops = { - .func = ftrace_stub, -}; - -/** - * ftrace_modify_direct_caller - modify ftrace nop directly - * @entry: The ftrace hash entry of the direct helper for @rec - * @rec: The record representing the function site to patch - * @old_addr: The location that the site at @rec->ip currently calls - * @new_addr: The location that the site at @rec->ip should call - * - * An architecture may overwrite this function to optimize the - * changing of the direct callback on an ftrace nop location. - * This is called with the ftrace_lock mutex held, and no other - * ftrace callbacks are on the associated record (@rec). Thus, - * it is safe to modify the ftrace record, where it should be - * currently calling @old_addr directly, to call @new_addr. - * - * This is called with direct_mutex locked. - * - * Safety checks should be made to make sure that the code at - * @rec->ip is currently calling @old_addr. And this must - * also update entry->direct to @new_addr. - */ -int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry, - struct dyn_ftrace *rec, - unsigned long old_addr, - unsigned long new_addr) -{ - unsigned long ip = rec->ip; - int ret; - - lockdep_assert_held(&direct_mutex); - - /* - * The ftrace_lock was used to determine if the record - * had more than one registered user to it. If it did, - * we needed to prevent that from changing to do the quick - * switch. But if it did not (only a direct caller was attached) - * then this function is called. But this function can deal - * with attached callers to the rec that we care about, and - * since this function uses standard ftrace calls that take - * the ftrace_lock mutex, we need to release it. - */ - mutex_unlock(&ftrace_lock); - - /* - * By setting a stub function at the same address, we force - * the code to call the iterator and the direct_ops helper. - * This means that @ip does not call the direct call, and - * we can simply modify it. - */ - ret = ftrace_set_filter_ip(&stub_ops, ip, 0, 0); - if (ret) - goto out_lock; - - ret = register_ftrace_function_nolock(&stub_ops); - if (ret) { - ftrace_set_filter_ip(&stub_ops, ip, 1, 0); - goto out_lock; - } - - entry->direct = new_addr; - - /* - * By removing the stub, we put back the direct call, calling - * the @new_addr. - */ - unregister_ftrace_function(&stub_ops); - ftrace_set_filter_ip(&stub_ops, ip, 1, 0); - - out_lock: - mutex_lock(&ftrace_lock); - - return ret; -} - -/** - * modify_ftrace_direct - Modify an existing direct call to call something else - * @ip: The instruction pointer to modify - * @old_addr: The address that the current @ip calls directly - * @new_addr: The address that the @ip should call - * - * This modifies a ftrace direct caller at an instruction pointer without - * having to disable it first. The direct call will switch over to the - * @new_addr without missing anything. - * - * Returns: zero on success. Non zero on error, which includes: - * -ENODEV : the @ip given has no direct caller attached - * -EINVAL : the @old_addr does not match the current direct caller - */ -int modify_ftrace_direct(unsigned long ip, - unsigned long old_addr, unsigned long new_addr) -{ - struct ftrace_direct_func *direct, *new_direct = NULL; - struct ftrace_func_entry *entry; - struct dyn_ftrace *rec; - int ret = -ENODEV; - - mutex_lock(&direct_mutex); - - mutex_lock(&ftrace_lock); - - ip = ftrace_location(ip); - if (!ip) - goto out_unlock; - - entry = find_direct_entry(&ip, &rec); - if (!entry) - goto out_unlock; - - ret = -EINVAL; - if (entry->direct != old_addr) - goto out_unlock; - - direct = ftrace_find_direct_func(old_addr); - if (WARN_ON(!direct)) - goto out_unlock; - if (direct->count > 1) { - ret = -ENOMEM; - new_direct = ftrace_alloc_direct_func(new_addr); - if (!new_direct) - goto out_unlock; - direct->count--; - new_direct->count++; - } else { - direct->addr = new_addr; - } - - /* - * If there's no other ftrace callback on the rec->ip location, - * then it can be changed directly by the architecture. - * If there is another caller, then we just need to change the - * direct caller helper to point to @new_addr. - */ - if (ftrace_rec_count(rec) == 1) { - ret = ftrace_modify_direct_caller(entry, rec, old_addr, new_addr); - } else { - entry->direct = new_addr; - ret = 0; - } - - if (unlikely(ret && new_direct)) { - direct->count++; - list_del_rcu(&new_direct->next); - synchronize_rcu_tasks(); - kfree(new_direct); - ftrace_direct_func_count--; - } - - out_unlock: - mutex_unlock(&ftrace_lock); - mutex_unlock(&direct_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(modify_ftrace_direct); - #define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS) static int check_direct_multi(struct ftrace_ops *ops) -- cgit v1.2.3 From da8bdfbd422333fbb7c85ac1d7f18592d17d6665 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Tue, 21 Mar 2023 15:04:21 +0100 Subject: ftrace: Rename _ftrace_direct_multi APIs to _ftrace_direct APIs Now that the original _ftrace_direct APIs are gone, the "_multi" suffixes only add confusion. Link: https://lkml.kernel.org/r/20230321140424.345218-5-revest@chromium.org Signed-off-by: Florent Revest Acked-by: Mark Rutland Tested-by: Mark Rutland Acked-by: Jiri Olsa Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 20 ++++++++--------- kernel/bpf/trampoline.c | 12 +++++----- kernel/trace/ftrace.c | 34 ++++++++++++++--------------- kernel/trace/trace_selftest.c | 9 ++++---- samples/Kconfig | 2 +- samples/ftrace/ftrace-direct-modify.c | 8 +++---- samples/ftrace/ftrace-direct-multi-modify.c | 8 +++---- samples/ftrace/ftrace-direct-multi.c | 4 ++-- samples/ftrace/ftrace-direct-too.c | 6 ++--- samples/ftrace/ftrace-direct.c | 6 ++--- 10 files changed, 55 insertions(+), 54 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2f400c9f0787..abee60865fc7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -398,11 +398,11 @@ struct ftrace_func_entry { #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS extern int ftrace_direct_func_count; unsigned long ftrace_find_rec_direct(unsigned long ip); -int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); -int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr, - bool free_filters); -int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); -int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr); +int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr); +int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr, + bool free_filters); +int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr); +int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned long addr); #else struct ftrace_ops; @@ -411,20 +411,20 @@ static inline unsigned long ftrace_find_rec_direct(unsigned long ip) { return 0; } -static inline int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +static inline int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) { return -ENODEV; } -static inline int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr, - bool free_filters) +static inline int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr, + bool free_filters) { return -ENODEV; } -static inline int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +static inline int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) { return -ENODEV; } -static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr) +static inline int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned long addr) { return -ENODEV; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 88bc23f1e10a..a14d0af534b3 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -45,8 +45,8 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd lockdep_assert_held_once(&tr->mutex); /* Instead of updating the trampoline here, we propagate - * -EAGAIN to register_ftrace_direct_multi(). Then we can - * retry register_ftrace_direct_multi() after updating the + * -EAGAIN to register_ftrace_direct(). Then we can + * retry register_ftrace_direct() after updating the * trampoline. */ if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && @@ -198,7 +198,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) int ret; if (tr->func.ftrace_managed) - ret = unregister_ftrace_direct_multi(tr->fops, (long)old_addr, false); + ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false); else ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); @@ -215,9 +215,9 @@ static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_ad if (tr->func.ftrace_managed) { if (lock_direct_mutex) - ret = modify_ftrace_direct_multi(tr->fops, (long)new_addr); + ret = modify_ftrace_direct(tr->fops, (long)new_addr); else - ret = modify_ftrace_direct_multi_nolock(tr->fops, (long)new_addr); + ret = modify_ftrace_direct_nolock(tr->fops, (long)new_addr); } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr); } @@ -243,7 +243,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) if (tr->func.ftrace_managed) { ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1); - ret = register_ftrace_direct_multi(tr->fops, (long)new_addr); + ret = register_ftrace_direct(tr->fops, (long)new_addr); } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fbc9abfa1a3e..845c4012630f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5318,7 +5318,7 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long } /** - * register_ftrace_direct_multi - Call a custom trampoline directly + * register_ftrace_direct - Call a custom trampoline directly * for multiple functions registered in @ops * @ops: The address of the struct ftrace_ops object * @addr: The address of the trampoline to call at @ops functions @@ -5339,7 +5339,7 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long * -ENODEV - @ip does not point to a ftrace nop location (or not supported) * -ENOMEM - There was an allocation failure. */ -int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) { struct ftrace_hash *hash, *free_hash = NULL; struct ftrace_func_entry *entry, *new; @@ -5397,11 +5397,11 @@ int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) } return err; } -EXPORT_SYMBOL_GPL(register_ftrace_direct_multi); +EXPORT_SYMBOL_GPL(register_ftrace_direct); /** - * unregister_ftrace_direct_multi - Remove calls to custom trampoline - * previously registered by register_ftrace_direct_multi for @ops object. + * unregister_ftrace_direct - Remove calls to custom trampoline + * previously registered by register_ftrace_direct for @ops object. * @ops: The address of the struct ftrace_ops object * * This is used to remove a direct calls to @addr from the nop locations @@ -5412,8 +5412,8 @@ EXPORT_SYMBOL_GPL(register_ftrace_direct_multi); * 0 on success * -EINVAL - The @ops object was not properly registered. */ -int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr, - bool free_filters) +int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr, + bool free_filters) { struct ftrace_hash *hash = ops->func_hash->filter_hash; int err; @@ -5436,10 +5436,10 @@ int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr, ftrace_free_filter(ops); return err; } -EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi); +EXPORT_SYMBOL_GPL(unregister_ftrace_direct); static int -__modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +__modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) { struct ftrace_hash *hash; struct ftrace_func_entry *entry, *iter; @@ -5486,7 +5486,7 @@ __modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) } /** - * modify_ftrace_direct_multi_nolock - Modify an existing direct 'multi' call + * modify_ftrace_direct_nolock - Modify an existing direct 'multi' call * to call something else * @ops: The address of the struct ftrace_ops object * @addr: The address of the new trampoline to call at @ops functions @@ -5503,19 +5503,19 @@ __modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) * Returns: zero on success. Non zero on error, which includes: * -EINVAL - The @ops object was not properly registered. */ -int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr) +int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned long addr) { if (check_direct_multi(ops)) return -EINVAL; if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) return -EINVAL; - return __modify_ftrace_direct_multi(ops, addr); + return __modify_ftrace_direct(ops, addr); } -EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi_nolock); +EXPORT_SYMBOL_GPL(modify_ftrace_direct_nolock); /** - * modify_ftrace_direct_multi - Modify an existing direct 'multi' call + * modify_ftrace_direct - Modify an existing direct 'multi' call * to call something else * @ops: The address of the struct ftrace_ops object * @addr: The address of the new trampoline to call at @ops functions @@ -5529,7 +5529,7 @@ EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi_nolock); * Returns: zero on success. Non zero on error, which includes: * -EINVAL - The @ops object was not properly registered. */ -int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) { int err; @@ -5539,11 +5539,11 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) return -EINVAL; mutex_lock(&direct_mutex); - err = __modify_ftrace_direct_multi(ops, addr); + err = __modify_ftrace_direct(ops, addr); mutex_unlock(&direct_mutex); return err; } -EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi); +EXPORT_SYMBOL_GPL(modify_ftrace_direct); #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ /** diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 9ce80b3ad06d..84cd7ba31d27 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -872,7 +872,8 @@ trace_selftest_startup_function_graph(struct tracer *trace, * and make sure we get graph trace. */ ftrace_set_filter_ip(&direct, (unsigned long)DYN_FTRACE_TEST_NAME, 0, 0); - ret = register_ftrace_direct_multi(&direct, (unsigned long)trace_direct_tramp); + ret = register_ftrace_direct(&direct, + (unsigned long)trace_direct_tramp); if (ret) goto out; @@ -892,9 +893,9 @@ trace_selftest_startup_function_graph(struct tracer *trace, unregister_ftrace_graph(&fgraph_ops); - ret = unregister_ftrace_direct_multi(&direct, - (unsigned long) trace_direct_tramp, - true); + ret = unregister_ftrace_direct(&direct, + (unsigned long) trace_direct_tramp, + true); if (ret) goto out; diff --git a/samples/Kconfig b/samples/Kconfig index 30ef8bd48ba3..fd24daa99f34 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -38,7 +38,7 @@ config SAMPLE_FTRACE_DIRECT that hooks to wake_up_process and prints the parameters. config SAMPLE_FTRACE_DIRECT_MULTI - tristate "Build register_ftrace_direct_multi() example" + tristate "Build register_ftrace_direct() on multiple ips example" depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS && m depends on HAVE_SAMPLE_FTRACE_DIRECT_MULTI help diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index f01ac74bac10..25fba66f61c0 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -116,7 +116,7 @@ static int simple_thread(void *arg) if (ret) continue; t ^= 1; - ret = modify_ftrace_direct_multi(&direct, tramps[t]); + ret = modify_ftrace_direct(&direct, tramps[t]); if (!ret) my_tramp = tramps[t]; WARN_ON_ONCE(ret); @@ -132,7 +132,7 @@ static int __init ftrace_direct_init(void) int ret; ftrace_set_filter_ip(&direct, (unsigned long) my_ip, 0, 0); - ret = register_ftrace_direct_multi(&direct, my_tramp); + ret = register_ftrace_direct(&direct, my_tramp); if (!ret) simple_tsk = kthread_run(simple_thread, NULL, "event-sample-fn"); @@ -142,12 +142,12 @@ static int __init ftrace_direct_init(void) static void __exit ftrace_direct_exit(void) { kthread_stop(simple_tsk); - unregister_ftrace_direct_multi(&direct, my_tramp, true); + unregister_ftrace_direct(&direct, my_tramp, true); } module_init(ftrace_direct_init); module_exit(ftrace_direct_exit); MODULE_AUTHOR("Steven Rostedt"); -MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct_multi()"); +MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct()"); MODULE_LICENSE("GPL"); diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index 196b43971cb5..f72623899602 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -123,7 +123,7 @@ static int simple_thread(void *arg) if (ret) continue; t ^= 1; - ret = modify_ftrace_direct_multi(&direct, tramps[t]); + ret = modify_ftrace_direct(&direct, tramps[t]); if (!ret) my_tramp = tramps[t]; WARN_ON_ONCE(ret); @@ -141,7 +141,7 @@ static int __init ftrace_direct_multi_init(void) ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0); ftrace_set_filter_ip(&direct, (unsigned long) schedule, 0, 0); - ret = register_ftrace_direct_multi(&direct, my_tramp); + ret = register_ftrace_direct(&direct, my_tramp); if (!ret) simple_tsk = kthread_run(simple_thread, NULL, "event-sample-fn"); @@ -151,12 +151,12 @@ static int __init ftrace_direct_multi_init(void) static void __exit ftrace_direct_multi_exit(void) { kthread_stop(simple_tsk); - unregister_ftrace_direct_multi(&direct, my_tramp, true); + unregister_ftrace_direct(&direct, my_tramp, true); } module_init(ftrace_direct_multi_init); module_exit(ftrace_direct_multi_exit); MODULE_AUTHOR("Jiri Olsa"); -MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct_multi()"); +MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct()"); MODULE_LICENSE("GPL"); diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c index ea0e88ee5e43..1547c2c6be02 100644 --- a/samples/ftrace/ftrace-direct-multi.c +++ b/samples/ftrace/ftrace-direct-multi.c @@ -73,12 +73,12 @@ static int __init ftrace_direct_multi_init(void) ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0); ftrace_set_filter_ip(&direct, (unsigned long) schedule, 0, 0); - return register_ftrace_direct_multi(&direct, (unsigned long) my_tramp); + return register_ftrace_direct(&direct, (unsigned long) my_tramp); } static void __exit ftrace_direct_multi_exit(void) { - unregister_ftrace_direct_multi(&direct, (unsigned long) my_tramp, true); + unregister_ftrace_direct(&direct, (unsigned long) my_tramp, true); } module_init(ftrace_direct_multi_init); diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index 05c3585ac15e..f28e7b99840f 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -76,17 +76,17 @@ static int __init ftrace_direct_init(void) { ftrace_set_filter_ip(&direct, (unsigned long) handle_mm_fault, 0, 0); - return register_ftrace_direct_multi(&direct, (unsigned long) my_tramp); + return register_ftrace_direct(&direct, (unsigned long) my_tramp); } static void __exit ftrace_direct_exit(void) { - unregister_ftrace_direct_multi(&direct, (unsigned long)my_tramp, true); + unregister_ftrace_direct(&direct, (unsigned long)my_tramp, true); } module_init(ftrace_direct_init); module_exit(ftrace_direct_exit); MODULE_AUTHOR("Steven Rostedt"); -MODULE_DESCRIPTION("Another example use case of using register_ftrace_direct_multi()"); +MODULE_DESCRIPTION("Another example use case of using register_ftrace_direct()"); MODULE_LICENSE("GPL"); diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 42ec9e39453b..d81a9473b585 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -69,17 +69,17 @@ static int __init ftrace_direct_init(void) { ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0); - return register_ftrace_direct_multi(&direct, (unsigned long) my_tramp); + return register_ftrace_direct(&direct, (unsigned long) my_tramp); } static void __exit ftrace_direct_exit(void) { - unregister_ftrace_direct_multi(&direct, (unsigned long)my_tramp, true); + unregister_ftrace_direct(&direct, (unsigned long)my_tramp, true); } module_init(ftrace_direct_init); module_exit(ftrace_direct_exit); MODULE_AUTHOR("Steven Rostedt"); -MODULE_DESCRIPTION("Example use case of using register_ftrace_direct_multi()"); +MODULE_DESCRIPTION("Example use case of using register_ftrace_direct()"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From dbaccb618fabde8b8596e341f8d76da63a9b0c2f Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Tue, 21 Mar 2023 15:04:22 +0100 Subject: ftrace: Store direct called addresses in their ops All direct calls are now registered using the register_ftrace_direct API so each ops can jump to only one direct-called trampoline. By storing the direct called trampoline address directly in the ops we can save one hashmap lookup in the direct call ops and implement arm64 direct calls on top of call ops. Link: https://lkml.kernel.org/r/20230321140424.345218-6-revest@chromium.org Signed-off-by: Florent Revest Acked-by: Jiri Olsa Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 3 +++ kernel/trace/ftrace.c | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index abee60865fc7..6a532dd6789e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -321,6 +321,9 @@ struct ftrace_ops { unsigned long trampoline_size; struct list_head list; ftrace_ops_func_t ops_func; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + unsigned long direct_call; +#endif #endif }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 845c4012630f..3bef2abc037a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2583,9 +2583,8 @@ ftrace_add_rec_direct(unsigned long ip, unsigned long addr, static void call_direct_funcs(unsigned long ip, unsigned long pip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { - unsigned long addr; + unsigned long addr = READ_ONCE(ops->direct_call); - addr = ftrace_find_rec_direct(ip); if (!addr) return; @@ -5381,6 +5380,7 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) ops->func = call_direct_funcs; ops->flags = MULTI_FLAGS; ops->trampoline = FTRACE_REGS_ADDR; + ops->direct_call = addr; err = register_ftrace_function_nolock(ops); @@ -5455,6 +5455,7 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) /* Enable the tmp_ops to have the same functions as the direct ops */ ftrace_ops_init(&tmp_ops); tmp_ops.func_hash = ops->func_hash; + tmp_ops.direct_call = addr; err = register_ftrace_function_nolock(&tmp_ops); if (err) @@ -5476,6 +5477,8 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) entry->direct = addr; } } + /* Prevent store tearing if a trampoline concurrently accesses the value */ + WRITE_ONCE(ops->direct_call, addr); mutex_unlock(&ftrace_lock); -- cgit v1.2.3 From 60c8971899f3b34ad24857913c0784dab08962f0 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Tue, 21 Mar 2023 15:04:23 +0100 Subject: ftrace: Make DIRECT_CALLS work WITH_ARGS and !WITH_REGS Direct called trampolines can be called in two ways: - either from the ftrace callsite. In this case, they do not access any struct ftrace_regs nor pt_regs - Or, if a ftrace ops is also attached, from the end of a ftrace trampoline. In this case, the call_direct_funcs ops is in charge of setting the direct call trampoline's address in a struct ftrace_regs Since: commit 9705bc709604 ("ftrace: pass fregs to arch_ftrace_set_direct_caller()") The later case no longer requires a full pt_regs. It only needs a struct ftrace_regs so DIRECT_CALLS can work with both WITH_ARGS or WITH_REGS. With architectures like arm64 already abandoning WITH_REGS in favor of WITH_ARGS, it's important to have DIRECT_CALLS work WITH_ARGS only. Link: https://lkml.kernel.org/r/20230321140424.345218-7-revest@chromium.org Signed-off-by: Florent Revest Co-developed-by: Mark Rutland Signed-off-by: Mark Rutland Acked-by: Jiri Olsa Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 6 ++++++ kernel/trace/Kconfig | 2 +- kernel/trace/ftrace.c | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 6a532dd6789e..31f1e1df2af3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -241,6 +241,12 @@ enum { FTRACE_OPS_FL_DIRECT = BIT(17), }; +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +#define FTRACE_OPS_FL_SAVE_ARGS FTRACE_OPS_FL_SAVE_REGS +#else +#define FTRACE_OPS_FL_SAVE_ARGS 0 +#endif + /* * FTRACE_OPS_CMD_* commands allow the ftrace core logic to request changes * to a ftrace_ops. Note, the requests may fail. diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a856d4a34c67..5b1e7fa41ca8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -257,7 +257,7 @@ config DYNAMIC_FTRACE_WITH_REGS config DYNAMIC_FTRACE_WITH_DIRECT_CALLS def_bool y - depends on DYNAMIC_FTRACE_WITH_REGS + depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS config DYNAMIC_FTRACE_WITH_CALL_OPS diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3bef2abc037a..3b46dba3f69b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5288,7 +5288,7 @@ static LIST_HEAD(ftrace_direct_funcs); static int register_ftrace_function_nolock(struct ftrace_ops *ops); -#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS) +#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_ARGS) static int check_direct_multi(struct ftrace_ops *ops) { -- cgit v1.2.3 From fee86a4ed536f4e521f3a4530242e152dd2a466b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 21 Mar 2023 15:04:24 +0100 Subject: ftrace: selftest: remove broken trace_direct_tramp The ftrace selftest code has a trace_direct_tramp() function which it uses as a direct call trampoline. This happens to work on x86, since the direct call's return address is in the usual place, and can be returned to via a RET, but in general the calling convention for direct calls is different from regular function calls, and requires a trampoline written in assembly. On s390, regular function calls place the return address in %r14, and an ftrace patch-site in an instrumented function places the trampoline's return address (which is within the instrumented function) in %r0, preserving the original %r14 value in-place. As a regular C function will return to the address in %r14, using a C function as the trampoline results in the trampoline returning to the caller of the instrumented function, skipping the body of the instrumented function. Note that the s390 issue is not detcted by the ftrace selftest code, as the instrumented function is trivial, and returning back into the caller happens to be equivalent. On arm64, regular function calls place the return address in x30, and an ftrace patch-site in an instrumented function saves this into r9 and places the trampoline's return address (within the instrumented function) in x30. A regular C function will return to the address in x30, but will not restore x9 into x30. Consequently, using a C function as the trampoline results in returning to the trampoline's return address having corrupted x30, such that when the instrumented function returns, it will return back into itself. To avoid future issues in this area, remove the trace_direct_tramp() function, and require that each architecture with direct calls provides a stub trampoline, named ftrace_stub_direct_tramp. This can be written to handle the architecture's trampoline calling convention, and in future could be used elsewhere (e.g. in the ftrace ops sample, to measure the overhead of direct calls), so we may as well always build it in. Link: https://lkml.kernel.org/r/20230321140424.345218-8-revest@chromium.org Signed-off-by: Mark Rutland Cc: Li Huafei Cc: Xu Kuohai Signed-off-by: Florent Revest Acked-by: Jiri Olsa Signed-off-by: Steven Rostedt (Google) --- arch/s390/kernel/mcount.S | 5 +++++ arch/x86/kernel/ftrace_32.S | 5 +++++ arch/x86/kernel/ftrace_64.S | 4 ++++ include/linux/ftrace.h | 2 ++ kernel/trace/trace_selftest.c | 12 ++---------- 5 files changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 43ff91073d2a..6c10da43b538 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -32,6 +32,11 @@ ENTRY(ftrace_stub) BR_EX %r14 ENDPROC(ftrace_stub) +SYM_CODE_START(ftrace_stub_direct_tramp) + lgr %r1, %r0 + BR_EX %r1 +SYM_CODE_END(ftrace_stub_direct_tramp) + .macro ftrace_regs_entry, allregs=0 stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index a0ed0e4a2c0c..0d9a14528176 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -163,6 +163,11 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) jmp .Lftrace_ret SYM_CODE_END(ftrace_regs_caller) +SYM_FUNC_START(ftrace_stub_direct_tramp) + CALL_DEPTH_ACCOUNT + RET +SYM_FUNC_END(ftrace_stub_direct_tramp) + #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_CODE_START(ftrace_graph_caller) pushl %eax diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index fb4f1e01b64a..970d8445fdc4 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -309,6 +309,10 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) SYM_FUNC_END(ftrace_regs_caller) STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) +SYM_FUNC_START(ftrace_stub_direct_tramp) + CALL_DEPTH_ACCOUNT + RET +SYM_FUNC_END(ftrace_stub_direct_tramp) #else /* ! CONFIG_DYNAMIC_FTRACE */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 31f1e1df2af3..931f3d904529 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -413,6 +413,8 @@ int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr, int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr); int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned long addr); +void ftrace_stub_direct_tramp(void); + #else struct ftrace_ops; # define ftrace_direct_func_count 0 diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 84cd7ba31d27..a931d9aaea26 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -786,14 +786,6 @@ static struct fgraph_ops fgraph_ops __initdata = { #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS static struct ftrace_ops direct; -#ifndef CALL_DEPTH_ACCOUNT -#define CALL_DEPTH_ACCOUNT "" -#endif - -noinline __noclone static void trace_direct_tramp(void) -{ - asm(CALL_DEPTH_ACCOUNT); -} #endif /* @@ -873,7 +865,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, */ ftrace_set_filter_ip(&direct, (unsigned long)DYN_FTRACE_TEST_NAME, 0, 0); ret = register_ftrace_direct(&direct, - (unsigned long)trace_direct_tramp); + (unsigned long)ftrace_stub_direct_tramp); if (ret) goto out; @@ -894,7 +886,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, unregister_ftrace_graph(&fgraph_ops); ret = unregister_ftrace_direct(&direct, - (unsigned long) trace_direct_tramp, + (unsigned long)ftrace_stub_direct_tramp, true); if (ret) goto out; -- cgit v1.2.3 From 7755cec63adeecea3cbbf4032047812c37cf7cc3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:20 -0400 Subject: arm64: perf: Move PMUv3 driver to drivers/perf Having the ARM PMUv3 driver sitting in arch/arm64/kernel is getting in the way of being able to use perf on ARMv8 cores running a 32bit kernel, such as 32bit KVM guests. This patch moves it into drivers/perf/arm_pmuv3.c, with an include file in include/linux/perf/arm_pmuv3.h. The only thing left in arch/arm64 is some mundane perf stuff. Signed-off-by: Marc Zyngier Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-2-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/perf_event.h | 249 ------ arch/arm64/kernel/Makefile | 1 - arch/arm64/kernel/perf_event.c | 1467 ---------------------------------- drivers/perf/Kconfig | 11 + drivers/perf/Makefile | 1 + drivers/perf/arm_pmuv3.c | 1468 +++++++++++++++++++++++++++++++++++ include/kvm/arm_pmu.h | 2 +- include/linux/perf/arm_pmuv3.h | 258 ++++++ 8 files changed, 1739 insertions(+), 1718 deletions(-) delete mode 100644 arch/arm64/kernel/perf_event.c create mode 100644 drivers/perf/arm_pmuv3.c create mode 100644 include/linux/perf/arm_pmuv3.h diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 3eaf462f5752..eb7071c9eb34 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -9,255 +9,6 @@ #include #include -#define ARMV8_PMU_MAX_COUNTERS 32 -#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) - -/* - * Common architectural and microarchitectural event numbers. - */ -#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x0000 -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x0001 -#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x0002 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x0003 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x0004 -#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x0005 -#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x0006 -#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x0007 -#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x0008 -#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x0009 -#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x000A -#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x000B -#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x000C -#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x000D -#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x000E -#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x000F -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x0010 -#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x0011 -#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x0012 -#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x0013 -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x0014 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x0015 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x0016 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x0017 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x0018 -#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x0019 -#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x001A -#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x001B -#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x001C -#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x001D -#define ARMV8_PMUV3_PERFCTR_CHAIN 0x001E -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x001F -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x0020 -#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x0021 -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x0022 -#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x0023 -#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x0024 -#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x0025 -#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x0026 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x0027 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x0028 -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x0029 -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x002A -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x002B -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x002C -#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x002D -#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x002E -#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x002F -#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x0030 -#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x0031 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x0032 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x0033 -#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x0034 -#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x0035 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x0036 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x0037 -#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x0038 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x0039 -#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x003A -#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x003B -#define ARMV8_PMUV3_PERFCTR_STALL 0x003C -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x003D -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x003E -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x003F - -/* Statistical profiling extension microarchitectural events */ -#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 -#define ARMV8_SPE_PERFCTR_SAMPLE_FEED 0x4001 -#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002 -#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003 - -/* AMUv1 architecture events */ -#define ARMV8_AMU_PERFCTR_CNT_CYCLES 0x4004 -#define ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM 0x4005 - -/* long-latency read miss events */ -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS 0x4006 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD 0x4009 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B - -/* Trace buffer events */ -#define ARMV8_PMUV3_PERFCTR_TRB_WRAP 0x400C -#define ARMV8_PMUV3_PERFCTR_TRB_TRIG 0x400E - -/* Trace unit events */ -#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0 0x4010 -#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1 0x4011 -#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2 0x4012 -#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3 0x4013 -#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4 0x4018 -#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5 0x4019 -#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6 0x401A -#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7 0x401B - -/* additional latency from alignment events */ -#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020 -#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021 -#define ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT 0x4022 - -/* Armv8.5 Memory Tagging Extension events */ -#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED 0x4024 -#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD 0x4025 -#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026 - -/* ARMv8 recommended implementation defined event types */ -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x0040 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x0042 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x0044 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x0045 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x0046 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x0047 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x0048 - -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x0050 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x0051 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x0052 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x0053 - -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x0056 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x0057 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x0058 - -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x005C -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x005D -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x005E -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x005F -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x0062 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x0063 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x0064 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x0065 -#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x0066 -#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x0067 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x0068 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x0069 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x006A - -#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x006C -#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x006D -#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x006E -#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x006F -#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x0070 -#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x0071 -#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x0072 -#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x0073 -#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x0074 -#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x0075 -#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x0076 -#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x0077 -#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x0078 -#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x0079 -#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x007A - -#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x007C -#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x007D -#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x007E - -#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x0081 -#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x0082 -#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x0083 -#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x0084 - -#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x0086 -#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x0087 -#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x0088 - -#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x008A -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x008B -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x008C -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x008D -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x008E -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x008F -#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x0090 -#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x0091 - -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0x00A0 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0x00A1 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0x00A2 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0x00A3 - -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0x00A6 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0x00A7 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0x00A8 - -/* - * Per-CPU PMCR: config reg - */ -#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */ -#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */ -#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */ -#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ -#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ -#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ -#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ -#define ARMV8_PMU_PMCR_N_MASK 0x1f -#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ - -/* - * PMOVSR: counters overflow flag status reg - */ -#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK - -/* - * PMXEVTYPER: Event selection reg - */ -#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ -#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ - -/* - * Event filters for PMUv3 - */ -#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) -#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) -#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) - -/* - * PMUSERENR: user enable reg - */ -#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */ -#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ -#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ -#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ -#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ - -/* PMMIR_EL1.SLOTS mask */ -#define ARMV8_PMU_SLOTS_MASK 0xff - -#define ARMV8_PMU_BUS_SLOTS_SHIFT 8 -#define ARMV8_PMU_BUS_SLOTS_MASK 0xff -#define ARMV8_PMU_BUS_WIDTH_SHIFT 16 -#define ARMV8_PMU_BUS_WIDTH_MASK 0xf - #ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index ceba6792f5b3..7c2bb4e72476 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c deleted file mode 100644 index dde06c0f97f3..000000000000 --- a/arch/arm64/kernel/perf_event.c +++ /dev/null @@ -1,1467 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * ARMv8 PMUv3 Performance Events handling code. - * - * Copyright (C) 2012 ARM Limited - * Author: Will Deacon - * - * This code is based heavily on the ARMv7 perf event code. - */ - -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -/* ARMv8 Cortex-A53 specific event types. */ -#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 - -/* ARMv8 Cavium ThunderX specific event types. */ -#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST 0xE9 -#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS 0xEA -#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS 0xEB -#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS 0xEC -#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED - -/* - * ARMv8 Architectural defined events, not all of these may - * be supported on any given implementation. Unsupported events will - * be disabled at run-time based on the PMCEID registers. - */ -static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, -}; - -static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL, - [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, - [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, - - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD, - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_RD, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, -}; - -static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, -}; - -static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, -}; - -static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, -}; - -static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST, - [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS, - [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS, - - [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS, - [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS, - - [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, - [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, -}; - -static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, - - [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, - [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, -}; - -static ssize_t -armv8pmu_events_sysfs_show(struct device *dev, - struct device_attribute *attr, char *page) -{ - struct perf_pmu_events_attr *pmu_attr; - - pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - - return sprintf(page, "event=0x%04llx\n", pmu_attr->id); -} - -#define ARMV8_EVENT_ATTR(name, config) \ - PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config) - -static struct attribute *armv8_pmuv3_event_attrs[] = { - ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR), - ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL), - ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL), - ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL), - ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE), - ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL), - ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED), - ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED), - ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED), - ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN), - ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN), - ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED), - ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED), - ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED), - ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED), - ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED), - ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED), - ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES), - ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED), - ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS), - ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE), - ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB), - ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE), - ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL), - ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB), - ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS), - ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR), - ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC), - ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED), - ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES), - /* Don't expose the chain event in /sys, since it's useless in isolation */ - ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE), - ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE), - ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED), - ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED), - ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND), - ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND), - ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB), - ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB), - ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE), - ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL), - ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE), - ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL), - ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE), - ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB), - ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL), - ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL), - ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB), - ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB), - ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS), - ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE), - ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS), - ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK), - ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK), - ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD), - ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD), - ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD), - ARMV8_EVENT_ATTR(l1d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD), - ARMV8_EVENT_ATTR(op_retired, ARMV8_PMUV3_PERFCTR_OP_RETIRED), - ARMV8_EVENT_ATTR(op_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC), - ARMV8_EVENT_ATTR(stall, ARMV8_PMUV3_PERFCTR_STALL), - ARMV8_EVENT_ATTR(stall_slot_backend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND), - ARMV8_EVENT_ATTR(stall_slot_frontend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND), - ARMV8_EVENT_ATTR(stall_slot, ARMV8_PMUV3_PERFCTR_STALL_SLOT), - ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP), - ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED), - ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE), - ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION), - ARMV8_EVENT_ATTR(cnt_cycles, ARMV8_AMU_PERFCTR_CNT_CYCLES), - ARMV8_EVENT_ATTR(stall_backend_mem, ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM), - ARMV8_EVENT_ATTR(l1i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS), - ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD), - ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS), - ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD), - ARMV8_EVENT_ATTR(trb_wrap, ARMV8_PMUV3_PERFCTR_TRB_WRAP), - ARMV8_EVENT_ATTR(trb_trig, ARMV8_PMUV3_PERFCTR_TRB_TRIG), - ARMV8_EVENT_ATTR(trcextout0, ARMV8_PMUV3_PERFCTR_TRCEXTOUT0), - ARMV8_EVENT_ATTR(trcextout1, ARMV8_PMUV3_PERFCTR_TRCEXTOUT1), - ARMV8_EVENT_ATTR(trcextout2, ARMV8_PMUV3_PERFCTR_TRCEXTOUT2), - ARMV8_EVENT_ATTR(trcextout3, ARMV8_PMUV3_PERFCTR_TRCEXTOUT3), - ARMV8_EVENT_ATTR(cti_trigout4, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4), - ARMV8_EVENT_ATTR(cti_trigout5, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5), - ARMV8_EVENT_ATTR(cti_trigout6, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6), - ARMV8_EVENT_ATTR(cti_trigout7, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7), - ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT), - ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT), - ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT), - ARMV8_EVENT_ATTR(mem_access_checked, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED), - ARMV8_EVENT_ATTR(mem_access_checked_rd, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD), - ARMV8_EVENT_ATTR(mem_access_checked_wr, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR), - NULL, -}; - -static umode_t -armv8pmu_event_attr_is_visible(struct kobject *kobj, - struct attribute *attr, int unused) -{ - struct device *dev = kobj_to_dev(kobj); - struct pmu *pmu = dev_get_drvdata(dev); - struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); - struct perf_pmu_events_attr *pmu_attr; - - pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); - - if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS && - test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap)) - return attr->mode; - - if (pmu_attr->id >= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE) { - u64 id = pmu_attr->id - ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; - - if (id < ARMV8_PMUV3_MAX_COMMON_EVENTS && - test_bit(id, cpu_pmu->pmceid_ext_bitmap)) - return attr->mode; - } - - return 0; -} - -static const struct attribute_group armv8_pmuv3_events_attr_group = { - .name = "events", - .attrs = armv8_pmuv3_event_attrs, - .is_visible = armv8pmu_event_attr_is_visible, -}; - -PMU_FORMAT_ATTR(event, "config:0-15"); -PMU_FORMAT_ATTR(long, "config1:0"); -PMU_FORMAT_ATTR(rdpmc, "config1:1"); - -static int sysctl_perf_user_access __read_mostly; - -static inline bool armv8pmu_event_is_64bit(struct perf_event *event) -{ - return event->attr.config1 & 0x1; -} - -static inline bool armv8pmu_event_want_user_access(struct perf_event *event) -{ - return event->attr.config1 & 0x2; -} - -static struct attribute *armv8_pmuv3_format_attrs[] = { - &format_attr_event.attr, - &format_attr_long.attr, - &format_attr_rdpmc.attr, - NULL, -}; - -static const struct attribute_group armv8_pmuv3_format_attr_group = { - .name = "format", - .attrs = armv8_pmuv3_format_attrs, -}; - -static ssize_t slots_show(struct device *dev, struct device_attribute *attr, - char *page) -{ - struct pmu *pmu = dev_get_drvdata(dev); - struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); - u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK; - - return sysfs_emit(page, "0x%08x\n", slots); -} - -static DEVICE_ATTR_RO(slots); - -static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr, - char *page) -{ - struct pmu *pmu = dev_get_drvdata(dev); - struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); - u32 bus_slots = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_SLOTS_SHIFT) - & ARMV8_PMU_BUS_SLOTS_MASK; - - return sysfs_emit(page, "0x%08x\n", bus_slots); -} - -static DEVICE_ATTR_RO(bus_slots); - -static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr, - char *page) -{ - struct pmu *pmu = dev_get_drvdata(dev); - struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); - u32 bus_width = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_WIDTH_SHIFT) - & ARMV8_PMU_BUS_WIDTH_MASK; - u32 val = 0; - - /* Encoded as Log2(number of bytes), plus one */ - if (bus_width > 2 && bus_width < 13) - val = 1 << (bus_width - 1); - - return sysfs_emit(page, "0x%08x\n", val); -} - -static DEVICE_ATTR_RO(bus_width); - -static struct attribute *armv8_pmuv3_caps_attrs[] = { - &dev_attr_slots.attr, - &dev_attr_bus_slots.attr, - &dev_attr_bus_width.attr, - NULL, -}; - -static const struct attribute_group armv8_pmuv3_caps_attr_group = { - .name = "caps", - .attrs = armv8_pmuv3_caps_attrs, -}; - -/* - * Perf Events' indices - */ -#define ARMV8_IDX_CYCLE_COUNTER 0 -#define ARMV8_IDX_COUNTER0 1 -#define ARMV8_IDX_CYCLE_COUNTER_USER 32 - -/* - * We unconditionally enable ARMv8.5-PMU long event counter support - * (64-bit events) where supported. Indicate if this arm_pmu has long - * event counter support. - */ -static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) -{ - return (cpu_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5); -} - -static inline bool armv8pmu_event_has_user_read(struct perf_event *event) -{ - return event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT; -} - -/* - * We must chain two programmable counters for 64 bit events, - * except when we have allocated the 64bit cycle counter (for CPU - * cycles event) or when user space counter access is enabled. - */ -static inline bool armv8pmu_event_is_chained(struct perf_event *event) -{ - int idx = event->hw.idx; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - - return !armv8pmu_event_has_user_read(event) && - armv8pmu_event_is_64bit(event) && - !armv8pmu_has_long_event(cpu_pmu) && - (idx != ARMV8_IDX_CYCLE_COUNTER); -} - -/* - * ARMv8 low level PMU access - */ - -/* - * Perf Event to low level counters mapping - */ -#define ARMV8_IDX_TO_COUNTER(x) \ - (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) - -/* - * This code is really good - */ - -#define PMEVN_CASE(n, case_macro) \ - case n: case_macro(n); break - -#define PMEVN_SWITCH(x, case_macro) \ - do { \ - switch (x) { \ - PMEVN_CASE(0, case_macro); \ - PMEVN_CASE(1, case_macro); \ - PMEVN_CASE(2, case_macro); \ - PMEVN_CASE(3, case_macro); \ - PMEVN_CASE(4, case_macro); \ - PMEVN_CASE(5, case_macro); \ - PMEVN_CASE(6, case_macro); \ - PMEVN_CASE(7, case_macro); \ - PMEVN_CASE(8, case_macro); \ - PMEVN_CASE(9, case_macro); \ - PMEVN_CASE(10, case_macro); \ - PMEVN_CASE(11, case_macro); \ - PMEVN_CASE(12, case_macro); \ - PMEVN_CASE(13, case_macro); \ - PMEVN_CASE(14, case_macro); \ - PMEVN_CASE(15, case_macro); \ - PMEVN_CASE(16, case_macro); \ - PMEVN_CASE(17, case_macro); \ - PMEVN_CASE(18, case_macro); \ - PMEVN_CASE(19, case_macro); \ - PMEVN_CASE(20, case_macro); \ - PMEVN_CASE(21, case_macro); \ - PMEVN_CASE(22, case_macro); \ - PMEVN_CASE(23, case_macro); \ - PMEVN_CASE(24, case_macro); \ - PMEVN_CASE(25, case_macro); \ - PMEVN_CASE(26, case_macro); \ - PMEVN_CASE(27, case_macro); \ - PMEVN_CASE(28, case_macro); \ - PMEVN_CASE(29, case_macro); \ - PMEVN_CASE(30, case_macro); \ - default: WARN(1, "Invalid PMEV* index\n"); \ - } \ - } while (0) - -#define RETURN_READ_PMEVCNTRN(n) \ - return read_sysreg(pmevcntr##n##_el0) -static unsigned long read_pmevcntrn(int n) -{ - PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); - return 0; -} - -#define WRITE_PMEVCNTRN(n) \ - write_sysreg(val, pmevcntr##n##_el0) -static void write_pmevcntrn(int n, unsigned long val) -{ - PMEVN_SWITCH(n, WRITE_PMEVCNTRN); -} - -#define WRITE_PMEVTYPERN(n) \ - write_sysreg(val, pmevtyper##n##_el0) -static void write_pmevtypern(int n, unsigned long val) -{ - PMEVN_SWITCH(n, WRITE_PMEVTYPERN); -} - -static inline u32 armv8pmu_pmcr_read(void) -{ - return read_sysreg(pmcr_el0); -} - -static inline void armv8pmu_pmcr_write(u32 val) -{ - val &= ARMV8_PMU_PMCR_MASK; - isb(); - write_sysreg(val, pmcr_el0); -} - -static inline int armv8pmu_has_overflowed(u32 pmovsr) -{ - return pmovsr & ARMV8_PMU_OVERFLOWED_MASK; -} - -static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) -{ - return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); -} - -static inline u64 armv8pmu_read_evcntr(int idx) -{ - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - - return read_pmevcntrn(counter); -} - -static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) -{ - int idx = event->hw.idx; - u64 val = armv8pmu_read_evcntr(idx); - - if (armv8pmu_event_is_chained(event)) - val = (val << 32) | armv8pmu_read_evcntr(idx - 1); - return val; -} - -/* - * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP - * is set the event counters also become 64-bit counters. Unless the - * user has requested a long counter (attr.config1) then we want to - * interrupt upon 32-bit overflow - we achieve this by applying a bias. - */ -static bool armv8pmu_event_needs_bias(struct perf_event *event) -{ - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - if (armv8pmu_event_is_64bit(event)) - return false; - - if (armv8pmu_has_long_event(cpu_pmu) || - idx == ARMV8_IDX_CYCLE_COUNTER) - return true; - - return false; -} - -static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) -{ - if (armv8pmu_event_needs_bias(event)) - value |= GENMASK(63, 32); - - return value; -} - -static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) -{ - if (armv8pmu_event_needs_bias(event)) - value &= ~GENMASK(63, 32); - - return value; -} - -static u64 armv8pmu_read_counter(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - u64 value; - - if (idx == ARMV8_IDX_CYCLE_COUNTER) - value = read_sysreg(pmccntr_el0); - else - value = armv8pmu_read_hw_counter(event); - - return armv8pmu_unbias_long_counter(event, value); -} - -static inline void armv8pmu_write_evcntr(int idx, u64 value) -{ - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - - write_pmevcntrn(counter, value); -} - -static inline void armv8pmu_write_hw_counter(struct perf_event *event, - u64 value) -{ - int idx = event->hw.idx; - - if (armv8pmu_event_is_chained(event)) { - armv8pmu_write_evcntr(idx, upper_32_bits(value)); - armv8pmu_write_evcntr(idx - 1, lower_32_bits(value)); - } else { - armv8pmu_write_evcntr(idx, value); - } -} - -static void armv8pmu_write_counter(struct perf_event *event, u64 value) -{ - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - value = armv8pmu_bias_long_counter(event, value); - - if (idx == ARMV8_IDX_CYCLE_COUNTER) - write_sysreg(value, pmccntr_el0); - else - armv8pmu_write_hw_counter(event, value); -} - -static inline void armv8pmu_write_evtype(int idx, u32 val) -{ - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - - val &= ARMV8_PMU_EVTYPE_MASK; - write_pmevtypern(counter, val); -} - -static inline void armv8pmu_write_event_type(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - /* - * For chained events, the low counter is programmed to count - * the event of interest and the high counter is programmed - * with CHAIN event code with filters set to count at all ELs. - */ - if (armv8pmu_event_is_chained(event)) { - u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN | - ARMV8_PMU_INCLUDE_EL2; - - armv8pmu_write_evtype(idx - 1, hwc->config_base); - armv8pmu_write_evtype(idx, chain_evt); - } else { - if (idx == ARMV8_IDX_CYCLE_COUNTER) - write_sysreg(hwc->config_base, pmccfiltr_el0); - else - armv8pmu_write_evtype(idx, hwc->config_base); - } -} - -static u32 armv8pmu_event_cnten_mask(struct perf_event *event) -{ - int counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); - u32 mask = BIT(counter); - - if (armv8pmu_event_is_chained(event)) - mask |= BIT(counter - 1); - return mask; -} - -static inline void armv8pmu_enable_counter(u32 mask) -{ - /* - * Make sure event configuration register writes are visible before we - * enable the counter. - * */ - isb(); - write_sysreg(mask, pmcntenset_el0); -} - -static inline void armv8pmu_enable_event_counter(struct perf_event *event) -{ - struct perf_event_attr *attr = &event->attr; - u32 mask = armv8pmu_event_cnten_mask(event); - - kvm_set_pmu_events(mask, attr); - - /* We rely on the hypervisor switch code to enable guest counters */ - if (!kvm_pmu_counter_deferred(attr)) - armv8pmu_enable_counter(mask); -} - -static inline void armv8pmu_disable_counter(u32 mask) -{ - write_sysreg(mask, pmcntenclr_el0); - /* - * Make sure the effects of disabling the counter are visible before we - * start configuring the event. - */ - isb(); -} - -static inline void armv8pmu_disable_event_counter(struct perf_event *event) -{ - struct perf_event_attr *attr = &event->attr; - u32 mask = armv8pmu_event_cnten_mask(event); - - kvm_clr_pmu_events(mask); - - /* We rely on the hypervisor switch code to disable guest counters */ - if (!kvm_pmu_counter_deferred(attr)) - armv8pmu_disable_counter(mask); -} - -static inline void armv8pmu_enable_intens(u32 mask) -{ - write_sysreg(mask, pmintenset_el1); -} - -static inline void armv8pmu_enable_event_irq(struct perf_event *event) -{ - u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); - armv8pmu_enable_intens(BIT(counter)); -} - -static inline void armv8pmu_disable_intens(u32 mask) -{ - write_sysreg(mask, pmintenclr_el1); - isb(); - /* Clear the overflow flag in case an interrupt is pending. */ - write_sysreg(mask, pmovsclr_el0); - isb(); -} - -static inline void armv8pmu_disable_event_irq(struct perf_event *event) -{ - u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); - armv8pmu_disable_intens(BIT(counter)); -} - -static inline u32 armv8pmu_getreset_flags(void) -{ - u32 value; - - /* Read */ - value = read_sysreg(pmovsclr_el0); - - /* Write to clear flags */ - value &= ARMV8_PMU_OVSR_MASK; - write_sysreg(value, pmovsclr_el0); - - return value; -} - -static void armv8pmu_disable_user_access(void) -{ - write_sysreg(0, pmuserenr_el0); -} - -static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) -{ - int i; - struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - - /* Clear any unused counters to avoid leaking their contents */ - for_each_clear_bit(i, cpuc->used_mask, cpu_pmu->num_events) { - if (i == ARMV8_IDX_CYCLE_COUNTER) - write_sysreg(0, pmccntr_el0); - else - armv8pmu_write_evcntr(i, 0); - } - - write_sysreg(0, pmuserenr_el0); - write_sysreg(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR, pmuserenr_el0); -} - -static void armv8pmu_enable_event(struct perf_event *event) -{ - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - - /* - * Disable counter - */ - armv8pmu_disable_event_counter(event); - - /* - * Set event. - */ - armv8pmu_write_event_type(event); - - /* - * Enable interrupt for this counter - */ - armv8pmu_enable_event_irq(event); - - /* - * Enable counter - */ - armv8pmu_enable_event_counter(event); -} - -static void armv8pmu_disable_event(struct perf_event *event) -{ - /* - * Disable counter - */ - armv8pmu_disable_event_counter(event); - - /* - * Disable interrupt for this counter - */ - armv8pmu_disable_event_irq(event); -} - -static void armv8pmu_start(struct arm_pmu *cpu_pmu) -{ - struct perf_event_context *ctx; - int nr_user = 0; - - ctx = perf_cpu_task_ctx(); - if (ctx) - nr_user = ctx->nr_user; - - if (sysctl_perf_user_access && nr_user) - armv8pmu_enable_user_access(cpu_pmu); - else - armv8pmu_disable_user_access(); - - /* Enable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); -} - -static void armv8pmu_stop(struct arm_pmu *cpu_pmu) -{ - /* Disable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); -} - -static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) -{ - u32 pmovsr; - struct perf_sample_data data; - struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - struct pt_regs *regs; - int idx; - - /* - * Get and reset the IRQ flags - */ - pmovsr = armv8pmu_getreset_flags(); - - /* - * Did an overflow occur? - */ - if (!armv8pmu_has_overflowed(pmovsr)) - return IRQ_NONE; - - /* - * Handle the counter(s) overflow(s) - */ - regs = get_irq_regs(); - - /* - * Stop the PMU while processing the counter overflows - * to prevent skews in group events. - */ - armv8pmu_stop(cpu_pmu); - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - /* Ignore if we don't have an event. */ - if (!event) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv8pmu_counter_has_overflowed(pmovsr, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event); - perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event)) - continue; - - /* - * Perf event overflow will queue the processing of the event as - * an irq_work which will be taken care of in the handling of - * IPI_IRQ_WORK. - */ - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); - } - armv8pmu_start(cpu_pmu); - - return IRQ_HANDLED; -} - -static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, - struct arm_pmu *cpu_pmu) -{ - int idx; - - for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - return -EAGAIN; -} - -static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, - struct arm_pmu *cpu_pmu) -{ - int idx; - - /* - * Chaining requires two consecutive event counters, where - * the lower idx must be even. - */ - for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) { - if (!test_and_set_bit(idx, cpuc->used_mask)) { - /* Check if the preceding even counter is available */ - if (!test_and_set_bit(idx - 1, cpuc->used_mask)) - return idx; - /* Release the Odd counter */ - clear_bit(idx, cpuc->used_mask); - } - } - return -EAGAIN; -} - -static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; - - /* Always prefer to place a cycle counter into the cycle counter. */ - if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { - if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) - return ARMV8_IDX_CYCLE_COUNTER; - else if (armv8pmu_event_is_64bit(event) && - armv8pmu_event_want_user_access(event) && - !armv8pmu_has_long_event(cpu_pmu)) - return -EAGAIN; - } - - /* - * Otherwise use events counters - */ - if (armv8pmu_event_is_chained(event)) - return armv8pmu_get_chain_idx(cpuc, cpu_pmu); - else - return armv8pmu_get_single_idx(cpuc, cpu_pmu); -} - -static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int idx = event->hw.idx; - - clear_bit(idx, cpuc->used_mask); - if (armv8pmu_event_is_chained(event)) - clear_bit(idx - 1, cpuc->used_mask); -} - -static int armv8pmu_user_event_idx(struct perf_event *event) -{ - if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event)) - return 0; - - /* - * We remap the cycle counter index to 32 to - * match the offset applied to the rest of - * the counter indices. - */ - if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER) - return ARMV8_IDX_CYCLE_COUNTER_USER; - - return event->hw.idx; -} - -/* - * Add an event filter to a given event. - */ -static int armv8pmu_set_event_filter(struct hw_perf_event *event, - struct perf_event_attr *attr) -{ - unsigned long config_base = 0; - - if (attr->exclude_idle) - return -EPERM; - - /* - * If we're running in hyp mode, then we *are* the hypervisor. - * Therefore we ignore exclude_hv in this configuration, since - * there's no hypervisor to sample anyway. This is consistent - * with other architectures (x86 and Power). - */ - if (is_kernel_in_hyp_mode()) { - if (!attr->exclude_kernel && !attr->exclude_host) - config_base |= ARMV8_PMU_INCLUDE_EL2; - if (attr->exclude_guest) - config_base |= ARMV8_PMU_EXCLUDE_EL1; - if (attr->exclude_host) - config_base |= ARMV8_PMU_EXCLUDE_EL0; - } else { - if (!attr->exclude_hv && !attr->exclude_host) - config_base |= ARMV8_PMU_INCLUDE_EL2; - } - - /* - * Filter out !VHE kernels and guest kernels - */ - if (attr->exclude_kernel) - config_base |= ARMV8_PMU_EXCLUDE_EL1; - - if (attr->exclude_user) - config_base |= ARMV8_PMU_EXCLUDE_EL0; - - /* - * Install the filter into config_base as this is used to - * construct the event type. - */ - event->config_base = config_base; - - return 0; -} - -static void armv8pmu_reset(void *info) -{ - struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u32 pmcr; - - /* The counter and interrupt enable registers are unknown at reset. */ - armv8pmu_disable_counter(U32_MAX); - armv8pmu_disable_intens(U32_MAX); - - /* Clear the counters we flip at guest entry/exit */ - kvm_clr_pmu_events(U32_MAX); - - /* - * Initialize & Reset PMNC. Request overflow interrupt for - * 64 bit cycle counter but cheat in armv8pmu_write_counter(). - */ - pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; - - /* Enable long event counter support where available */ - if (armv8pmu_has_long_event(cpu_pmu)) - pmcr |= ARMV8_PMU_PMCR_LP; - - armv8pmu_pmcr_write(pmcr); -} - -static int __armv8_pmuv3_map_event(struct perf_event *event, - const unsigned (*extra_event_map) - [PERF_COUNT_HW_MAX], - const unsigned (*extra_cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]) -{ - int hw_event_id; - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - - hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map, - &armv8_pmuv3_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); - - /* - * CHAIN events only work when paired with an adjacent counter, and it - * never makes sense for a user to open one in isolation, as they'll be - * rotated arbitrarily. - */ - if (hw_event_id == ARMV8_PMUV3_PERFCTR_CHAIN) - return -EINVAL; - - if (armv8pmu_event_is_64bit(event)) - event->hw.flags |= ARMPMU_EVT_64BIT; - - /* - * User events must be allocated into a single counter, and so - * must not be chained. - * - * Most 64-bit events require long counter support, but 64-bit - * CPU_CYCLES events can be placed into the dedicated cycle - * counter when this is free. - */ - if (armv8pmu_event_want_user_access(event)) { - if (!(event->attach_state & PERF_ATTACH_TASK)) - return -EINVAL; - if (armv8pmu_event_is_64bit(event) && - (hw_event_id != ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && - !armv8pmu_has_long_event(armpmu)) - return -EOPNOTSUPP; - - event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT; - } - - /* Only expose micro/arch events supported by this PMU */ - if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) - && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { - return hw_event_id; - } - - return armpmu_map_event(event, extra_event_map, extra_cache_map, - ARMV8_PMU_EVTYPE_EVENT); -} - -static int armv8_pmuv3_map_event(struct perf_event *event) -{ - return __armv8_pmuv3_map_event(event, NULL, NULL); -} - -static int armv8_a53_map_event(struct perf_event *event) -{ - return __armv8_pmuv3_map_event(event, NULL, &armv8_a53_perf_cache_map); -} - -static int armv8_a57_map_event(struct perf_event *event) -{ - return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map); -} - -static int armv8_a73_map_event(struct perf_event *event) -{ - return __armv8_pmuv3_map_event(event, NULL, &armv8_a73_perf_cache_map); -} - -static int armv8_thunder_map_event(struct perf_event *event) -{ - return __armv8_pmuv3_map_event(event, NULL, - &armv8_thunder_perf_cache_map); -} - -static int armv8_vulcan_map_event(struct perf_event *event) -{ - return __armv8_pmuv3_map_event(event, NULL, - &armv8_vulcan_perf_cache_map); -} - -struct armv8pmu_probe_info { - struct arm_pmu *pmu; - bool present; -}; - -static void __armv8pmu_probe_pmu(void *info) -{ - struct armv8pmu_probe_info *probe = info; - struct arm_pmu *cpu_pmu = probe->pmu; - u64 dfr0; - u64 pmceid_raw[2]; - u32 pmceid[2]; - int pmuver; - - dfr0 = read_sysreg(id_aa64dfr0_el1); - pmuver = cpuid_feature_extract_unsigned_field(dfr0, - ID_AA64DFR0_EL1_PMUVer_SHIFT); - if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || - pmuver == ID_AA64DFR0_EL1_PMUVer_NI) - return; - - cpu_pmu->pmuver = pmuver; - probe->present = true; - - /* Read the nb of CNTx counters supported from PMNC */ - cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) - & ARMV8_PMU_PMCR_N_MASK; - - /* Add the CPU cycles counter */ - cpu_pmu->num_events += 1; - - pmceid[0] = pmceid_raw[0] = read_sysreg(pmceid0_el0); - pmceid[1] = pmceid_raw[1] = read_sysreg(pmceid1_el0); - - bitmap_from_arr32(cpu_pmu->pmceid_bitmap, - pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); - - pmceid[0] = pmceid_raw[0] >> 32; - pmceid[1] = pmceid_raw[1] >> 32; - - bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, - pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); - - /* store PMMIR_EL1 register for sysfs */ - if (pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4 && (pmceid_raw[1] & BIT(31))) - cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); - else - cpu_pmu->reg_pmmir = 0; -} - -static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) -{ - struct armv8pmu_probe_info probe = { - .pmu = cpu_pmu, - .present = false, - }; - int ret; - - ret = smp_call_function_any(&cpu_pmu->supported_cpus, - __armv8pmu_probe_pmu, - &probe, 1); - if (ret) - return ret; - - return probe.present ? 0 : -ENODEV; -} - -static void armv8pmu_disable_user_access_ipi(void *unused) -{ - armv8pmu_disable_user_access(); -} - -static int armv8pmu_proc_user_access_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (ret || !write || sysctl_perf_user_access) - return ret; - - on_each_cpu(armv8pmu_disable_user_access_ipi, NULL, 1); - return 0; -} - -static struct ctl_table armv8_pmu_sysctl_table[] = { - { - .procname = "perf_user_access", - .data = &sysctl_perf_user_access, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = armv8pmu_proc_user_access_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { } -}; - -static void armv8_pmu_register_sysctl_table(void) -{ - static u32 tbl_registered = 0; - - if (!cmpxchg_relaxed(&tbl_registered, 0, 1)) - register_sysctl("kernel", armv8_pmu_sysctl_table); -} - -static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, - int (*map_event)(struct perf_event *event), - const struct attribute_group *events, - const struct attribute_group *format, - const struct attribute_group *caps) -{ - int ret = armv8pmu_probe_pmu(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->handle_irq = armv8pmu_handle_irq; - cpu_pmu->enable = armv8pmu_enable_event; - cpu_pmu->disable = armv8pmu_disable_event; - cpu_pmu->read_counter = armv8pmu_read_counter; - cpu_pmu->write_counter = armv8pmu_write_counter; - cpu_pmu->get_event_idx = armv8pmu_get_event_idx; - cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx; - cpu_pmu->start = armv8pmu_start; - cpu_pmu->stop = armv8pmu_stop; - cpu_pmu->reset = armv8pmu_reset; - cpu_pmu->set_event_filter = armv8pmu_set_event_filter; - - cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx; - - cpu_pmu->name = name; - cpu_pmu->map_event = map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ? - events : &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ? - format : &armv8_pmuv3_format_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ? - caps : &armv8_pmuv3_caps_attr_group; - - armv8_pmu_register_sysctl_table(); - return 0; -} - -static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name, - int (*map_event)(struct perf_event *event)) -{ - return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL); -} - -#define PMUV3_INIT_SIMPLE(name) \ -static int name##_pmu_init(struct arm_pmu *cpu_pmu) \ -{ \ - return armv8_pmu_init_nogroups(cpu_pmu, #name, armv8_pmuv3_map_event);\ -} - -PMUV3_INIT_SIMPLE(armv8_pmuv3) - -PMUV3_INIT_SIMPLE(armv8_cortex_a34) -PMUV3_INIT_SIMPLE(armv8_cortex_a55) -PMUV3_INIT_SIMPLE(armv8_cortex_a65) -PMUV3_INIT_SIMPLE(armv8_cortex_a75) -PMUV3_INIT_SIMPLE(armv8_cortex_a76) -PMUV3_INIT_SIMPLE(armv8_cortex_a77) -PMUV3_INIT_SIMPLE(armv8_cortex_a78) -PMUV3_INIT_SIMPLE(armv9_cortex_a510) -PMUV3_INIT_SIMPLE(armv9_cortex_a710) -PMUV3_INIT_SIMPLE(armv8_cortex_x1) -PMUV3_INIT_SIMPLE(armv9_cortex_x2) -PMUV3_INIT_SIMPLE(armv8_neoverse_e1) -PMUV3_INIT_SIMPLE(armv8_neoverse_n1) -PMUV3_INIT_SIMPLE(armv9_neoverse_n2) -PMUV3_INIT_SIMPLE(armv8_neoverse_v1) - -PMUV3_INIT_SIMPLE(armv8_nvidia_carmel) -PMUV3_INIT_SIMPLE(armv8_nvidia_denver) - -static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35", - armv8_a53_map_event); -} - -static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53", - armv8_a53_map_event); -} - -static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57", - armv8_a57_map_event); -} - -static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72", - armv8_a57_map_event); -} - -static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73", - armv8_a73_map_event); -} - -static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder", - armv8_thunder_map_event); -} - -static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) -{ - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan", - armv8_vulcan_map_event); -} - -static const struct of_device_id armv8_pmu_of_device_ids[] = { - {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_pmu_init}, - {.compatible = "arm,cortex-a34-pmu", .data = armv8_cortex_a34_pmu_init}, - {.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init}, - {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, - {.compatible = "arm,cortex-a55-pmu", .data = armv8_cortex_a55_pmu_init}, - {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, - {.compatible = "arm,cortex-a65-pmu", .data = armv8_cortex_a65_pmu_init}, - {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, - {.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init}, - {.compatible = "arm,cortex-a75-pmu", .data = armv8_cortex_a75_pmu_init}, - {.compatible = "arm,cortex-a76-pmu", .data = armv8_cortex_a76_pmu_init}, - {.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init}, - {.compatible = "arm,cortex-a78-pmu", .data = armv8_cortex_a78_pmu_init}, - {.compatible = "arm,cortex-a510-pmu", .data = armv9_cortex_a510_pmu_init}, - {.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init}, - {.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init}, - {.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init}, - {.compatible = "arm,neoverse-e1-pmu", .data = armv8_neoverse_e1_pmu_init}, - {.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init}, - {.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init}, - {.compatible = "arm,neoverse-v1-pmu", .data = armv8_neoverse_v1_pmu_init}, - {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, - {.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init}, - {.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init}, - {.compatible = "nvidia,denver-pmu", .data = armv8_nvidia_denver_pmu_init}, - {}, -}; - -static int armv8_pmu_device_probe(struct platform_device *pdev) -{ - return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); -} - -static struct platform_driver armv8_pmu_driver = { - .driver = { - .name = ARMV8_PMU_PDEV_NAME, - .of_match_table = armv8_pmu_of_device_ids, - .suppress_bind_attrs = true, - }, - .probe = armv8_pmu_device_probe, -}; - -static int __init armv8_pmu_driver_init(void) -{ - if (acpi_disabled) - return platform_driver_register(&armv8_pmu_driver); - else - return arm_pmu_acpi_probe(armv8_pmuv3_pmu_init); -} -device_initcall(armv8_pmu_driver_init) - -void arch_perf_update_userpage(struct perf_event *event, - struct perf_event_mmap_page *userpg, u64 now) -{ - struct clock_read_data *rd; - unsigned int seq; - u64 ns; - - userpg->cap_user_time = 0; - userpg->cap_user_time_zero = 0; - userpg->cap_user_time_short = 0; - userpg->cap_user_rdpmc = armv8pmu_event_has_user_read(event); - - if (userpg->cap_user_rdpmc) { - if (event->hw.flags & ARMPMU_EVT_64BIT) - userpg->pmc_width = 64; - else - userpg->pmc_width = 32; - } - - do { - rd = sched_clock_read_begin(&seq); - - if (rd->read_sched_clock != arch_timer_read_counter) - return; - - userpg->time_mult = rd->mult; - userpg->time_shift = rd->shift; - userpg->time_zero = rd->epoch_ns; - userpg->time_cycles = rd->epoch_cyc; - userpg->time_mask = rd->sched_clock_mask; - - /* - * Subtract the cycle base, such that software that - * doesn't know about cap_user_time_short still 'works' - * assuming no wraps. - */ - ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); - userpg->time_zero -= ns; - - } while (sched_clock_read_retry(seq)); - - userpg->time_offset = userpg->time_zero - now; - - /* - * time_shift is not expected to be greater than 31 due to - * the original published conversion algorithm shifting a - * 32-bit value (now specifies a 64-bit value) - refer - * perf_event_mmap_page documentation in perf_event.h. - */ - if (userpg->time_shift == 32) { - userpg->time_shift = 31; - userpg->time_mult >>= 1; - } - - /* - * Internal timekeeping for enabled/running/stopped times - * is always computed with the sched_clock. - */ - userpg->cap_user_time = 1; - userpg->cap_user_time_zero = 1; - userpg->cap_user_time_short = 1; -} diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 66c259000a44..defe6b47854b 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -100,6 +100,17 @@ config ARM_SMMU_V3_PMU through the SMMU and allow the resulting information to be filtered based on the Stream ID of the corresponding master. +config ARM_PMUV3 + depends on HW_PERF_EVENTS && ARM64 + bool "ARM PMUv3 support" if !ARM64 + default y + help + Say y if you want to use the ARM performance monitor unit (PMU) + version 3. The PMUv3 is the CPU performance monitors on ARMv8 + (aarch32 and aarch64) systems that implement the PMUv3 + architecture. + Currently, PMUv3 is only supported on aarch64 (arm64) + config ARM_DSU_PMU tristate "ARM DynamIQ Shared Unit (DSU) PMU" depends on ARM64 diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 13e45da61100..dabc859540ce 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_ARM_CMN) += arm-cmn.o obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o +obj-$(CONFIG_ARM_PMUV3) += arm_pmuv3.o obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o obj-$(CONFIG_HISI_PMU) += hisilicon/ diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c new file mode 100644 index 000000000000..c25203f8b940 --- /dev/null +++ b/drivers/perf/arm_pmuv3.c @@ -0,0 +1,1468 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ARMv8 PMUv3 Performance Events handling code. + * + * Copyright (C) 2012 ARM Limited + * Author: Will Deacon + * + * This code is based heavily on the ARMv7 perf event code. + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* ARMv8 Cortex-A53 specific event types. */ +#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 + +/* ARMv8 Cavium ThunderX specific event types. */ +#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST 0xE9 +#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS 0xEA +#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS 0xEB +#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS 0xEC +#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED + +/* + * ARMv8 Architectural defined events, not all of these may + * be supported on any given implementation. Unsupported events will + * be disabled at run-time based on the PMCEID registers. + */ +static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, +}; + +static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL, + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, + [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, + + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD, + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_RD, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, +}; + +static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, +}; + +static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, +}; + +static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, +}; + +static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS, + + [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS, + [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS, + + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, +}; + +static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, + + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, +}; + +static ssize_t +armv8pmu_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); +} + +#define ARMV8_EVENT_ATTR(name, config) \ + PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config) + +static struct attribute *armv8_pmuv3_event_attrs[] = { + ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR), + ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL), + ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL), + ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE), + ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL), + ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED), + ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED), + ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED), + ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN), + ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN), + ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED), + ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED), + ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED), + ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED), + ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED), + ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED), + ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES), + ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED), + ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS), + ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE), + ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB), + ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE), + ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB), + ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS), + ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR), + ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC), + ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED), + ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES), + /* Don't expose the chain event in /sys, since it's useless in isolation */ + ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED), + ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED), + ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND), + ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND), + ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB), + ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB), + ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE), + ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL), + ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE), + ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB), + ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL), + ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL), + ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB), + ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB), + ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS), + ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE), + ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS), + ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK), + ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK), + ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD), + ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD), + ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD), + ARMV8_EVENT_ATTR(l1d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(op_retired, ARMV8_PMUV3_PERFCTR_OP_RETIRED), + ARMV8_EVENT_ATTR(op_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC), + ARMV8_EVENT_ATTR(stall, ARMV8_PMUV3_PERFCTR_STALL), + ARMV8_EVENT_ATTR(stall_slot_backend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND), + ARMV8_EVENT_ATTR(stall_slot_frontend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND), + ARMV8_EVENT_ATTR(stall_slot, ARMV8_PMUV3_PERFCTR_STALL_SLOT), + ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP), + ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED), + ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE), + ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION), + ARMV8_EVENT_ATTR(cnt_cycles, ARMV8_AMU_PERFCTR_CNT_CYCLES), + ARMV8_EVENT_ATTR(stall_backend_mem, ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM), + ARMV8_EVENT_ATTR(l1i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS), + ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS), + ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(trb_wrap, ARMV8_PMUV3_PERFCTR_TRB_WRAP), + ARMV8_EVENT_ATTR(trb_trig, ARMV8_PMUV3_PERFCTR_TRB_TRIG), + ARMV8_EVENT_ATTR(trcextout0, ARMV8_PMUV3_PERFCTR_TRCEXTOUT0), + ARMV8_EVENT_ATTR(trcextout1, ARMV8_PMUV3_PERFCTR_TRCEXTOUT1), + ARMV8_EVENT_ATTR(trcextout2, ARMV8_PMUV3_PERFCTR_TRCEXTOUT2), + ARMV8_EVENT_ATTR(trcextout3, ARMV8_PMUV3_PERFCTR_TRCEXTOUT3), + ARMV8_EVENT_ATTR(cti_trigout4, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4), + ARMV8_EVENT_ATTR(cti_trigout5, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5), + ARMV8_EVENT_ATTR(cti_trigout6, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6), + ARMV8_EVENT_ATTR(cti_trigout7, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7), + ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT), + ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT), + ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT), + ARMV8_EVENT_ATTR(mem_access_checked, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED), + ARMV8_EVENT_ATTR(mem_access_checked_rd, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD), + ARMV8_EVENT_ATTR(mem_access_checked_wr, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR), + NULL, +}; + +static umode_t +armv8pmu_event_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); + + if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS && + test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap)) + return attr->mode; + + if (pmu_attr->id >= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE) { + u64 id = pmu_attr->id - ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; + + if (id < ARMV8_PMUV3_MAX_COMMON_EVENTS && + test_bit(id, cpu_pmu->pmceid_ext_bitmap)) + return attr->mode; + } + + return 0; +} + +static const struct attribute_group armv8_pmuv3_events_attr_group = { + .name = "events", + .attrs = armv8_pmuv3_event_attrs, + .is_visible = armv8pmu_event_attr_is_visible, +}; + +PMU_FORMAT_ATTR(event, "config:0-15"); +PMU_FORMAT_ATTR(long, "config1:0"); +PMU_FORMAT_ATTR(rdpmc, "config1:1"); + +static int sysctl_perf_user_access __read_mostly; + +static inline bool armv8pmu_event_is_64bit(struct perf_event *event) +{ + return event->attr.config1 & 0x1; +} + +static inline bool armv8pmu_event_want_user_access(struct perf_event *event) +{ + return event->attr.config1 & 0x2; +} + +static struct attribute *armv8_pmuv3_format_attrs[] = { + &format_attr_event.attr, + &format_attr_long.attr, + &format_attr_rdpmc.attr, + NULL, +}; + +static const struct attribute_group armv8_pmuv3_format_attr_group = { + .name = "format", + .attrs = armv8_pmuv3_format_attrs, +}; + +static ssize_t slots_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK; + + return sysfs_emit(page, "0x%08x\n", slots); +} + +static DEVICE_ATTR_RO(slots); + +static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + u32 bus_slots = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_SLOTS_SHIFT) + & ARMV8_PMU_BUS_SLOTS_MASK; + + return sysfs_emit(page, "0x%08x\n", bus_slots); +} + +static DEVICE_ATTR_RO(bus_slots); + +static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + u32 bus_width = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_WIDTH_SHIFT) + & ARMV8_PMU_BUS_WIDTH_MASK; + u32 val = 0; + + /* Encoded as Log2(number of bytes), plus one */ + if (bus_width > 2 && bus_width < 13) + val = 1 << (bus_width - 1); + + return sysfs_emit(page, "0x%08x\n", val); +} + +static DEVICE_ATTR_RO(bus_width); + +static struct attribute *armv8_pmuv3_caps_attrs[] = { + &dev_attr_slots.attr, + &dev_attr_bus_slots.attr, + &dev_attr_bus_width.attr, + NULL, +}; + +static const struct attribute_group armv8_pmuv3_caps_attr_group = { + .name = "caps", + .attrs = armv8_pmuv3_caps_attrs, +}; + +/* + * Perf Events' indices + */ +#define ARMV8_IDX_CYCLE_COUNTER 0 +#define ARMV8_IDX_COUNTER0 1 +#define ARMV8_IDX_CYCLE_COUNTER_USER 32 + +/* + * We unconditionally enable ARMv8.5-PMU long event counter support + * (64-bit events) where supported. Indicate if this arm_pmu has long + * event counter support. + */ +static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) +{ + return (cpu_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5); +} + +static inline bool armv8pmu_event_has_user_read(struct perf_event *event) +{ + return event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT; +} + +/* + * We must chain two programmable counters for 64 bit events, + * except when we have allocated the 64bit cycle counter (for CPU + * cycles event) or when user space counter access is enabled. + */ +static inline bool armv8pmu_event_is_chained(struct perf_event *event) +{ + int idx = event->hw.idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + + return !armv8pmu_event_has_user_read(event) && + armv8pmu_event_is_64bit(event) && + !armv8pmu_has_long_event(cpu_pmu) && + (idx != ARMV8_IDX_CYCLE_COUNTER); +} + +/* + * ARMv8 low level PMU access + */ + +/* + * Perf Event to low level counters mapping + */ +#define ARMV8_IDX_TO_COUNTER(x) \ + (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) + +/* + * This code is really good + */ + +#define PMEVN_CASE(n, case_macro) \ + case n: case_macro(n); break + +#define PMEVN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + PMEVN_CASE(0, case_macro); \ + PMEVN_CASE(1, case_macro); \ + PMEVN_CASE(2, case_macro); \ + PMEVN_CASE(3, case_macro); \ + PMEVN_CASE(4, case_macro); \ + PMEVN_CASE(5, case_macro); \ + PMEVN_CASE(6, case_macro); \ + PMEVN_CASE(7, case_macro); \ + PMEVN_CASE(8, case_macro); \ + PMEVN_CASE(9, case_macro); \ + PMEVN_CASE(10, case_macro); \ + PMEVN_CASE(11, case_macro); \ + PMEVN_CASE(12, case_macro); \ + PMEVN_CASE(13, case_macro); \ + PMEVN_CASE(14, case_macro); \ + PMEVN_CASE(15, case_macro); \ + PMEVN_CASE(16, case_macro); \ + PMEVN_CASE(17, case_macro); \ + PMEVN_CASE(18, case_macro); \ + PMEVN_CASE(19, case_macro); \ + PMEVN_CASE(20, case_macro); \ + PMEVN_CASE(21, case_macro); \ + PMEVN_CASE(22, case_macro); \ + PMEVN_CASE(23, case_macro); \ + PMEVN_CASE(24, case_macro); \ + PMEVN_CASE(25, case_macro); \ + PMEVN_CASE(26, case_macro); \ + PMEVN_CASE(27, case_macro); \ + PMEVN_CASE(28, case_macro); \ + PMEVN_CASE(29, case_macro); \ + PMEVN_CASE(30, case_macro); \ + default: WARN(1, "Invalid PMEV* index\n"); \ + } \ + } while (0) + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + +static inline u32 armv8pmu_pmcr_read(void) +{ + return read_sysreg(pmcr_el0); +} + +static inline void armv8pmu_pmcr_write(u32 val) +{ + val &= ARMV8_PMU_PMCR_MASK; + isb(); + write_sysreg(val, pmcr_el0); +} + +static inline int armv8pmu_has_overflowed(u32 pmovsr) +{ + return pmovsr & ARMV8_PMU_OVERFLOWED_MASK; +} + +static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) +{ + return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); +} + +static inline u64 armv8pmu_read_evcntr(int idx) +{ + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + + return read_pmevcntrn(counter); +} + +static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) +{ + int idx = event->hw.idx; + u64 val = armv8pmu_read_evcntr(idx); + + if (armv8pmu_event_is_chained(event)) + val = (val << 32) | armv8pmu_read_evcntr(idx - 1); + return val; +} + +/* + * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP + * is set the event counters also become 64-bit counters. Unless the + * user has requested a long counter (attr.config1) then we want to + * interrupt upon 32-bit overflow - we achieve this by applying a bias. + */ +static bool armv8pmu_event_needs_bias(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (armv8pmu_event_is_64bit(event)) + return false; + + if (armv8pmu_has_long_event(cpu_pmu) || + idx == ARMV8_IDX_CYCLE_COUNTER) + return true; + + return false; +} + +static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) +{ + if (armv8pmu_event_needs_bias(event)) + value |= GENMASK(63, 32); + + return value; +} + +static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) +{ + if (armv8pmu_event_needs_bias(event)) + value &= ~GENMASK(63, 32); + + return value; +} + +static u64 armv8pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u64 value; + + if (idx == ARMV8_IDX_CYCLE_COUNTER) + value = read_sysreg(pmccntr_el0); + else + value = armv8pmu_read_hw_counter(event); + + return armv8pmu_unbias_long_counter(event, value); +} + +static inline void armv8pmu_write_evcntr(int idx, u64 value) +{ + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + + write_pmevcntrn(counter, value); +} + +static inline void armv8pmu_write_hw_counter(struct perf_event *event, + u64 value) +{ + int idx = event->hw.idx; + + if (armv8pmu_event_is_chained(event)) { + armv8pmu_write_evcntr(idx, upper_32_bits(value)); + armv8pmu_write_evcntr(idx - 1, lower_32_bits(value)); + } else { + armv8pmu_write_evcntr(idx, value); + } +} + +static void armv8pmu_write_counter(struct perf_event *event, u64 value) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + value = armv8pmu_bias_long_counter(event, value); + + if (idx == ARMV8_IDX_CYCLE_COUNTER) + write_sysreg(value, pmccntr_el0); + else + armv8pmu_write_hw_counter(event, value); +} + +static inline void armv8pmu_write_evtype(int idx, u32 val) +{ + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + + val &= ARMV8_PMU_EVTYPE_MASK; + write_pmevtypern(counter, val); +} + +static inline void armv8pmu_write_event_type(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * For chained events, the low counter is programmed to count + * the event of interest and the high counter is programmed + * with CHAIN event code with filters set to count at all ELs. + */ + if (armv8pmu_event_is_chained(event)) { + u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN | + ARMV8_PMU_INCLUDE_EL2; + + armv8pmu_write_evtype(idx - 1, hwc->config_base); + armv8pmu_write_evtype(idx, chain_evt); + } else { + if (idx == ARMV8_IDX_CYCLE_COUNTER) + write_sysreg(hwc->config_base, pmccfiltr_el0); + else + armv8pmu_write_evtype(idx, hwc->config_base); + } +} + +static u32 armv8pmu_event_cnten_mask(struct perf_event *event) +{ + int counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); + u32 mask = BIT(counter); + + if (armv8pmu_event_is_chained(event)) + mask |= BIT(counter - 1); + return mask; +} + +static inline void armv8pmu_enable_counter(u32 mask) +{ + /* + * Make sure event configuration register writes are visible before we + * enable the counter. + * */ + isb(); + write_sysreg(mask, pmcntenset_el0); +} + +static inline void armv8pmu_enable_event_counter(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + u32 mask = armv8pmu_event_cnten_mask(event); + + kvm_set_pmu_events(mask, attr); + + /* We rely on the hypervisor switch code to enable guest counters */ + if (!kvm_pmu_counter_deferred(attr)) + armv8pmu_enable_counter(mask); +} + +static inline void armv8pmu_disable_counter(u32 mask) +{ + write_sysreg(mask, pmcntenclr_el0); + /* + * Make sure the effects of disabling the counter are visible before we + * start configuring the event. + */ + isb(); +} + +static inline void armv8pmu_disable_event_counter(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + u32 mask = armv8pmu_event_cnten_mask(event); + + kvm_clr_pmu_events(mask); + + /* We rely on the hypervisor switch code to disable guest counters */ + if (!kvm_pmu_counter_deferred(attr)) + armv8pmu_disable_counter(mask); +} + +static inline void armv8pmu_enable_intens(u32 mask) +{ + write_sysreg(mask, pmintenset_el1); +} + +static inline void armv8pmu_enable_event_irq(struct perf_event *event) +{ + u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); + armv8pmu_enable_intens(BIT(counter)); +} + +static inline void armv8pmu_disable_intens(u32 mask) +{ + write_sysreg(mask, pmintenclr_el1); + isb(); + /* Clear the overflow flag in case an interrupt is pending. */ + write_sysreg(mask, pmovsclr_el0); + isb(); +} + +static inline void armv8pmu_disable_event_irq(struct perf_event *event) +{ + u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); + armv8pmu_disable_intens(BIT(counter)); +} + +static inline u32 armv8pmu_getreset_flags(void) +{ + u32 value; + + /* Read */ + value = read_sysreg(pmovsclr_el0); + + /* Write to clear flags */ + value &= ARMV8_PMU_OVSR_MASK; + write_sysreg(value, pmovsclr_el0); + + return value; +} + +static void armv8pmu_disable_user_access(void) +{ + write_sysreg(0, pmuserenr_el0); +} + +static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) +{ + int i; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + + /* Clear any unused counters to avoid leaking their contents */ + for_each_clear_bit(i, cpuc->used_mask, cpu_pmu->num_events) { + if (i == ARMV8_IDX_CYCLE_COUNTER) + write_sysreg(0, pmccntr_el0); + else + armv8pmu_write_evcntr(i, 0); + } + + write_sysreg(0, pmuserenr_el0); + write_sysreg(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR, pmuserenr_el0); +} + +static void armv8pmu_enable_event(struct perf_event *event) +{ + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + + /* + * Disable counter + */ + armv8pmu_disable_event_counter(event); + + /* + * Set event. + */ + armv8pmu_write_event_type(event); + + /* + * Enable interrupt for this counter + */ + armv8pmu_enable_event_irq(event); + + /* + * Enable counter + */ + armv8pmu_enable_event_counter(event); +} + +static void armv8pmu_disable_event(struct perf_event *event) +{ + /* + * Disable counter + */ + armv8pmu_disable_event_counter(event); + + /* + * Disable interrupt for this counter + */ + armv8pmu_disable_event_irq(event); +} + +static void armv8pmu_start(struct arm_pmu *cpu_pmu) +{ + struct perf_event_context *ctx; + int nr_user = 0; + + ctx = perf_cpu_task_ctx(); + if (ctx) + nr_user = ctx->nr_user; + + if (sysctl_perf_user_access && nr_user) + armv8pmu_enable_user_access(cpu_pmu); + else + armv8pmu_disable_user_access(); + + /* Enable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); +} + +static void armv8pmu_stop(struct arm_pmu *cpu_pmu) +{ + /* Disable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); +} + +static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + u32 pmovsr; + struct perf_sample_data data; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmovsr = armv8pmu_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv8pmu_has_overflowed(pmovsr)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + /* + * Stop the PMU while processing the counter overflows + * to prevent skews in group events. + */ + armv8pmu_stop(cpu_pmu); + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv8pmu_counter_has_overflowed(pmovsr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + /* + * Perf event overflow will queue the processing of the event as + * an irq_work which will be taken care of in the handling of + * IPI_IRQ_WORK. + */ + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + armv8pmu_start(cpu_pmu); + + return IRQ_HANDLED; +} + +static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) +{ + int idx; + + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + return -EAGAIN; +} + +static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) +{ + int idx; + + /* + * Chaining requires two consecutive event counters, where + * the lower idx must be even. + */ + for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) { + if (!test_and_set_bit(idx, cpuc->used_mask)) { + /* Check if the preceding even counter is available */ + if (!test_and_set_bit(idx - 1, cpuc->used_mask)) + return idx; + /* Release the Odd counter */ + clear_bit(idx, cpuc->used_mask); + } + } + return -EAGAIN; +} + +static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; + + /* Always prefer to place a cycle counter into the cycle counter. */ + if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { + if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return ARMV8_IDX_CYCLE_COUNTER; + else if (armv8pmu_event_is_64bit(event) && + armv8pmu_event_want_user_access(event) && + !armv8pmu_has_long_event(cpu_pmu)) + return -EAGAIN; + } + + /* + * Otherwise use events counters + */ + if (armv8pmu_event_is_chained(event)) + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); + else + return armv8pmu_get_single_idx(cpuc, cpu_pmu); +} + +static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx = event->hw.idx; + + clear_bit(idx, cpuc->used_mask); + if (armv8pmu_event_is_chained(event)) + clear_bit(idx - 1, cpuc->used_mask); +} + +static int armv8pmu_user_event_idx(struct perf_event *event) +{ + if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event)) + return 0; + + /* + * We remap the cycle counter index to 32 to + * match the offset applied to the rest of + * the counter indices. + */ + if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER) + return ARMV8_IDX_CYCLE_COUNTER_USER; + + return event->hw.idx; +} + +/* + * Add an event filter to a given event. + */ +static int armv8pmu_set_event_filter(struct hw_perf_event *event, + struct perf_event_attr *attr) +{ + unsigned long config_base = 0; + + if (attr->exclude_idle) + return -EPERM; + + /* + * If we're running in hyp mode, then we *are* the hypervisor. + * Therefore we ignore exclude_hv in this configuration, since + * there's no hypervisor to sample anyway. This is consistent + * with other architectures (x86 and Power). + */ + if (is_kernel_in_hyp_mode()) { + if (!attr->exclude_kernel && !attr->exclude_host) + config_base |= ARMV8_PMU_INCLUDE_EL2; + if (attr->exclude_guest) + config_base |= ARMV8_PMU_EXCLUDE_EL1; + if (attr->exclude_host) + config_base |= ARMV8_PMU_EXCLUDE_EL0; + } else { + if (!attr->exclude_hv && !attr->exclude_host) + config_base |= ARMV8_PMU_INCLUDE_EL2; + } + + /* + * Filter out !VHE kernels and guest kernels + */ + if (attr->exclude_kernel) + config_base |= ARMV8_PMU_EXCLUDE_EL1; + + if (attr->exclude_user) + config_base |= ARMV8_PMU_EXCLUDE_EL0; + + /* + * Install the filter into config_base as this is used to + * construct the event type. + */ + event->config_base = config_base; + + return 0; +} + +static void armv8pmu_reset(void *info) +{ + struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; + u32 pmcr; + + /* The counter and interrupt enable registers are unknown at reset. */ + armv8pmu_disable_counter(U32_MAX); + armv8pmu_disable_intens(U32_MAX); + + /* Clear the counters we flip at guest entry/exit */ + kvm_clr_pmu_events(U32_MAX); + + /* + * Initialize & Reset PMNC. Request overflow interrupt for + * 64 bit cycle counter but cheat in armv8pmu_write_counter(). + */ + pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; + + /* Enable long event counter support where available */ + if (armv8pmu_has_long_event(cpu_pmu)) + pmcr |= ARMV8_PMU_PMCR_LP; + + armv8pmu_pmcr_write(pmcr); +} + +static int __armv8_pmuv3_map_event(struct perf_event *event, + const unsigned (*extra_event_map) + [PERF_COUNT_HW_MAX], + const unsigned (*extra_cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]) +{ + int hw_event_id; + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + + hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map, + &armv8_pmuv3_perf_cache_map, + ARMV8_PMU_EVTYPE_EVENT); + + /* + * CHAIN events only work when paired with an adjacent counter, and it + * never makes sense for a user to open one in isolation, as they'll be + * rotated arbitrarily. + */ + if (hw_event_id == ARMV8_PMUV3_PERFCTR_CHAIN) + return -EINVAL; + + if (armv8pmu_event_is_64bit(event)) + event->hw.flags |= ARMPMU_EVT_64BIT; + + /* + * User events must be allocated into a single counter, and so + * must not be chained. + * + * Most 64-bit events require long counter support, but 64-bit + * CPU_CYCLES events can be placed into the dedicated cycle + * counter when this is free. + */ + if (armv8pmu_event_want_user_access(event)) { + if (!(event->attach_state & PERF_ATTACH_TASK)) + return -EINVAL; + if (armv8pmu_event_is_64bit(event) && + (hw_event_id != ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && + !armv8pmu_has_long_event(armpmu)) + return -EOPNOTSUPP; + + event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT; + } + + /* Only expose micro/arch events supported by this PMU */ + if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) + && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { + return hw_event_id; + } + + return armpmu_map_event(event, extra_event_map, extra_cache_map, + ARMV8_PMU_EVTYPE_EVENT); +} + +static int armv8_pmuv3_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, NULL); +} + +static int armv8_a53_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, &armv8_a53_perf_cache_map); +} + +static int armv8_a57_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map); +} + +static int armv8_a73_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, &armv8_a73_perf_cache_map); +} + +static int armv8_thunder_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, + &armv8_thunder_perf_cache_map); +} + +static int armv8_vulcan_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, + &armv8_vulcan_perf_cache_map); +} + +struct armv8pmu_probe_info { + struct arm_pmu *pmu; + bool present; +}; + +static void __armv8pmu_probe_pmu(void *info) +{ + struct armv8pmu_probe_info *probe = info; + struct arm_pmu *cpu_pmu = probe->pmu; + u64 dfr0; + u64 pmceid_raw[2]; + u32 pmceid[2]; + int pmuver; + + dfr0 = read_sysreg(id_aa64dfr0_el1); + pmuver = cpuid_feature_extract_unsigned_field(dfr0, + ID_AA64DFR0_EL1_PMUVer_SHIFT); + if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || + pmuver == ID_AA64DFR0_EL1_PMUVer_NI) + return; + + cpu_pmu->pmuver = pmuver; + probe->present = true; + + /* Read the nb of CNTx counters supported from PMNC */ + cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) + & ARMV8_PMU_PMCR_N_MASK; + + /* Add the CPU cycles counter */ + cpu_pmu->num_events += 1; + + pmceid[0] = pmceid_raw[0] = read_sysreg(pmceid0_el0); + pmceid[1] = pmceid_raw[1] = read_sysreg(pmceid1_el0); + + bitmap_from_arr32(cpu_pmu->pmceid_bitmap, + pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); + + pmceid[0] = pmceid_raw[0] >> 32; + pmceid[1] = pmceid_raw[1] >> 32; + + bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, + pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); + + /* store PMMIR_EL1 register for sysfs */ + if (pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4 && (pmceid_raw[1] & BIT(31))) + cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); + else + cpu_pmu->reg_pmmir = 0; +} + +static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) +{ + struct armv8pmu_probe_info probe = { + .pmu = cpu_pmu, + .present = false, + }; + int ret; + + ret = smp_call_function_any(&cpu_pmu->supported_cpus, + __armv8pmu_probe_pmu, + &probe, 1); + if (ret) + return ret; + + return probe.present ? 0 : -ENODEV; +} + +static void armv8pmu_disable_user_access_ipi(void *unused) +{ + armv8pmu_disable_user_access(); +} + +static int armv8pmu_proc_user_access_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write || sysctl_perf_user_access) + return ret; + + on_each_cpu(armv8pmu_disable_user_access_ipi, NULL, 1); + return 0; +} + +static struct ctl_table armv8_pmu_sysctl_table[] = { + { + .procname = "perf_user_access", + .data = &sysctl_perf_user_access, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = armv8pmu_proc_user_access_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static void armv8_pmu_register_sysctl_table(void) +{ + static u32 tbl_registered = 0; + + if (!cmpxchg_relaxed(&tbl_registered, 0, 1)) + register_sysctl("kernel", armv8_pmu_sysctl_table); +} + +static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, + int (*map_event)(struct perf_event *event), + const struct attribute_group *events, + const struct attribute_group *format, + const struct attribute_group *caps) +{ + int ret = armv8pmu_probe_pmu(cpu_pmu); + if (ret) + return ret; + + cpu_pmu->handle_irq = armv8pmu_handle_irq; + cpu_pmu->enable = armv8pmu_enable_event; + cpu_pmu->disable = armv8pmu_disable_event; + cpu_pmu->read_counter = armv8pmu_read_counter; + cpu_pmu->write_counter = armv8pmu_write_counter; + cpu_pmu->get_event_idx = armv8pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx; + cpu_pmu->start = armv8pmu_start; + cpu_pmu->stop = armv8pmu_stop; + cpu_pmu->reset = armv8pmu_reset; + cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + + cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx; + + cpu_pmu->name = name; + cpu_pmu->map_event = map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ? + events : &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ? + format : &armv8_pmuv3_format_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ? + caps : &armv8_pmuv3_caps_attr_group; + + armv8_pmu_register_sysctl_table(); + return 0; +} + +static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name, + int (*map_event)(struct perf_event *event)) +{ + return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL); +} + +#define PMUV3_INIT_SIMPLE(name) \ +static int name##_pmu_init(struct arm_pmu *cpu_pmu) \ +{ \ + return armv8_pmu_init_nogroups(cpu_pmu, #name, armv8_pmuv3_map_event);\ +} + +PMUV3_INIT_SIMPLE(armv8_pmuv3) + +PMUV3_INIT_SIMPLE(armv8_cortex_a34) +PMUV3_INIT_SIMPLE(armv8_cortex_a55) +PMUV3_INIT_SIMPLE(armv8_cortex_a65) +PMUV3_INIT_SIMPLE(armv8_cortex_a75) +PMUV3_INIT_SIMPLE(armv8_cortex_a76) +PMUV3_INIT_SIMPLE(armv8_cortex_a77) +PMUV3_INIT_SIMPLE(armv8_cortex_a78) +PMUV3_INIT_SIMPLE(armv9_cortex_a510) +PMUV3_INIT_SIMPLE(armv9_cortex_a710) +PMUV3_INIT_SIMPLE(armv8_cortex_x1) +PMUV3_INIT_SIMPLE(armv9_cortex_x2) +PMUV3_INIT_SIMPLE(armv8_neoverse_e1) +PMUV3_INIT_SIMPLE(armv8_neoverse_n1) +PMUV3_INIT_SIMPLE(armv9_neoverse_n2) +PMUV3_INIT_SIMPLE(armv8_neoverse_v1) + +PMUV3_INIT_SIMPLE(armv8_nvidia_carmel) +PMUV3_INIT_SIMPLE(armv8_nvidia_denver) + +static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35", + armv8_a53_map_event); +} + +static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53", + armv8_a53_map_event); +} + +static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57", + armv8_a57_map_event); +} + +static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72", + armv8_a57_map_event); +} + +static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73", + armv8_a73_map_event); +} + +static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder", + armv8_thunder_map_event); +} + +static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan", + armv8_vulcan_map_event); +} + +static const struct of_device_id armv8_pmu_of_device_ids[] = { + {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_pmu_init}, + {.compatible = "arm,cortex-a34-pmu", .data = armv8_cortex_a34_pmu_init}, + {.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init}, + {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, + {.compatible = "arm,cortex-a55-pmu", .data = armv8_cortex_a55_pmu_init}, + {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, + {.compatible = "arm,cortex-a65-pmu", .data = armv8_cortex_a65_pmu_init}, + {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, + {.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init}, + {.compatible = "arm,cortex-a75-pmu", .data = armv8_cortex_a75_pmu_init}, + {.compatible = "arm,cortex-a76-pmu", .data = armv8_cortex_a76_pmu_init}, + {.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init}, + {.compatible = "arm,cortex-a78-pmu", .data = armv8_cortex_a78_pmu_init}, + {.compatible = "arm,cortex-a510-pmu", .data = armv9_cortex_a510_pmu_init}, + {.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init}, + {.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init}, + {.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init}, + {.compatible = "arm,neoverse-e1-pmu", .data = armv8_neoverse_e1_pmu_init}, + {.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init}, + {.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init}, + {.compatible = "arm,neoverse-v1-pmu", .data = armv8_neoverse_v1_pmu_init}, + {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, + {.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init}, + {.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init}, + {.compatible = "nvidia,denver-pmu", .data = armv8_nvidia_denver_pmu_init}, + {}, +}; + +static int armv8_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); +} + +static struct platform_driver armv8_pmu_driver = { + .driver = { + .name = ARMV8_PMU_PDEV_NAME, + .of_match_table = armv8_pmu_of_device_ids, + .suppress_bind_attrs = true, + }, + .probe = armv8_pmu_device_probe, +}; + +static int __init armv8_pmu_driver_init(void) +{ + if (acpi_disabled) + return platform_driver_register(&armv8_pmu_driver); + else + return arm_pmu_acpi_probe(armv8_pmuv3_pmu_init); +} +device_initcall(armv8_pmu_driver_init) + +void arch_perf_update_userpage(struct perf_event *event, + struct perf_event_mmap_page *userpg, u64 now) +{ + struct clock_read_data *rd; + unsigned int seq; + u64 ns; + + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_time_short = 0; + userpg->cap_user_rdpmc = armv8pmu_event_has_user_read(event); + + if (userpg->cap_user_rdpmc) { + if (event->hw.flags & ARMPMU_EVT_64BIT) + userpg->pmc_width = 64; + else + userpg->pmc_width = 32; + } + + do { + rd = sched_clock_read_begin(&seq); + + if (rd->read_sched_clock != arch_timer_read_counter) + return; + + userpg->time_mult = rd->mult; + userpg->time_shift = rd->shift; + userpg->time_zero = rd->epoch_ns; + userpg->time_cycles = rd->epoch_cyc; + userpg->time_mask = rd->sched_clock_mask; + + /* + * Subtract the cycle base, such that software that + * doesn't know about cap_user_time_short still 'works' + * assuming no wraps. + */ + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); + userpg->time_zero -= ns; + + } while (sched_clock_read_retry(seq)); + + userpg->time_offset = userpg->time_zero - now; + + /* + * time_shift is not expected to be greater than 31 due to + * the original published conversion algorithm shifting a + * 32-bit value (now specifies a 64-bit value) - refer + * perf_event_mmap_page documentation in perf_event.h. + */ + if (userpg->time_shift == 32) { + userpg->time_shift = 31; + userpg->time_mult >>= 1; + } + + /* + * Internal timekeeping for enabled/running/stopped times + * is always computed with the sched_clock. + */ + userpg->cap_user_time = 1; + userpg->cap_user_time_zero = 1; + userpg->cap_user_time_short = 1; +} diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 628775334d5e..1a6a695ca67a 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -8,7 +8,7 @@ #define __ASM_ARM_KVM_PMU_H #include -#include +#include #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h new file mode 100644 index 000000000000..53173abfc022 --- /dev/null +++ b/include/linux/perf/arm_pmuv3.h @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __PERF_ARM_PMUV3_H +#define __PERF_ARM_PMUV3_H + +#define ARMV8_PMU_MAX_COUNTERS 32 +#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) + +/* + * Common architectural and microarchitectural event numbers. + */ +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x0000 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x0001 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x0002 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x0003 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x0004 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x0005 +#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x0006 +#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x0007 +#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x0008 +#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x0009 +#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x000A +#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x000B +#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x000C +#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x000D +#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x000E +#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x000F +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x0010 +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x0011 +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x0012 +#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x0013 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x0014 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x0015 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x0016 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x0017 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x0018 +#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x0019 +#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x001A +#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x001B +#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x001C +#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x001D +#define ARMV8_PMUV3_PERFCTR_CHAIN 0x001E +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x001F +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x0020 +#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x0021 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x0022 +#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x0023 +#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x0024 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x0025 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x0026 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x0027 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x0028 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x0029 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x002A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x002B +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x002C +#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x002D +#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x002E +#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x002F +#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x0030 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x0031 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x0032 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x0033 +#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x0034 +#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x0035 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x0036 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x0037 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x0038 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x0039 +#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x003A +#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x003B +#define ARMV8_PMUV3_PERFCTR_STALL 0x003C +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x003D +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x003E +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x003F + +/* Statistical profiling extension microarchitectural events */ +#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 +#define ARMV8_SPE_PERFCTR_SAMPLE_FEED 0x4001 +#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002 +#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003 + +/* AMUv1 architecture events */ +#define ARMV8_AMU_PERFCTR_CNT_CYCLES 0x4004 +#define ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM 0x4005 + +/* long-latency read miss events */ +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS 0x4006 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD 0x4009 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B + +/* Trace buffer events */ +#define ARMV8_PMUV3_PERFCTR_TRB_WRAP 0x400C +#define ARMV8_PMUV3_PERFCTR_TRB_TRIG 0x400E + +/* Trace unit events */ +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0 0x4010 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1 0x4011 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2 0x4012 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3 0x4013 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4 0x4018 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5 0x4019 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6 0x401A +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7 0x401B + +/* additional latency from alignment events */ +#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020 +#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021 +#define ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT 0x4022 + +/* Armv8.5 Memory Tagging Extension events */ +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED 0x4024 +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD 0x4025 +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026 + +/* ARMv8 recommended implementation defined event types */ +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x0040 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x0042 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x0044 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x0045 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x0046 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x0047 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x0048 + +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x0050 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x0051 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x0052 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x0053 + +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x0056 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x0057 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x0058 + +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x005C +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x005D +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x005E +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x005F +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x0062 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x0063 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x0064 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x0065 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x0066 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x0067 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x0068 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x0069 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x006A + +#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x006C +#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x006D +#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x006E +#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x006F +#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x0070 +#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x0071 +#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x0072 +#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x0073 +#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x0074 +#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x0075 +#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x0076 +#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x0077 +#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x0078 +#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x0079 +#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x007A + +#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x007C +#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x007D +#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x007E + +#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x0081 +#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x0082 +#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x0083 +#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x0084 + +#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x0086 +#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x0087 +#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x0088 + +#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x008A +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x008B +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x008C +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x008D +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x008E +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x008F +#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x0090 +#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x0091 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0x00A0 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0x00A1 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0x00A2 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0x00A3 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0x00A6 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0x00A7 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0x00A8 + +/* + * Per-CPU PMCR: config reg + */ +#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */ +#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */ +#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */ +#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ +#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ +#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ +#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ +#define ARMV8_PMU_PMCR_N_MASK 0x1f +#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ + +/* + * PMOVSR: counters overflow flag status reg + */ +#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK + +/* + * PMXEVTYPER: Event selection reg + */ +#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ +#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ + +/* + * Event filters for PMUv3 + */ +#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) +#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) +#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) + +/* + * PMUSERENR: user enable reg + */ +#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */ +#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ +#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ +#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ +#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ + +/* PMMIR_EL1.SLOTS mask */ +#define ARMV8_PMU_SLOTS_MASK 0xff + +#define ARMV8_PMU_BUS_SLOTS_SHIFT 8 +#define ARMV8_PMU_BUS_SLOTS_MASK 0xff +#define ARMV8_PMU_BUS_WIDTH_SHIFT 16 +#define ARMV8_PMU_BUS_WIDTH_MASK 0xf + +#endif -- cgit v1.2.3 From df29ddf4f04b00cf60669686dc7f534c3ed7c433 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:21 -0400 Subject: arm64: perf: Abstract system register accesses away As we want to enable 32bit support, we need to distanciate the PMUv3 driver from the AArch64 system register names. This patch moves all system register accesses to an architecture specific include file, allowing the 32bit counterpart to be slotted in at a later time. Signed-off-by: Marc Zyngier Co-developed-by: Zaid Al-Bassam Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-3-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arm_pmuv3.h | 137 +++++++++++++++++++++++++++++++++++++ drivers/perf/arm_pmuv3.c | 115 +++++++------------------------ include/linux/perf/arm_pmuv3.h | 45 ++++++++++++ 3 files changed, 205 insertions(+), 92 deletions(-) create mode 100644 arch/arm64/include/asm/arm_pmuv3.h diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h new file mode 100644 index 000000000000..c444cbfb3acd --- /dev/null +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ASM_PMUV3_H +#define __ASM_PMUV3_H + +#include +#include + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + +static inline unsigned long read_pmmir(void) +{ + return read_cpuid(PMMIR_EL1); +} + +static inline u32 read_pmuver(void) +{ + u64 dfr0 = read_sysreg(id_aa64dfr0_el1); + + return cpuid_feature_extract_unsigned_field(dfr0, + ID_AA64DFR0_EL1_PMUVer_SHIFT); +} + +static inline void write_pmcr(u32 val) +{ + write_sysreg(val, pmcr_el0); +} + +static inline u32 read_pmcr(void) +{ + return read_sysreg(pmcr_el0); +} + +static inline void write_pmselr(u32 val) +{ + write_sysreg(val, pmselr_el0); +} + +static inline void write_pmccntr(u64 val) +{ + write_sysreg(val, pmccntr_el0); +} + +static inline u64 read_pmccntr(void) +{ + return read_sysreg(pmccntr_el0); +} + +static inline void write_pmxevcntr(u32 val) +{ + write_sysreg(val, pmxevcntr_el0); +} + +static inline u32 read_pmxevcntr(void) +{ + return read_sysreg(pmxevcntr_el0); +} + +static inline void write_pmxevtyper(u32 val) +{ + write_sysreg(val, pmxevtyper_el0); +} + +static inline void write_pmcntenset(u32 val) +{ + write_sysreg(val, pmcntenset_el0); +} + +static inline void write_pmcntenclr(u32 val) +{ + write_sysreg(val, pmcntenclr_el0); +} + +static inline void write_pmintenset(u32 val) +{ + write_sysreg(val, pmintenset_el1); +} + +static inline void write_pmintenclr(u32 val) +{ + write_sysreg(val, pmintenclr_el1); +} + +static inline void write_pmccfiltr(u32 val) +{ + write_sysreg(val, pmccfiltr_el0); +} + +static inline void write_pmovsclr(u32 val) +{ + write_sysreg(val, pmovsclr_el0); +} + +static inline u32 read_pmovsclr(void) +{ + return read_sysreg(pmovsclr_el0); +} + +static inline void write_pmuserenr(u32 val) +{ + write_sysreg(val, pmuserenr_el0); +} + +static inline u32 read_pmceid0(void) +{ + return read_sysreg(pmceid0_el0); +} + +static inline u32 read_pmceid1(void) +{ + return read_sysreg(pmceid1_el0); +} + +#endif diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index c25203f8b940..f783f068d612 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -10,7 +10,6 @@ #include #include -#include #include #include @@ -25,6 +24,8 @@ #include #include +#include + /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 @@ -425,83 +426,16 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event) #define ARMV8_IDX_TO_COUNTER(x) \ (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) -/* - * This code is really good - */ - -#define PMEVN_CASE(n, case_macro) \ - case n: case_macro(n); break - -#define PMEVN_SWITCH(x, case_macro) \ - do { \ - switch (x) { \ - PMEVN_CASE(0, case_macro); \ - PMEVN_CASE(1, case_macro); \ - PMEVN_CASE(2, case_macro); \ - PMEVN_CASE(3, case_macro); \ - PMEVN_CASE(4, case_macro); \ - PMEVN_CASE(5, case_macro); \ - PMEVN_CASE(6, case_macro); \ - PMEVN_CASE(7, case_macro); \ - PMEVN_CASE(8, case_macro); \ - PMEVN_CASE(9, case_macro); \ - PMEVN_CASE(10, case_macro); \ - PMEVN_CASE(11, case_macro); \ - PMEVN_CASE(12, case_macro); \ - PMEVN_CASE(13, case_macro); \ - PMEVN_CASE(14, case_macro); \ - PMEVN_CASE(15, case_macro); \ - PMEVN_CASE(16, case_macro); \ - PMEVN_CASE(17, case_macro); \ - PMEVN_CASE(18, case_macro); \ - PMEVN_CASE(19, case_macro); \ - PMEVN_CASE(20, case_macro); \ - PMEVN_CASE(21, case_macro); \ - PMEVN_CASE(22, case_macro); \ - PMEVN_CASE(23, case_macro); \ - PMEVN_CASE(24, case_macro); \ - PMEVN_CASE(25, case_macro); \ - PMEVN_CASE(26, case_macro); \ - PMEVN_CASE(27, case_macro); \ - PMEVN_CASE(28, case_macro); \ - PMEVN_CASE(29, case_macro); \ - PMEVN_CASE(30, case_macro); \ - default: WARN(1, "Invalid PMEV* index\n"); \ - } \ - } while (0) - -#define RETURN_READ_PMEVCNTRN(n) \ - return read_sysreg(pmevcntr##n##_el0) -static unsigned long read_pmevcntrn(int n) -{ - PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); - return 0; -} - -#define WRITE_PMEVCNTRN(n) \ - write_sysreg(val, pmevcntr##n##_el0) -static void write_pmevcntrn(int n, unsigned long val) -{ - PMEVN_SWITCH(n, WRITE_PMEVCNTRN); -} - -#define WRITE_PMEVTYPERN(n) \ - write_sysreg(val, pmevtyper##n##_el0) -static void write_pmevtypern(int n, unsigned long val) -{ - PMEVN_SWITCH(n, WRITE_PMEVTYPERN); -} - static inline u32 armv8pmu_pmcr_read(void) { - return read_sysreg(pmcr_el0); + return read_pmcr(); } static inline void armv8pmu_pmcr_write(u32 val) { val &= ARMV8_PMU_PMCR_MASK; isb(); - write_sysreg(val, pmcr_el0); + write_pmcr(val); } static inline int armv8pmu_has_overflowed(u32 pmovsr) @@ -576,7 +510,7 @@ static u64 armv8pmu_read_counter(struct perf_event *event) u64 value; if (idx == ARMV8_IDX_CYCLE_COUNTER) - value = read_sysreg(pmccntr_el0); + value = read_pmccntr(); else value = armv8pmu_read_hw_counter(event); @@ -611,7 +545,7 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) value = armv8pmu_bias_long_counter(event, value); if (idx == ARMV8_IDX_CYCLE_COUNTER) - write_sysreg(value, pmccntr_el0); + write_pmccntr(value); else armv8pmu_write_hw_counter(event, value); } @@ -642,7 +576,7 @@ static inline void armv8pmu_write_event_type(struct perf_event *event) armv8pmu_write_evtype(idx, chain_evt); } else { if (idx == ARMV8_IDX_CYCLE_COUNTER) - write_sysreg(hwc->config_base, pmccfiltr_el0); + write_pmccfiltr(hwc->config_base); else armv8pmu_write_evtype(idx, hwc->config_base); } @@ -665,7 +599,7 @@ static inline void armv8pmu_enable_counter(u32 mask) * enable the counter. * */ isb(); - write_sysreg(mask, pmcntenset_el0); + write_pmcntenset(mask); } static inline void armv8pmu_enable_event_counter(struct perf_event *event) @@ -682,7 +616,7 @@ static inline void armv8pmu_enable_event_counter(struct perf_event *event) static inline void armv8pmu_disable_counter(u32 mask) { - write_sysreg(mask, pmcntenclr_el0); + write_pmcntenclr(mask); /* * Make sure the effects of disabling the counter are visible before we * start configuring the event. @@ -704,7 +638,7 @@ static inline void armv8pmu_disable_event_counter(struct perf_event *event) static inline void armv8pmu_enable_intens(u32 mask) { - write_sysreg(mask, pmintenset_el1); + write_pmintenset(mask); } static inline void armv8pmu_enable_event_irq(struct perf_event *event) @@ -715,10 +649,10 @@ static inline void armv8pmu_enable_event_irq(struct perf_event *event) static inline void armv8pmu_disable_intens(u32 mask) { - write_sysreg(mask, pmintenclr_el1); + write_pmintenclr(mask); isb(); /* Clear the overflow flag in case an interrupt is pending. */ - write_sysreg(mask, pmovsclr_el0); + write_pmovsclr(mask); isb(); } @@ -733,18 +667,18 @@ static inline u32 armv8pmu_getreset_flags(void) u32 value; /* Read */ - value = read_sysreg(pmovsclr_el0); + value = read_pmovsclr(); /* Write to clear flags */ value &= ARMV8_PMU_OVSR_MASK; - write_sysreg(value, pmovsclr_el0); + write_pmovsclr(value); return value; } static void armv8pmu_disable_user_access(void) { - write_sysreg(0, pmuserenr_el0); + write_pmuserenr(0); } static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) @@ -755,13 +689,13 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) /* Clear any unused counters to avoid leaking their contents */ for_each_clear_bit(i, cpuc->used_mask, cpu_pmu->num_events) { if (i == ARMV8_IDX_CYCLE_COUNTER) - write_sysreg(0, pmccntr_el0); + write_pmccntr(0); else armv8pmu_write_evcntr(i, 0); } - write_sysreg(0, pmuserenr_el0); - write_sysreg(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR, pmuserenr_el0); + write_pmuserenr(0); + write_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR); } static void armv8pmu_enable_event(struct perf_event *event) @@ -1145,14 +1079,11 @@ static void __armv8pmu_probe_pmu(void *info) { struct armv8pmu_probe_info *probe = info; struct arm_pmu *cpu_pmu = probe->pmu; - u64 dfr0; u64 pmceid_raw[2]; u32 pmceid[2]; int pmuver; - dfr0 = read_sysreg(id_aa64dfr0_el1); - pmuver = cpuid_feature_extract_unsigned_field(dfr0, - ID_AA64DFR0_EL1_PMUVer_SHIFT); + pmuver = read_pmuver(); if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || pmuver == ID_AA64DFR0_EL1_PMUVer_NI) return; @@ -1167,8 +1098,8 @@ static void __armv8pmu_probe_pmu(void *info) /* Add the CPU cycles counter */ cpu_pmu->num_events += 1; - pmceid[0] = pmceid_raw[0] = read_sysreg(pmceid0_el0); - pmceid[1] = pmceid_raw[1] = read_sysreg(pmceid1_el0); + pmceid[0] = pmceid_raw[0] = read_pmceid0(); + pmceid[1] = pmceid_raw[1] = read_pmceid1(); bitmap_from_arr32(cpu_pmu->pmceid_bitmap, pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); @@ -1179,9 +1110,9 @@ static void __armv8pmu_probe_pmu(void *info) bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); - /* store PMMIR_EL1 register for sysfs */ + /* store PMMIR register for sysfs */ if (pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4 && (pmceid_raw[1] & BIT(31))) - cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); + cpu_pmu->reg_pmmir = read_pmmir(); else cpu_pmu->reg_pmmir = 0; } diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 53173abfc022..e3899bd77f5c 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -255,4 +255,49 @@ #define ARMV8_PMU_BUS_WIDTH_SHIFT 16 #define ARMV8_PMU_BUS_WIDTH_MASK 0xf +/* + * This code is really good + */ + +#define PMEVN_CASE(n, case_macro) \ + case n: case_macro(n); break + +#define PMEVN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + PMEVN_CASE(0, case_macro); \ + PMEVN_CASE(1, case_macro); \ + PMEVN_CASE(2, case_macro); \ + PMEVN_CASE(3, case_macro); \ + PMEVN_CASE(4, case_macro); \ + PMEVN_CASE(5, case_macro); \ + PMEVN_CASE(6, case_macro); \ + PMEVN_CASE(7, case_macro); \ + PMEVN_CASE(8, case_macro); \ + PMEVN_CASE(9, case_macro); \ + PMEVN_CASE(10, case_macro); \ + PMEVN_CASE(11, case_macro); \ + PMEVN_CASE(12, case_macro); \ + PMEVN_CASE(13, case_macro); \ + PMEVN_CASE(14, case_macro); \ + PMEVN_CASE(15, case_macro); \ + PMEVN_CASE(16, case_macro); \ + PMEVN_CASE(17, case_macro); \ + PMEVN_CASE(18, case_macro); \ + PMEVN_CASE(19, case_macro); \ + PMEVN_CASE(20, case_macro); \ + PMEVN_CASE(21, case_macro); \ + PMEVN_CASE(22, case_macro); \ + PMEVN_CASE(23, case_macro); \ + PMEVN_CASE(24, case_macro); \ + PMEVN_CASE(25, case_macro); \ + PMEVN_CASE(26, case_macro); \ + PMEVN_CASE(27, case_macro); \ + PMEVN_CASE(28, case_macro); \ + PMEVN_CASE(29, case_macro); \ + PMEVN_CASE(30, case_macro); \ + default: WARN(1, "Invalid PMEV* index\n"); \ + } \ + } while (0) + #endif -- cgit v1.2.3 From 711432770f78e5a9ae235a1a55decfb71993c958 Mon Sep 17 00:00:00 2001 From: Zaid Al-Bassam Date: Fri, 17 Mar 2023 15:50:22 -0400 Subject: perf: pmuv3: Abstract PMU version checks The current PMU version definitions are available for arm64 only, As we want to add PMUv3 support to arm (32-bit), abstracts these definitions by using arch-specific helpers. Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-4-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arm_pmuv3.h | 16 ++++++++++++++++ drivers/perf/arm_pmuv3.c | 7 +++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index c444cbfb3acd..80cdfa4c3e88 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -134,4 +134,20 @@ static inline u32 read_pmceid1(void) return read_sysreg(pmceid1_el0); } +static inline bool pmuv3_implemented(int pmuver) +{ + return !(pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || + pmuver == ID_AA64DFR0_EL1_PMUVer_NI); +} + +static inline bool is_pmuv3p4(int pmuver) +{ + return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4; +} + +static inline bool is_pmuv3p5(int pmuver) +{ + return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5; +} + #endif diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index f783f068d612..f7d890af8cc1 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -392,7 +392,7 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = { */ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) { - return (cpu_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5); + return (is_pmuv3p5(cpu_pmu->pmuver)); } static inline bool armv8pmu_event_has_user_read(struct perf_event *event) @@ -1084,8 +1084,7 @@ static void __armv8pmu_probe_pmu(void *info) int pmuver; pmuver = read_pmuver(); - if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || - pmuver == ID_AA64DFR0_EL1_PMUVer_NI) + if (!pmuv3_implemented(pmuver)) return; cpu_pmu->pmuver = pmuver; @@ -1111,7 +1110,7 @@ static void __armv8pmu_probe_pmu(void *info) pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); /* store PMMIR register for sysfs */ - if (pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4 && (pmceid_raw[1] & BIT(31))) + if (is_pmuv3p4(pmuver) && (pmceid_raw[1] & BIT(31))) cpu_pmu->reg_pmmir = read_pmmir(); else cpu_pmu->reg_pmmir = 0; -- cgit v1.2.3 From 11fba29a8a1f2fca08f301426b15c62d8a0b8040 Mon Sep 17 00:00:00 2001 From: Zaid Al-Bassam Date: Fri, 17 Mar 2023 15:50:23 -0400 Subject: perf: pmuv3: Move inclusion of kvm_host.h to the arch-specific helper KVM host support is available only on arm64. By moving the inclusion of kvm_host.h to an arm64-specific file, the 32bit architecture will be able to implement dummy helpers. Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-5-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arm_pmuv3.h | 2 ++ drivers/perf/arm_pmuv3.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index 80cdfa4c3e88..d6b51deb7bf0 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -6,6 +6,8 @@ #ifndef __ASM_PMUV3_H #define __ASM_PMUV3_H +#include + #include #include diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index f7d890af8cc1..2cab600af4fd 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -16,7 +16,6 @@ #include #include -#include #include #include #include -- cgit v1.2.3 From b3a070869f39be4fad9ea4d99f2c8fab1fb7eead Mon Sep 17 00:00:00 2001 From: Zaid Al-Bassam Date: Fri, 17 Mar 2023 15:50:24 -0400 Subject: perf: pmuv3: Change GENMASK to GENMASK_ULL GENMASK macro uses "unsigned long" (32-bit wide on arm and 64-bit on arm64), This causes build issues when enabling PMUv3 on arm as it tries to access bits > 31. This patch switches the GENMASK to GENMASK_ULL, which uses "unsigned long long" (64-bit on both arm and arm64). Signed-off-by: Zaid Al-Bassam Acked-by: Marc Zyngier Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-6-zalbassam@google.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 2cab600af4fd..fc8ed3cd0330 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -489,7 +489,7 @@ static bool armv8pmu_event_needs_bias(struct perf_event *event) static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) { if (armv8pmu_event_needs_bias(event)) - value |= GENMASK(63, 32); + value |= GENMASK_ULL(63, 32); return value; } @@ -497,7 +497,7 @@ static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) { if (armv8pmu_event_needs_bias(event)) - value &= ~GENMASK(63, 32); + value &= ~GENMASK_ULL(63, 32); return value; } -- cgit v1.2.3 From 252309adc81f529d42c8c90a4965866ec82cb6ad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:25 -0400 Subject: ARM: Make CONFIG_CPU_V7 valid for 32bit ARMv8 implementations ARMv8 is a superset of ARMv7, and all the ARMv8 features are discoverable with a set of ID registers. It means that we can use CPU_V7 to guard ARMv8 features at compile time. This commit simply amends the CPU_V7 configuration symbol comment to reflect that CPU_V7 also covers ARMv8. Signed-off-by: Marc Zyngier Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-7-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm/mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index c5bbae86f725..be183ed1232d 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -403,7 +403,7 @@ config CPU_V6K select CPU_THUMB_CAPABLE select CPU_TLB_V6 if MMU -# ARMv7 +# ARMv7 and ARMv8 architectures config CPU_V7 bool select CPU_32v6K -- cgit v1.2.3 From 009d6dc87a568db62290b8dc0a517b612217b6da Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:26 -0400 Subject: ARM: perf: Allow the use of the PMUv3 driver on 32bit ARM The only thing stopping the PMUv3 driver from compiling on 32bit is the lack of defined system registers names and the handful of required helpers. This is easily solved by providing the sysreg accessors and updating the Kconfig entry. Signed-off-by: Marc Zyngier Co-developed-by: Zaid Al-Bassam Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-8-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm/include/asm/arm_pmuv3.h | 247 +++++++++++++++++++++++++++++++++++++++ drivers/perf/Kconfig | 5 +- drivers/perf/arm_pmuv3.c | 5 +- 3 files changed, 253 insertions(+), 4 deletions(-) create mode 100644 arch/arm/include/asm/arm_pmuv3.h diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h new file mode 100644 index 000000000000..78d3d4b82c6c --- /dev/null +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ASM_PMUV3_H +#define __ASM_PMUV3_H + +#include +#include + +#define PMCCNTR __ACCESS_CP15_64(0, c9) + +#define PMCR __ACCESS_CP15(c9, 0, c12, 0) +#define PMCNTENSET __ACCESS_CP15(c9, 0, c12, 1) +#define PMCNTENCLR __ACCESS_CP15(c9, 0, c12, 2) +#define PMOVSR __ACCESS_CP15(c9, 0, c12, 3) +#define PMSELR __ACCESS_CP15(c9, 0, c12, 5) +#define PMCEID0 __ACCESS_CP15(c9, 0, c12, 6) +#define PMCEID1 __ACCESS_CP15(c9, 0, c12, 7) +#define PMXEVTYPER __ACCESS_CP15(c9, 0, c13, 1) +#define PMXEVCNTR __ACCESS_CP15(c9, 0, c13, 2) +#define PMUSERENR __ACCESS_CP15(c9, 0, c14, 0) +#define PMINTENSET __ACCESS_CP15(c9, 0, c14, 1) +#define PMINTENCLR __ACCESS_CP15(c9, 0, c14, 2) +#define PMMIR __ACCESS_CP15(c9, 0, c14, 6) +#define PMCCFILTR __ACCESS_CP15(c14, 0, c15, 7) + +#define PMEVCNTR0 __ACCESS_CP15(c14, 0, c8, 0) +#define PMEVCNTR1 __ACCESS_CP15(c14, 0, c8, 1) +#define PMEVCNTR2 __ACCESS_CP15(c14, 0, c8, 2) +#define PMEVCNTR3 __ACCESS_CP15(c14, 0, c8, 3) +#define PMEVCNTR4 __ACCESS_CP15(c14, 0, c8, 4) +#define PMEVCNTR5 __ACCESS_CP15(c14, 0, c8, 5) +#define PMEVCNTR6 __ACCESS_CP15(c14, 0, c8, 6) +#define PMEVCNTR7 __ACCESS_CP15(c14, 0, c8, 7) +#define PMEVCNTR8 __ACCESS_CP15(c14, 0, c9, 0) +#define PMEVCNTR9 __ACCESS_CP15(c14, 0, c9, 1) +#define PMEVCNTR10 __ACCESS_CP15(c14, 0, c9, 2) +#define PMEVCNTR11 __ACCESS_CP15(c14, 0, c9, 3) +#define PMEVCNTR12 __ACCESS_CP15(c14, 0, c9, 4) +#define PMEVCNTR13 __ACCESS_CP15(c14, 0, c9, 5) +#define PMEVCNTR14 __ACCESS_CP15(c14, 0, c9, 6) +#define PMEVCNTR15 __ACCESS_CP15(c14, 0, c9, 7) +#define PMEVCNTR16 __ACCESS_CP15(c14, 0, c10, 0) +#define PMEVCNTR17 __ACCESS_CP15(c14, 0, c10, 1) +#define PMEVCNTR18 __ACCESS_CP15(c14, 0, c10, 2) +#define PMEVCNTR19 __ACCESS_CP15(c14, 0, c10, 3) +#define PMEVCNTR20 __ACCESS_CP15(c14, 0, c10, 4) +#define PMEVCNTR21 __ACCESS_CP15(c14, 0, c10, 5) +#define PMEVCNTR22 __ACCESS_CP15(c14, 0, c10, 6) +#define PMEVCNTR23 __ACCESS_CP15(c14, 0, c10, 7) +#define PMEVCNTR24 __ACCESS_CP15(c14, 0, c11, 0) +#define PMEVCNTR25 __ACCESS_CP15(c14, 0, c11, 1) +#define PMEVCNTR26 __ACCESS_CP15(c14, 0, c11, 2) +#define PMEVCNTR27 __ACCESS_CP15(c14, 0, c11, 3) +#define PMEVCNTR28 __ACCESS_CP15(c14, 0, c11, 4) +#define PMEVCNTR29 __ACCESS_CP15(c14, 0, c11, 5) +#define PMEVCNTR30 __ACCESS_CP15(c14, 0, c11, 6) + +#define PMEVTYPER0 __ACCESS_CP15(c14, 0, c12, 0) +#define PMEVTYPER1 __ACCESS_CP15(c14, 0, c12, 1) +#define PMEVTYPER2 __ACCESS_CP15(c14, 0, c12, 2) +#define PMEVTYPER3 __ACCESS_CP15(c14, 0, c12, 3) +#define PMEVTYPER4 __ACCESS_CP15(c14, 0, c12, 4) +#define PMEVTYPER5 __ACCESS_CP15(c14, 0, c12, 5) +#define PMEVTYPER6 __ACCESS_CP15(c14, 0, c12, 6) +#define PMEVTYPER7 __ACCESS_CP15(c14, 0, c12, 7) +#define PMEVTYPER8 __ACCESS_CP15(c14, 0, c13, 0) +#define PMEVTYPER9 __ACCESS_CP15(c14, 0, c13, 1) +#define PMEVTYPER10 __ACCESS_CP15(c14, 0, c13, 2) +#define PMEVTYPER11 __ACCESS_CP15(c14, 0, c13, 3) +#define PMEVTYPER12 __ACCESS_CP15(c14, 0, c13, 4) +#define PMEVTYPER13 __ACCESS_CP15(c14, 0, c13, 5) +#define PMEVTYPER14 __ACCESS_CP15(c14, 0, c13, 6) +#define PMEVTYPER15 __ACCESS_CP15(c14, 0, c13, 7) +#define PMEVTYPER16 __ACCESS_CP15(c14, 0, c14, 0) +#define PMEVTYPER17 __ACCESS_CP15(c14, 0, c14, 1) +#define PMEVTYPER18 __ACCESS_CP15(c14, 0, c14, 2) +#define PMEVTYPER19 __ACCESS_CP15(c14, 0, c14, 3) +#define PMEVTYPER20 __ACCESS_CP15(c14, 0, c14, 4) +#define PMEVTYPER21 __ACCESS_CP15(c14, 0, c14, 5) +#define PMEVTYPER22 __ACCESS_CP15(c14, 0, c14, 6) +#define PMEVTYPER23 __ACCESS_CP15(c14, 0, c14, 7) +#define PMEVTYPER24 __ACCESS_CP15(c14, 0, c15, 0) +#define PMEVTYPER25 __ACCESS_CP15(c14, 0, c15, 1) +#define PMEVTYPER26 __ACCESS_CP15(c14, 0, c15, 2) +#define PMEVTYPER27 __ACCESS_CP15(c14, 0, c15, 3) +#define PMEVTYPER28 __ACCESS_CP15(c14, 0, c15, 4) +#define PMEVTYPER29 __ACCESS_CP15(c14, 0, c15, 5) +#define PMEVTYPER30 __ACCESS_CP15(c14, 0, c15, 6) + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(PMEVCNTR##n) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, PMEVCNTR##n) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, PMEVTYPER##n) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + +static inline unsigned long read_pmmir(void) +{ + return read_sysreg(PMMIR); +} + +static inline u32 read_pmuver(void) +{ + /* PMUVers is not a signed field */ + u32 dfr0 = read_cpuid_ext(CPUID_EXT_DFR0); + + return (dfr0 >> 24) & 0xf; +} + +static inline void write_pmcr(u32 val) +{ + write_sysreg(val, PMCR); +} + +static inline u32 read_pmcr(void) +{ + return read_sysreg(PMCR); +} + +static inline void write_pmselr(u32 val) +{ + write_sysreg(val, PMSELR); +} + +static inline void write_pmccntr(u64 val) +{ + write_sysreg(val, PMCCNTR); +} + +static inline u64 read_pmccntr(void) +{ + return read_sysreg(PMCCNTR); +} + +static inline void write_pmxevcntr(u32 val) +{ + write_sysreg(val, PMXEVCNTR); +} + +static inline u32 read_pmxevcntr(void) +{ + return read_sysreg(PMXEVCNTR); +} + +static inline void write_pmxevtyper(u32 val) +{ + write_sysreg(val, PMXEVTYPER); +} + +static inline void write_pmcntenset(u32 val) +{ + write_sysreg(val, PMCNTENSET); +} + +static inline void write_pmcntenclr(u32 val) +{ + write_sysreg(val, PMCNTENCLR); +} + +static inline void write_pmintenset(u32 val) +{ + write_sysreg(val, PMINTENSET); +} + +static inline void write_pmintenclr(u32 val) +{ + write_sysreg(val, PMINTENCLR); +} + +static inline void write_pmccfiltr(u32 val) +{ + write_sysreg(val, PMCCFILTR); +} + +static inline void write_pmovsclr(u32 val) +{ + write_sysreg(val, PMOVSR); +} + +static inline u32 read_pmovsclr(void) +{ + return read_sysreg(PMOVSR); +} + +static inline void write_pmuserenr(u32 val) +{ + write_sysreg(val, PMUSERENR); +} + +static inline u32 read_pmceid0(void) +{ + return read_sysreg(PMCEID0); +} + +static inline u32 read_pmceid1(void) +{ + return read_sysreg(PMCEID1); +} + +static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} +static inline void kvm_clr_pmu_events(u32 clr) {} +static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) +{ + return false; +} + +/* PMU Version in DFR Register */ +#define ARMV8_PMU_DFR_VER_NI 0 +#define ARMV8_PMU_DFR_VER_V3P4 0x5 +#define ARMV8_PMU_DFR_VER_V3P5 0x6 +#define ARMV8_PMU_DFR_VER_IMP_DEF 0xF + +static inline bool pmuv3_implemented(int pmuver) +{ + return !(pmuver == ARMV8_PMU_DFR_VER_IMP_DEF || + pmuver == ARMV8_PMU_DFR_VER_NI); +} + +static inline bool is_pmuv3p4(int pmuver) +{ + return pmuver >= ARMV8_PMU_DFR_VER_V3P4; +} + +static inline bool is_pmuv3p5(int pmuver) +{ + return pmuver >= ARMV8_PMU_DFR_VER_V3P5; +} + +#endif diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index defe6b47854b..711f82400086 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -101,15 +101,14 @@ config ARM_SMMU_V3_PMU based on the Stream ID of the corresponding master. config ARM_PMUV3 - depends on HW_PERF_EVENTS && ARM64 + depends on HW_PERF_EVENTS && ((ARM && CPU_V7) || ARM64) bool "ARM PMUv3 support" if !ARM64 - default y + default ARM64 help Say y if you want to use the ARM performance monitor unit (PMU) version 3. The PMUv3 is the CPU performance monitors on ARMv8 (aarch32 and aarch64) systems that implement the PMUv3 architecture. - Currently, PMUv3 is only supported on aarch64 (arm64) config ARM_DSU_PMU tristate "ARM DynamIQ Shared Unit (DSU) PMU" diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index fc8ed3cd0330..34ed0d5d7898 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -388,10 +388,13 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = { * We unconditionally enable ARMv8.5-PMU long event counter support * (64-bit events) where supported. Indicate if this arm_pmu has long * event counter support. + * + * On AArch32, long counters make no sense (you can't access the top + * bits), so we only enable this on AArch64. */ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) { - return (is_pmuv3p5(cpu_pmu->pmuver)); + return (IS_ENABLED(CONFIG_ARM64) && is_pmuv3p5(cpu_pmu->pmuver)); } static inline bool armv8pmu_event_has_user_read(struct perf_event *event) -- cgit v1.2.3 From 3b16f6268e660f15aed0bb97aefe87e893eb8882 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:27 -0400 Subject: ARM: mach-virt: Select PMUv3 driver by default Since 32bit guests are not unlikely to run on an ARMv8 host, let's select the PMUv3 driver, which allows the PMU to be used on such systems. Signed-off-by: Marc Zyngier Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-9-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e24a9820e12f..a5e5c0b09ff2 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -401,6 +401,7 @@ config ARCH_VIRT select ARM_GIC_V3 select ARM_GIC_V3_ITS if PCI select ARM_PSCI + select ARM_PMUV3 if PERF_EVENTS select HAVE_ARM_ARCH_TIMER config ARCH_AIROHA -- cgit v1.2.3 From f87e9114b5e590c2c6658ca21d7b714ca240bdd0 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 1 Mar 2023 09:55:40 -0800 Subject: perf/arm-cmn: Move overlapping wp_combine field As eventid field was expanded to support new mesh versions, it started to overlap with wp_combine field. Move wp_combine to fix the issue. Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support") Signed-off-by: Ilkka Koskinen Link: https://lore.kernel.org/r/20230301175540.19891-1-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index c9689861be3f..9f2edc28d16e 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -166,7 +166,7 @@ #define CMN_EVENT_BYNODEID(event) FIELD_GET(CMN_CONFIG_BYNODEID, (event)->attr.config) #define CMN_EVENT_NODEID(event) FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config) -#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(27, 24) +#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(30, 27) #define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48) #define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51) /* Note that we don't yet support the tertiary match group on newer IPs */ -- cgit v1.2.3 From d7f4679dc8d1a5cec497dd9f1b315ce77ab8db28 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 15 Mar 2023 10:30:17 +0800 Subject: perf: arm: Use devm_platform_get_and_ioremap_resource() According to commit 890cc39a8799 ("drivers: provide devm_platform_get_and_ioremap_resource()"), convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Link: https://lore.kernel.org/r/20230315023017.35789-1-yang.lee@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/arm_dmc620_pmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 54aa4658fb36..5de06f9a4dd3 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -655,8 +655,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev) .attr_groups = dmc620_pmu_attr_groups, }; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - dmc620_pmu->base = devm_ioremap_resource(&pdev->dev, res); + dmc620_pmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(dmc620_pmu->base)) return PTR_ERR(dmc620_pmu->base); -- cgit v1.2.3 From 8540504c5136146fd5c867afd86bb4618c1ee61a Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 15 Mar 2023 10:31:08 +0800 Subject: perf: qcom: Use devm_platform_get_and_ioremap_resource() According to commit 890cc39a8799 ("drivers: provide devm_platform_get_and_ioremap_resource()"), convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Link: https://lore.kernel.org/r/20230315023108.36953-1-yang.lee@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/qcom_l3_pmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c index 346311a05460..2887edb4eb0b 100644 --- a/drivers/perf/qcom_l3_pmu.c +++ b/drivers/perf/qcom_l3_pmu.c @@ -763,8 +763,7 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev) .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; - memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0); - l3pmu->regs = devm_ioremap_resource(&pdev->dev, memrc); + l3pmu->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &memrc); if (IS_ERR(l3pmu->regs)) return PTR_ERR(l3pmu->regs); -- cgit v1.2.3 From a64021d3726a096442d63a958ff0c49c12c3a1f4 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Fri, 17 Feb 2023 14:10:43 +0000 Subject: kbuild, drivers/perf: remove MODULE_LICENSE in non-modules Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Cc: Luis Chamberlain Cc: linux-modules@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Hitomi Hasegawa Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230217141059.392471-9-nick.alcock@oracle.com Signed-off-by: Will Deacon --- drivers/perf/apple_m1_cpu_pmu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c index 979a7c2b4f56..7123beeb992f 100644 --- a/drivers/perf/apple_m1_cpu_pmu.c +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -581,4 +581,3 @@ static struct platform_driver m1_pmu_driver = { }; module_platform_driver(m1_pmu_driver); -MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From bc12f344d5de246d0e2bd1ffcd5f950314e460d9 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 16 Feb 2023 14:34:03 +0800 Subject: drivers/perf: Use devm_platform_get_and_ioremap_resource() Convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Reviewed-by: Shuai Xue Link: https://lore.kernel.org/r/20230216063403.9753-1-yang.lee@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/alibaba_uncore_drw_pmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c index a7689fecb49d..5c5be9fc1b15 100644 --- a/drivers/perf/alibaba_uncore_drw_pmu.c +++ b/drivers/perf/alibaba_uncore_drw_pmu.c @@ -656,8 +656,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev) drw_pmu->dev = &pdev->dev; platform_set_drvdata(pdev, drw_pmu); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - drw_pmu->cfg_base = devm_ioremap_resource(&pdev->dev, res); + drw_pmu->cfg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(drw_pmu->cfg_base)) return PTR_ERR(drw_pmu->cfg_base); -- cgit v1.2.3 From c61e5720f23273269cc67ffb2908cf9831c8ca9d Mon Sep 17 00:00:00 2001 From: Jiucheng Xu Date: Thu, 9 Feb 2023 19:54:01 +0800 Subject: perf/amlogic: Fix config1/config2 parsing issue The 3th argument of for_each_set_bit is incorrect, fix them. Fixes: 2016e2113d35 ("perf/amlogic: Add support for Amlogic meson G12 SoC DDR PMU driver") Signed-off-by: Jiucheng Xu Link: https://lore.kernel.org/r/20230209115403.521868-1-jiucheng.xu@amlogic.com Signed-off-by: Will Deacon --- drivers/perf/amlogic/meson_ddr_pmu_core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c index b84346dbac2c..0b24dee1ed3c 100644 --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c @@ -156,10 +156,14 @@ static int meson_ddr_perf_event_add(struct perf_event *event, int flags) u64 config2 = event->attr.config2; int i; - for_each_set_bit(i, (const unsigned long *)&config1, sizeof(config1)) + for_each_set_bit(i, + (const unsigned long *)&config1, + BITS_PER_TYPE(config1)) meson_ddr_set_axi_filter(event, i); - for_each_set_bit(i, (const unsigned long *)&config2, sizeof(config2)) + for_each_set_bit(i, + (const unsigned long *)&config2, + BITS_PER_TYPE(config2)) meson_ddr_set_axi_filter(event, i + 64); if (flags & PERF_EF_START) -- cgit v1.2.3 From 16e15834659e9f5c05b9f12da6e86d76165c60a3 Mon Sep 17 00:00:00 2001 From: Besar Wicaksono Date: Thu, 2 Mar 2023 14:57:01 -0600 Subject: perf: arm_cspmu: Fix variable dereference warning Fix warning message from smatch tool: | smatch warnings: | drivers/perf/arm_cspmu/arm_cspmu.c:1075 arm_cspmu_find_cpu_container() | warn: variable dereferenced before check 'cpu_dev' (see line 1073) Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/202302191227.kc0V8fM7-lkp@intel.com/ Reviewed-by: Suzuki K Poulose Signed-off-by: Besar Wicaksono Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20230302205701.35323-1-bwicaksono@nvidia.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index e31302ab7e37..a3f1c410b417 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -1078,12 +1078,14 @@ static int arm_cspmu_request_irq(struct arm_cspmu *cspmu) static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid) { u32 acpi_uid; - struct device *cpu_dev = get_cpu_device(cpu); - struct acpi_device *acpi_dev = ACPI_COMPANION(cpu_dev); + struct device *cpu_dev; + struct acpi_device *acpi_dev; + cpu_dev = get_cpu_device(cpu); if (!cpu_dev) return -ENODEV; + acpi_dev = ACPI_COMPANION(cpu_dev); while (acpi_dev) { if (!strcmp(acpi_device_hid(acpi_dev), ACPI_PROCESSOR_CONTAINER_HID) && -- cgit v1.2.3 From 640a3b7a3d138cb2467b75a6830144fac1c26a81 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 14 Feb 2023 11:38:01 +0100 Subject: dt-bindings: arm-pmu: Add PMU compatible strings for Apple M2 cores The PMUs on the Apple M2 cores avalanche and blizzard CPU are compatible with M1 ones. As on M1 we don't know exactly what the counters count so use a distinct compatible for each micro-architecture. Apple's PMU counter description omits a counter for M2 so there is some variation on the interpretation of the counters. Signed-off-by: Janne Grunau Acked-by: Rob Herring Reviewed-by: Hector Martin Link: https://lore.kernel.org/r/20230214-apple_m2_pmu-v1-1-9c9213ab9b63@jannau.net Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml index dbb6f3dc5ae5..e14358bf0b9c 100644 --- a/Documentation/devicetree/bindings/arm/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -20,6 +20,8 @@ properties: items: - enum: - apm,potenza-pmu + - apple,avalanche-pmu + - apple,blizzard-pmu - apple,firestorm-pmu - apple,icestorm-pmu - arm,armv8-pmuv3 # Only for s/w models -- cgit v1.2.3 From 7d0bfb7c997753ef88f4c3a29f3409c8cb052ab1 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 14 Feb 2023 11:38:02 +0100 Subject: drivers/perf: apple_m1: Add Apple M2 support The PMU itself is compatible with the one found on M1. We still know next to nothing about the counters so keep using CPU uarch specific compatibles/PMU names. Signed-off-by: Janne Grunau Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230214-apple_m2_pmu-v1-2-9c9213ab9b63@jannau.net Signed-off-by: Will Deacon --- drivers/perf/apple_m1_cpu_pmu.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c index 7123beeb992f..8574c6e58c83 100644 --- a/drivers/perf/apple_m1_cpu_pmu.c +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -559,7 +559,21 @@ static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu) return m1_pmu_init(cpu_pmu); } +static int m2_pmu_avalanche_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "apple_avalanche_pmu"; + return m1_pmu_init(cpu_pmu); +} + +static int m2_pmu_blizzard_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "apple_blizzard_pmu"; + return m1_pmu_init(cpu_pmu); +} + static const struct of_device_id m1_pmu_of_device_ids[] = { + { .compatible = "apple,avalanche-pmu", .data = m2_pmu_avalanche_init, }, + { .compatible = "apple,blizzard-pmu", .data = m2_pmu_blizzard_init, }, { .compatible = "apple,icestorm-pmu", .data = m1_pmu_ice_init, }, { .compatible = "apple,firestorm-pmu", .data = m1_pmu_fire_init, }, { }, -- cgit v1.2.3 From bfa7965b33ab79fc3b2f8adc14704075fe2416cd Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Fri, 17 Mar 2023 23:29:34 +0800 Subject: mm,kfence: decouple kfence from page granularity mapping judgement Kfence only needs its pool to be mapped as page granularity, if it is inited early. Previous judgement was a bit over protected. From [1], Mark suggested to "just map the KFENCE region a page granularity". So I decouple it from judgement and do page granularity mapping for kfence pool only. Need to be noticed that late init of kfence pool still requires page granularity mapping. Page granularity mapping in theory cost more(2M per 1GB) memory on arm64 platform. Like what I've tested on QEMU(emulated 1GB RAM) with gki_defconfig, also turning off rodata protection: Before: [root@liebao ]# cat /proc/meminfo MemTotal: 999484 kB After: [root@liebao ]# cat /proc/meminfo MemTotal: 1001480 kB To implement this, also relocate the kfence pool allocation before the linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys addr, __kfence_pool is to be set after linear mapping set up. LINK: [1] https://lore.kernel.org/linux-arm-kernel/Y+IsdrvDNILA59UN@FVFF77S0Q05N/ Suggested-by: Mark Rutland Signed-off-by: Zhenhua Huang Reviewed-by: Kefeng Wang Reviewed-by: Marco Elver Link: https://lore.kernel.org/r/1679066974-690-1-git-send-email-quic_zhenhuah@quicinc.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/kfence.h | 10 +++++++ arch/arm64/mm/mmu.c | 61 +++++++++++++++++++++++++++++++++++++++++ arch/arm64/mm/pageattr.c | 7 +++-- mm/kfence/core.c | 4 +++ 4 files changed, 80 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h index aa855c6a0ae6..a81937fae9f6 100644 --- a/arch/arm64/include/asm/kfence.h +++ b/arch/arm64/include/asm/kfence.h @@ -19,4 +19,14 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) return true; } +#ifdef CONFIG_KFENCE +extern bool kfence_early_init; +static inline bool arm64_kfence_can_set_direct_map(void) +{ + return !kfence_early_init; +} +#else /* CONFIG_KFENCE */ +static inline bool arm64_kfence_can_set_direct_map(void) { return false; } +#endif /* CONFIG_KFENCE */ + #endif /* __ASM_KFENCE_H */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6f9d8898a025..9813f2a9c825 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -38,6 +39,7 @@ #include #include #include +#include #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) @@ -525,12 +527,67 @@ static int __init enable_crash_mem_map(char *arg) } early_param("crashkernel", enable_crash_mem_map); +#ifdef CONFIG_KFENCE + +bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL; + +/* early_param() will be parsed before map_mem() below. */ +static int __init parse_kfence_early_init(char *arg) +{ + int val; + + if (get_option(&arg, &val)) + kfence_early_init = !!val; + return 0; +} +early_param("kfence.sample_interval", parse_kfence_early_init); + +static phys_addr_t __init arm64_kfence_alloc_pool(void) +{ + phys_addr_t kfence_pool; + + if (!kfence_early_init) + return 0; + + kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); + if (!kfence_pool) { + pr_err("failed to allocate kfence pool\n"); + kfence_early_init = false; + return 0; + } + + /* Temporarily mark as NOMAP. */ + memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); + + return kfence_pool; +} + +static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) +{ + if (!kfence_pool) + return; + + /* KFENCE pool needs page-level mapping. */ + __map_memblock(pgdp, kfence_pool, kfence_pool + KFENCE_POOL_SIZE, + pgprot_tagged(PAGE_KERNEL), + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); + __kfence_pool = phys_to_virt(kfence_pool); +} +#else /* CONFIG_KFENCE */ + +static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } +static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { } + +#endif /* CONFIG_KFENCE */ + static void __init map_mem(pgd_t *pgdp) { static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); phys_addr_t kernel_start = __pa_symbol(_stext); phys_addr_t kernel_end = __pa_symbol(__init_begin); phys_addr_t start, end; + phys_addr_t early_kfence_pool; int flags = NO_EXEC_MAPPINGS; u64 i; @@ -543,6 +600,8 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); + early_kfence_pool = arm64_kfence_alloc_pool(); + if (can_set_direct_map()) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; @@ -608,6 +667,8 @@ static void __init map_mem(pgd_t *pgdp) } } #endif + + arm64_kfence_map_pool(early_kfence_pool, pgdp); } void mark_rodata_ro(void) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 79dd201c59d8..8e2017ba5f1b 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -11,6 +11,7 @@ #include #include #include +#include struct page_change_data { pgprot_t set_mask; @@ -22,12 +23,14 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED bool can_set_direct_map(void) { /* - * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be + * rodata_full and DEBUG_PAGEALLOC require linear map to be * mapped at page granularity, so that it is possible to * protect/unprotect single pages. + * + * KFENCE pool requires page-granular mapping if initialized late. */ return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() || - IS_ENABLED(CONFIG_KFENCE); + arm64_kfence_can_set_direct_map(); } static int change_page_range(pte_t *ptep, unsigned long addr, void *data) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 5349c37a5dac..5abc79f16fe0 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -814,6 +814,10 @@ void __init kfence_alloc_pool(void) if (!kfence_sample_interval) return; + /* if the pool has already been initialized by arch, skip the below. */ + if (__kfence_pool) + return; + __kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); if (!__kfence_pool) -- cgit v1.2.3 From 7cae569e6209af7082171477de06ee0eeeaab11f Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Mar 2023 16:20:56 -0700 Subject: arm: uaccess: Remove memcpy_page_flushcache() Commit 21b56c847753 ("iov_iter: get rid of separate bvec and xarray callbacks") removed the calls to memcpy_page_flushcache(). Remove the unnecessary memcpy_page_flushcache() call. Cc: Al Viro Cc: "Dan Williams" Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20221230-kmap-x86-v1-3-15f1ecccab50@intel.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/uaccess.h | 2 -- arch/arm64/lib/uaccess_flushcache.c | 6 ------ 2 files changed, 8 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 5c7b2f9d5913..4bf2c0975a82 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -449,8 +449,6 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE -struct page; -void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len); extern unsigned long __must_check __copy_user_flushcache(void *to, const void __user *from, unsigned long n); static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c index baee22961bdb..7510d1a23124 100644 --- a/arch/arm64/lib/uaccess_flushcache.c +++ b/arch/arm64/lib/uaccess_flushcache.c @@ -19,12 +19,6 @@ void memcpy_flushcache(void *dst, const void *src, size_t cnt) } EXPORT_SYMBOL_GPL(memcpy_flushcache); -void memcpy_page_flushcache(char *to, struct page *page, size_t offset, - size_t len) -{ - memcpy_flushcache(to, page_address(page) + offset, len); -} - unsigned long __copy_user_flushcache(void *to, const void __user *from, unsigned long n) { -- cgit v1.2.3 From d2c48b2387eb89e0bf2a2e06e30987cf410acad4 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Thu, 16 Feb 2023 09:49:19 +0100 Subject: firmware: arm_sdei: Fix sleep from invalid context BUG Running a preempt-rt (v6.2-rc3-rt1) based kernel on an Ampere Altra triggers: BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46 in_atomic(): 0, irqs_disabled(): 128, non_block: 0, pid: 24, name: cpuhp/0 preempt_count: 0, expected: 0 RCU nest depth: 0, expected: 0 3 locks held by cpuhp/0/24: #0: ffffda30217c70d0 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x5c/0x248 #1: ffffda30217c7120 (cpuhp_state-up){+.+.}-{0:0}, at: cpuhp_thread_fun+0x5c/0x248 #2: ffffda3021c711f0 (sdei_list_lock){....}-{3:3}, at: sdei_cpuhp_up+0x3c/0x130 irq event stamp: 36 hardirqs last enabled at (35): [] finish_task_switch+0xb4/0x2b0 hardirqs last disabled at (36): [] cpuhp_thread_fun+0x21c/0x248 softirqs last enabled at (0): [] copy_process+0x63c/0x1ac0 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 0 PID: 24 Comm: cpuhp/0 Not tainted 5.19.0-rc3-rt5-[...] Hardware name: WIWYNN Mt.Jade Server [...] Call trace: dump_backtrace+0x114/0x120 show_stack+0x20/0x70 dump_stack_lvl+0x9c/0xd8 dump_stack+0x18/0x34 __might_resched+0x188/0x228 rt_spin_lock+0x70/0x120 sdei_cpuhp_up+0x3c/0x130 cpuhp_invoke_callback+0x250/0xf08 cpuhp_thread_fun+0x120/0x248 smpboot_thread_fn+0x280/0x320 kthread+0x130/0x140 ret_from_fork+0x10/0x20 sdei_cpuhp_up() is called in the STARTING hotplug section, which runs with interrupts disabled. Use a CPUHP_AP_ONLINE_DYN entry instead to execute the cpuhp cb later, with preemption enabled. SDEI originally got its own cpuhp slot to allow interacting with perf. It got superseded by pNMI and this early slot is not relevant anymore. [1] Some SDEI calls (e.g. SDEI_1_0_FN_SDEI_PE_MASK) take actions on the calling CPU. It is checked that preemption is disabled for them. _ONLINE cpuhp cb are executed in the 'per CPU hotplug thread'. Preemption is enabled in those threads, but their cpumask is limited to 1 CPU. Move 'WARN_ON_ONCE(preemptible())' statements so that SDEI cpuhp cb don't trigger them. Also add a check for the SDEI_1_0_FN_SDEI_PRIVATE_RESET SDEI call which acts on the calling CPU. [1]: https://lore.kernel.org/all/5813b8c5-ae3e-87fd-fccc-94c9cd08816d@arm.com/ Suggested-by: James Morse Signed-off-by: Pierre Gondois Reviewed-by: James Morse Link: https://lore.kernel.org/r/20230216084920.144064-1-pierre.gondois@arm.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 37 ++++++++++++++++++++----------------- include/linux/cpuhotplug.h | 1 - 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 1e1a51510e83..f9040bd61081 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -43,6 +43,8 @@ static asmlinkage void (*sdei_firmware_call)(unsigned long function_id, /* entry point from firmware to arch asm code */ static unsigned long sdei_entry_point; +static int sdei_hp_state; + struct sdei_event { /* These three are protected by the sdei_list_lock */ struct list_head list; @@ -301,8 +303,6 @@ int sdei_mask_local_cpu(void) { int err; - WARN_ON_ONCE(preemptible()); - err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PE_MASK, 0, 0, 0, 0, 0, NULL); if (err && err != -EIO) { pr_warn_once("failed to mask CPU[%u]: %d\n", @@ -315,6 +315,7 @@ int sdei_mask_local_cpu(void) static void _ipi_mask_cpu(void *ignored) { + WARN_ON_ONCE(preemptible()); sdei_mask_local_cpu(); } @@ -322,8 +323,6 @@ int sdei_unmask_local_cpu(void) { int err; - WARN_ON_ONCE(preemptible()); - err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PE_UNMASK, 0, 0, 0, 0, 0, NULL); if (err && err != -EIO) { pr_warn_once("failed to unmask CPU[%u]: %d\n", @@ -336,6 +335,7 @@ int sdei_unmask_local_cpu(void) static void _ipi_unmask_cpu(void *ignored) { + WARN_ON_ONCE(preemptible()); sdei_unmask_local_cpu(); } @@ -343,6 +343,8 @@ static void _ipi_private_reset(void *ignored) { int err; + WARN_ON_ONCE(preemptible()); + err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PRIVATE_RESET, 0, 0, 0, 0, 0, NULL); if (err && err != -EIO) @@ -389,8 +391,6 @@ static void _local_event_enable(void *data) int err; struct sdei_crosscall_args *arg = data; - WARN_ON_ONCE(preemptible()); - err = sdei_api_event_enable(arg->event->event_num); sdei_cross_call_return(arg, err); @@ -479,8 +479,6 @@ static void _local_event_unregister(void *data) int err; struct sdei_crosscall_args *arg = data; - WARN_ON_ONCE(preemptible()); - err = sdei_api_event_unregister(arg->event->event_num); sdei_cross_call_return(arg, err); @@ -561,8 +559,6 @@ static void _local_event_register(void *data) struct sdei_registered_event *reg; struct sdei_crosscall_args *arg = data; - WARN_ON(preemptible()); - reg = per_cpu_ptr(arg->event->private_registered, smp_processor_id()); err = sdei_api_event_register(arg->event->event_num, sdei_entry_point, reg, 0, 0); @@ -717,6 +713,8 @@ static int sdei_pm_notifier(struct notifier_block *nb, unsigned long action, { int rv; + WARN_ON_ONCE(preemptible()); + switch (action) { case CPU_PM_ENTER: rv = sdei_mask_local_cpu(); @@ -765,7 +763,7 @@ static int sdei_device_freeze(struct device *dev) int err; /* unregister private events */ - cpuhp_remove_state(CPUHP_AP_ARM_SDEI_STARTING); + cpuhp_remove_state(sdei_entry_point); err = sdei_unregister_shared(); if (err) @@ -786,12 +784,15 @@ static int sdei_device_thaw(struct device *dev) return err; } - err = cpuhp_setup_state(CPUHP_AP_ARM_SDEI_STARTING, "SDEI", + err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SDEI", &sdei_cpuhp_up, &sdei_cpuhp_down); - if (err) + if (err < 0) { pr_warn("Failed to re-register CPU hotplug notifier...\n"); + return err; + } - return err; + sdei_hp_state = err; + return 0; } static int sdei_device_restore(struct device *dev) @@ -823,7 +824,7 @@ static int sdei_reboot_notifier(struct notifier_block *nb, unsigned long action, * We are going to reset the interface, after this there is no point * doing work when we take CPUs offline. */ - cpuhp_remove_state(CPUHP_AP_ARM_SDEI_STARTING); + cpuhp_remove_state(sdei_hp_state); sdei_platform_reset(); @@ -1003,13 +1004,15 @@ static int sdei_probe(struct platform_device *pdev) goto remove_cpupm; } - err = cpuhp_setup_state(CPUHP_AP_ARM_SDEI_STARTING, "SDEI", + err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SDEI", &sdei_cpuhp_up, &sdei_cpuhp_down); - if (err) { + if (err < 0) { pr_warn("Failed to register CPU hotplug notifier...\n"); goto remove_reboot; } + sdei_hp_state = err; + return 0; remove_reboot: diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index c6fab004104a..5066fa20e3e9 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -163,7 +163,6 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_CSTATE_STARTING, CPUHP_AP_PERF_XTENSA_STARTING, CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, - CPUHP_AP_ARM_SDEI_STARTING, CPUHP_AP_ARM_VFP_STARTING, CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, -- cgit v1.2.3 From 0e2cb49ef104738de2701d6badd08f2178c13aff Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Fri, 3 Mar 2023 10:50:47 +0800 Subject: arm64: armv8_deprecated: remove unnecessary (void*) conversions Pointer variables of void * type do not require type cast. Signed-off-by: Yu Zhe Link: https://lore.kernel.org/r/20230303025047.19717-1-yuzhe@nfschina.com Signed-off-by: Will Deacon --- arch/arm64/kernel/armv8_deprecated.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 8a9052cf3013..1febd412b4d2 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -420,14 +420,14 @@ static DEFINE_MUTEX(insn_emulation_mutex); static void enable_insn_hw_mode(void *data) { - struct insn_emulation *insn = (struct insn_emulation *)data; + struct insn_emulation *insn = data; if (insn->set_hw_mode) insn->set_hw_mode(true); } static void disable_insn_hw_mode(void *data) { - struct insn_emulation *insn = (struct insn_emulation *)data; + struct insn_emulation *insn = data; if (insn->set_hw_mode) insn->set_hw_mode(false); } -- cgit v1.2.3 From 6915cffd4889bb2d5f1b41714ec56f9d10fa7e67 Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Fri, 3 Mar 2023 10:57:15 +0800 Subject: arm64: kexec: remove unnecessary (void*) conversions Pointer variables of void * type do not require type cast. Signed-off-by: Yu Zhe Link: https://lore.kernel.org/r/20230303025715.32570-1-yuzhe@nfschina.com Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index ce3d40120f72..d35cb0568f84 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -102,7 +102,7 @@ static void kexec_segment_flush(const struct kimage *kimage) /* Allocates pages for kexec page table */ static void *kexec_page_alloc(void *arg) { - struct kimage *kimage = (struct kimage *)arg; + struct kimage *kimage = arg; struct page *page = kimage_alloc_control_pages(kimage, 0); void *vaddr = NULL; -- cgit v1.2.3 From 4248d043e462bd43dbef60164d35b817d5664eb1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 28 Mar 2023 18:52:14 +0100 Subject: Revert "ARM: mach-virt: Select PMUv3 driver by default" This reverts commit 3b16f6268e660f15aed0bb97aefe87e893eb8882. Selecting a Kconfig option that has its own set of dependencies tends to end badly, and in this case 'randconfig' builds blew up on 32-bit ARM where ARM_PMUV3 was being selecting with HW_PERF_EVENTS=n: | drivers/perf/arm_pmuv3.c:68:5: error: use of undeclared identifier 'DTLB' | [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB, | ^ | fatal error: too many errors emitted, stopping now [-ferror-limit=] | 20 errors generated. | | Kconfig warnings: (for reference only) | WARNING: unmet direct dependencies detected for ARM_PMUV3 | Depends on [n]: PERF_EVENTS [=y] && HW_PERF_EVENTS [=n] && (ARM [=y] && CPU_V7 [=y] || ARM64) | Selected by [y]: | - ARCH_VIRT [=y] && ARCH_MULTI_V7 [=y] && PERF_EVENTS [=y] As suggested by Marc, just drop the 'select' clause altogether by reverting the patch which introduced it. Link: https://lore.kernel.org/r/202303281539.zzI4vpw1-lkp@intel.com Reported-by: kernel test robot Reported-by: Arnd Bergmann Suggested-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a5e5c0b09ff2..e24a9820e12f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -401,7 +401,6 @@ config ARCH_VIRT select ARM_GIC_V3 select ARM_GIC_V3_ITS if PCI select ARM_PSCI - select ARM_PMUV3 if PERF_EVENTS select HAVE_ARM_ARCH_TIMER config ARCH_AIROHA -- cgit v1.2.3 From e5cacb540fd2509484d6849c0d5372bd67d174b9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 14 Mar 2023 15:36:57 +0000 Subject: arm64: atomics: lse: improve cmpxchg implementation For historical reasons, the LSE implementation of cmpxchg*() hard-codes the GPRs to use, and shuffles registers around with MOVs. This is no longer necessary, and can be simplified. When the LSE cmpxchg implementation was added in commit: c342f78217e822d2 ("arm64: cmpxchg: patch in lse instructions when supported by the CPU") ... the LL/SC implementation of cmpxchg() would be placed out-of-line, and the in-line assembly for cmpxchg would default to: NOP BL NOP The LL/SC implementation of each cmpxchg() function accepted arguments as per AAPCS64 rules, to it was necessary to place the pointer in x0, the older value in X1, and the new value in x2, and acquire the return value from x0. The LL/SC implementation required a temporary register (e.g. for the STXR status value). As the LL/SC implementation preserved the old value, the LSE implementation does likewise. Since commit: addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics") ... the LSE and LL/SC implementations of cmpxchg are inlined as separate asm blocks, with another branch choosing between thw two. Due to this, it is no longer necessary for the LSE implementation to match the register constraints of the LL/SC implementation. This was partially dealt with by removing the hard-coded use of x30 in commit: 3337cb5aea594e40 ("arm64: avoid using hard-coded registers for LSE atomics") ... but we didn't clean up the hard-coding of x0, x1, and x2. This patch simplifies the LSE implementation of cmpxchg, removing the register shuffling and directly clobbering the 'old' argument. This gives the compiler greater freedom for register allocation, and avoids redundant work. The new constraints permit 'old' (Rs) and 'new' (Rt) to be allocated to the same register when the initial values of the two are the same, e.g. resulting in: CAS X0, X0, [X1] This is safe as Rs is only written back after the initial values of Rs and Rt are consumed, and there are no UNPREDICTABLE behaviours to avoid when Rs == Rt. The new constraints also permit 'new' to be allocated to the zero register, avoiding a MOV in a few cases. The same cannot be done for 'old' as it is both an input and output, and any caller of cmpxchg() should care about the output value. Note that for CAS* the use of the zero register never affects the ordering (while for SWP* the use of the zero regsiter for the 'old' value drops any ACQUIRE semantic). Compared to v6.2-rc4, a defconfig vmlinux is ~116KiB smaller, though the resulting Image is the same size due to internal alignment and padding: [mark@lakrids:~/src/linux]% ls -al vmlinux-* -rwxr-xr-x 1 mark mark 137269304 Jan 16 11:59 vmlinux-after -rwxr-xr-x 1 mark mark 137387936 Jan 16 10:54 vmlinux-before [mark@lakrids:~/src/linux]% ls -al Image-* -rw-r--r-- 1 mark mark 38711808 Jan 16 11:59 Image-after -rw-r--r-- 1 mark mark 38711808 Jan 16 10:54 Image-before This patch does not touch cmpxchg_double*() as that requires contiguous register pairs, and separate patches will replace it with cmpxchg128*(). There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Robin Murphy Cc: Will Deacon Link: https://lore.kernel.org/r/20230314153700.787701-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic_lse.h | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index a94d6dacc029..319958b95cfd 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -251,22 +251,15 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr, \ u##sz old, \ u##sz new) \ { \ - register unsigned long x0 asm ("x0") = (unsigned long)ptr; \ - register u##sz x1 asm ("x1") = old; \ - register u##sz x2 asm ("x2") = new; \ - unsigned long tmp; \ - \ asm volatile( \ __LSE_PREAMBLE \ - " mov %" #w "[tmp], %" #w "[old]\n" \ - " cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n" \ - " mov %" #w "[ret], %" #w "[tmp]" \ - : [ret] "+r" (x0), [v] "+Q" (*(u##sz *)ptr), \ - [tmp] "=&r" (tmp) \ - : [old] "r" (x1), [new] "r" (x2) \ + " cas" #mb #sfx " %" #w "[old], %" #w "[new], %[v]\n" \ + : [v] "+Q" (*(u##sz *)ptr), \ + [old] "+r" (old) \ + : [new] "rZ" (new) \ : cl); \ \ - return x0; \ + return old; \ } __CMPXCHG_CASE(w, b, , 8, ) -- cgit v1.2.3 From 39c8275de81cf7fb524e2ff067aa175447412284 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 14 Mar 2023 15:36:58 +0000 Subject: arm64: uaccess: permit __smp_store_release() to use zero register Currently the asm constraints for __smp_store_release() require that the value is placed in a "real" GPR (i.e. one other than [XW]ZR or SP). This means that for cases such as: __smp_store_release(ptr, 0) ... the compiler has to move '0' into "real" GPR, e.g. mov xN, #0 stlr xN, [] This is unfortunate, as using the zero register would require fewer instructions and save a "real" GPR for other usage, allowing the compiler to generate: stlr xzr, [] Modify the asm constaints for __smp_store_release() to permit the use of the zero register for the value. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Robin Murphy Cc: Will Deacon Link: https://lore.kernel.org/r/20230314153700.787701-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/barrier.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 3dd8982a9ce3..cf2987464c18 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -131,25 +131,25 @@ do { \ case 1: \ asm volatile ("stlrb %w1, %0" \ : "=Q" (*__p) \ - : "r" (*(__u8 *)__u.__c) \ + : "rZ" (*(__u8 *)__u.__c) \ : "memory"); \ break; \ case 2: \ asm volatile ("stlrh %w1, %0" \ : "=Q" (*__p) \ - : "r" (*(__u16 *)__u.__c) \ + : "rZ" (*(__u16 *)__u.__c) \ : "memory"); \ break; \ case 4: \ asm volatile ("stlr %w1, %0" \ : "=Q" (*__p) \ - : "r" (*(__u32 *)__u.__c) \ + : "rZ" (*(__u32 *)__u.__c) \ : "memory"); \ break; \ case 8: \ - asm volatile ("stlr %1, %0" \ + asm volatile ("stlr %x1, %0" \ : "=Q" (*__p) \ - : "r" (*(__u64 *)__u.__c) \ + : "rZ" (*(__u64 *)__u.__c) \ : "memory"); \ break; \ } \ -- cgit v1.2.3 From 4a3f806eca09f2ac042c42db0d6468f5a81ab666 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 14 Mar 2023 15:36:59 +0000 Subject: arm64: uaccess: permit put_{user,kernel} to use zero register Currently the asm constraints for __put_mem_asm() require that the value is placed in a "real" GPR (i.e. one other than [XW]ZR or SP). This means that for cases such as: __put_user(0, addr) ... the compiler has to move '0' into "real" GPR, e.g. mov xN, #0 sttr xN, [] This is unfortunate, as using the zero register would require fewer instructions and save a "real" GPR for other usage, allowing the compiler to generate: sttr xzr, [] Modify the asm constaints for __put_mem_asm() to permit the use of the zero register for the value. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Robin Murphy Cc: Will Deacon Link: https://lore.kernel.org/r/20230314153700.787701-4-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 5c7b2f9d5913..4ee5aa7bd5a2 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -327,7 +327,7 @@ do { \ "2:\n" \ _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \ : "+r" (err) \ - : "r" (x), "r" (addr)) + : "rZ" (x), "r" (addr)) #define __raw_put_mem(str, x, ptr, err, type) \ do { \ -- cgit v1.2.3 From 172420865b29233f9daeb7fb97da84224dcdbf40 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 14 Mar 2023 15:37:00 +0000 Subject: arm64: uaccess: remove unnecessary earlyclobber Currently the asm constraints for __get_mem_asm() mark the value register as an earlyclobber operand. This means that the compiler can't reuse the same register for both the address and value, even when the value is not subsequently used. There's no need for the value register to be marked as earlyclobber, as it's only written to after the address register is consumed, even when the access faults. Remove the unnecessary earlyclobber. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Robin Murphy Cc: Will Deacon Link: https://lore.kernel.org/r/20230314153700.787701-5-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 4ee5aa7bd5a2..deaf4f8f0672 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -237,7 +237,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) "1: " load " " reg "1, [%2]\n" \ "2:\n" \ _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ - : "+r" (err), "=&r" (x) \ + : "+r" (err), "=r" (x) \ : "r" (addr)) #define __raw_get_mem(ldr, x, ptr, err, type) \ -- cgit v1.2.3 From 7bd6680b47fa4cd53ee1047693c09825e212a6f5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 30 Mar 2023 16:23:03 +0100 Subject: Revert "Revert "arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()"" This reverts commit b7d9aae404841d9999b7476170867ae441e948d2. With the Qualcomm remoteproc driver now modified to use a carveout memory region in 57f72170a2b2 ("remoteproc: qcom_q6v5_mss: Use a carveout to authenticate modem headers"), we can reinstate c44094eee32f ("arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()") which relaxes the arm64 implementation of arch_dma_prep_coherent() to perform only a data cache clean operation, rather than a clean-and-invalidate. Signed-off-by: Will Deacon --- arch/arm64/mm/dma-mapping.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 5240f6acad64..3cb101e8cb29 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -36,22 +36,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) { unsigned long start = (unsigned long)page_address(page); - /* - * The architecture only requires a clean to the PoC here in order to - * meet the requirements of the DMA API. However, some vendors (i.e. - * Qualcomm) abuse the DMA API for transferring buffers from the - * non-secure to the secure world, resetting the system if a non-secure - * access shows up after the buffer has been transferred: - * - * https://lore.kernel.org/r/20221114110329.68413-1-manivannan.sadhasivam@linaro.org - * - * Using clean+invalidate appears to make this issue less likely, but - * the drivers themselves still need fixing as the CPU could issue a - * speculative read from the buffer via the linear mapping irrespective - * of the cache maintenance we use. Once the drivers are fixed, we can - * relax this to a clean operation. - */ - dcache_clean_inval_poc(start, start + size); + dcache_clean_poc(start, start + size); } #ifdef CONFIG_IOMMU_DMA -- cgit v1.2.3 From 013ecd4439784bc98fe83bfe3413924db97c3b38 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 6 Mar 2023 11:48:36 +0000 Subject: arm64/sysreg: allow *Enum blocks in SysregFields blocks We'd like to support Enum/SignedEnum/UnsignedEnum blocks within SysregFields blocks, so that we can define enumerations for sets of registers. This isn't currently supported by gen-sysreg.awk due to the way we track the active block, which can't handle more than a single layer of nesting, which imposes an awkward requirement that when ending a block we know what the parent block is when calling change_block() Make this nicer by using a stack of active blocks, with block_push() to start a block, and block_pop() to end a block. Doing so means that we only need to check the active block at the start of parsing a line: for the start of a block we can check the parent is valid, and for the end of a block we check that the active block is valid. This structure makes the block parsing simpler and makes it easy to permit a block to live under several potential parents (e.g. by permitting Enum to start when the active block is Sysreg or SysregFields). It also permits further nesting, if we need that in future. To aid debugging, the stack of active blocks is reported for fatal errors, and an error is raised if the file is terminated without ending the active block. For clarity I've renamed the top-level element from "None" to "Root". The Fields element it intended only for use within Sysreg blocks, and does not make sense within SysregFields blocks, and so remains forbidden within a SysregFields block. I've verified using sha1sum that this patch does not change the current generated contents of . Signed-off-by: Mark Rutland Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Anshuman Khandual Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230306114836.2575432-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/tools/gen-sysreg.awk | 95 +++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 37 deletions(-) diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index 6fa0468caa00..d1254a056114 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -4,23 +4,35 @@ # # Usage: awk -f gen-sysreg.awk sysregs.txt +function block_current() { + return __current_block[__current_block_depth]; +} + # Log an error and terminate function fatal(msg) { print "Error at " NR ": " msg > "/dev/stderr" + + printf "Current block nesting:" + + for (i = 0; i <= __current_block_depth; i++) { + printf " " __current_block[i] + } + printf "\n" + exit 1 } -# Sanity check that the start or end of a block makes sense at this point in -# the file. If not, produce an error and terminate. -# -# @this - the $Block or $EndBlock -# @prev - the only valid block to already be in (value of @block) -# @new - the new value of @block -function change_block(this, prev, new) { - if (block != prev) - fatal("unexpected " this " (inside " block ")") - - block = new +# Enter a new block, setting the active block to @block +function block_push(block) { + __current_block[++__current_block_depth] = block +} + +# Exit a block, setting the active block to the parent block +function block_pop() { + if (__current_block_depth == 0) + fatal("error: block_pop() in root block") + + __current_block_depth--; } # Sanity check the number of records for a field makes sense. If not, produce @@ -84,10 +96,14 @@ BEGIN { print "/* Generated file - do not edit */" print "" - block = "None" + __current_block_depth = 0 + __current_block[__current_block_depth] = "Root" } END { + if (__current_block_depth != 0) + fatal("Missing terminator for " block_current() " block") + print "#endif /* __ASM_SYSREG_DEFS_H */" } @@ -95,8 +111,9 @@ END { /^$/ { next } /^[\t ]*#/ { next } -/^SysregFields/ { - change_block("SysregFields", "None", "SysregFields") +/^SysregFields/ && block_current() == "Root" { + block_push("SysregFields") + expect_fields(2) reg = $2 @@ -110,12 +127,10 @@ END { next } -/^EndSysregFields/ { +/^EndSysregFields/ && block_current() == "SysregFields" { if (next_bit > 0) fatal("Unspecified bits in " reg) - change_block("EndSysregFields", "SysregFields", "None") - define(reg "_RES0", "(" res0 ")") define(reg "_RES1", "(" res1 ")") define(reg "_UNKN", "(" unkn ")") @@ -126,11 +141,13 @@ END { res1 = null unkn = null + block_pop() next } -/^Sysreg/ { - change_block("Sysreg", "None", "Sysreg") +/^Sysreg/ && block_current() == "Root" { + block_push("Sysreg") + expect_fields(7) reg = $2 @@ -160,12 +177,10 @@ END { next } -/^EndSysreg/ { +/^EndSysreg/ && block_current() == "Sysreg" { if (next_bit > 0) fatal("Unspecified bits in " reg) - change_block("EndSysreg", "Sysreg", "None") - if (res0 != null) define(reg "_RES0", "(" res0 ")") if (res1 != null) @@ -185,12 +200,13 @@ END { res1 = null unkn = null + block_pop() next } # Currently this is effectivey a comment, in future we may want to emit # defines for the fields. -/^Fields/ && (block == "Sysreg") { +/^Fields/ && block_current() == "Sysreg" { expect_fields(2) if (next_bit != 63) @@ -208,7 +224,7 @@ END { } -/^Res0/ && (block == "Sysreg" || block == "SysregFields") { +/^Res0/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { expect_fields(2) parse_bitdef(reg, "RES0", $2) field = "RES0_" msb "_" lsb @@ -218,7 +234,7 @@ END { next } -/^Res1/ && (block == "Sysreg" || block == "SysregFields") { +/^Res1/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { expect_fields(2) parse_bitdef(reg, "RES1", $2) field = "RES1_" msb "_" lsb @@ -228,7 +244,7 @@ END { next } -/^Unkn/ && (block == "Sysreg" || block == "SysregFields") { +/^Unkn/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { expect_fields(2) parse_bitdef(reg, "UNKN", $2) field = "UNKN_" msb "_" lsb @@ -238,7 +254,7 @@ END { next } -/^Field/ && (block == "Sysreg" || block == "SysregFields") { +/^Field/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { expect_fields(3) field = $3 parse_bitdef(reg, field, $2) @@ -249,15 +265,16 @@ END { next } -/^Raz/ && (block == "Sysreg" || block == "SysregFields") { +/^Raz/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { expect_fields(2) parse_bitdef(reg, field, $2) next } -/^SignedEnum/ { - change_block("Enum<", "Sysreg", "Enum") +/^SignedEnum/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { + block_push("Enum") + expect_fields(3) field = $3 parse_bitdef(reg, field, $2) @@ -268,8 +285,9 @@ END { next } -/^UnsignedEnum/ { - change_block("Enum<", "Sysreg", "Enum") +/^UnsignedEnum/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { + block_push("Enum") + expect_fields(3) field = $3 parse_bitdef(reg, field, $2) @@ -280,8 +298,9 @@ END { next } -/^Enum/ { - change_block("Enum", "Sysreg", "Enum") +/^Enum/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { + block_push("Enum") + expect_fields(3) field = $3 parse_bitdef(reg, field, $2) @@ -291,16 +310,18 @@ END { next } -/^EndEnum/ { - change_block("EndEnum", "Enum", "Sysreg") +/^EndEnum/ && block_current() == "Enum" { + field = null msb = null lsb = null print "" + + block_pop() next } -/0b[01]+/ && block == "Enum" { +/0b[01]+/ && block_current() == "Enum" { expect_fields(2) val = $1 name = $2 -- cgit v1.2.3 From cc077e7facbe23856702dca1d78aebcc19683680 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 23 Mar 2023 20:44:53 +0000 Subject: arm64/sysreg: Convert HFG[RW]TR_EL2 to automatic generation Convert the fine grained traps read and write control registers to automatic generation as per DDI0601 2022-12. No functional changes. Reviewed-by: Joey Gouly Acked-by: Catalin Marinas Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230306-arm64-fgt-reg-gen-v3-1-decba93cbaab@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 8 ----- arch/arm64/tools/sysreg | 75 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9e3ecba3c4e6..e5ca9ece1606 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -419,8 +419,6 @@ #define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1) #define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2) #define SYS_HSTR_EL2 sys_reg(3, 4, 1, 1, 3) -#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4) -#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5) #define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_HACR_EL2 sys_reg(3, 4, 1, 1, 7) @@ -758,12 +756,6 @@ #define ICH_VTR_TDS_SHIFT 19 #define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) -/* HFG[WR]TR_EL2 bit definitions */ -#define HFGxTR_EL2_nTPIDR2_EL0_SHIFT 55 -#define HFGxTR_EL2_nTPIDR2_EL0_MASK BIT_MASK(HFGxTR_EL2_nTPIDR2_EL0_SHIFT) -#define HFGxTR_EL2_nSMPRI_EL1_SHIFT 54 -#define HFGxTR_EL2_nSMPRI_EL1_MASK BIT_MASK(HFGxTR_EL2_nSMPRI_EL1_SHIFT) - #define ARM64_FEATURE_FIELD_BITS 4 /* Defined for compatibility only, do not add new users. */ diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index dd5a9c7e310f..60829a9409f0 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1866,6 +1866,81 @@ Field 1 ZA Field 0 SM EndSysreg +SysregFields HFGxTR_EL2 +Field 63 nAMIAIR2_EL1 +Field 62 nMAIR2_EL1 +Field 61 nS2POR_EL1 +Field 60 nPOR_EL1 +Field 59 nPOR_EL0 +Field 58 nPIR_EL1 +Field 57 nPIRE0_EL1 +Field 56 nRCWMASK_EL1 +Field 55 nTPIDR2_EL0 +Field 54 nSMPRI_EL1 +Field 53 nGCS_EL1 +Field 52 nGCS_EL0 +Res0 51 +Field 50 nACCDATA_EL1 +Field 49 ERXADDR_EL1 +Field 48 EXRPFGCDN_EL1 +Field 47 EXPFGCTL_EL1 +Field 46 EXPFGF_EL1 +Field 45 ERXMISCn_EL1 +Field 44 ERXSTATUS_EL1 +Field 43 ERXCTLR_EL1 +Field 42 ERXFR_EL1 +Field 41 ERRSELR_EL1 +Field 40 ERRIDR_EL1 +Field 39 ICC_IGRPENn_EL1 +Field 38 VBAR_EL1 +Field 37 TTBR1_EL1 +Field 36 TTBR0_EL1 +Field 35 TPIDR_EL0 +Field 34 TPIDRRO_EL0 +Field 33 TPIDR_EL1 +Field 32 TCR_EL1 +Field 31 SCTXNUM_EL0 +Field 30 SCTXNUM_EL1 +Field 29 SCTLR_EL1 +Field 28 REVIDR_EL1 +Field 27 PAR_EL1 +Field 26 MPIDR_EL1 +Field 25 MIDR_EL1 +Field 24 MAIR_EL1 +Field 23 LORSA_EL1 +Field 22 LORN_EL1 +Field 21 LORID_EL1 +Field 20 LOREA_EL1 +Field 19 LORC_EL1 +Field 18 ISR_EL1 +Field 17 FAR_EL1 +Field 16 ESR_EL1 +Field 15 DCZID_EL0 +Field 14 CTR_EL0 +Field 13 CSSELR_EL1 +Field 12 CPACR_EL1 +Field 11 CONTEXTIDR_EL1 +Field 10 CLIDR_EL1 +Field 9 CCSIDR_EL1 +Field 8 APIBKey +Field 7 APIAKey +Field 6 APGAKey +Field 5 APDBKey +Field 4 APDAKey +Field 3 AMAIR_EL1 +Field 2 AIDR_EL1 +Field 1 AFSR1_EL1 +Field 0 AFSR0_EL1 +EndSysregFields + +Sysreg HFGRTR_EL2 3 4 1 1 4 +Fields HFGxTR_EL2 +EndSysreg + +Sysreg HFGWTR_EL2 3 4 1 1 5 +Fields HFGxTR_EL2 +EndSysreg + Sysreg ZCR_EL2 3 4 1 2 0 Fields ZCR_ELx EndSysreg -- cgit v1.2.3 From 79260355593f96a7580a1995f07a9662c0d5a9ae Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 8 Mar 2023 15:20:42 +0000 Subject: arm64/sysreg: Update ID_AA64PFR1_EL1 for DDI0601 2022-12 Version 2022-12 of DDI0601 has defined a number of new fields in previously RES0 space in ID_AA64PFR1_EL1, update our definition to include them. No functional changes. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230308-arm64-aa64pfr1-2022-12-v1-1-f03c1ea39611@kernel.org Signed-off-by: Will Deacon --- arch/arm64/tools/sysreg | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 60829a9409f0..2af92b4f4fe4 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -879,7 +879,30 @@ EndEnum EndSysreg Sysreg ID_AA64PFR1_EL1 3 0 0 4 1 -Res0 63:40 +UnsignedEnum 63:60 PFAR + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 59:56 DF2 + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 55:52 MTEX + 0b0000 MTE + 0b0001 MTE4 +EndEnum +UnsignedEnum 51:48 THE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 47:44 GCS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 43:40 MTE_frac + 0b0000 ASYNC + 0b1111 NI +EndEnum UnsignedEnum 39:36 NMI 0b0000 NI 0b0001 IMP -- cgit v1.2.3 From 23b2fd83948952ffee2e96ce6a982be9d8e96f9f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 3 Apr 2023 12:49:05 +0100 Subject: perf/arm-cmn: Validate cycles events fully DTC cycle count events don't have anything to validate or initialise in themselves, but we should not forget to still validate their whole group context. Otherwise, we may fail to correctly reject a contrived group containing an impossible number of cycles events. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/3124e8c276a1f513c1a415dc839ca4181b3c8bc8.1680522545.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 9f2edc28d16e..5a57c3f10d27 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1546,7 +1546,7 @@ static int arm_cmn_event_init(struct perf_event *event) type = CMN_EVENT_TYPE(event); /* DTC events (i.e. cycles) already have everything they need */ if (type == CMN_TYPE_DTC) - return 0; + return arm_cmn_validate_group(cmn, event); eventid = CMN_EVENT_EVENTID(event); /* For watchpoints we need the actual XP node here */ -- cgit v1.2.3 From a30b87e6bd7db112deb1e3014b23fb90111bc6c4 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Apr 2023 11:38:09 +0200 Subject: arm64: pmuv3: dynamically map PERF_COUNT_HW_BRANCH_INSTRUCTIONS The mapping of perf_events generic hardware events to actual PMU events on ARM PMUv3 may not always be correct. This is in particular true for the PERF_COUNT_HW_BRANCH_INSTRUCTIONS event. Although the mapping points to an architected event, it may not always be available. This can be seen with a simple: $ perf stat -e branches sleep 0 Performance counter stats for 'sleep 0': branches 0.001401081 seconds time elapsed Yet the hardware does have an event that could be used for branches. Dynamically check for a supported hardware event which can be used for PERF_COUNT_HW_BRANCH_INSTRUCTIONS at mapping time. And with that: $ perf stat -e branches sleep 0 Performance counter stats for 'sleep 0': 166,739 branches 0.000832163 seconds time elapsed Co-developed-by: Stephane Eranian Signed-off-by: Stephane Eranian Co-developed-by: Mark Rutland Signed-off-by: Mark Rutland Co-developed-by: Peter Newman Signed-off-by: Peter Newman Link: https://lore.kernel.org/all/YvunKCJHSXKz%2FkZB@FVFF77S0Q05N Link: https://lore.kernel.org/r/20230411093809.657501-1-peternewman@google.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 34ed0d5d7898..c98e4039386d 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -46,7 +46,6 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, @@ -985,6 +984,28 @@ static void armv8pmu_reset(void *info) armv8pmu_pmcr_write(pmcr); } +static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu, + struct perf_event *event) +{ + if (event->attr.type == PERF_TYPE_HARDWARE && + event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) { + + if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, + armpmu->pmceid_bitmap)) + return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED; + + if (test_bit(ARMV8_PMUV3_PERFCTR_BR_RETIRED, + armpmu->pmceid_bitmap)) + return ARMV8_PMUV3_PERFCTR_BR_RETIRED; + + return HW_OP_UNSUPPORTED; + } + + return armpmu_map_event(event, &armv8_pmuv3_perf_map, + &armv8_pmuv3_perf_cache_map, + ARMV8_PMU_EVTYPE_EVENT); +} + static int __armv8_pmuv3_map_event(struct perf_event *event, const unsigned (*extra_event_map) [PERF_COUNT_HW_MAX], @@ -996,9 +1017,7 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, int hw_event_id; struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map, - &armv8_pmuv3_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event); /* * CHAIN events only work when paired with an adjacent counter, and it -- cgit v1.2.3 From 2aa6ac03516d078cf0c35aaa273b5cd11ea9734c Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 5 Apr 2023 20:02:46 +0200 Subject: arm64: ftrace: Add direct call support This builds up on the CALL_OPS work which extends the ftrace patchsite on arm64 with an ops pointer usable by the ftrace trampoline. This ops pointer is valid at all time. Indeed, it is either pointing to ftrace_list_ops or to the single ops which should be called from that patchsite. There are a few cases to distinguish: - If a direct call ops is the only one tracing a function: - If the direct called trampoline is within the reach of a BL instruction -> the ftrace patchsite jumps to the trampoline - Else -> the ftrace patchsite jumps to the ftrace_caller trampoline which reads the ops pointer in the patchsite and jumps to the direct call address stored in the ops - Else -> the ftrace patchsite jumps to the ftrace_caller trampoline and its ops literal points to ftrace_list_ops so it iterates over all registered ftrace ops, including the direct call ops and calls its call_direct_funcs handler which stores the direct called trampoline's address in the ftrace_regs and the ftrace_caller trampoline will return to that address instead of returning to the traced function Signed-off-by: Florent Revest Co-developed-by: Mark Rutland Signed-off-by: Mark Rutland Link: https://lore.kernel.org/r/20230405180250.2046566-2-revest@chromium.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++ arch/arm64/include/asm/ftrace.h | 22 ++++++++++ arch/arm64/kernel/asm-offsets.c | 6 +++ arch/arm64/kernel/entry-ftrace.S | 90 +++++++++++++++++++++++++++++++++------- arch/arm64/kernel/ftrace.c | 36 +++++++++++++--- 5 files changed, 138 insertions(+), 20 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1023e896d46b..f3503d0cc1b8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -185,6 +185,10 @@ config ARM64 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_ARGS \ + if $(cc-option,-fpatchable-function-entry=2) + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS \ + if DYNAMIC_FTRACE_WITH_ARGS && DYNAMIC_FTRACE_WITH_CALL_OPS select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \ if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG && \ !CC_OPTIMIZE_FOR_SIZE) diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 1c2672bbbf37..b87d70b693c6 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -70,10 +70,19 @@ struct ftrace_ops; #define arch_ftrace_get_regs(regs) NULL +/* + * Note: sizeof(struct ftrace_regs) must be a multiple of 16 to ensure correct + * stack alignment + */ struct ftrace_regs { /* x0 - x8 */ unsigned long regs[9]; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + unsigned long direct_tramp; +#else unsigned long __unused; +#endif unsigned long fp; unsigned long lr; @@ -136,6 +145,19 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); #define ftrace_graph_func ftrace_graph_func + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, + unsigned long addr) +{ + /* + * The ftrace trampoline will return to this address instead of the + * instrumented function. + */ + fregs->direct_tramp = addr; +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #endif #define ftrace_return_address(n) return_address(n) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index ae345b06e9f7..0996094b0d22 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -93,6 +93,9 @@ int main(void) DEFINE(FREGS_LR, offsetof(struct ftrace_regs, lr)); DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp)); DEFINE(FREGS_PC, offsetof(struct ftrace_regs, pc)); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct ftrace_regs, direct_tramp)); +#endif DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs)); BLANK(); #endif @@ -197,6 +200,9 @@ int main(void) #endif #ifdef CONFIG_FUNCTION_TRACER DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func)); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call)); +#endif #endif return 0; } diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 350ed81324ac..1c38a60575aa 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -36,6 +36,31 @@ SYM_CODE_START(ftrace_caller) bti c +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + /* + * The literal pointer to the ops is at an 8-byte aligned boundary + * which is either 12 or 16 bytes before the BL instruction in the call + * site. See ftrace_call_adjust() for details. + * + * Therefore here the LR points at `literal + 16` or `literal + 20`, + * and we can find the address of the literal in either case by + * aligning to an 8-byte boundary and subtracting 16. We do the + * alignment first as this allows us to fold the subtraction into the + * LDR. + */ + bic x11, x30, 0x7 + ldr x11, [x11, #-(4 * AARCH64_INSN_SIZE)] // op + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* + * If the op has a direct call, handle it immediately without + * saving/restoring registers. + */ + ldr x17, [x11, #FTRACE_OPS_DIRECT_CALL] // op->direct_call + cbnz x17, ftrace_caller_direct +#endif +#endif + /* Save original SP */ mov x10, sp @@ -49,6 +74,10 @@ SYM_CODE_START(ftrace_caller) stp x6, x7, [sp, #FREGS_X6] str x8, [sp, #FREGS_X8] +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + str xzr, [sp, #FREGS_DIRECT_TRAMP] +#endif + /* Save the callsite's FP, LR, SP */ str x29, [sp, #FREGS_FP] str x9, [sp, #FREGS_LR] @@ -71,20 +100,7 @@ SYM_CODE_START(ftrace_caller) mov x3, sp // regs #ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS - /* - * The literal pointer to the ops is at an 8-byte aligned boundary - * which is either 12 or 16 bytes before the BL instruction in the call - * site. See ftrace_call_adjust() for details. - * - * Therefore here the LR points at `literal + 16` or `literal + 20`, - * and we can find the address of the literal in either case by - * aligning to an 8-byte boundary and subtracting 16. We do the - * alignment first as this allows us to fold the subtraction into the - * LDR. - */ - bic x2, x30, 0x7 - ldr x2, [x2, #-16] // op - + mov x2, x11 // op ldr x4, [x2, #FTRACE_OPS_FUNC] // op->func blr x4 // op->func(ip, parent_ip, op, regs) @@ -107,8 +123,15 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) ldp x6, x7, [sp, #FREGS_X6] ldr x8, [sp, #FREGS_X8] - /* Restore the callsite's FP, LR, PC */ + /* Restore the callsite's FP */ ldr x29, [sp, #FREGS_FP] + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + ldr x17, [sp, #FREGS_DIRECT_TRAMP] + cbnz x17, ftrace_caller_direct_late +#endif + + /* Restore the callsite's LR and PC */ ldr x30, [sp, #FREGS_LR] ldr x9, [sp, #FREGS_PC] @@ -116,8 +139,45 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) add sp, sp, #FREGS_SIZE + 32 ret x9 + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +SYM_INNER_LABEL(ftrace_caller_direct_late, SYM_L_LOCAL) + /* + * Head to a direct trampoline in x17 after having run other tracers. + * The ftrace_regs are live, and x0-x8 and FP have been restored. The + * LR, PC, and SP have not been restored. + */ + + /* + * Restore the callsite's LR and PC matching the trampoline calling + * convention. + */ + ldr x9, [sp, #FREGS_LR] + ldr x30, [sp, #FREGS_PC] + + /* Restore the callsite's SP */ + add sp, sp, #FREGS_SIZE + 32 + +SYM_INNER_LABEL(ftrace_caller_direct, SYM_L_LOCAL) + /* + * Head to a direct trampoline in x17. + * + * We use `BR X17` as this can safely land on a `BTI C` or `PACIASP` in + * the trampoline, and will not unbalance any return stack. + */ + br x17 +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ SYM_CODE_END(ftrace_caller) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +SYM_CODE_START(ftrace_stub_direct_tramp) + bti c + mov x10, x30 + mov x30, x9 + ret x10 +SYM_CODE_END(ftrace_stub_direct_tramp) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ /* diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 5545fe1a9012..758436727fba 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -206,6 +206,13 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) return NULL; } +static bool reachable_by_bl(unsigned long addr, unsigned long pc) +{ + long offset = (long)addr - (long)pc; + + return offset >= -SZ_128M && offset < SZ_128M; +} + /* * Find the address the callsite must branch to in order to reach '*addr'. * @@ -220,14 +227,21 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, unsigned long *addr) { unsigned long pc = rec->ip; - long offset = (long)*addr - (long)pc; struct plt_entry *plt; + /* + * If a custom trampoline is unreachable, rely on the ftrace_caller + * trampoline which knows how to indirectly reach that trampoline + * through ops->direct_call. + */ + if (*addr != FTRACE_ADDR && !reachable_by_bl(*addr, pc)) + *addr = FTRACE_ADDR; + /* * When the target is within range of the 'BL' instruction, use 'addr' * as-is and branch to that directly. */ - if (offset >= -SZ_128M && offset < SZ_128M) + if (reachable_by_bl(*addr, pc)) return true; /* @@ -330,12 +344,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - if (WARN_ON_ONCE(old_addr != (unsigned long)ftrace_caller)) + unsigned long pc = rec->ip; + u32 old, new; + int ret; + + ret = ftrace_rec_set_ops(rec, arm64_rec_get_ops(rec)); + if (ret) + return ret; + + if (!ftrace_find_callable_addr(rec, NULL, &old_addr)) return -EINVAL; - if (WARN_ON_ONCE(addr != (unsigned long)ftrace_caller)) + if (!ftrace_find_callable_addr(rec, NULL, &addr)) return -EINVAL; - return ftrace_rec_update_ops(rec); + old = aarch64_insn_gen_branch_imm(pc, old_addr, + AARCH64_INSN_BRANCH_LINK); + new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); + + return ftrace_modify_code(pc, old, new, true); } #endif -- cgit v1.2.3 From 0f59dca63bf2c329e9afeddae2a7ff91cce4cb44 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 5 Apr 2023 20:02:47 +0200 Subject: arm64: ftrace: Simplify get_ftrace_plt Following recent refactorings, the get_ftrace_plt function only ever gets called with addr = FTRACE_ADDR so its code can be simplified to always return the ftrace trampoline plt. Signed-off-by: Florent Revest Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230405180250.2046566-3-revest@chromium.org Signed-off-by: Will Deacon --- arch/arm64/kernel/ftrace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 758436727fba..432626c866a8 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -195,15 +195,15 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ftrace_modify_code(pc, 0, new, false); } -static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) +static struct plt_entry *get_ftrace_plt(struct module *mod) { #ifdef CONFIG_ARM64_MODULE_PLTS struct plt_entry *plt = mod->arch.ftrace_trampolines; - if (addr == FTRACE_ADDR) - return &plt[FTRACE_PLT_IDX]; -#endif + return &plt[FTRACE_PLT_IDX]; +#else return NULL; +#endif } static bool reachable_by_bl(unsigned long addr, unsigned long pc) @@ -270,7 +270,7 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, if (WARN_ON(!mod)) return false; - plt = get_ftrace_plt(mod, *addr); + plt = get_ftrace_plt(mod); if (!plt) { pr_err("ftrace: no module PLT for %ps\n", (void *)*addr); return false; -- cgit v1.2.3 From 9e09d445f1cab518eedf4454d119ead27979b8f4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 11 Apr 2023 17:29:41 +0100 Subject: arm64: stacktrace: recover return address for first entry The function which calls the top-level backtracing function may have been instrumented with ftrace and/or kprobes, and hence the first return address may have been rewritten. Factor out the existing fgraph / kretprobes address recovery, and use this for the first address. As the comment for the fgraph case isn't all that helpful, I've also dropped that. Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh Cc: Catalin Marinas Cc: Madhavan T. Venkataraman Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20230411162943.203199-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 53 ++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 83154303e682..dd03feba8048 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -69,6 +69,32 @@ static __always_inline void unwind_init_from_task(struct unwind_state *state, state->pc = thread_saved_pc(task); } +static __always_inline int +unwind_recover_return_address(struct unwind_state *state) +{ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (state->task->ret_stack && + (state->pc == (unsigned long)return_to_handler)) { + unsigned long orig_pc; + orig_pc = ftrace_graph_ret_addr(state->task, NULL, state->pc, + (void *)state->fp); + if (WARN_ON_ONCE(state->pc == orig_pc)) + return -EINVAL; + state->pc = orig_pc; + } +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_KRETPROBES + if (is_kretprobe_trampoline(state->pc)) { + state->pc = kretprobe_find_ret_addr(state->task, + (void *)state->fp, + &state->kr_cur); + } +#endif /* CONFIG_KRETPROBES */ + + return 0; +} + /* * Unwind from one frame record (A) to the next frame record (B). * @@ -92,35 +118,16 @@ static int notrace unwind_next(struct unwind_state *state) state->pc = ptrauth_strip_insn_pac(state->pc); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (tsk->ret_stack && - (state->pc == (unsigned long)return_to_handler)) { - unsigned long orig_pc; - /* - * This is a case where function graph tracer has - * modified a return address (LR) in a stack frame - * to hook a function return. - * So replace it to an original value. - */ - orig_pc = ftrace_graph_ret_addr(tsk, NULL, state->pc, - (void *)state->fp); - if (WARN_ON_ONCE(state->pc == orig_pc)) - return -EINVAL; - state->pc = orig_pc; - } -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -#ifdef CONFIG_KRETPROBES - if (is_kretprobe_trampoline(state->pc)) - state->pc = kretprobe_find_ret_addr(tsk, (void *)state->fp, &state->kr_cur); -#endif - - return 0; + return unwind_recover_return_address(state); } NOKPROBE_SYMBOL(unwind_next); static void notrace unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry, void *cookie) { + if (unwind_recover_return_address(state)) + return; + while (1) { int ret; -- cgit v1.2.3 From ead6122c289eec06bc1bd167442dfab358fafefb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 11 Apr 2023 17:29:42 +0100 Subject: arm64: stacktrace: move dump functions to end of file For historical reasons, the backtrace dumping functions are placed in the middle of stacktrace.c, despite using functions defined later. For clarity, and to make subsequent refactoring easier, move the dumping functions to the end of stacktrace.c There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh Cc: Catalin Marinas Cc: Madhavan T. Venkataraman Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20230411162943.203199-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 66 +++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index dd03feba8048..ee398cf5141f 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -140,39 +140,6 @@ static void notrace unwind(struct unwind_state *state, } NOKPROBE_SYMBOL(unwind); -static bool dump_backtrace_entry(void *arg, unsigned long where) -{ - char *loglvl = arg; - printk("%s %pSb\n", loglvl, (void *)where); - return true; -} - -void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, - const char *loglvl) -{ - pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); - - if (regs && user_mode(regs)) - return; - - if (!tsk) - tsk = current; - - if (!try_get_task_stack(tsk)) - return; - - printk("%sCall trace:\n", loglvl); - arch_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs); - - put_task_stack(tsk); -} - -void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) -{ - dump_backtrace(NULL, tsk, loglvl); - barrier(); -} - /* * Per-cpu stacks are only accessible when unwinding the current task in a * non-preemptible context. @@ -237,3 +204,36 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, unwind(&state, consume_entry, cookie); } + +static bool dump_backtrace_entry(void *arg, unsigned long where) +{ + char *loglvl = arg; + printk("%s %pSb\n", loglvl, (void *)where); + return true; +} + +void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl) +{ + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + + if (regs && user_mode(regs)) + return; + + if (!tsk) + tsk = current; + + if (!try_get_task_stack(tsk)) + return; + + printk("%sCall trace:\n", loglvl); + arch_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs); + + put_task_stack(tsk); +} + +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) +{ + dump_backtrace(NULL, tsk, loglvl); + barrier(); +} -- cgit v1.2.3 From b5ecc19e684eee1df32a035b7d981932f344fc67 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 11 Apr 2023 17:29:43 +0100 Subject: arm64: stacktrace: always inline core stacktrace functions The arm64 stacktrace code can be used in kprobe context, and so cannot be safely probed. Some (but not all) of the unwind functions are annotated with `NOKPROBE_SYMBOL()` to ensure this, with others markes as `__always_inline`, relying on the top-level unwind function being marked as `noinstr`. This patch has stacktrace.c consistently mark the internal stacktrace functions as `__always_inline`, removing the need for NOKPROBE_SYMBOL() as the top-level unwind function (arch_stack_walk()) is marked as `noinstr`. This is more consistent and is a simpler pattern to follow for future additions to stacktrace.c. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh Cc: Catalin Marinas Cc: Madhavan T. Venkataraman Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20230411162943.203199-4-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ee398cf5141f..f527fadcb33b 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -25,8 +25,9 @@ * * The regs must be on a stack currently owned by the calling task. */ -static __always_inline void unwind_init_from_regs(struct unwind_state *state, - struct pt_regs *regs) +static __always_inline void +unwind_init_from_regs(struct unwind_state *state, + struct pt_regs *regs) { unwind_init_common(state, current); @@ -42,7 +43,8 @@ static __always_inline void unwind_init_from_regs(struct unwind_state *state, * * The function which invokes this must be noinline. */ -static __always_inline void unwind_init_from_caller(struct unwind_state *state) +static __always_inline void +unwind_init_from_caller(struct unwind_state *state) { unwind_init_common(state, current); @@ -60,8 +62,9 @@ static __always_inline void unwind_init_from_caller(struct unwind_state *state) * duration of the unwind, or the unwind will be bogus. It is never valid to * call this for the current task. */ -static __always_inline void unwind_init_from_task(struct unwind_state *state, - struct task_struct *task) +static __always_inline void +unwind_init_from_task(struct unwind_state *state, + struct task_struct *task) { unwind_init_common(state, task); @@ -102,7 +105,8 @@ unwind_recover_return_address(struct unwind_state *state) * records (e.g. a cycle), determined based on the location and fp value of A * and the location (but not the fp value) of B. */ -static int notrace unwind_next(struct unwind_state *state) +static __always_inline int +unwind_next(struct unwind_state *state) { struct task_struct *tsk = state->task; unsigned long fp = state->fp; @@ -120,10 +124,10 @@ static int notrace unwind_next(struct unwind_state *state) return unwind_recover_return_address(state); } -NOKPROBE_SYMBOL(unwind_next); -static void notrace unwind(struct unwind_state *state, - stack_trace_consume_fn consume_entry, void *cookie) +static __always_inline void +unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry, + void *cookie) { if (unwind_recover_return_address(state)) return; @@ -138,7 +142,6 @@ static void notrace unwind(struct unwind_state *state, break; } } -NOKPROBE_SYMBOL(unwind); /* * Per-cpu stacks are only accessible when unwinding the current task in a -- cgit v1.2.3 From 32f5b6995f790ac8fc048d3afd6b83c82074aedf Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 6 Apr 2023 16:27:57 +0100 Subject: arm64: add FIXADDR_TOT_{START,SIZE} Currently arm64's FIXADDR_{START,SIZE} definitions only cover the runtime fixmap slots (and not the boot-time fixmap slots), but the code for creating the fixmap assumes that these definitions cover the entire fixmap range. This means that the ptdump boundaries are reported in a misleading way, missing the VA region of the runtime slots. In theory this could also cause the fixmap creation to go wrong if the boot-time fixmap slots end up spilling into a separate PMD entry, though luckily this is not currently the case in any configuration. While it seems like we could extend FIXADDR_{START,SIZE} to cover the entire fixmap area, core code relies upon these *only* covering the runtime slots. For example, fix_to_virt() and virt_to_fix() try to reject manipulation of the boot-time slots based upon FIXADDR_{START,SIZE}, while __fix_to_virt() and __virt_to_fix() can handle any fixmap slot. This patch follows the lead of x86 in commit: 55f49fcb879fbeeb ("x86/mm: Fix overlap of i386 CPU_ENTRY_AREA with FIX_BTMAP") ... and add new FIXADDR_TOT_{START,SIZE} definitions which cover the entire fixmap area, using these for the fixmap creation and ptdump code. As the boot-time fixmap slots are now rejected by fix_to_virt(), the early_fixmap_init() code is changed to consistently use __fix_to_virt(), as it already does in a few cases. Signed-off-by: Mark Rutland Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Ryan Roberts Cc: Will Deacon Reviewed-by: Ryan Roberts Link: https://lore.kernel.org/r/20230406152759.4164229-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/fixmap.h | 6 ++++-- arch/arm64/mm/mmu.c | 26 +++++++++++++------------- arch/arm64/mm/ptdump.c | 2 +- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 71ed5fdf718b..508238f73ba8 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -95,8 +95,10 @@ enum fixed_addresses { __end_of_fixed_addresses }; -#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE) #define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9813f2a9c825..44954251e14d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -795,14 +795,14 @@ static void __init map_kernel(pgd_t *pgdp) &vmlinux_initdata, 0, VM_NO_GUARD); map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); - if (!READ_ONCE(pgd_val(*pgd_offset_pgd(pgdp, FIXADDR_START)))) { + if (!READ_ONCE(pgd_val(*pgd_offset_pgd(pgdp, FIXADDR_TOT_START)))) { /* * The fixmap falls in a separate pgd to the kernel, and doesn't * live in the carveout for the swapper_pg_dir. We can simply * re-use the existing dir for the fixmap. */ - set_pgd(pgd_offset_pgd(pgdp, FIXADDR_START), - READ_ONCE(*pgd_offset_k(FIXADDR_START))); + set_pgd(pgd_offset_pgd(pgdp, FIXADDR_TOT_START), + READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START))); } else if (CONFIG_PGTABLE_LEVELS > 3) { pgd_t *bm_pgdp; p4d_t *bm_p4dp; @@ -814,9 +814,9 @@ static void __init map_kernel(pgd_t *pgdp) * entry instead. */ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - bm_pgdp = pgd_offset_pgd(pgdp, FIXADDR_START); - bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_START); - bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_START); + bm_pgdp = pgd_offset_pgd(pgdp, FIXADDR_TOT_START); + bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_TOT_START); + bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_TOT_START); pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd)); pud_clear_fixmap(); } else { @@ -1275,7 +1275,7 @@ void __init early_fixmap_init(void) p4d_t *p4dp, p4d; pud_t *pudp; pmd_t *pmdp; - unsigned long addr = FIXADDR_START; + unsigned long addr = FIXADDR_TOT_START; pgdp = pgd_offset_k(addr); p4dp = p4d_offset(pgdp, addr); @@ -1306,16 +1306,16 @@ void __init early_fixmap_init(void) BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); - if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) - || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) { + if ((pmdp != fixmap_pmd(__fix_to_virt(FIX_BTMAP_BEGIN))) + || pmdp != fixmap_pmd(__fix_to_virt(FIX_BTMAP_END))) { WARN_ON(1); pr_warn("pmdp %p != %p, %p\n", - pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)), - fixmap_pmd(fix_to_virt(FIX_BTMAP_END))); + pmdp, fixmap_pmd(__fix_to_virt(FIX_BTMAP_BEGIN)), + fixmap_pmd(__fix_to_virt(FIX_BTMAP_END))); pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", - fix_to_virt(FIX_BTMAP_BEGIN)); + __fix_to_virt(FIX_BTMAP_BEGIN)); pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", - fix_to_virt(FIX_BTMAP_END)); + __fix_to_virt(FIX_BTMAP_END)); pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 9bc4066c5bf3..e305b6593c4e 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -45,7 +45,7 @@ static struct addr_marker address_markers[] = { { MODULES_END, "Modules end" }, { VMALLOC_START, "vmalloc() area" }, { VMALLOC_END, "vmalloc() end" }, - { FIXADDR_START, "Fixmap start" }, + { FIXADDR_TOT_START, "Fixmap start" }, { FIXADDR_TOP, "Fixmap end" }, { PCI_IO_START, "PCI I/O start" }, { PCI_IO_END, "PCI I/O end" }, -- cgit v1.2.3 From b97547761b02cc95e0e6be827dc9ca9da8142761 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 6 Apr 2023 16:27:58 +0100 Subject: arm64: mm: move fixmap code to its own file Over time, arm64's mm/mmu.c has become increasingly large and painful to navigate. Move the fixmap code to its own file where it can be understood in isolation. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Ryan Roberts Cc: Will Deacon Reviewed-by: Ryan Roberts Link: https://lore.kernel.org/r/20230406152759.4164229-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/fixmap.h | 1 + arch/arm64/include/asm/mmu.h | 2 + arch/arm64/mm/Makefile | 2 +- arch/arm64/mm/fixmap.c | 215 ++++++++++++++++++++++++++++++++++++++++ arch/arm64/mm/mmu.c | 197 +----------------------------------- 5 files changed, 222 insertions(+), 195 deletions(-) create mode 100644 arch/arm64/mm/fixmap.c diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 508238f73ba8..42e21c6bc0dd 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -103,6 +103,7 @@ enum fixed_addresses { #define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE) void __init early_fixmap_init(void); +void __init fixmap_copy(pgd_t *pgdir); #define __early_set_fixmap __set_fixmap diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 48f8466a4be9..4384eaa0aeb7 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -65,6 +65,8 @@ extern void paging_init(void); extern void bootmem_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); +extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool page_mappings_only); diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index ff1e800ba7a1..dbd1bc95967d 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -2,7 +2,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ cache.o copypage.o flush.o \ ioremap.o mmap.o pgd.o mmu.o \ - context.o proc.o pageattr.o + context.o proc.o pageattr.o fixmap.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c new file mode 100644 index 000000000000..7fbdd6faf500 --- /dev/null +++ b/arch/arm64/mm/fixmap.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Fixmap manipulation code + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; +static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; + +static inline pud_t *fixmap_pud(unsigned long addr) +{ + pgd_t *pgdp = pgd_offset_k(addr); + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + + BUG_ON(p4d_none(p4d) || p4d_bad(p4d)); + + return pud_offset_kimg(p4dp, addr); +} + +static inline pmd_t *fixmap_pmd(unsigned long addr) +{ + pud_t *pudp = fixmap_pud(addr); + pud_t pud = READ_ONCE(*pudp); + + BUG_ON(pud_none(pud) || pud_bad(pud)); + + return pmd_offset_kimg(pudp, addr); +} + +static inline pte_t *fixmap_pte(unsigned long addr) +{ + return &bm_pte[pte_index(addr)]; +} + +/* + * The p*d_populate functions call virt_to_phys implicitly so they can't be used + * directly on kernel symbols (bm_p*d). This function is called too early to use + * lm_alias so __p*d_populate functions must be used to populate with the + * physical address from __pa_symbol. + */ +void __init early_fixmap_init(void) +{ + pgd_t *pgdp; + p4d_t *p4dp, p4d; + pud_t *pudp; + pmd_t *pmdp; + unsigned long addr = FIXADDR_TOT_START; + + pgdp = pgd_offset_k(addr); + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + if (CONFIG_PGTABLE_LEVELS > 3 && + !(p4d_none(p4d) || p4d_page_paddr(p4d) == __pa_symbol(bm_pud))) { + /* + * We only end up here if the kernel mapping and the fixmap + * share the top level pgd entry, which should only happen on + * 16k/4 levels configurations. + */ + BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); + pudp = pud_offset_kimg(p4dp, addr); + } else { + if (p4d_none(p4d)) + __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); + pudp = fixmap_pud(addr); + } + if (pud_none(READ_ONCE(*pudp))) + __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); + pmdp = fixmap_pmd(addr); + __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE); + + /* + * The boot-ioremap range spans multiple pmds, for which + * we are not prepared: + */ + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); + + if ((pmdp != fixmap_pmd(__fix_to_virt(FIX_BTMAP_BEGIN))) + || pmdp != fixmap_pmd(__fix_to_virt(FIX_BTMAP_END))) { + WARN_ON(1); + pr_warn("pmdp %p != %p, %p\n", + pmdp, fixmap_pmd(__fix_to_virt(FIX_BTMAP_BEGIN)), + fixmap_pmd(__fix_to_virt(FIX_BTMAP_END))); + pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", + __fix_to_virt(FIX_BTMAP_BEGIN)); + pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", + __fix_to_virt(FIX_BTMAP_END)); + + pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); + pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); + } +} + +/* + * Unusually, this is also called in IRQ context (ghes_iounmap_irq) so if we + * ever need to use IPIs for TLB broadcasting, then we're in trouble here. + */ +void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *ptep; + + BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); + + ptep = fixmap_pte(addr); + + if (pgprot_val(flags)) { + set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); + } else { + pte_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + } +} + +void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) +{ + const u64 dt_virt_base = __fix_to_virt(FIX_FDT); + int offset; + void *dt_virt; + + /* + * Check whether the physical FDT address is set and meets the minimum + * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be + * at least 8 bytes so that we can always access the magic and size + * fields of the FDT header after mapping the first chunk, double check + * here if that is indeed the case. + */ + BUILD_BUG_ON(MIN_FDT_ALIGN < 8); + if (!dt_phys || dt_phys % MIN_FDT_ALIGN) + return NULL; + + /* + * Make sure that the FDT region can be mapped without the need to + * allocate additional translation table pages, so that it is safe + * to call create_mapping_noalloc() this early. + * + * On 64k pages, the FDT will be mapped using PTEs, so we need to + * be in the same PMD as the rest of the fixmap. + * On 4k pages, we'll use section mappings for the FDT so we only + * have to be in the same PUD. + */ + BUILD_BUG_ON(dt_virt_base % SZ_2M); + + BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT != + __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT); + + offset = dt_phys % SWAPPER_BLOCK_SIZE; + dt_virt = (void *)dt_virt_base + offset; + + /* map the first chunk so we can read the size from the header */ + create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), + dt_virt_base, SWAPPER_BLOCK_SIZE, prot); + + if (fdt_magic(dt_virt) != FDT_MAGIC) + return NULL; + + *size = fdt_totalsize(dt_virt); + if (*size > MAX_FDT_SIZE) + return NULL; + + if (offset + *size > SWAPPER_BLOCK_SIZE) + create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, + round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot); + + return dt_virt; +} + +/* + * Copy the fixmap region into a new pgdir. + */ +void __init fixmap_copy(pgd_t *pgdir) +{ + if (!READ_ONCE(pgd_val(*pgd_offset_pgd(pgdir, FIXADDR_TOT_START)))) { + /* + * The fixmap falls in a separate pgd to the kernel, and doesn't + * live in the carveout for the swapper_pg_dir. We can simply + * re-use the existing dir for the fixmap. + */ + set_pgd(pgd_offset_pgd(pgdir, FIXADDR_TOT_START), + READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START))); + } else if (CONFIG_PGTABLE_LEVELS > 3) { + pgd_t *bm_pgdp; + p4d_t *bm_p4dp; + pud_t *bm_pudp; + /* + * The fixmap shares its top level pgd entry with the kernel + * mapping. This can really only occur when we are running + * with 16k/4 levels, so we can simply reuse the pud level + * entry instead. + */ + BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); + bm_pgdp = pgd_offset_pgd(pgdir, FIXADDR_TOT_START); + bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_TOT_START); + bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_TOT_START); + pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd)); + pud_clear_fixmap(); + } else { + BUG(); + } +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 44954251e14d..ef59c18fbf8c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -73,10 +73,6 @@ long __section(".mmuoff.data.write") __early_cpu_boot_status; unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; -static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; - static DEFINE_SPINLOCK(swapper_pgdir_lock); static DEFINE_MUTEX(fixmap_lock); @@ -452,8 +448,8 @@ static phys_addr_t pgd_pgtable_alloc(int shift) * without allocating new levels of table. Note that this permits the * creation of new section or page entries. */ -static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) +void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", @@ -795,34 +791,7 @@ static void __init map_kernel(pgd_t *pgdp) &vmlinux_initdata, 0, VM_NO_GUARD); map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); - if (!READ_ONCE(pgd_val(*pgd_offset_pgd(pgdp, FIXADDR_TOT_START)))) { - /* - * The fixmap falls in a separate pgd to the kernel, and doesn't - * live in the carveout for the swapper_pg_dir. We can simply - * re-use the existing dir for the fixmap. - */ - set_pgd(pgd_offset_pgd(pgdp, FIXADDR_TOT_START), - READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START))); - } else if (CONFIG_PGTABLE_LEVELS > 3) { - pgd_t *bm_pgdp; - p4d_t *bm_p4dp; - pud_t *bm_pudp; - /* - * The fixmap shares its top level pgd entry with the kernel - * mapping. This can really only occur when we are running - * with 16k/4 levels, so we can simply reuse the pud level - * entry instead. - */ - BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - bm_pgdp = pgd_offset_pgd(pgdp, FIXADDR_TOT_START); - bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_TOT_START); - bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_TOT_START); - pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd)); - pud_clear_fixmap(); - } else { - BUG(); - } - + fixmap_copy(pgdp); kasan_copy_shadow(pgdp); } @@ -1237,166 +1206,6 @@ void vmemmap_free(unsigned long start, unsigned long end, } #endif /* CONFIG_MEMORY_HOTPLUG */ -static inline pud_t *fixmap_pud(unsigned long addr) -{ - pgd_t *pgdp = pgd_offset_k(addr); - p4d_t *p4dp = p4d_offset(pgdp, addr); - p4d_t p4d = READ_ONCE(*p4dp); - - BUG_ON(p4d_none(p4d) || p4d_bad(p4d)); - - return pud_offset_kimg(p4dp, addr); -} - -static inline pmd_t *fixmap_pmd(unsigned long addr) -{ - pud_t *pudp = fixmap_pud(addr); - pud_t pud = READ_ONCE(*pudp); - - BUG_ON(pud_none(pud) || pud_bad(pud)); - - return pmd_offset_kimg(pudp, addr); -} - -static inline pte_t *fixmap_pte(unsigned long addr) -{ - return &bm_pte[pte_index(addr)]; -} - -/* - * The p*d_populate functions call virt_to_phys implicitly so they can't be used - * directly on kernel symbols (bm_p*d). This function is called too early to use - * lm_alias so __p*d_populate functions must be used to populate with the - * physical address from __pa_symbol. - */ -void __init early_fixmap_init(void) -{ - pgd_t *pgdp; - p4d_t *p4dp, p4d; - pud_t *pudp; - pmd_t *pmdp; - unsigned long addr = FIXADDR_TOT_START; - - pgdp = pgd_offset_k(addr); - p4dp = p4d_offset(pgdp, addr); - p4d = READ_ONCE(*p4dp); - if (CONFIG_PGTABLE_LEVELS > 3 && - !(p4d_none(p4d) || p4d_page_paddr(p4d) == __pa_symbol(bm_pud))) { - /* - * We only end up here if the kernel mapping and the fixmap - * share the top level pgd entry, which should only happen on - * 16k/4 levels configurations. - */ - BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - pudp = pud_offset_kimg(p4dp, addr); - } else { - if (p4d_none(p4d)) - __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); - pudp = fixmap_pud(addr); - } - if (pud_none(READ_ONCE(*pudp))) - __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); - pmdp = fixmap_pmd(addr); - __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE); - - /* - * The boot-ioremap range spans multiple pmds, for which - * we are not prepared: - */ - BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) - != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); - - if ((pmdp != fixmap_pmd(__fix_to_virt(FIX_BTMAP_BEGIN))) - || pmdp != fixmap_pmd(__fix_to_virt(FIX_BTMAP_END))) { - WARN_ON(1); - pr_warn("pmdp %p != %p, %p\n", - pmdp, fixmap_pmd(__fix_to_virt(FIX_BTMAP_BEGIN)), - fixmap_pmd(__fix_to_virt(FIX_BTMAP_END))); - pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", - __fix_to_virt(FIX_BTMAP_BEGIN)); - pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", - __fix_to_virt(FIX_BTMAP_END)); - - pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); - pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); - } -} - -/* - * Unusually, this is also called in IRQ context (ghes_iounmap_irq) so if we - * ever need to use IPIs for TLB broadcasting, then we're in trouble here. - */ -void __set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags) -{ - unsigned long addr = __fix_to_virt(idx); - pte_t *ptep; - - BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); - - ptep = fixmap_pte(addr); - - if (pgprot_val(flags)) { - set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); - } else { - pte_clear(&init_mm, addr, ptep); - flush_tlb_kernel_range(addr, addr+PAGE_SIZE); - } -} - -void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) -{ - const u64 dt_virt_base = __fix_to_virt(FIX_FDT); - int offset; - void *dt_virt; - - /* - * Check whether the physical FDT address is set and meets the minimum - * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be - * at least 8 bytes so that we can always access the magic and size - * fields of the FDT header after mapping the first chunk, double check - * here if that is indeed the case. - */ - BUILD_BUG_ON(MIN_FDT_ALIGN < 8); - if (!dt_phys || dt_phys % MIN_FDT_ALIGN) - return NULL; - - /* - * Make sure that the FDT region can be mapped without the need to - * allocate additional translation table pages, so that it is safe - * to call create_mapping_noalloc() this early. - * - * On 64k pages, the FDT will be mapped using PTEs, so we need to - * be in the same PMD as the rest of the fixmap. - * On 4k pages, we'll use section mappings for the FDT so we only - * have to be in the same PUD. - */ - BUILD_BUG_ON(dt_virt_base % SZ_2M); - - BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT != - __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT); - - offset = dt_phys % SWAPPER_BLOCK_SIZE; - dt_virt = (void *)dt_virt_base + offset; - - /* map the first chunk so we can read the size from the header */ - create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), - dt_virt_base, SWAPPER_BLOCK_SIZE, prot); - - if (fdt_magic(dt_virt) != FDT_MAGIC) - return NULL; - - *size = fdt_totalsize(dt_virt); - if (*size > MAX_FDT_SIZE) - return NULL; - - if (offset + *size > SWAPPER_BLOCK_SIZE) - create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, - round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot); - - return dt_virt; -} - int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); -- cgit v1.2.3 From 414c109bdf496195269bc03d40841fe67fc2f839 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 6 Apr 2023 16:27:59 +0100 Subject: arm64: mm: always map fixmap at page granularity Today the fixmap code largely maps elements at PAGE_SIZE granularity, but we special-case the FDT mapping such that it can be mapped with 2M block mappings when 4K pages are in use. The original rationale for this was simplicity, but it has some unfortunate side-effects, and complicates portions of the fixmap code (i.e. is not so simple after all). The FDT can be up to 2M in size but is only required to have 8-byte alignment, and so it may straddle a 2M boundary. Thus when using 2M block mappings we may map up to 4M of memory surrounding the FDT. This is unfortunate as most of that memory will be unrelated to the FDT, and any pages which happen to share a 2M block with the FDT will by mapped with Normal Write-Back Cacheable attributes, which might not be what we want elsewhere (e.g. for carve-outs using Non-Cacheable attributes). The logic to handle mapping the FDT with 2M blocks requires some special cases in the fixmap code, and ties it to the early page table configuration by virtue of the SWAPPER_TABLE_SHIFT and SWAPPER_BLOCK_SIZE constants used to determine the granularity used to map the FDT. This patch simplifies the FDT logic and removes the unnecessary mappings of surrounding pages by always mapping the FDT at page granularity as with all other fixmap mappings. To do so we statically reserve multiple PTE tables to cover the fixmap VA range. Since the FDT can be at most 2M, for 4K pages we only need to allocate a single additional PTE table, and for 16K and 64K pages the existing single PTE table is sufficient. The PTE table allocation scales with the number of slots reserved in the fixmap, and so this also makes it easier to add more fixmap entries if we require those in future. Our VA layout means that the fixmap will always fall within a single PMD table (and consequently, within a single PUD/P4D/PGD entry), which we can verify at compile time with a static_assert(). With that assert a number of runtime warnings become impossible, and are removed. I've boot-tested this patch with both 4K and 64K pages. Signed-off-by: Mark Rutland Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Ryan Roberts Cc: Will Deacon Reviewed-by: Ryan Roberts Link: https://lore.kernel.org/r/20230406152759.4164229-4-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/fixmap.h | 15 ++-- arch/arm64/include/asm/kernel-pgtable.h | 5 +- arch/arm64/mm/fixmap.c | 148 +++++++++++++++----------------- 3 files changed, 78 insertions(+), 90 deletions(-) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 42e21c6bc0dd..58c294a96676 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -17,6 +17,7 @@ #ifndef __ASSEMBLY__ #include +#include #include #include #include @@ -36,17 +37,13 @@ enum fixed_addresses { FIX_HOLE, /* - * Reserve a virtual window for the FDT that is 2 MB larger than the - * maximum supported size, and put it at the top of the fixmap region. - * The additional space ensures that any FDT that does not exceed - * MAX_FDT_SIZE can be mapped regardless of whether it crosses any - * 2 MB alignment boundaries. - * - * Keep this at the top so it remains 2 MB aligned. + * Reserve a virtual window for the FDT that is a page bigger than the + * maximum supported size. The additional space ensures that any FDT + * that does not exceed MAX_FDT_SIZE can be mapped regardless of + * whether it crosses any page boundary. */ -#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M) FIX_FDT_END, - FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, + FIX_FDT = FIX_FDT_END + DIV_ROUND_UP(MAX_FDT_SIZE, PAGE_SIZE) + 1, FIX_EARLYCON_MEM_BASE, FIX_TEXT_POKE0, diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index fcd14197756f..186dd7f85b14 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -59,8 +59,11 @@ #define EARLY_KASLR (0) #endif +#define SPAN_NR_ENTRIES(vstart, vend, shift) \ + ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1) + #define EARLY_ENTRIES(vstart, vend, shift, add) \ - ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + add) + (SPAN_NR_ENTRIES(vstart, vend, shift) + (add)) #define EARLY_PGDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT, add)) diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index 7fbdd6faf500..c0a3301203bd 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -16,34 +16,77 @@ #include #include -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#define NR_BM_PTE_TABLES \ + SPAN_NR_ENTRIES(FIXADDR_TOT_START, FIXADDR_TOP, PMD_SHIFT) +#define NR_BM_PMD_TABLES \ + SPAN_NR_ENTRIES(FIXADDR_TOT_START, FIXADDR_TOP, PUD_SHIFT) + +static_assert(NR_BM_PMD_TABLES == 1); + +#define __BM_TABLE_IDX(addr, shift) \ + (((addr) >> (shift)) - (FIXADDR_TOT_START >> (shift))) + +#define BM_PTE_TABLE_IDX(addr) __BM_TABLE_IDX(addr, PMD_SHIFT) + +static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; -static inline pud_t *fixmap_pud(unsigned long addr) +static inline pte_t *fixmap_pte(unsigned long addr) { - pgd_t *pgdp = pgd_offset_k(addr); - p4d_t *p4dp = p4d_offset(pgdp, addr); - p4d_t p4d = READ_ONCE(*p4dp); + return &bm_pte[BM_PTE_TABLE_IDX(addr)][pte_index(addr)]; +} - BUG_ON(p4d_none(p4d) || p4d_bad(p4d)); +static void __init early_fixmap_init_pte(pmd_t *pmdp, unsigned long addr) +{ + pmd_t pmd = READ_ONCE(*pmdp); + pte_t *ptep; - return pud_offset_kimg(p4dp, addr); + if (pmd_none(pmd)) { + ptep = bm_pte[BM_PTE_TABLE_IDX(addr)]; + __pmd_populate(pmdp, __pa_symbol(ptep), PMD_TYPE_TABLE); + } } -static inline pmd_t *fixmap_pmd(unsigned long addr) +static void __init early_fixmap_init_pmd(pud_t *pudp, unsigned long addr, + unsigned long end) { - pud_t *pudp = fixmap_pud(addr); + unsigned long next; pud_t pud = READ_ONCE(*pudp); + pmd_t *pmdp; - BUG_ON(pud_none(pud) || pud_bad(pud)); + if (pud_none(pud)) + __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); - return pmd_offset_kimg(pudp, addr); + pmdp = pmd_offset_kimg(pudp, addr); + do { + next = pmd_addr_end(addr, end); + early_fixmap_init_pte(pmdp, addr); + } while (pmdp++, addr = next, addr != end); } -static inline pte_t *fixmap_pte(unsigned long addr) + +static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr, + unsigned long end) { - return &bm_pte[pte_index(addr)]; + p4d_t p4d = READ_ONCE(*p4dp); + pud_t *pudp; + + if (CONFIG_PGTABLE_LEVELS > 3 && !p4d_none(p4d) && + p4d_page_paddr(p4d) != __pa_symbol(bm_pud)) { + /* + * We only end up here if the kernel mapping and the fixmap + * share the top level pgd entry, which should only happen on + * 16k/4 levels configurations. + */ + BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); + } + + if (p4d_none(p4d)) + __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); + + pudp = pud_offset_kimg(p4dp, addr); + early_fixmap_init_pmd(pudp, addr, end); } /* @@ -54,55 +97,13 @@ static inline pte_t *fixmap_pte(unsigned long addr) */ void __init early_fixmap_init(void) { - pgd_t *pgdp; - p4d_t *p4dp, p4d; - pud_t *pudp; - pmd_t *pmdp; unsigned long addr = FIXADDR_TOT_START; + unsigned long end = FIXADDR_TOP; - pgdp = pgd_offset_k(addr); - p4dp = p4d_offset(pgdp, addr); - p4d = READ_ONCE(*p4dp); - if (CONFIG_PGTABLE_LEVELS > 3 && - !(p4d_none(p4d) || p4d_page_paddr(p4d) == __pa_symbol(bm_pud))) { - /* - * We only end up here if the kernel mapping and the fixmap - * share the top level pgd entry, which should only happen on - * 16k/4 levels configurations. - */ - BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - pudp = pud_offset_kimg(p4dp, addr); - } else { - if (p4d_none(p4d)) - __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); - pudp = fixmap_pud(addr); - } - if (pud_none(READ_ONCE(*pudp))) - __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); - pmdp = fixmap_pmd(addr); - __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE); + pgd_t *pgdp = pgd_offset_k(addr); + p4d_t *p4dp = p4d_offset(pgdp, addr); - /* - * The boot-ioremap range spans multiple pmds, for which - * we are not prepared: - */ - BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) - != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); - - if ((pmdp != fixmap_pmd(__fix_to_virt(FIX_BTMAP_BEGIN))) - || pmdp != fixmap_pmd(__fix_to_virt(FIX_BTMAP_END))) { - WARN_ON(1); - pr_warn("pmdp %p != %p, %p\n", - pmdp, fixmap_pmd(__fix_to_virt(FIX_BTMAP_BEGIN)), - fixmap_pmd(__fix_to_virt(FIX_BTMAP_END))); - pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", - __fix_to_virt(FIX_BTMAP_BEGIN)); - pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", - __fix_to_virt(FIX_BTMAP_END)); - - pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); - pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); - } + early_fixmap_init_pud(p4dp, addr, end); } /* @@ -130,6 +131,7 @@ void __set_fixmap(enum fixed_addresses idx, void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); + phys_addr_t dt_phys_base; int offset; void *dt_virt; @@ -144,27 +146,12 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) if (!dt_phys || dt_phys % MIN_FDT_ALIGN) return NULL; - /* - * Make sure that the FDT region can be mapped without the need to - * allocate additional translation table pages, so that it is safe - * to call create_mapping_noalloc() this early. - * - * On 64k pages, the FDT will be mapped using PTEs, so we need to - * be in the same PMD as the rest of the fixmap. - * On 4k pages, we'll use section mappings for the FDT so we only - * have to be in the same PUD. - */ - BUILD_BUG_ON(dt_virt_base % SZ_2M); - - BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT != - __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT); - - offset = dt_phys % SWAPPER_BLOCK_SIZE; + dt_phys_base = round_down(dt_phys, PAGE_SIZE); + offset = dt_phys % PAGE_SIZE; dt_virt = (void *)dt_virt_base + offset; /* map the first chunk so we can read the size from the header */ - create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), - dt_virt_base, SWAPPER_BLOCK_SIZE, prot); + create_mapping_noalloc(dt_phys_base, dt_virt_base, PAGE_SIZE, prot); if (fdt_magic(dt_virt) != FDT_MAGIC) return NULL; @@ -173,9 +160,10 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) if (*size > MAX_FDT_SIZE) return NULL; - if (offset + *size > SWAPPER_BLOCK_SIZE) - create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, - round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot); + if (offset + *size > PAGE_SIZE) { + create_mapping_noalloc(dt_phys_base, dt_virt_base, + offset + *size, prot); + } return dt_virt; } -- cgit v1.2.3 From 73e68984cf18ca00fd66e6cb53b3bc06492aafa4 Mon Sep 17 00:00:00 2001 From: Teo Couprie Diaz Date: Tue, 14 Mar 2023 14:00:38 +0000 Subject: arm64: compat: Remove defines now in asm-generic Some generic COMPAT definitions have been consolidated in asm-generic/compat.h by commit 84a0c977ab98 ("asm-generic: compat: Cleanup duplicate definitions") Remove those that are already defined to the same value there from arm64 asm/compat.h. Signed-off-by: Teo Couprie Diaz Reviewed-by: Guo Ren Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230314140038.252908-1-teo.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/compat.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 9f362274a4f7..74575c3d6987 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -83,10 +83,6 @@ struct compat_statfs { int f_spare[4]; }; -#define COMPAT_RLIM_INFINITY 0xffffffff - -#define COMPAT_OFF_T_MAX 0x7fffffff - #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) #define COMPAT_MINSIGSTKSZ 2048 -- cgit v1.2.3 From 0d124e96051b1dcee88624efea972c826b3dfc83 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 7 Apr 2023 09:15:05 +0800 Subject: arm64: kdump : take off the protection on crashkernel memory region Problem: ======= On arm64, block and section mapping is supported to build page tables. However, currently it enforces to take base page mapping for the whole linear mapping if CONFIG_ZONE_DMA or CONFIG_ZONE_DMA32 is enabled and crashkernel kernel parameter is set. This will cause longer time of the linear mapping process during bootup and severe performance degradation during running time. Root cause: ========== On arm64, crashkernel reservation relies on knowing the upper limit of low memory zone because it needs to reserve memory in the zone so that devices' DMA addressing in kdump kernel can be satisfied. However, the upper limit of low memory on arm64 is variant. And the upper limit can only be decided late till bootmem_init() is called [1]. And we need to map the crashkernel region with base page granularity when doing linear mapping, because kdump needs to protect the crashkernel region via set_memory_valid(,0) after kdump kernel loading. However, arm64 doesn't support well on splitting the built block or section mapping due to some cpu reststriction [2]. And unfortunately, the linear mapping is done before bootmem_init(). To resolve the above conflict on arm64, the compromise is enforcing to take base page mapping for the entire linear mapping if crashkernel is set, and CONFIG_ZONE_DMA or CONFIG_ZONE_DMA32 is enabed. Hence performance is sacrificed. Solution: ========= Comparing with the base page mapping for the whole linear region, it's better to take off the protection on crashkernel memory region for the time being because the anticipated stamping on crashkernel memory region could only happen in a chance in one million, while the base page mapping for the whole linear region is mitigating arm64 systems with crashkernel set always. [1] https://lore.kernel.org/all/YrIIJkhKWSuAqkCx@arm.com/T/#u [2] https://lore.kernel.org/linux-arm-kernel/20190911182546.17094-1-nsaenzjulienne@suse.de/T/ Signed-off-by: Baoquan He Acked-by: Catalin Marinas Acked-by: Mike Rapoport (IBM) Reviewed-by: Zhen Lei Link: https://lore.kernel.org/r/20230407011507.17572-2-bhe@redhat.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/kexec.h | 6 ------ arch/arm64/kernel/machine_kexec.c | 20 -------------------- 2 files changed, 26 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 559bfae26715..9ac9572a3bbe 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -102,12 +102,6 @@ void cpu_soft_restart(unsigned long el2_switch, unsigned long entry, int machine_kexec_post_load(struct kimage *image); #define machine_kexec_post_load machine_kexec_post_load - -void arch_kexec_protect_crashkres(void); -#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres - -void arch_kexec_unprotect_crashkres(void); -#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres #endif #define ARCH_HAS_KIMAGE_ARCH diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index ce3d40120f72..22da7fc1ff50 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -268,26 +268,6 @@ void machine_crash_shutdown(struct pt_regs *regs) pr_info("Starting crashdump kernel...\n"); } -void arch_kexec_protect_crashkres(void) -{ - int i; - - for (i = 0; i < kexec_crash_image->nr_segments; i++) - set_memory_valid( - __phys_to_virt(kexec_crash_image->segment[i].mem), - kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0); -} - -void arch_kexec_unprotect_crashkres(void) -{ - int i; - - for (i = 0; i < kexec_crash_image->nr_segments; i++) - set_memory_valid( - __phys_to_virt(kexec_crash_image->segment[i].mem), - kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1); -} - #ifdef CONFIG_HIBERNATION /* * To preserve the crash dump kernel image, the relevant memory segments -- cgit v1.2.3 From 04a2a7af3d977f5a54aed26ab54cf734b59dcdf3 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 7 Apr 2023 09:15:06 +0800 Subject: arm64: kdump: do not map crashkernel region specifically After taking off the protection functions on crashkernel memory region, there's no need to map crashkernel region with page granularity during linear mapping. With this change, the system can make use of block or section mapping on linear region to largely improve perforcemence during system bootup and running. Signed-off-by: Baoquan He Acked-by: Catalin Marinas Acked-by: Mike Rapoport (IBM) Reviewed-by: Zhen Lei Link: https://lore.kernel.org/r/20230407011507.17572-3-bhe@redhat.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 43 ------------------------------------------- 1 file changed, 43 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6f9d8898a025..7556020a27b7 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -510,21 +510,6 @@ void __init mark_linear_text_alias_ro(void) PAGE_KERNEL_RO); } -static bool crash_mem_map __initdata; - -static int __init enable_crash_mem_map(char *arg) -{ - /* - * Proper parameter parsing is done by reserve_crashkernel(). We only - * need to know if the linear map has to avoid block mappings so that - * the crashkernel reservations can be unmapped later. - */ - crash_mem_map = true; - - return 0; -} -early_param("crashkernel", enable_crash_mem_map); - static void __init map_mem(pgd_t *pgdp) { static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); @@ -554,16 +539,6 @@ static void __init map_mem(pgd_t *pgdp) */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); -#ifdef CONFIG_KEXEC_CORE - if (crash_mem_map) { - if (defer_reserve_crashkernel()) - flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; - else if (crashk_res.end) - memblock_mark_nomap(crashk_res.start, - resource_size(&crashk_res)); - } -#endif - /* map all the memory banks */ for_each_mem_range(i, &start, &end) { if (start >= end) @@ -590,24 +565,6 @@ static void __init map_mem(pgd_t *pgdp) __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); - - /* - * Use page-level mappings here so that we can shrink the region - * in page granularity and put back unused memory to buddy system - * through /sys/kernel/kexec_crash_size interface. - */ -#ifdef CONFIG_KEXEC_CORE - if (crash_mem_map && !defer_reserve_crashkernel()) { - if (crashk_res.end) { - __map_memblock(pgdp, crashk_res.start, - crashk_res.end + 1, - PAGE_KERNEL, - NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); - memblock_clear_nomap(crashk_res.start, - resource_size(&crashk_res)); - } - } -#endif } void mark_rodata_ro(void) -- cgit v1.2.3 From 504cae453f8222884486f77f1fd3e8e0aa317dd7 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 7 Apr 2023 09:15:07 +0800 Subject: arm64: kdump: defer the crashkernel reservation for platforms with no DMA memory zones In commit 031495635b46 ("arm64: Do not defer reserve_crashkernel() for platforms with no DMA memory zones"), reserve_crashkernel() is called much earlier in arm64_memblock_init() to avoid causing base apge mapping on platforms with no DMA meomry zones. With taking off protection on crashkernel memory region, no need to call reserve_crashkernel() specially in advance. The deferred invocation of reserve_crashkernel() in bootmem_init() can cover all cases. So revert the whole commit now. Signed-off-by: Baoquan He Reviewed-by: Zhen Lei Link: https://lore.kernel.org/r/20230407011507.17572-4-bhe@redhat.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 5 ----- arch/arm64/mm/init.c | 34 +++------------------------------- 2 files changed, 3 insertions(+), 36 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 78e5163836a0..efcd68154a3a 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -374,11 +374,6 @@ static inline void *phys_to_virt(phys_addr_t x) }) void dump_mem_limit(void); - -static inline bool defer_reserve_crashkernel(void) -{ - return IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32); -} #endif /* !ASSEMBLY */ /* diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 58a0bb2c17f1..66e70ca47680 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -61,34 +61,8 @@ EXPORT_SYMBOL(memstart_addr); * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4). * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, * otherwise it is empty. - * - * Memory reservation for crash kernel either done early or deferred - * depending on DMA memory zones configs (ZONE_DMA) -- - * - * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized - * here instead of max_zone_phys(). This lets early reservation of - * crash kernel memory which has a dependency on arm64_dma_phys_limit. - * Reserving memory early for crash kernel allows linear creation of block - * mappings (greater than page-granularity) for all the memory bank rangs. - * In this scheme a comparatively quicker boot is observed. - * - * If ZONE_DMA configs are defined, crash kernel memory reservation - * is delayed until DMA zone memory range size initialization performed in - * zone_sizes_init(). The defer is necessary to steer clear of DMA zone - * memory range to avoid overlap allocation. So crash kernel memory boundaries - * are not known when mapping all bank memory ranges, which otherwise means - * not possible to exclude crash kernel range from creating block mappings - * so page-granularity mappings are created for the entire memory range. - * Hence a slightly slower boot is observed. - * - * Note: Page-granularity mappings are necessary for crash kernel memory - * range for shrinking its size via /sys/kernel/kexec_crash_size interface. */ -#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) phys_addr_t __ro_after_init arm64_dma_phys_limit; -#else -phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1; -#endif /* Current arm64 boot protocol requires 2MB alignment */ #define CRASH_ALIGN SZ_2M @@ -248,6 +222,8 @@ static void __init zone_sizes_init(void) if (!arm64_dma_phys_limit) arm64_dma_phys_limit = dma32_phys_limit; #endif + if (!arm64_dma_phys_limit) + arm64_dma_phys_limit = PHYS_MASK + 1; max_zone_pfns[ZONE_NORMAL] = max_pfn; free_area_init(max_zone_pfns); @@ -408,9 +384,6 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - if (!defer_reserve_crashkernel()) - reserve_crashkernel(); - high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } @@ -457,8 +430,7 @@ void __init bootmem_init(void) * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. */ - if (defer_reserve_crashkernel()) - reserve_crashkernel(); + reserve_crashkernel(); memblock_dump_all(); } -- cgit v1.2.3 From b2ad9d4e249ea28be5c9678bf1feec6d50f07613 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 23 Mar 2023 22:06:32 +0000 Subject: arm64/idreg: Don't disable SME when disabling SVE SVE and SME are separate features which can be implemented without each other but currently if the user specifies arm64.nosve then we disable SME as well as SVE. There is already a separate override for SME so remove the implicit disablement from the SVE override. One usecase for this would be testing SME only support on a system which implements both SVE and SME. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230315-arm64-override-sve-sme-v2-1-bab7593e842b@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/idreg-override.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index d833d78a7f31..9d8edeb6aa46 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -167,7 +167,7 @@ static const struct { } aliases[] __initconst = { { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" }, - { "arm64.nosve", "id_aa64pfr0.sve=0 id_aa64pfr1.sme=0" }, + { "arm64.nosve", "id_aa64pfr0.sve=0" }, { "arm64.nosme", "id_aa64pfr1.sme=0" }, { "arm64.nobti", "id_aa64pfr1.bt=0" }, { "arm64.nopauth", -- cgit v1.2.3 From e9d14f3f3fb76c1e89f1648c0a2bc890de58bf11 Mon Sep 17 00:00:00 2001 From: Dongxu Sun Date: Fri, 17 Mar 2023 20:49:12 +0800 Subject: arm64/signal: Use system_supports_tpidr2() to check TPIDR2 Since commit a9d6915859501("arm64/sme: Implement support for TPIDR2"), We introduced system_supports_tpidr2() for TPIDR2 handling. Let's use the specific check instead. No functional changes. Signed-off-by: Dongxu Sun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230317124915.1263-2-sundongxu3@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 06a02707f488..032e97f8cae0 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -651,7 +651,7 @@ static int parse_user_sigframe(struct user_ctxs *user, break; case TPIDR2_MAGIC: - if (!system_supports_sme()) + if (!system_supports_tpidr2()) goto invalid; if (user->tpidr2) @@ -802,7 +802,7 @@ static int restore_sigframe(struct pt_regs *regs, err = restore_fpsimd_context(&user); } - if (err == 0 && system_supports_sme() && user.tpidr2) + if (err == 0 && system_supports_tpidr2() && user.tpidr2) err = restore_tpidr2_context(&user); if (err == 0 && system_supports_sme() && user.za) @@ -974,7 +974,7 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, } /* TPIDR2 if supported */ - if (system_supports_sme() && err == 0) { + if (system_supports_tpidr2() && err == 0) { struct tpidr2_context __user *tpidr2_ctx = apply_user_offset(user, user->tpidr2_offset); err |= preserve_tpidr2_context(tpidr2_ctx); -- cgit v1.2.3 From 19e99e7d59bc421373e82af818197576b1a3940f Mon Sep 17 00:00:00 2001 From: Dongxu Sun Date: Fri, 17 Mar 2023 20:49:13 +0800 Subject: arm64/signal: Alloc tpidr2 sigframe after checking system_supports_tpidr2() Move tpidr2 sigframe allocation from under the checking of system_supports_sme() to the checking of system_supports_tpidr2(). Signed-off-by: Dongxu Sun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230317124915.1263-3-sundongxu3@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 032e97f8cae0..2cfc810d0a5b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -893,6 +893,13 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } + if (system_supports_tpidr2()) { + err = sigframe_alloc(user, &user->tpidr2_offset, + sizeof(struct tpidr2_context)); + if (err) + return err; + } + if (system_supports_sme()) { unsigned int vl; unsigned int vq = 0; @@ -902,11 +909,6 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, else vl = task_get_sme_vl(current); - err = sigframe_alloc(user, &user->tpidr2_offset, - sizeof(struct tpidr2_context)); - if (err) - return err; - if (thread_za_enabled(¤t->thread)) vq = sve_vq_from_vl(vl); -- cgit v1.2.3 From 97b5576b01166a6aed6a267df059128e5d98b15e Mon Sep 17 00:00:00 2001 From: Dongxu Sun Date: Fri, 17 Mar 2023 20:49:15 +0800 Subject: arm64/sme: Fix some comments of ARM SME When TIF_SME is clear, fpsimd_restore_current_state will disable SME trap during ret_to_user, then SME access trap is impossible in userspace, not SVE. Besides, fix typo: alocated->allocated. Signed-off-by: Dongxu Sun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230317124915.1263-5-sundongxu3@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 9e7e50a0fd76..2fbafa5cc7ac 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -299,7 +299,7 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, /* * TIF_SME controls whether a task can use SME without trapping while * in userspace, when TIF_SME is set then we must have storage - * alocated in sve_state and sme_state to store the contents of both ZA + * allocated in sve_state and sme_state to store the contents of both ZA * and the SVE registers for both streaming and non-streaming modes. * * If both SVCR.ZA and SVCR.SM are disabled then at any point we @@ -1477,7 +1477,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs) * * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state() * would have disabled the SME access trap for userspace during - * ret_to_user, making an SVE access trap impossible in that case. + * ret_to_user, making an SME access trap impossible in that case. */ void do_sme_acc(unsigned long esr, struct pt_regs *regs) { -- cgit v1.2.3 From 9df3f5082ff94c55e84523a28c040445220b2f64 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 12 Apr 2023 17:01:32 +0100 Subject: arm64: avoid redundant PAC stripping in __builtin_return_address() In old versions of GCC and Clang, __builtin_return_address() did not strip the PAC. This was not the behaviour we desired, and so we wrapped this with code to strip the PAC in commit: 689eae42afd7a916 ("arm64: mask PAC bits of __builtin_return_address") Since then, both GCC and Clang decided that __builtin_return_address() *should* strip the PAC, and the existing behaviour was a bug. GCC was fixed in 11.1.0, with those fixes backported to 10.2.0, 9.4.0, 8.5.0, but not earlier: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94891 Clang was fixed in 12.0.0, though this was not backported: https://reviews.llvm.org/D75044 When using a compiler whose __builtin_return_address() strips the PAC, our wrapper to strip the PAC is redundant. Similarly, when pointer authentication is not in use within the kernel pointers will not have a PAC, and so there's no point stripping those pointers. To avoid this redundant work, this patch updates the __builtin_return_address() wrapper to only be used when in-kernel pointer authentication is configured and the compiler's __builtin_return_address() does not strip the PAC. This is a cleanup/optimization, and not a fix that requires backporting. Stripping a PAC should be an idempotent operation, and so redundantly stripping the PAC is not harmful. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Amit Daniel Kachhap Cc: Catalin Marinas Cc: James Morse Cc: Kristina Martsenko Cc: Will Deacon Link: https://lore.kernel.org/r/20230412160134.306148-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 14 ++++++++++++++ arch/arm64/include/asm/compiler.h | 3 +++ 2 files changed, 17 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1023e896d46b..d69b624b3083 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -362,6 +362,20 @@ config ARCH_PROC_KCORE_TEXT config BROKEN_GAS_INST def_bool !$(as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n) +config BUILTIN_RETURN_ADDRESS_STRIPS_PAC + bool + # Clang's __builtin_return_adddress() strips the PAC since 12.0.0 + # https://reviews.llvm.org/D75044 + default y if CC_IS_CLANG && (CLANG_VERSION >= 120000) + # GCC's __builtin_return_address() strips the PAC since 11.1.0, + # and this was backported to 10.2.0, 9.4.0, 8.5.0, but not earlier + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94891 + default y if CC_IS_GCC && (GCC_VERSION >= 110100) + default y if CC_IS_GCC && (GCC_VERSION >= 100200) && (GCC_VERSION < 110000) + default y if CC_IS_GCC && (GCC_VERSION >= 90400) && (GCC_VERSION < 100000) + default y if CC_IS_GCC && (GCC_VERSION >= 80500) && (GCC_VERSION < 90000) + default n + config KASAN_SHADOW_OFFSET hex depends on KASAN_GENERIC || KASAN_SW_TAGS diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h index 6fb2e6bcc392..9033a0b2f46a 100644 --- a/arch/arm64/include/asm/compiler.h +++ b/arch/arm64/include/asm/compiler.h @@ -20,7 +20,10 @@ ((ptr & BIT_ULL(55)) ? (ptr | ptrauth_kernel_pac_mask()) : \ (ptr & ~ptrauth_user_pac_mask())) +#if defined(CONFIG_ARM64_PTR_AUTH_KERNEL) && \ + !defined(CONFIG_BUILTIN_RETURN_ADDRESS_STRIPS_PAC) #define __builtin_return_address(val) \ (void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val))) +#endif #endif /* __ASM_COMPILER_H */ -- cgit v1.2.3 From ca708599ca43567cdd69f799454f95d1c80ffee5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 12 Apr 2023 17:01:33 +0100 Subject: arm64: use XPACLRI to strip PAC Currently we strip the PAC from pointers using C code, which requires generating bitmasks, and conditionally clearing/setting bits depending on bit 55. We can do better by using XPACLRI directly. When the logic was originally written to strip PACs from user pointers, contemporary toolchains used for the kernel had assemblers which were unaware of the PAC instructions. As stripping the PAC from userspace pointers required unconditional clearing of a fixed set of bits (which could be performed with a single instruction), it was simpler to implement the masking in C than it was to make use of XPACI or XPACLRI. When support for in-kernel pointer authentication was added, the stripping logic was extended to cover TTBR1 pointers, requiring several instructions to handle whether to clear/set bits dependent on bit 55 of the pointer. This patch simplifies the stripping of PACs by using XPACLRI directly, as contemporary toolchains do within __builtin_return_address(). This saves a number of instructions, especially where __builtin_return_address() does not implicitly strip the PAC but is heavily used (e.g. with tracepoints). As the kernel might be compiled with an assembler without knowledge of XPACLRI, it is assembled using the 'HINT #7' alias, which results in an identical opcode. At the same time, I've split ptrauth_strip_insn_pac() into ptrauth_strip_user_insn_pac() and ptrauth_strip_kernel_insn_pac() helpers so that we can avoid unnecessary PAC stripping when pointer authentication is not in use in userspace or kernel respectively. The underlying xpaclri() macro uses inline assembly which clobbers x30. The clobber causes the compiler to save/restore the original x30 value in a frame record (protected with PACIASP and AUTIASP when in-kernel authentication is enabled), so this does not provide a gadget to alter the return address. Similarly this does not adversely affect unwinding due to the presence of the frame record. The ptrauth_user_pac_mask() and ptrauth_kernel_pac_mask() are exported from the kernel in ptrace and core dumps, so these are retained. A subsequent patch will move them out of . Signed-off-by: Mark Rutland Cc: Amit Daniel Kachhap Cc: Catalin Marinas Cc: James Morse Cc: Kristina Martsenko Cc: Will Deacon Link: https://lore.kernel.org/r/20230412160134.306148-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/compiler.h | 32 +++++++++++++++++++++++++------- arch/arm64/include/asm/pointer_auth.h | 6 ------ arch/arm64/kernel/perf_callchain.c | 2 +- arch/arm64/kernel/process.c | 2 +- arch/arm64/kernel/stacktrace.c | 2 +- 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h index 9033a0b2f46a..b6d1d62474e5 100644 --- a/arch/arm64/include/asm/compiler.h +++ b/arch/arm64/include/asm/compiler.h @@ -15,15 +15,33 @@ #define ptrauth_user_pac_mask() GENMASK_ULL(54, vabits_actual) #define ptrauth_kernel_pac_mask() GENMASK_ULL(63, vabits_actual) -/* Valid for EL0 TTBR0 and EL1 TTBR1 instruction pointers */ -#define ptrauth_clear_pac(ptr) \ - ((ptr & BIT_ULL(55)) ? (ptr | ptrauth_kernel_pac_mask()) : \ - (ptr & ~ptrauth_user_pac_mask())) +#define xpaclri(ptr) \ +({ \ + register unsigned long __xpaclri_ptr asm("x30") = (ptr); \ + \ + asm( \ + ARM64_ASM_PREAMBLE \ + " hint #7\n" \ + : "+r" (__xpaclri_ptr)); \ + \ + __xpaclri_ptr; \ +}) -#if defined(CONFIG_ARM64_PTR_AUTH_KERNEL) && \ - !defined(CONFIG_BUILTIN_RETURN_ADDRESS_STRIPS_PAC) +#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL +#define ptrauth_strip_kernel_insn_pac(ptr) xpaclri(ptr) +#else +#define ptrauth_strip_kernel_insn_pac(ptr) (ptr) +#endif + +#ifdef CONFIG_ARM64_PTR_AUTH +#define ptrauth_strip_user_insn_pac(ptr) xpaclri(ptr) +#else +#define ptrauth_strip_user_insn_pac(ptr) (ptr) +#endif + +#if !defined(CONFIG_BUILTIN_RETURN_ADDRESS_STRIPS_PAC) #define __builtin_return_address(val) \ - (void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val))) + (void *)(ptrauth_strip_kernel_insn_pac((unsigned long)__builtin_return_address(val))) #endif #endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index efb098de3a84..b0665db86aa6 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -97,11 +97,6 @@ extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys, unsigned long enabled); extern int ptrauth_get_enabled_keys(struct task_struct *tsk); -static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr) -{ - return ptrauth_clear_pac(ptr); -} - static __always_inline void ptrauth_enable(void) { if (!system_supports_address_auth()) @@ -133,7 +128,6 @@ static __always_inline void ptrauth_enable(void) #define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL) #define ptrauth_set_enabled_keys(tsk, keys, enabled) (-EINVAL) #define ptrauth_get_enabled_keys(tsk) (-EINVAL) -#define ptrauth_strip_insn_pac(lr) (lr) #define ptrauth_suspend_exit() #define ptrauth_thread_init_user() #define ptrauth_thread_switch_user(tsk) diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 65b196e3ca6c..6d157f32187b 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -38,7 +38,7 @@ user_backtrace(struct frame_tail __user *tail, if (err) return NULL; - lr = ptrauth_strip_insn_pac(buftail.lr); + lr = ptrauth_strip_user_insn_pac(buftail.lr); perf_callchain_store(entry, lr); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 71d59b5abede..b5bed62483cb 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -217,7 +217,7 @@ void __show_regs(struct pt_regs *regs) if (!user_mode(regs)) { printk("pc : %pS\n", (void *)regs->pc); - printk("lr : %pS\n", (void *)ptrauth_strip_insn_pac(lr)); + printk("lr : %pS\n", (void *)ptrauth_strip_kernel_insn_pac(lr)); } else { printk("pc : %016llx\n", regs->pc); printk("lr : %016llx\n", lr); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index f527fadcb33b..17f66a74c745 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -120,7 +120,7 @@ unwind_next(struct unwind_state *state) if (err) return err; - state->pc = ptrauth_strip_insn_pac(state->pc); + state->pc = ptrauth_strip_kernel_insn_pac(state->pc); return unwind_recover_return_address(state); } -- cgit v1.2.3 From de1702f65feb516d5394e81a78519de9dc567031 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 12 Apr 2023 17:01:34 +0100 Subject: arm64: move PAC masks to Now that we use XPACLRI to strip PACs within the kernel, the ptrauth_user_pac_mask() and ptrauth_kernel_pac_mask() definitions no longer need to live in . Move them to , and ensure that this header is included where they are used. Signed-off-by: Mark Rutland Cc: Amit Daniel Kachhap Cc: Catalin Marinas Cc: James Morse Cc: Kristina Martsenko Cc: Will Deacon Link: https://lore.kernel.org/r/20230412160134.306148-4-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/compiler.h | 7 ------- arch/arm64/include/asm/pointer_auth.h | 7 +++++++ arch/arm64/kernel/crash_core.c | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h index b6d1d62474e5..9bbd7b7097ff 100644 --- a/arch/arm64/include/asm/compiler.h +++ b/arch/arm64/include/asm/compiler.h @@ -8,13 +8,6 @@ #define ARM64_ASM_PREAMBLE #endif -/* - * The EL0/EL1 pointer bits used by a pointer authentication code. - * This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply. - */ -#define ptrauth_user_pac_mask() GENMASK_ULL(54, vabits_actual) -#define ptrauth_kernel_pac_mask() GENMASK_ULL(63, vabits_actual) - #define xpaclri(ptr) \ ({ \ register unsigned long __xpaclri_ptr asm("x30") = (ptr); \ diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index b0665db86aa6..d2e0306e65d3 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -10,6 +10,13 @@ #include #include +/* + * The EL0/EL1 pointer bits used by a pointer authentication code. + * This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply. + */ +#define ptrauth_user_pac_mask() GENMASK_ULL(54, vabits_actual) +#define ptrauth_kernel_pac_mask() GENMASK_ULL(63, vabits_actual) + #define PR_PAC_ENABLED_KEYS_MASK \ (PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY) diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c index 2b65aae332ce..66cde752cd74 100644 --- a/arch/arm64/kernel/crash_core.c +++ b/arch/arm64/kernel/crash_core.c @@ -8,6 +8,7 @@ #include #include #include +#include static inline u64 get_tcr_el1_t1sz(void); -- cgit v1.2.3 From af6c0bd59f4f3ad5daad2f7b777954b1954551d5 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Thu, 2 Feb 2023 13:01:48 +0530 Subject: arm64: kgdb: Set PSTATE.SS to 1 to re-enable single-step Currently only the first attempt to single-step has any effect. After that all further stepping remains "stuck" at the same program counter value. Refer to the ARM Architecture Reference Manual (ARM DDI 0487E.a) D2.12, PSTATE.SS=1 should be set at each step before transferring the PE to the 'Active-not-pending' state. The problem here is PSTATE.SS=1 is not set since the second single-step. After the first single-step, the PE transferes to the 'Inactive' state, with PSTATE.SS=0 and MDSCR.SS=1, thus PSTATE.SS won't be set to 1 due to kernel_active_single_step()=true. Then the PE transferes to the 'Active-pending' state when ERET and returns to the debugger by step exception. Before this patch: ================== Entering kdb (current=0xffff3376039f0000, pid 1) on processor 0 due to Keyboard Entry [0]kdb> [0]kdb> [0]kdb> bp write_sysrq_trigger Instruction(i) BP #0 at 0xffffa45c13d09290 (write_sysrq_trigger) is enabled addr at ffffa45c13d09290, hardtype=0 installed=0 [0]kdb> go $ echo h > /proc/sysrq-trigger Entering kdb (current=0xffff4f7e453f8000, pid 175) on processor 1 due to Breakpoint @ 0xffffad651a309290 [1]kdb> ss Entering kdb (current=0xffff4f7e453f8000, pid 175) on processor 1 due to SS trap @ 0xffffad651a309294 [1]kdb> ss Entering kdb (current=0xffff4f7e453f8000, pid 175) on processor 1 due to SS trap @ 0xffffad651a309294 [1]kdb> After this patch: ================= Entering kdb (current=0xffff6851c39f0000, pid 1) on processor 0 due to Keyboard Entry [0]kdb> bp write_sysrq_trigger Instruction(i) BP #0 at 0xffffc02d2dd09290 (write_sysrq_trigger) is enabled addr at ffffc02d2dd09290, hardtype=0 installed=0 [0]kdb> go $ echo h > /proc/sysrq-trigger Entering kdb (current=0xffff6851c53c1840, pid 174) on processor 1 due to Breakpoint @ 0xffffc02d2dd09290 [1]kdb> ss Entering kdb (current=0xffff6851c53c1840, pid 174) on processor 1 due to SS trap @ 0xffffc02d2dd09294 [1]kdb> ss Entering kdb (current=0xffff6851c53c1840, pid 174) on processor 1 due to SS trap @ 0xffffc02d2dd09298 [1]kdb> ss Entering kdb (current=0xffff6851c53c1840, pid 174) on processor 1 due to SS trap @ 0xffffc02d2dd0929c [1]kdb> Fixes: 44679a4f142b ("arm64: KGDB: Add step debugging support") Co-developed-by: Wei Li Signed-off-by: Wei Li Signed-off-by: Sumit Garg Tested-by: Douglas Anderson Acked-by: Daniel Thompson Tested-by: Daniel Thompson Link: https://lore.kernel.org/r/20230202073148.657746-3-sumit.garg@linaro.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 1 + arch/arm64/kernel/debug-monitors.c | 5 +++++ arch/arm64/kernel/kgdb.c | 2 ++ 3 files changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7b7e05c02691..13d437bcbf58 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -104,6 +104,7 @@ void user_regs_reset_single_step(struct user_pt_regs *regs, void kernel_enable_single_step(struct pt_regs *regs); void kernel_disable_single_step(void); int kernel_active_single_step(void); +void kernel_rewind_single_step(struct pt_regs *regs); #ifdef CONFIG_HAVE_HW_BREAKPOINT int reinstall_suspended_bps(struct pt_regs *regs); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 3da09778267e..64f2ecbdfe5c 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -438,6 +438,11 @@ int kernel_active_single_step(void) } NOKPROBE_SYMBOL(kernel_active_single_step); +void kernel_rewind_single_step(struct pt_regs *regs) +{ + set_regs_spsr_ss(regs); +} + /* ptrace API */ void user_enable_single_step(struct task_struct *task) { diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index cda9c1e9864f..4e1f983df3d1 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -224,6 +224,8 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, */ if (!kernel_active_single_step()) kernel_enable_single_step(linux_regs); + else + kernel_rewind_single_step(linux_regs); err = 0; break; default: -- cgit v1.2.3 From 2ad91e44e6b0c7ef1ed151b3bb2242a2144e6085 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 12 Apr 2023 11:29:40 +0100 Subject: perf/arm-cmn: Fix port detection for CMN-700 When the "extra device ports" configuration was first added, the additional mxp_device_port_connect_info registers were added around the existing mxp_mesh_port_connect_info registers. What I missed about CMN-700 is that it shuffled them around to remove this discontinuity. As such, tweak the definitions and factor out a helper for reading these registers so we can deal with this discrepancy easily, which does at least allow nicely tidying up the callsites. With this we can then also do the nice thing and skip accesses completely rather than relying on RES0 behaviour where we know the extra registers aren't defined. Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support") Reported-by: Jing Zhang Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/71d129241d4d7923cde72a0e5b4c8d2f6084525f.1681295193.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 57 ++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 5a57c3f10d27..47d359f72957 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -57,14 +57,12 @@ #define CMN_INFO_REQ_VC_NUM GENMASK_ULL(1, 0) /* XPs also have some local topology info which has uses too */ -#define CMN_MXP__CONNECT_INFO_P0 0x0008 -#define CMN_MXP__CONNECT_INFO_P1 0x0010 -#define CMN_MXP__CONNECT_INFO_P2 0x0028 -#define CMN_MXP__CONNECT_INFO_P3 0x0030 -#define CMN_MXP__CONNECT_INFO_P4 0x0038 -#define CMN_MXP__CONNECT_INFO_P5 0x0040 +#define CMN_MXP__CONNECT_INFO(p) (0x0008 + 8 * (p)) #define CMN__CONNECT_INFO_DEVICE_TYPE GENMASK_ULL(4, 0) +#define CMN_MAX_PORTS 6 +#define CI700_CONNECT_INFO_P2_5_OFFSET 0x10 + /* PMU registers occupy the 3rd 4KB page of each node's region */ #define CMN_PMU_OFFSET 0x2000 @@ -396,6 +394,25 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, return NULL; } +static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn, + const struct arm_cmn_node *xp, int port) +{ + int offset = CMN_MXP__CONNECT_INFO(port); + + if (port >= 2) { + if (cmn->model & (CMN600 | CMN650)) + return 0; + /* + * CI-700 may have extra ports, but still has the + * mesh_port_connect_info registers in the way. + */ + if (cmn->model == CI700) + offset += CI700_CONNECT_INFO_P2_5_OFFSET; + } + + return readl_relaxed(xp->pmu_base - CMN_PMU_OFFSET + offset); +} + static struct dentry *arm_cmn_debugfs; #ifdef CONFIG_DEBUG_FS @@ -469,7 +486,7 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) y = cmn->mesh_y; while (y--) { int xp_base = cmn->mesh_x * y; - u8 port[6][CMN_MAX_DIMENSION]; + u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION]; for (x = 0; x < cmn->mesh_x; x++) seq_puts(s, "--------+"); @@ -477,14 +494,9 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) seq_printf(s, "\n%d |", y); for (x = 0; x < cmn->mesh_x; x++) { struct arm_cmn_node *xp = cmn->xps + xp_base + x; - void __iomem *base = xp->pmu_base - CMN_PMU_OFFSET; - - port[0][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P0); - port[1][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P1); - port[2][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P2); - port[3][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P3); - port[4][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P4); - port[5][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P5); + + for (p = 0; p < CMN_MAX_PORTS; p++) + port[p][x] = arm_cmn_device_connect_info(cmn, xp, p); seq_printf(s, " XP #%-2d |", xp_base + x); } @@ -2083,18 +2095,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) * from this, since in that case we will see at least one XP * with port 2 connected, for the HN-D. */ - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P0)) - xp_ports |= BIT(0); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P1)) - xp_ports |= BIT(1); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P2)) - xp_ports |= BIT(2); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P3)) - xp_ports |= BIT(3); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P4)) - xp_ports |= BIT(4); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P5)) - xp_ports |= BIT(5); + for (int p = 0; p < CMN_MAX_PORTS; p++) + if (arm_cmn_device_connect_info(cmn, xp, p)) + xp_ports |= BIT(p); if (cmn->multi_dtm && (xp_ports & 0xc)) arm_cmn_init_dtm(dtm++, xp, 1); -- cgit v1.2.3 From a2a83eb40fbdeee3ed9c6fcc7bb015a2dd1a3e93 Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Wed, 12 Apr 2023 10:02:58 +0530 Subject: arm64: kernel: Fix kernel warning when nokaslr is passed to commandline 'Unknown kernel command line parameters "nokaslr", will be passed to user space' message is noticed in the dmesg when nokaslr is passed to the kernel commandline on ARM64 platform. This is because nokaslr param is handled by early cpufeature detection infrastructure and the parameter is never consumed by a kernel param handler. Fix this warning by providing a dummy kernel param handler for nokaslr. Signed-off-by: Pavankumar Kondeti Link: https://lore.kernel.org/r/20230412043258.397455-1-quic_pkondeti@quicinc.com Signed-off-by: Will Deacon --- arch/arm64/kernel/idreg-override.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index d833d78a7f31..964ba6efc7a0 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -178,6 +178,13 @@ static const struct { { "nokaslr", "kaslr.disabled=1" }, }; +static int __init parse_nokaslr(char *unused) +{ + /* nokaslr param handling is done by early cpufeature code */ + return 0; +} +early_param("nokaslr", parse_nokaslr); + static int __init find_field(const char *cmdline, const struct ftr_set_desc *reg, int f, u64 *v) { -- cgit v1.2.3 From 858a56630a840aba0e291fd4958e9a8f74638d56 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 14 Oct 2022 18:06:23 +0200 Subject: ACPI: AGDI: Improve error reporting for problems during .remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning an error value in a platform driver's remove callback results in a generic error message being emitted by the driver core, but otherwise it doesn't make a difference. The device goes away anyhow. So instead of triggering the generic platform error message, emit a more helpful message if a problem occurs and return 0 to suppress the generic message. This patch is a preparation for making platform remove callbacks return void. Signed-off-by: Uwe Kleine-König Reviewed-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20221014160623.467195-1-u.kleine-koenig@pengutronix.de Signed-off-by: Will Deacon --- drivers/acpi/arm64/agdi.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/arm64/agdi.c b/drivers/acpi/arm64/agdi.c index cf31abd0ed1b..f605302395c3 100644 --- a/drivers/acpi/arm64/agdi.c +++ b/drivers/acpi/arm64/agdi.c @@ -64,8 +64,11 @@ static int agdi_remove(struct platform_device *pdev) int err, i; err = sdei_event_disable(adata->sdei_event); - if (err) - return err; + if (err) { + dev_err(&pdev->dev, "Failed to disable sdei-event #%d (%pe)\n", + adata->sdei_event, ERR_PTR(err)); + return 0; + } for (i = 0; i < 3; i++) { err = sdei_event_unregister(adata->sdei_event); @@ -75,7 +78,11 @@ static int agdi_remove(struct platform_device *pdev) schedule(); } - return err; + if (err) + dev_err(&pdev->dev, "Failed to unregister sdei-event #%d (%pe)\n", + adata->sdei_event, ERR_PTR(err)); + + return 0; } static struct platform_driver agdi_driver = { -- cgit v1.2.3 From bbd329fe723d185485598bf52378a8341912a804 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 Apr 2023 17:26:43 +0100 Subject: arm64/sysreg: Convert HFGITR_EL2 to automatic generation Automatically generate the Hypervisor Fine-Grained Instruction Trap Register as per DDI0601 2023-03, currently we only have a definition for the register name not any of the contents. No functional change. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230306-arm64-fgt-reg-gen-v5-1-516a89cb50f6@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 65 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e5ca9ece1606..c48b41c9b0cc 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -419,7 +419,6 @@ #define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1) #define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2) #define SYS_HSTR_EL2 sys_reg(3, 4, 1, 1, 3) -#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_HACR_EL2 sys_reg(3, 4, 1, 1, 7) #define SYS_TTBR0_EL2 sys_reg(3, 4, 2, 0, 0) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 2af92b4f4fe4..77edce16f4f9 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1964,6 +1964,71 @@ Sysreg HFGWTR_EL2 3 4 1 1 5 Fields HFGxTR_EL2 EndSysreg +Sysreg HFGITR_EL2 3 4 1 1 6 +Res0 63:61 +Field 60 COSPRCTX +Field 59 nGCSEPP +Field 58 nGCSSTR_EL1 +Field 57 nGCSPUSHM_EL1 +Field 56 nBRBIALL +Field 55 nBRBINJ +Field 54 DCCVAC +Field 53 SVC_EL1 +Field 52 SVC_EL0 +Field 51 ERET +Field 50 CPPRCTX +Field 49 DVPRCTX +Field 48 CFPRCTX +Field 47 TLBIVAALE1 +Field 46 TLBIVALE1 +Field 45 TLBIVAAE1 +Field 44 TLBIASIDE1 +Field 43 TLBIVAE1 +Field 42 TLBIVMALLE1 +Field 41 TLBIRVAALE1 +Field 40 TLBIRVALE1 +Field 39 TLBIRVAAE1 +Field 38 TLBIRVAE1 +Field 37 TLBIRVAALE1IS +Field 36 TLBIRVALE1IS +Field 35 TLBIRVAAE1IS +Field 34 TLBIRVAE1IS +Field 33 TLBIVAALE1IS +Field 32 TLBIVALE1IS +Field 31 TLBIVAAE1IS +Field 30 TLBIASIDE1IS +Field 29 TLBIVAE1IS +Field 28 TLBIVMALLE1IS +Field 27 TLBIRVAALE1OS +Field 26 TLBIRVALE1OS +Field 25 TLBIRVAAE1OS +Field 24 TLBIRVAE1OS +Field 23 TLBIVAALE1OS +Field 22 TLBIVALE1OS +Field 21 TLBIVAAE1OS +Field 20 TLBIASIDE1OS +Field 19 TLBIVAE1OS +Field 18 TLBIVMALLE1OS +Field 17 ATS1E1WP +Field 16 ATS1E1RP +Field 15 ATS1E0W +Field 14 ATS1E0R +Field 13 ATS1E1W +Field 12 ATS1E1R +Field 11 DCZVA +Field 10 DCCIVAC +Field 9 DCCVADP +Field 8 DCCVAP +Field 7 DCCVAU +Field 6 DCCISW +Field 5 DCCSW +Field 4 DCISW +Field 3 DCIVAC +Field 2 ICIVAU +Field 1 ICIALLU +Field 0 ICIALLUIS +EndSysreg + Sysreg ZCR_EL2 3 4 1 2 0 Fields ZCR_ELx EndSysreg -- cgit v1.2.3 From 876e3c8efe79d7f21dadd711837ba9a495c2246a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 Apr 2023 18:13:29 +0100 Subject: arm64/cpufeature: Pull out helper for CPUID register definitions We use the same structure to match hwcaps and CPU features so we can use the same helper to generate the fields required. Pull the portion of the current hwcaps helper that initialises the fields out into a separate define placed earlier in the file so we can use it for cpufeatures. No functional change. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230303-arm64-cpufeature-helpers-v2-1-4c8f28a6f203@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2e3e55139777..77862b7c8908 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -140,6 +140,13 @@ void dump_cpu_features(void) pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); } +#define ARM64_CPUID_FIELDS(reg, field, min_value) \ + .sys_reg = SYS_##reg, \ + .field_pos = reg##_##field##_SHIFT, \ + .field_width = reg##_##field##_WIDTH, \ + .sign = reg##_##field##_SIGNED, \ + .min_field_value = reg##_##field##_##min_value, + #define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ { \ .sign = SIGNED, \ @@ -2776,12 +2783,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { }; #define HWCAP_CPUID_MATCH(reg, field, min_value) \ - .matches = has_user_cpuid_feature, \ - .sys_reg = SYS_##reg, \ - .field_pos = reg##_##field##_SHIFT, \ - .field_width = reg##_##field##_WIDTH, \ - .sign = reg##_##field##_SIGNED, \ - .min_field_value = reg##_##field##_##min_value, + .matches = has_user_cpuid_feature, \ + ARM64_CPUID_FIELDS(reg, field, min_value) #define __HWCAP_CAP(name, cap_type, cap) \ .desc = name, \ @@ -2811,26 +2814,26 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PTR_AUTH static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { - HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, APA, PAuth) + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, APA, PAuth) }, { - HWCAP_CPUID_MATCH(ID_AA64ISAR2_EL1, APA3, PAuth) + ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, APA3, PAuth) }, { - HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, API, PAuth) + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, API, PAuth) }, {}, }; static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { { - HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, GPA, IMP) + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, GPA, IMP) }, { - HWCAP_CPUID_MATCH(ID_AA64ISAR2_EL1, GPA3, IMP) + ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, GPA3, IMP) }, { - HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, GPI, IMP) + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, GPI, IMP) }, {}, }; -- cgit v1.2.3 From 21642da214a9676727cd32b8e7f2906259502f00 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 Apr 2023 18:13:30 +0100 Subject: arm64/cpufeature: Consistently use symbolic constants for min_field_value A number of the cpufeatures use raw numbers for the minimum field values specified rather than symbolic constants. In preparation for the use of helper macros replace all these with the appropriate constants. No change in the generated binary. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230303-arm64-cpufeature-helpers-v2-2-4c8f28a6f203@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 77862b7c8908..1002ac437e8b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2217,7 +2217,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR0_EL1_GIC_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, - .min_field_value = 1, + .min_field_value = ID_AA64PFR0_EL1_GIC_IMP, }, { .desc = "Enhanced Counter Virtualization", @@ -2228,7 +2228,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, - .min_field_value = 1, + .min_field_value = ID_AA64MMFR0_EL1_ECV_IMP, }, #ifdef CONFIG_ARM64_PAN { @@ -2240,7 +2240,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64MMFR1_EL1_PAN_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, - .min_field_value = 1, + .min_field_value = ID_AA64MMFR1_EL1_PAN_IMP, .cpu_enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ @@ -2254,7 +2254,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64MMFR1_EL1_PAN_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, - .min_field_value = 3, + .min_field_value = ID_AA64MMFR1_EL1_PAN_PAN3, }, #endif /* CONFIG_ARM64_EPAN */ #ifdef CONFIG_ARM64_LSE_ATOMICS @@ -2267,7 +2267,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64ISAR0_EL1_ATOMIC_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, - .min_field_value = 2, + .min_field_value = ID_AA64ISAR0_EL1_ATOMIC_IMP, }, #endif /* CONFIG_ARM64_LSE_ATOMICS */ { @@ -2335,7 +2335,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_EL1_CSV3_SHIFT, .field_width = 4, - .min_field_value = 1, + .min_field_value = ID_AA64PFR0_EL1_CSV3_IMP, .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, }, @@ -2355,7 +2355,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT, .field_width = 4, - .min_field_value = 1, + .min_field_value = ID_AA64ISAR1_EL1_DPB_IMP, }, { .desc = "Data cache clean to Point of Deep Persistence", @@ -2366,7 +2366,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT, .field_width = 4, - .min_field_value = 2, + .min_field_value = ID_AA64ISAR1_EL1_DPB_DPB2, }, #endif #ifdef CONFIG_ARM64_SVE @@ -2437,7 +2437,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_EL1_FWB_SHIFT, .field_width = 4, - .min_field_value = 1, + .min_field_value = ID_AA64MMFR2_EL1_FWB_IMP, .matches = has_cpuid_feature, }, { @@ -2448,7 +2448,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_EL1_TTL_SHIFT, .field_width = 4, - .min_field_value = 1, + .min_field_value = ID_AA64MMFR2_EL1_TTL_IMP, .matches = has_cpuid_feature, }, { @@ -2478,7 +2478,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR1_EL1_HAFDBS_SHIFT, .field_width = 4, - .min_field_value = 2, + .min_field_value = ID_AA64MMFR1_EL1_HAFDBS_DBM, .matches = has_hw_dbm, .cpu_enable = cpu_enable_hw_dbm, }, @@ -2491,7 +2491,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_EL1_CRC32_SHIFT, .field_width = 4, - .min_field_value = 1, + .min_field_value = ID_AA64ISAR0_EL1_CRC32_IMP, }, { .desc = "Speculative Store Bypassing Safe (SSBS)", @@ -2514,7 +2514,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_EL1_CnP_SHIFT, .field_width = 4, - .min_field_value = 1, + .min_field_value = ID_AA64MMFR2_EL1_CnP_IMP, .cpu_enable = cpu_enable_cnp, }, #endif @@ -2527,7 +2527,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64ISAR1_EL1_SB_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, - .min_field_value = 1, + .min_field_value = ID_AA64ISAR1_EL1_SB_IMP, }, #ifdef CONFIG_ARM64_PTR_AUTH { @@ -2636,7 +2636,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_width = 4, .field_pos = ID_AA64MMFR2_EL1_E0PD_SHIFT, .matches = has_cpuid_feature, - .min_field_value = 1, + .min_field_value = ID_AA64MMFR2_EL1_E0PD_IMP, .cpu_enable = cpu_enable_e0pd, }, #endif @@ -2649,7 +2649,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64ISAR0_EL1_RNDR_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, - .min_field_value = 1, + .min_field_value = ID_AA64ISAR0_EL1_RNDR_IMP, }, #ifdef CONFIG_ARM64_BTI { @@ -2703,7 +2703,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64ISAR1_EL1_LRCPC_SHIFT, .field_width = 4, .matches = has_cpuid_feature, - .min_field_value = 1, + .min_field_value = ID_AA64ISAR1_EL1_LRCPC_IMP, }, #ifdef CONFIG_ARM64_SME { -- cgit v1.2.3 From 25d8c25025a46e7621edde2eb6d5f55c6d29ee86 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Mon, 3 Apr 2023 16:14:22 +0800 Subject: drivers/perf: hisi: Remove redundant initialized of pmu->name "pmu->name" is initialized by perf_pmu_register() function, so remove the redundant initialized in hisi_pmu_init(). Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230403081423.62460-2-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pmu.c | 4 +--- drivers/perf/hisilicon/hisi_uncore_pmu.h | 3 +-- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 2 +- 8 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 4c67d57217a7..40f1bc9f9b91 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -316,7 +316,7 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) if (!name) return -ENOMEM; - hisi_pmu_init(cpa_pmu, name, THIS_MODULE); + hisi_pmu_init(cpa_pmu, THIS_MODULE); /* Power Management should be disabled before using CPA PMU. */ hisi_cpa_pmu_disable_pm(cpa_pmu); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 8c3ffcbfd4c0..8a3d74ddcd6d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -516,7 +516,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); - hisi_pmu_init(ddrc_pmu, name, THIS_MODULE); + hisi_pmu_init(ddrc_pmu, THIS_MODULE); ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 806698b9eabf..5701a84edb0e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -519,7 +519,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", hha_pmu->sccl_id, hha_pmu->index_id); - hisi_pmu_init(hha_pmu, name, THIS_MODULE); + hisi_pmu_init(hha_pmu, THIS_MODULE); ret = perf_pmu_register(&hha_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 5b2c35f1658a..68596b566344 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -557,7 +557,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) */ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", l3c_pmu->sccl_id, l3c_pmu->ccl_id); - hisi_pmu_init(l3c_pmu, name, THIS_MODULE); + hisi_pmu_init(l3c_pmu, THIS_MODULE); ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index afe3419f3f6d..71b6687d6696 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -412,7 +412,7 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(pa_pmu, name, THIS_MODULE); + hisi_pmu_init(pa_pmu, THIS_MODULE); ret = perf_pmu_register(&pa_pmu->pmu, name, -1); if (ret) { dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index f1b0f5e1a28f..2823f381930d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -531,12 +531,10 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); -void hisi_pmu_init(struct hisi_pmu *hisi_pmu, const char *name, - struct module *module) +void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { struct pmu *pmu = &hisi_pmu->pmu; - pmu->name = name; pmu->module = module; pmu->task_ctx_nr = perf_invalid_context; pmu->event_init = hisi_uncore_pmu_event_init; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index f8e3cc6903d7..07890a8e96ca 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -121,6 +121,5 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); -void hisi_pmu_init(struct hisi_pmu *hisi_pmu, const char *name, - struct module *module); +void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 1e354433776a..6fe534a665ed 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -445,7 +445,7 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(sllc_pmu, name, THIS_MODULE); + hisi_pmu_init(sllc_pmu, THIS_MODULE); ret = perf_pmu_register(&sllc_pmu->pmu, name, -1); if (ret) { -- cgit v1.2.3 From 257aedb72e731082ab514058e57b132f0b29d707 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Mon, 3 Apr 2023 16:14:23 +0800 Subject: drivers/perf: hisi: add NULL check for name When allocations fails that can be NULL now. If the name provided is NULL, then the initialization process of the PMU type and dev will be skipped in function perf_pmu_register(). Consequently, the PMU will not be able to register into the kernel. Moreover, in the case of unregister the PMU, the function device_del() will need to handle NULL pointers, which potentially can cause issues. So move this allocation above the cpuhp_state_add_instance() and directly return if it does fail. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230403081423.62460-3-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 17 ++++++++++------- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 7 +++++-- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 11 +++++------ 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 8a3d74ddcd6d..ffb039d05d07 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -499,13 +499,6 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, - &ddrc_pmu->node); - if (ret) { - dev_err(&pdev->dev, "Error %d registering hotplug;\n", ret); - return ret; - } - if (ddrc_pmu->identifier >= HISI_PMU_V2) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_ddrc%u_%u", @@ -516,6 +509,16 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, + &ddrc_pmu->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug;\n", ret); + return ret; + } + hisi_pmu_init(ddrc_pmu, THIS_MODULE); ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 5701a84edb0e..15caf99e1eef 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -510,6 +510,11 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) if (ret) return ret; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", + hha_pmu->sccl_id, hha_pmu->index_id); + if (!name) + return -ENOMEM; + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, &hha_pmu->node); if (ret) { @@ -517,8 +522,6 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) return ret; } - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", - hha_pmu->sccl_id, hha_pmu->index_id); hisi_pmu_init(hha_pmu, THIS_MODULE); ret = perf_pmu_register(&hha_pmu->pmu, name, -1); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 68596b566344..794dbcd19b7a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -544,6 +544,11 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) if (ret) return ret; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", + l3c_pmu->sccl_id, l3c_pmu->ccl_id); + if (!name) + return -ENOMEM; + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, &l3c_pmu->node); if (ret) { @@ -551,12 +556,6 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) return ret; } - /* - * CCL_ID is used to identify the L3C in the same SCCL which was - * used _UID by mistake. - */ - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", - l3c_pmu->sccl_id, l3c_pmu->ccl_id); hisi_pmu_init(l3c_pmu, THIS_MODULE); ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); -- cgit v1.2.3 From 863da0bdb17b5c9f093e042f655a84ea3f169d6b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 Apr 2023 18:13:31 +0100 Subject: arm64/cpufeature: Use helper macro to specify ID register for capabilites When defining which value to look for in a system register field we currently manually specify the register, field shift, width and sign and the value to look for. This opens the potential for error with for example the wrong field width or sign being specified, an enumeration value for a different similarly named field or letting something be initialised to 0. Since we now generate defines for all the ID registers we now have named constants for all of these things generated from the system register description, meaning that we can generate initialisation for all the fields used in matching from a minimal specification of register, field and match value. This is both shorter and eliminates or makes build failures several potential errors. No change in the generated binary. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230303-arm64-cpufeature-helpers-v2-3-4c8f28a6f203@kernel.org [will: Drop explicit '.sign' assignment for BTI feature] Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 245 ++++++++--------------------------------- 1 file changed, 44 insertions(+), 201 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1002ac437e8b..1bdad599e769 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2213,22 +2213,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_GIC_CPUIF_SYSREGS, .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = has_useable_gicv3_cpuif, - .sys_reg = SYS_ID_AA64PFR0_EL1, - .field_pos = ID_AA64PFR0_EL1_GIC_SHIFT, - .field_width = 4, - .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64PFR0_EL1_GIC_IMP, + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, GIC, IMP) }, { .desc = "Enhanced Counter Virtualization", .capability = ARM64_HAS_ECV, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64MMFR0_EL1, - .field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT, - .field_width = 4, - .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64MMFR0_EL1_ECV_IMP, + ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, IMP) }, #ifdef CONFIG_ARM64_PAN { @@ -2236,12 +2228,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_PAN, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64MMFR1_EL1, - .field_pos = ID_AA64MMFR1_EL1_PAN_SHIFT, - .field_width = 4, - .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64MMFR1_EL1_PAN_IMP, .cpu_enable = cpu_enable_pan, + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, IMP) }, #endif /* CONFIG_ARM64_PAN */ #ifdef CONFIG_ARM64_EPAN @@ -2250,11 +2238,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_EPAN, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64MMFR1_EL1, - .field_pos = ID_AA64MMFR1_EL1_PAN_SHIFT, - .field_width = 4, - .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64MMFR1_EL1_PAN_PAN3, + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, PAN3) }, #endif /* CONFIG_ARM64_EPAN */ #ifdef CONFIG_ARM64_LSE_ATOMICS @@ -2263,11 +2247,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_LSE_ATOMICS, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64ISAR0_EL1, - .field_pos = ID_AA64ISAR0_EL1_ATOMIC_SHIFT, - .field_width = 4, - .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64ISAR0_EL1_ATOMIC_IMP, + ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP) }, #endif /* CONFIG_ARM64_LSE_ATOMICS */ { @@ -2288,21 +2268,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_NESTED_VIRT, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_nested_virt_support, - .sys_reg = SYS_ID_AA64MMFR2_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR2_EL1_NV_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64MMFR2_EL1_NV_IMP, + ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, IMP) }, { .capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_32bit_el0, - .sys_reg = SYS_ID_AA64PFR0_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR0_EL1_EL0_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64PFR0_EL1_ELx_32BIT_64BIT, + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, EL0, AARCH32) }, #ifdef CONFIG_KVM { @@ -2310,11 +2282,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_32BIT_EL1, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64PFR0_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR0_EL1_EL1_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64PFR0_EL1_ELx_32BIT_64BIT, + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, EL1, AARCH32) }, { .desc = "Protected KVM", @@ -2327,17 +2295,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, + .cpu_enable = kpti_install_ng_mappings, + .matches = unmap_kernel_at_el0, /* * The ID feature fields below are used to indicate that * the CPU doesn't need KPTI. See unmap_kernel_at_el0 for * more details. */ - .sys_reg = SYS_ID_AA64PFR0_EL1, - .field_pos = ID_AA64PFR0_EL1_CSV3_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64PFR0_EL1_CSV3_IMP, - .matches = unmap_kernel_at_el0, - .cpu_enable = kpti_install_ng_mappings, + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, CSV3, IMP) }, { /* FP/SIMD is not implemented */ @@ -2352,21 +2317,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_DCPOP, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64ISAR1_EL1, - .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64ISAR1_EL1_DPB_IMP, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, DPB, IMP) }, { .desc = "Data cache clean to Point of Deep Persistence", .capability = ARM64_HAS_DCPODP, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64ISAR1_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64ISAR1_EL1_DPB_DPB2, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, DPB, DPB2) }, #endif #ifdef CONFIG_ARM64_SVE @@ -2374,13 +2332,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .desc = "Scalable Vector Extension", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_SVE, - .sys_reg = SYS_ID_AA64PFR0_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR0_EL1_SVE_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64PFR0_EL1_SVE_IMP, - .matches = has_cpuid_feature, .cpu_enable = sve_kernel_enable, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, SVE, IMP) }, #endif /* CONFIG_ARM64_SVE */ #ifdef CONFIG_ARM64_RAS_EXTN @@ -2389,12 +2343,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_RAS_EXTN, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64PFR0_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR0_EL1_RAS_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64PFR0_EL1_RAS_IMP, .cpu_enable = cpu_clear_disr, + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP) }, #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_AMU_EXTN @@ -2408,12 +2358,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_AMU_EXTN, .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_amu, - .sys_reg = SYS_ID_AA64PFR0_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR0_EL1_AMU_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64PFR0_EL1_AMU_IMP, .cpu_enable = cpu_amu_enable, + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, AMU, IMP) }, #endif /* CONFIG_ARM64_AMU_EXTN */ { @@ -2433,34 +2379,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .desc = "Stage-2 Force Write-Back", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_HAS_STAGE2_FWB, - .sys_reg = SYS_ID_AA64MMFR2_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR2_EL1_FWB_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64MMFR2_EL1_FWB_IMP, .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, FWB, IMP) }, { .desc = "ARMv8.4 Translation Table Level", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_HAS_ARMv8_4_TTL, - .sys_reg = SYS_ID_AA64MMFR2_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR2_EL1_TTL_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64MMFR2_EL1_TTL_IMP, .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, TTL, IMP) }, { .desc = "TLB range maintenance instructions", .capability = ARM64_HAS_TLB_RANGE, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64ISAR0_EL1, - .field_pos = ID_AA64ISAR0_EL1_TLB_SHIFT, - .field_width = 4, - .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64ISAR0_EL1_TLB_RANGE, + ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, TLB, RANGE) }, #ifdef CONFIG_ARM64_HW_AFDBM { @@ -2474,13 +2408,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { */ .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .capability = ARM64_HW_DBM, - .sys_reg = SYS_ID_AA64MMFR1_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR1_EL1_HAFDBS_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64MMFR1_EL1_HAFDBS_DBM, .matches = has_hw_dbm, .cpu_enable = cpu_enable_hw_dbm, + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM) }, #endif { @@ -2488,21 +2418,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_CRC32, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64ISAR0_EL1, - .field_pos = ID_AA64ISAR0_EL1_CRC32_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64ISAR0_EL1_CRC32_IMP, + ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, CRC32, IMP) }, { .desc = "Speculative Store Bypassing Safe (SSBS)", .capability = ARM64_SSBS, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64PFR1_EL1, - .field_pos = ID_AA64PFR1_EL1_SSBS_SHIFT, - .field_width = 4, - .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64PFR1_EL1_SSBS_IMP, + ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SSBS, IMP) }, #ifdef CONFIG_ARM64_CNP { @@ -2510,12 +2433,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_CNP, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_useable_cnp, - .sys_reg = SYS_ID_AA64MMFR2_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR2_EL1_CnP_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64MMFR2_EL1_CnP_IMP, .cpu_enable = cpu_enable_cnp, + ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, CnP, IMP) }, #endif { @@ -2523,45 +2442,29 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_SB, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64ISAR1_EL1, - .field_pos = ID_AA64ISAR1_EL1_SB_SHIFT, - .field_width = 4, - .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64ISAR1_EL1_SB_IMP, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, SB, IMP) }, #ifdef CONFIG_ARM64_PTR_AUTH { .desc = "Address authentication (architected QARMA5 algorithm)", .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, - .sys_reg = SYS_ID_AA64ISAR1_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_EL1_APA_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64ISAR1_EL1_APA_PAuth, .matches = has_address_auth_cpucap, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, APA, PAuth) }, { .desc = "Address authentication (architected QARMA3 algorithm)", .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, - .sys_reg = SYS_ID_AA64ISAR2_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR2_EL1_APA3_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64ISAR2_EL1_APA3_PAuth, .matches = has_address_auth_cpucap, + ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, APA3, PAuth) }, { .desc = "Address authentication (IMP DEF algorithm)", .capability = ARM64_HAS_ADDRESS_AUTH_IMP_DEF, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, - .sys_reg = SYS_ID_AA64ISAR1_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_EL1_API_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64ISAR1_EL1_API_PAuth, .matches = has_address_auth_cpucap, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, API, PAuth) }, { .capability = ARM64_HAS_ADDRESS_AUTH, @@ -2572,34 +2475,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .desc = "Generic authentication (architected QARMA5 algorithm)", .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5, .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .sys_reg = SYS_ID_AA64ISAR1_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_EL1_GPA_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64ISAR1_EL1_GPA_IMP, .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, GPA, IMP) }, { .desc = "Generic authentication (architected QARMA3 algorithm)", .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3, .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .sys_reg = SYS_ID_AA64ISAR2_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR2_EL1_GPA3_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64ISAR2_EL1_GPA3_IMP, .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, GPA3, IMP) }, { .desc = "Generic authentication (IMP DEF algorithm)", .capability = ARM64_HAS_GENERIC_AUTH_IMP_DEF, .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .sys_reg = SYS_ID_AA64ISAR1_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_EL1_GPI_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64ISAR1_EL1_GPI_IMP, .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, GPI, IMP) }, { .capability = ARM64_HAS_GENERIC_AUTH, @@ -2631,13 +2522,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .desc = "E0PD", .capability = ARM64_HAS_E0PD, .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .sys_reg = SYS_ID_AA64MMFR2_EL1, - .sign = FTR_UNSIGNED, - .field_width = 4, - .field_pos = ID_AA64MMFR2_EL1_E0PD_SHIFT, - .matches = has_cpuid_feature, - .min_field_value = ID_AA64MMFR2_EL1_E0PD_IMP, .cpu_enable = cpu_enable_e0pd, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, E0PD, IMP) }, #endif { @@ -2645,11 +2532,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_RNG, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64ISAR0_EL1, - .field_pos = ID_AA64ISAR0_EL1_RNDR_SHIFT, - .field_width = 4, - .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64ISAR0_EL1_RNDR_IMP, + ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, RNDR, IMP) }, #ifdef CONFIG_ARM64_BTI { @@ -2662,11 +2545,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #endif .matches = has_cpuid_feature, .cpu_enable = bti_enable, - .sys_reg = SYS_ID_AA64PFR1_EL1, - .field_pos = ID_AA64PFR1_EL1_BT_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64PFR1_EL1_BT_IMP, - .sign = FTR_UNSIGNED, + ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, BT, IMP) }, #endif #ifdef CONFIG_ARM64_MTE @@ -2675,109 +2554,73 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_MTE, .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64PFR1_EL1, - .field_pos = ID_AA64PFR1_EL1_MTE_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64PFR1_EL1_MTE_MTE2, - .sign = FTR_UNSIGNED, .cpu_enable = cpu_enable_mte, + ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, MTE, MTE2) }, { .desc = "Asymmetric MTE Tag Check Fault", .capability = ARM64_MTE_ASYMM, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64PFR1_EL1, - .field_pos = ID_AA64PFR1_EL1_MTE_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64PFR1_EL1_MTE_MTE3, - .sign = FTR_UNSIGNED, + ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, MTE, MTE3) }, #endif /* CONFIG_ARM64_MTE */ { .desc = "RCpc load-acquire (LDAPR)", .capability = ARM64_HAS_LDAPR, .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .sys_reg = SYS_ID_AA64ISAR1_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_EL1_LRCPC_SHIFT, - .field_width = 4, .matches = has_cpuid_feature, - .min_field_value = ID_AA64ISAR1_EL1_LRCPC_IMP, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LRCPC, IMP) }, #ifdef CONFIG_ARM64_SME { .desc = "Scalable Matrix Extension", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_SME, - .sys_reg = SYS_ID_AA64PFR1_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR1_EL1_SME_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64PFR1_EL1_SME_IMP, .matches = has_cpuid_feature, .cpu_enable = sme_kernel_enable, + ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SME, IMP) }, /* FA64 should be sorted after the base SME capability */ { .desc = "FA64", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_SME_FA64, - .sys_reg = SYS_ID_AA64SMFR0_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64SMFR0_EL1_FA64_SHIFT, - .field_width = 1, - .min_field_value = ID_AA64SMFR0_EL1_FA64_IMP, .matches = has_cpuid_feature, .cpu_enable = fa64_kernel_enable, + ARM64_CPUID_FIELDS(ID_AA64SMFR0_EL1, FA64, IMP) }, { .desc = "SME2", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_SME2, - .sys_reg = SYS_ID_AA64PFR1_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR1_EL1_SME_SHIFT, - .field_width = ID_AA64PFR1_EL1_SME_WIDTH, - .min_field_value = ID_AA64PFR1_EL1_SME_SME2, .matches = has_cpuid_feature, .cpu_enable = sme2_kernel_enable, + ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SME, SME2) }, #endif /* CONFIG_ARM64_SME */ { .desc = "WFx with timeout", .capability = ARM64_HAS_WFXT, .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .sys_reg = SYS_ID_AA64ISAR2_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR2_EL1_WFxT_SHIFT, - .field_width = 4, .matches = has_cpuid_feature, - .min_field_value = ID_AA64ISAR2_EL1_WFxT_IMP, + ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, WFxT, IMP) }, { .desc = "Trap EL0 IMPLEMENTATION DEFINED functionality", .capability = ARM64_HAS_TIDCP1, .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .sys_reg = SYS_ID_AA64MMFR1_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR1_EL1_TIDCP1_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64MMFR1_EL1_TIDCP1_IMP, .matches = has_cpuid_feature, .cpu_enable = cpu_trap_el0_impdef, + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, TIDCP1, IMP) }, { .desc = "Data independent timing control (DIT)", .capability = ARM64_HAS_DIT, .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .sys_reg = SYS_ID_AA64PFR0_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64PFR0_EL1_DIT_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64PFR0_EL1_DIT_IMP, .matches = has_cpuid_feature, .cpu_enable = cpu_enable_dit, + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, DIT, IMP) }, {}, }; -- cgit v1.2.3 From 460e70e2dc9af3128f69e533daf349635c421b0a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 19 Apr 2023 10:58:43 +0300 Subject: arm64: delete dead code in this_cpu_set_vectors() The "slot" variable is an enum, and in this context it is an unsigned int. So the type means it can never be negative and also we never pass invalid data to this function. If something did pass invalid data then this check would be insufficient protection. Signed-off-by: Dan Carpenter Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/73859c9e-dea0-4764-bf01-7ae694fa2e37@kili.mountain Signed-off-by: Will Deacon --- arch/arm64/kernel/proton-pack.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index fca9cc6f5581..05f40c4e18fd 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -966,9 +966,6 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) { const char *v = arm64_get_bp_hardening_vector(slot); - if (slot < 0) - return; - __this_cpu_write(this_cpu_vector, v); /* -- cgit v1.2.3 From b7b4ce84c830ae3ce6429fbbd70a62a4ae1ac78d Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 18 Apr 2023 13:54:00 +0200 Subject: arm64: kexec: include reboot.h Include reboot.h in machine_kexec.c for declaration of machine_crash_shutdown. gcc-12 with W=1 reports: arch/arm64/kernel/machine_kexec.c:257:6: warning: no previous prototype for 'machine_crash_shutdown' [-Wmissing-prototypes] 257 | void machine_crash_shutdown(struct pt_regs *regs) No functional changes intended. Compile tested only. Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20230418-arm64-kexec-include-reboot-v1-1-8453fd4fb3fb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index d35cb0568f84..3604322b2b3b 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 87727ba2bb05cc3cb4233231faa7ab4c7eeb6c73 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 20 Apr 2023 13:33:56 +0100 Subject: KVM: arm64: Ensure CPU PMU probes before pKVM host de-privilege Although pKVM supports CPU PMU emulation for non-protected guests since 722625c6f4c5 ("KVM: arm64: Reenable pmu in Protected Mode"), this relies on the PMU driver probing before the host has de-privileged so that the 'kvm_arm_pmu_available' static key can still be enabled by patching the hypervisor text. As it happens, both of these events hang off device_initcall() but the PMU consistently won the race until 7755cec63ade ("arm64: perf: Move PMUv3 driver to drivers/perf"). Since then, the host will fail to boot when pKVM is enabled: | hw perfevents: enabled with armv8_pmuv3_0 PMU driver, 7 counters available | kvm [1]: nVHE hyp BUG at: [] __kvm_nvhe_handle_host_mem_abort+0x270/0x284! | kvm [1]: Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE | kvm [1]: Hyp Offset: 0xfffea41fbdf70000 | Kernel panic - not syncing: HYP panic: | PS:a00003c9 PC:0000dbe04b0c66e0 ESR:00000000f2000800 | FAR:fffffbfffddfcf00 HPFAR:00000000010b0bf0 PAR:0000000000000000 | VCPU:0000000000000000 | CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.3.0-rc7-00083-g0bce6746d154 #1 | Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 | Call trace: | dump_backtrace+0xec/0x108 | show_stack+0x18/0x2c | dump_stack_lvl+0x50/0x68 | dump_stack+0x18/0x24 | panic+0x13c/0x33c | nvhe_hyp_panic_handler+0x10c/0x190 | aarch64_insn_patch_text_nosync+0x64/0xc8 | arch_jump_label_transform+0x4c/0x5c | __jump_label_update+0x84/0xfc | jump_label_update+0x100/0x134 | static_key_enable_cpuslocked+0x68/0xac | static_key_enable+0x20/0x34 | kvm_host_pmu_init+0x88/0xa4 | armpmu_register+0xf0/0xf4 | arm_pmu_acpi_probe+0x2ec/0x368 | armv8_pmu_driver_init+0x38/0x44 | do_one_initcall+0xcc/0x240 Fix the race properly by deferring the de-privilege step to device_initcall_sync(). This will also be needed in future when probing IOMMU devices and allows us to separate the pKVM de-privilege logic from the core hypervisor initialisation path. Cc: Oliver Upton Cc: Fuad Tabba Cc: Marc Zyngier Fixes: 7755cec63ade ("arm64: perf: Move PMUv3 driver to drivers/perf") Tested-by: Fuad Tabba Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20230420123356.2708-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kvm/arm.c | 45 --------------------------------------------- arch/arm64/kvm/pkvm.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3bd732eaf087..890f730bc3ab 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -46,7 +45,6 @@ #include static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; -DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); @@ -2105,41 +2103,6 @@ out_err: return err; } -static void __init _kvm_host_prot_finalize(void *arg) -{ - int *err = arg; - - if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) - WRITE_ONCE(*err, -EINVAL); -} - -static int __init pkvm_drop_host_privileges(void) -{ - int ret = 0; - - /* - * Flip the static key upfront as that may no longer be possible - * once the host stage 2 is installed. - */ - static_branch_enable(&kvm_protected_mode_initialized); - on_each_cpu(_kvm_host_prot_finalize, &ret, 1); - return ret; -} - -static int __init finalize_hyp_mode(void) -{ - if (!is_protected_kvm_enabled()) - return 0; - - /* - * Exclude HYP sections from kmemleak so that they don't get peeked - * at, which would end badly once inaccessible. - */ - kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); - kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); - return pkvm_drop_host_privileges(); -} - struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) { struct kvm_vcpu *vcpu; @@ -2257,14 +2220,6 @@ static __init int kvm_arm_init(void) if (err) goto out_hyp; - if (!in_hyp_mode) { - err = finalize_hyp_mode(); - if (err) { - kvm_err("Failed to finalize Hyp protection\n"); - goto out_subs; - } - } - if (is_protected_kvm_enabled()) { kvm_info("Protected nVHE mode initialized successfully\n"); } else if (in_hyp_mode) { diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index cf56958b1492..6e9ece1ebbe7 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -4,6 +4,8 @@ * Author: Quentin Perret */ +#include +#include #include #include #include @@ -13,6 +15,8 @@ #include "hyp_constants.h" +DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); + static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); @@ -213,3 +217,46 @@ int pkvm_init_host_vm(struct kvm *host_kvm) mutex_init(&host_kvm->lock); return 0; } + +static void __init _kvm_host_prot_finalize(void *arg) +{ + int *err = arg; + + if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) + WRITE_ONCE(*err, -EINVAL); +} + +static int __init pkvm_drop_host_privileges(void) +{ + int ret = 0; + + /* + * Flip the static key upfront as that may no longer be possible + * once the host stage 2 is installed. + */ + static_branch_enable(&kvm_protected_mode_initialized); + on_each_cpu(_kvm_host_prot_finalize, &ret, 1); + return ret; +} + +static int __init finalize_pkvm(void) +{ + int ret; + + if (!is_protected_kvm_enabled()) + return 0; + + /* + * Exclude HYP sections from kmemleak so that they don't get peeked + * at, which would end badly once inaccessible. + */ + kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); + kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); + + ret = pkvm_drop_host_privileges(); + if (ret) + pr_err("Failed to finalize Hyp protection: %d\n", ret); + + return ret; +} +device_initcall_sync(finalize_pkvm); -- cgit v1.2.3