diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/fork.c | 96 | ||||
-rw-r--r-- | kernel/sched/core.c | 1 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 5 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 67 |
4 files changed, 141 insertions, 28 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index beb31725f7e2..0c240fd5beba 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -158,19 +158,39 @@ void __weak arch_release_thread_stack(unsigned long *stack) * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a * kmemcache based allocator. */ -# if THREAD_SIZE >= PAGE_SIZE -static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, - int node) +# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) +static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { +#ifdef CONFIG_VMAP_STACK + void *stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, + VMALLOC_START, VMALLOC_END, + THREADINFO_GFP | __GFP_HIGHMEM, + PAGE_KERNEL, + 0, node, + __builtin_return_address(0)); + + /* + * We can't call find_vm_area() in interrupt context, and + * free_thread_stack() can be called in interrupt context, + * so cache the vm_struct. + */ + if (stack) + tsk->stack_vm_area = find_vm_area(stack); + return stack; +#else struct page *page = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); return page ? page_address(page) : NULL; +#endif } -static inline void free_thread_stack(unsigned long *stack) +static inline void free_thread_stack(struct task_struct *tsk) { - __free_pages(virt_to_page(stack), THREAD_SIZE_ORDER); + if (task_stack_vm_area(tsk)) + vfree(tsk->stack); + else + __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); } # else static struct kmem_cache *thread_stack_cache; @@ -181,9 +201,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); } -static void free_thread_stack(unsigned long *stack) +static void free_thread_stack(struct task_struct *tsk) { - kmem_cache_free(thread_stack_cache, stack); + kmem_cache_free(thread_stack_cache, tsk->stack); } void thread_stack_cache_init(void) @@ -213,24 +233,47 @@ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; -static void account_kernel_stack(unsigned long *stack, int account) +static void account_kernel_stack(struct task_struct *tsk, int account) { - /* All stack pages are in the same zone and belong to the same memcg. */ - struct page *first_page = virt_to_page(stack); + void *stack = task_stack_page(tsk); + struct vm_struct *vm = task_stack_vm_area(tsk); + + BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); + + if (vm) { + int i; + + BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); + + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { + mod_zone_page_state(page_zone(vm->pages[i]), + NR_KERNEL_STACK_KB, + PAGE_SIZE / 1024 * account); + } - mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, - THREAD_SIZE / 1024 * account); + /* All stack pages belong to the same memcg. */ + memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB, + account * (THREAD_SIZE / 1024)); + } else { + /* + * All stack pages are in the same zone and belong to the + * same memcg. + */ + struct page *first_page = virt_to_page(stack); + + mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, + THREAD_SIZE / 1024 * account); - memcg_kmem_update_page_stat( - first_page, MEMCG_KERNEL_STACK_KB, - account * (THREAD_SIZE / 1024)); + memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB, + account * (THREAD_SIZE / 1024)); + } } void free_task(struct task_struct *tsk) { - account_kernel_stack(tsk->stack, -1); + account_kernel_stack(tsk, -1); arch_release_thread_stack(tsk->stack); - free_thread_stack(tsk->stack); + free_thread_stack(tsk); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); put_seccomp_filter(tsk); @@ -342,6 +385,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; unsigned long *stack; + struct vm_struct *stack_vm_area; int err; if (node == NUMA_NO_NODE) @@ -354,11 +398,23 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (!stack) goto free_tsk; + stack_vm_area = task_stack_vm_area(tsk); + err = arch_dup_task_struct(tsk, orig); + + /* + * arch_dup_task_struct() clobbers the stack-related fields. Make + * sure they're properly initialized before using any stack-related + * functions again. + */ + tsk->stack = stack; +#ifdef CONFIG_VMAP_STACK + tsk->stack_vm_area = stack_vm_area; +#endif + if (err) goto free_stack; - tsk->stack = stack; #ifdef CONFIG_SECCOMP /* * We must handle setting up seccomp filters once we're under @@ -390,14 +446,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; - account_kernel_stack(stack, 1); + account_kernel_stack(tsk, 1); kcov_task_init(tsk); return tsk; free_stack: - free_thread_stack(stack); + free_thread_stack(tsk); free_tsk: free_task_struct(tsk); return NULL; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 44817c640e99..0b6238f18da2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3403,7 +3403,6 @@ static void __sched notrace __schedule(bool preempt) balance_callback(rq); } -STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */ static inline void sched_submit_work(struct task_struct *tsk) { diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index f4b86e8ca1e7..ba3326785ca4 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -24,11 +24,6 @@ config HAVE_FUNCTION_GRAPH_TRACER help See Documentation/trace/ftrace-design.txt -config HAVE_FUNCTION_GRAPH_FP_TEST - bool - help - See Documentation/trace/ftrace-design.txt - config HAVE_DYNAMIC_FTRACE bool help diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 7363ccf79512..0cbe38a844fa 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -119,7 +119,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration, /* Add a function return address to the trace stack on thread info.*/ int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, - unsigned long frame_pointer) + unsigned long frame_pointer, unsigned long *retp) { unsigned long long calltime; int index; @@ -171,7 +171,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, current->ret_stack[index].func = func; current->ret_stack[index].calltime = calltime; current->ret_stack[index].subtime = 0; +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST current->ret_stack[index].fp = frame_pointer; +#endif +#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR + current->ret_stack[index].retp = retp; +#endif *depth = current->curr_ret_stack; return 0; @@ -204,7 +209,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, return; } -#if defined(CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) && !defined(CC_USING_FENTRY) +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST /* * The arch may choose to record the frame pointer used * and check it here to make sure that it is what we expect it @@ -279,6 +284,64 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) return ret; } +/** + * ftrace_graph_ret_addr - convert a potentially modified stack return address + * to its original value + * + * This function can be called by stack unwinding code to convert a found stack + * return address ('ret') to its original value, in case the function graph + * tracer has modified it to be 'return_to_handler'. If the address hasn't + * been modified, the unchanged value of 'ret' is returned. + * + * 'idx' is a state variable which should be initialized by the caller to zero + * before the first call. + * + * 'retp' is a pointer to the return address on the stack. It's ignored if + * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined. + */ +#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR +unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, + unsigned long ret, unsigned long *retp) +{ + int index = task->curr_ret_stack; + int i; + + if (ret != (unsigned long)return_to_handler) + return ret; + + if (index < -1) + index += FTRACE_NOTRACE_DEPTH; + + if (index < 0) + return ret; + + for (i = 0; i <= index; i++) + if (task->ret_stack[i].retp == retp) + return task->ret_stack[i].ret; + + return ret; +} +#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ +unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, + unsigned long ret, unsigned long *retp) +{ + int task_idx; + + if (ret != (unsigned long)return_to_handler) + return ret; + + task_idx = task->curr_ret_stack; + + if (!task->ret_stack || task_idx < *idx) + return ret; + + task_idx -= *idx; + (*idx)++; + + return task->ret_stack[task_idx].ret; +} +#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ + int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned long flags, |