diff options
-rw-r--r-- | arch/x86/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.c | 351 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.h | 39 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack_32.c | 307 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack_64.c | 289 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 4 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 4 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 54 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 11 | ||||
-rw-r--r-- | include/linux/ftrace.h | 4 | ||||
-rw-r--r-- | include/linux/ring_buffer.h | 5 | ||||
-rw-r--r-- | kernel/fork.c | 9 | ||||
-rw-r--r-- | kernel/lockdep.c | 1 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 1 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 15 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 311 | ||||
-rw-r--r-- | kernel/trace/trace.c | 16 | ||||
-rw-r--r-- | kernel/trace/trace.h | 2 | ||||
-rw-r--r-- | kernel/trace/trace_branch.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 168 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 13 |
21 files changed, 884 insertions, 726 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d274425fb076..a3049da61985 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -31,7 +31,7 @@ CFLAGS_tsc.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -obj-y += time_$(BITS).o ioport.o ldt.o +obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c new file mode 100644 index 000000000000..6b1f6f6f8661 --- /dev/null +++ b/arch/x86/kernel/dumpstack.c @@ -0,0 +1,351 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + */ +#include <linux/kallsyms.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/utsname.h> +#include <linux/hardirq.h> +#include <linux/kdebug.h> +#include <linux/module.h> +#include <linux/ptrace.h> +#include <linux/kexec.h> +#include <linux/bug.h> +#include <linux/nmi.h> +#include <linux/sysfs.h> + +#include <asm/stacktrace.h> + +#include "dumpstack.h" + +int panic_on_unrecovered_nmi; +unsigned int code_bytes = 64; +int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; +static int die_counter; + +void printk_address(unsigned long address, int reliable) +{ + printk(" [<%p>] %s%pS\n", (void *) address, + reliable ? "" : "? ", (void *) address); +} + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void +print_ftrace_graph_addr(unsigned long addr, void *data, + const struct stacktrace_ops *ops, + struct thread_info *tinfo, int *graph) +{ + struct task_struct *task = tinfo->task; + unsigned long ret_addr; + int index = task->curr_ret_stack; + + if (addr != (unsigned long)return_to_handler) + return; + + if (!task->ret_stack || index < *graph) + return; + + index -= *graph; + ret_addr = task->ret_stack[index].ret; + + ops->address(data, ret_addr, 1); + + (*graph)++; +} +#else +static inline void +print_ftrace_graph_addr(unsigned long addr, void *data, + const struct stacktrace_ops *ops, + struct thread_info *tinfo, int *graph) +{ } +#endif + +/* + * x86-64 can have up to three kernel stacks: + * process stack + * interrupt stack + * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack + */ + +static inline int valid_stack_ptr(struct thread_info *tinfo, + void *p, unsigned int size, void *end) +{ + void *t = tinfo; + if (end) { + if (p < end && p >= (end-THREAD_SIZE)) + return 1; + else + return 0; + } + return p > t && p < t + THREAD_SIZE - size; +} + +unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph) +{ + struct stack_frame *frame = (struct stack_frame *)bp; + + while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { + unsigned long addr; + + addr = *stack; + if (__kernel_text_address(addr)) { + if ((unsigned long) stack == bp + sizeof(long)) { + ops->address(data, addr, 1); + frame = frame->next_frame; + bp = (unsigned long) frame; + } else { + ops->address(data, addr, bp == 0); + } + print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + } + stack++; + } + return bp; +} + + +static void +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + printk(data); + print_symbol(msg, symbol); + printk("\n"); +} + +static void print_trace_warning(void *data, char *msg) +{ + printk("%s%s\n", (char *)data, msg); +} + +static int print_trace_stack(void *data, char *name) +{ + printk("%s <%s> ", (char *)data, name); + return 0; +} + +/* + * Print one address/symbol entries per line. + */ +static void print_trace_address(void *data, unsigned long addr, int reliable) +{ + touch_nmi_watchdog(); + printk(data); + printk_address(addr, reliable); +} + +static const struct stacktrace_ops print_trace_ops = { + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, +}; + +void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, char *log_lvl) +{ + printk("%sCall Trace:\n", log_lvl); + dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); +} + +void show_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp) +{ + show_trace_log_lvl(task, regs, stack, bp, ""); +} + +void show_stack(struct task_struct *task, unsigned long *sp) +{ + show_stack_log_lvl(task, NULL, sp, 0, ""); +} + +/* + * The architecture-independent dump_stack generator + */ +void dump_stack(void) +{ + unsigned long bp = 0; + unsigned long stack; + +#ifdef CONFIG_FRAME_POINTER + if (!bp) + get_bp(bp); +#endif + + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + show_trace(NULL, NULL, &stack, bp); +} +EXPORT_SYMBOL(dump_stack); + +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; + +unsigned __kprobes long oops_begin(void) +{ + int cpu; + unsigned long flags; + + oops_enter(); + + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!__raw_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + __raw_spin_lock(&die_lock); + } + die_nest_count++; + die_owner = cpu; + console_verbose(); + bust_spinlocks(1); + return flags; +} + +void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +{ + if (regs && kexec_should_crash(current)) + crash_kexec(regs); + + bust_spinlocks(0); + die_owner = -1; + add_taint(TAINT_DIE); + die_nest_count--; + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); + oops_exit(); + + if (!signr) + return; + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception"); + do_exit(signr); +} + +int __kprobes __die(const char *str, struct pt_regs *regs, long err) +{ +#ifdef CONFIG_X86_32 + unsigned short ss; + unsigned long sp; +#endif + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP "); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); +#endif + printk("\n"); + sysfs_printk_last_file(); + if (notify_die(DIE_OOPS, str, regs, err, + current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) + return 1; + + show_registers(regs); +#ifdef CONFIG_X86_32 + sp = (unsigned long) (®s->sp); + savesegment(ss, ss); + if (user_mode(regs)) { + sp = regs->sp; + ss = regs->ss & 0xffff; + } + printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); + print_symbol("%s", regs->ip); + printk(" SS:ESP %04x:%08lx\n", ss, sp); +#else + /* Executive summary in case the oops scrolled away */ + printk(KERN_ALERT "RIP "); + printk_address(regs->ip, 1); + printk(" RSP <%016lx>\n", regs->sp); +#endif + return 0; +} + +/* + * This is gone through when something in the kernel has done something bad + * and is about to be terminated: + */ +void die(const char *str, struct pt_regs *regs, long err) +{ + unsigned long flags = oops_begin(); + int sig = SIGSEGV; + + if (!user_mode_vm(regs)) + report_bug(regs->ip, regs); + + if (__die(str, regs, err)) + sig = 0; + oops_end(flags, regs, sig); +} + +void notrace __kprobes +die_nmi(char *str, struct pt_regs *regs, int do_panic) +{ + unsigned long flags; + + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; + + /* + * We are in trouble anyway, lets at least try + * to get a message out. + */ + flags = oops_begin(); + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->ip); + show_registers(regs); + oops_end(flags, regs, 0); + if (do_panic || panic_on_oops) + panic("Non maskable interrupt"); + nmi_exit(); + local_irq_enable(); + do_exit(SIGBUS); +} + +static int __init oops_setup(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "panic")) + panic_on_oops = 1; + return 0; +} +early_param("oops", oops_setup); + +static int __init kstack_setup(char *s) +{ + if (!s) + return -EINVAL; + kstack_depth_to_print = simple_strtoul(s, NULL, 0); + return 0; +} +early_param("kstack", kstack_setup); + +static int __init code_bytes_setup(char *s) +{ + code_bytes = simple_strtoul(s, NULL, 0); + if (code_bytes > 8192) + code_bytes = 8192; + + return 1; +} +__setup("code_bytes=", code_bytes_setup); diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h new file mode 100644 index 000000000000..da87590b8698 --- /dev/null +++ b/arch/x86/kernel/dumpstack.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + */ + +#ifndef DUMPSTACK_H +#define DUMPSTACK_H + +#ifdef CONFIG_X86_32 +#define STACKSLOTS_PER_LINE 8 +#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) +#else +#define STACKSLOTS_PER_LINE 4 +#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) +#endif + +extern unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph); + +extern void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, char *log_lvl); + +extern void +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, unsigned long bp, char *log_lvl); + +extern unsigned int code_bytes; +extern int kstack_depth_to_print; + +/* The form of the top of the frame on the stack */ +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; +#endif diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index b3614752197b..d593cd1f58dc 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -17,69 +17,14 @@ #include <asm/stacktrace.h> -#define STACKSLOTS_PER_LINE 8 -#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) - -int panic_on_unrecovered_nmi; -int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; -static unsigned int code_bytes = 64; -static int die_counter; - -void printk_address(unsigned long address, int reliable) -{ - printk(" [<%p>] %s%pS\n", (void *) address, - reliable ? "" : "? ", (void *) address); -} - -static inline int valid_stack_ptr(struct thread_info *tinfo, - void *p, unsigned int size, void *end) -{ - void *t = tinfo; - if (end) { - if (p < end && p >= (end-THREAD_SIZE)) - return 1; - else - return 0; - } - return p > t && p < t + THREAD_SIZE - size; -} - -/* The form of the top of the frame on the stack */ -struct stack_frame { - struct stack_frame *next_frame; - unsigned long return_address; -}; - -static inline unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end) -{ - struct stack_frame *frame = (struct stack_frame *)bp; - - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { - unsigned long addr; - - addr = *stack; - if (__kernel_text_address(addr)) { - if ((unsigned long) stack == bp + sizeof(long)) { - ops->address(data, addr, 1); - frame = frame->next_frame; - bp = (unsigned long) frame; - } else { - ops->address(data, addr, bp == 0); - } - } - stack++; - } - return bp; -} +#include "dumpstack.h" void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { + int graph = 0; + if (!task) task = current; @@ -107,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, data, NULL); + bp = print_context_stack(context, stack, bp, ops, + data, NULL, &graph); stack = (unsigned long *)context->previous_esp; if (!stack) @@ -119,57 +65,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, } EXPORT_SYMBOL(dump_trace); -static void -print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - printk(data); - print_symbol(msg, symbol); - printk("\n"); -} - -static void print_trace_warning(void *data, char *msg) -{ - printk("%s%s\n", (char *)data, msg); -} - -static int print_trace_stack(void *data, char *name) -{ - printk("%s <%s> ", (char *)data, name); - return 0; -} - -/* - * Print one address/symbol entries per line. - */ -static void print_trace_address(void *data, unsigned long addr, int reliable) -{ - touch_nmi_watchdog(); - printk(data); - printk_address(addr, reliable); -} - -static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, -}; - -static void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) -{ - printk("%sCall Trace:\n", log_lvl); - dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); -} - -void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) -{ - show_trace_log_lvl(task, regs, stack, bp, ""); -} - -static void +void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *sp, unsigned long bp, char *log_lvl) { @@ -196,33 +92,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, show_trace_log_lvl(task, regs, sp, bp, log_lvl); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_log_lvl(task, NULL, sp, 0, ""); -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long bp = 0; - unsigned long stack; - -#ifdef CONFIG_FRAME_POINTER - if (!bp) - get_bp(bp); -#endif - - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); -} - -EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { @@ -283,167 +152,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } -static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; -static int die_owner = -1; -static unsigned int die_nest_count; - -unsigned __kprobes long oops_begin(void) -{ - unsigned long flags; - - oops_enter(); - - if (die_owner != raw_smp_processor_id()) { - console_verbose(); - raw_local_irq_save(flags); - __raw_spin_lock(&die_lock); - die_owner = smp_processor_id(); - die_nest_count = 0; - bust_spinlocks(1); - } else { - raw_local_irq_save(flags); - } - die_nest_count++; - return flags; -} - -void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) -{ - bust_spinlocks(0); - die_owner = -1; - add_taint(TAINT_DIE); - __raw_spin_unlock(&die_lock); - raw_local_irq_restore(flags); - - if (!regs) - return; - - if (kexec_should_crash(current)) - crash_kexec(regs); - if (in_interrupt()) - panic("Fatal exception in interrupt"); - if (panic_on_oops) - panic("Fatal exception"); - oops_exit(); - do_exit(signr); -} - -int __kprobes __die(const char *str, struct pt_regs *regs, long err) -{ - unsigned short ss; - unsigned long sp; - - printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif - printk("\n"); - sysfs_printk_last_file(); - if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) - return 1; - - show_registers(regs); - /* Executive summary in case the oops scrolled away */ - sp = (unsigned long) (®s->sp); - savesegment(ss, ss); - if (user_mode(regs)) { - sp = regs->sp; - ss = regs->ss & 0xffff; - } - printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); - print_symbol("%s", regs->ip); - printk(" SS:ESP %04x:%08lx\n", ss, sp); - return 0; -} - -/* - * This is gone through when something in the kernel has done something bad - * and is about to be terminated: - */ -void die(const char *str, struct pt_regs *regs, long err) -{ - unsigned long flags = oops_begin(); - - if (die_nest_count < 3) { - report_bug(regs->ip, regs); - - if (__die(str, regs, err)) - regs = NULL; - } else { - printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); - } - - oops_end(flags, regs, SIGSEGV); -} - -static DEFINE_SPINLOCK(nmi_print_lock); - -void notrace __kprobes -die_nmi(char *str, struct pt_regs *regs, int do_panic) -{ - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - spin_lock(&nmi_print_lock); - /* - * We are in trouble anyway, lets at least try - * to get a message out: - */ - bust_spinlocks(1); - printk(KERN_EMERG "%s", str); - printk(" on CPU%d, ip %08lx, registers:\n", - smp_processor_id(), regs->ip); - show_registers(regs); - if (do_panic) - panic("Non maskable interrupt"); - console_silent(); - spin_unlock(&nmi_print_lock); - - /* - * If we are in kernel we are probably nested up pretty bad - * and might aswell get out now while we still can: - */ - if (!user_mode_vm(regs)) { - current->thread.trap_no = 2; - crash_kexec(regs); - } - - bust_spinlocks(0); - do_exit(SIGSEGV); -} - -static int __init oops_setup(char *s) -{ - if (!s) - return -EINVAL; - if (!strcmp(s, "panic")) - panic_on_oops = 1; - return 0; -} -early_param("oops", oops_setup); - -static int __init kstack_setup(char *s) -{ - if (!s) - return -EINVAL; - kstack_depth_to_print = simple_strtoul(s, NULL, 0); - return 0; -} -early_param("kstack", kstack_setup); - -static int __init code_bytes_setup(char *s) -{ - code_bytes = simple_strtoul(s, NULL, 0); - if (code_bytes > 8192) - code_bytes = 8192; - - return 1; -} -__setup("code_bytes=", code_bytes_setup); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 96a5db7da8a7..c302d0707048 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -17,19 +17,7 @@ #include <asm/stacktrace.h> -#define STACKSLOTS_PER_LINE 4 -#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) - -int panic_on_unrecovered_nmi; -int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; -static unsigned int code_bytes = 64; -static int die_counter; - -void printk_address(unsigned long address, int reliable) -{ - printk(" [<%p>] %s%pS\n", (void *) address, - reliable ? "" : "? ", (void *) address); -} +#include "dumpstack.h" static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, unsigned *usedp, char **idp) @@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -static inline int valid_stack_ptr(struct thread_info *tinfo, - void *p, unsigned int size, void *end) -{ - void *t = tinfo; - if (end) { - if (p < end && p >= (end-THREAD_SIZE)) - return 1; - else - return 0; - } - return p > t && p < t + THREAD_SIZE - size; -} - -/* The form of the top of the frame on the stack */ -struct stack_frame { - struct stack_frame *next_frame; - unsigned long return_address; -}; - -static inline unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end) -{ - struct stack_frame *frame = (struct stack_frame *)bp; - - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { - unsigned long addr; - - addr = *stack; - if (__kernel_text_address(addr)) { - if ((unsigned long) stack == bp + sizeof(long)) { - ops->address(data, addr, 1); - frame = frame->next_frame; - bp = (unsigned long) frame; - } else { - ops->address(data, addr, bp == 0); - } - } - stack++; - } - return bp; -} - void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) @@ -166,6 +109,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; + int graph = 0; if (!task) task = current; @@ -206,7 +150,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, break; bp = print_context_stack(tinfo, stack, bp, ops, - data, estack_end); + data, estack_end, &graph); ops->stack(data, "<EOE>"); /* * We link to the next stack via the @@ -225,7 +169,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, "IRQ") < 0) break; bp = print_context_stack(tinfo, stack, bp, - ops, data, irqstack_end); + ops, data, irqstack_end, &graph); /* * We link to the next stack (which would be * the process stack normally) the last @@ -243,62 +187,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, /* * This handles the process stack: */ - bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); + bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); put_cpu(); } EXPORT_SYMBOL(dump_trace); -static void -print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - printk(data); - print_symbol(msg, symbol); - printk("\n"); -} - -static void print_trace_warning(void *data, char *msg) -{ - printk("%s%s\n", (char *)data, msg); -} - -static int print_trace_stack(void *data, char *name) -{ - printk("%s <%s> ", (char *)data, name); - return 0; -} - -/* - * Print one address/symbol entries per line. - */ -static void print_trace_address(void *data, unsigned long addr, int reliable) -{ - touch_nmi_watchdog(); - printk(data); - printk_address(addr, reliable); -} - -static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, -}; - -static void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) -{ - printk("%sCall Trace:\n", log_lvl); - dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); -} - -void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) -{ - show_trace_log_lvl(task, regs, stack, bp, ""); -} - -static void +void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *sp, unsigned long bp, char *log_lvl) { @@ -342,33 +236,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, show_trace_log_lvl(task, regs, sp, bp, log_lvl); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_log_lvl(task, NULL, sp, 0, ""); -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long bp = 0; - unsigned long stack; - -#ifdef CONFIG_FRAME_POINTER - if (!bp) - get_bp(bp); -#endif - - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); -} -EXPORT_SYMBOL(dump_stack); - void show_registers(struct pt_regs *regs) { int i; @@ -429,147 +296,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } -static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; -static int die_owner = -1; -static unsigned int die_nest_count; - -unsigned __kprobes long oops_begin(void) -{ - int cpu; - unsigned long flags; - - oops_enter(); - - /* racy, but better than risking deadlock. */ - raw_local_irq_save(flags); - cpu = smp_processor_id(); - if (!__raw_spin_trylock(&die_lock)) { - if (cpu == die_owner) - /* nested oops. should stop eventually */; - else - __raw_spin_lock(&die_lock); - } - die_nest_count++; - die_owner = cpu; - console_verbose(); - bust_spinlocks(1); - return flags; -} - -void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) -{ - die_owner = -1; - bust_spinlocks(0); - die_nest_count--; - if (!die_nest_count) - /* Nest count reaches zero, release the lock. */ - __raw_spin_unlock(&die_lock); - raw_local_irq_restore(flags); - if (!regs) { - oops_exit(); - return; - } - if (in_interrupt()) - panic("Fatal exception in interrupt"); - if (panic_on_oops) - panic("Fatal exception"); - oops_exit(); - do_exit(signr); -} - -int __kprobes __die(const char *str, struct pt_regs *regs, long err) -{ - printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif - printk("\n"); - sysfs_printk_last_file(); - if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) - return 1; - - show_registers(regs); - add_taint(TAINT_DIE); - /* Executive summary in case the oops scrolled away */ - printk(KERN_ALERT "RIP "); - printk_address(regs->ip, 1); - printk(" RSP <%016lx>\n", regs->sp); - if (kexec_should_crash(current)) - crash_kexec(regs); - return 0; -} - -void die(const char *str, struct pt_regs *regs, long err) -{ - unsigned long flags = oops_begin(); - - if (!user_mode(regs)) - report_bug(regs->ip, regs); - - if (__die(str, regs, err)) - regs = NULL; - oops_end(flags, regs, SIGSEGV); -} - -notrace __kprobes void -die_nmi(char *str, struct pt_regs *regs, int do_panic) -{ - unsigned long flags; - - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - flags = oops_begin(); - /* - * We are in trouble anyway, lets at least try - * to get a message out. - */ - printk(KERN_EMERG "%s", str); - printk(" on CPU%d, ip %08lx, registers:\n", - smp_processor_id(), regs->ip); - show_registers(regs); - if (kexec_should_crash(current)) - crash_kexec(regs); - if (do_panic || panic_on_oops) - panic("Non maskable interrupt"); - oops_end(flags, NULL, SIGBUS); - nmi_exit(); - local_irq_enable(); - do_exit(SIGBUS); -} - -static int __init oops_setup(char *s) -{ - if (!s) - return -EINVAL; - if (!strcmp(s, "panic")) - panic_on_oops = 1; - return 0; -} -early_param("oops", oops_setup); - -static int __init kstack_setup(char *s) -{ - if (!s) - return -EINVAL; - kstack_depth_to_print = simple_strtoul(s, NULL, 0); - return 0; -} -early_param("kstack", kstack_setup); - -static int __init code_bytes_setup(char *s) -{ - code_bytes = simple_strtoul(s, NULL, 0); - if (code_bytes > 8192) - code_bytes = 8192; - - return 1; -} -__setup("code_bytes=", code_bytes_setup); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 958af86186c4..43ceb3f454bf 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1196,6 +1196,9 @@ ENTRY(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER cmpl $ftrace_stub, ftrace_graph_return jnz ftrace_graph_caller + + cmpl $ftrace_graph_entry_stub, ftrace_graph_entry + jnz ftrace_graph_caller #endif .globl ftrace_stub ftrace_stub: @@ -1230,6 +1233,7 @@ ENTRY(ftrace_graph_caller) pushl %edx movl 0xc(%esp), %edx lea 0x4(%ebp), %eax + subl $MCOUNT_INSN_SIZE, %edx call prepare_ftrace_return popl %edx popl %ecx diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 2aa0526ac30e..54e0bbdccb99 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -120,6 +120,9 @@ ENTRY(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER cmpq $ftrace_stub, ftrace_graph_return jnz ftrace_graph_caller + + cmpq $ftrace_graph_entry_stub, ftrace_graph_entry + jnz ftrace_graph_caller #endif .globl ftrace_stub @@ -173,6 +176,7 @@ ENTRY(ftrace_graph_caller) leaq 8(%rbp), %rdi movq 0x38(%rsp), %rsi + subq $MCOUNT_INSN_SIZE, %rsi call prepare_ftrace_return diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 58832478b94e..f98c4076a170 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -420,12 +420,23 @@ static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) int index; index = current->curr_ret_stack; + + if (unlikely(index < 0)) { + ftrace_graph_stop(); + WARN_ON(1); + /* Might as well panic, otherwise we have no where to go */ + *ret = (unsigned long)panic; + return; + } + *ret = current->ret_stack[index].ret; trace->func = current->ret_stack[index].func; trace->calltime = current->ret_stack[index].calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = index; + barrier(); current->curr_ret_stack--; + } /* @@ -441,6 +452,13 @@ unsigned long ftrace_return_to_handler(void) trace.rettime = cpu_clock(raw_smp_processor_id()); ftrace_graph_return(&trace); + if (unlikely(!ret)) { + ftrace_graph_stop(); + WARN_ON(1); + /* Might as well panic. What else to do? */ + ret = (unsigned long)panic; + } + return ret; } @@ -467,28 +485,16 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) * ignore such a protection. */ asm volatile( -#ifdef CONFIG_X86_64 - "1: movq (%[parent_old]), %[old]\n" - "2: movq %[return_hooker], (%[parent_replaced])\n" -#else - "1: movl (%[parent_old]), %[old]\n" - "2: movl %[return_hooker], (%[parent_replaced])\n" -#endif + "1: " _ASM_MOV " (%[parent_old]), %[old]\n" + "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" " movl $0, %[faulted]\n" ".section .fixup, \"ax\"\n" "3: movl $1, %[faulted]\n" ".previous\n" - ".section __ex_table, \"a\"\n" -#ifdef CONFIG_X86_64 - " .quad 1b, 3b\n" - " .quad 2b, 3b\n" -#else - " .long 1b, 3b\n" - " .long 2b, 3b\n" -#endif - ".previous\n" + _ASM_EXTABLE(1b, 3b) + _ASM_EXTABLE(2b, 3b) : [parent_replaced] "=r" (parent), [old] "=r" (old), [faulted] "=r" (faulted) @@ -496,14 +502,16 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) : "memory" ); - if (WARN_ON(faulted)) { - unregister_ftrace_graph(); + if (unlikely(faulted)) { + ftrace_graph_stop(); + WARN_ON(1); return; } - if (WARN_ON(!__kernel_text_address(old))) { - unregister_ftrace_graph(); + if (unlikely(!__kernel_text_address(old))) { + ftrace_graph_stop(); *parent = old; + WARN_ON(1); return; } @@ -516,7 +524,11 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) } trace.func = self_addr; - ftrace_graph_entry(&trace); + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 4152d3c3b138..21e996a70d68 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, unsigned long error_code) { unsigned long flags = oops_begin(); + int sig = SIGKILL; struct task_struct *tsk; printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", @@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, tsk->thread.trap_no = 14; tsk->thread.error_code = error_code; if (__die("Bad pagetable", regs, error_code)) - regs = NULL; - oops_end(flags, regs, SIGKILL); + sig = 0; + oops_end(flags, regs, sig); } #endif @@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) int fault; #ifdef CONFIG_X86_64 unsigned long flags; + int sig; #endif tsk = current; @@ -849,11 +851,12 @@ no_context: bust_spinlocks(0); do_exit(SIGKILL); #else + sig = SIGKILL; if (__die("Oops", regs, error_code)) - regs = NULL; + sig = 0; /* Executive summary in case the body of the oops scrolled away */ printk(KERN_EMERG "CR2: %016lx\n", address); - oops_end(flags, regs, SIGKILL); + oops_end(flags, regs, sig); #endif /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index afba918c623c..469ceb3e85ba 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -371,11 +371,13 @@ struct ftrace_graph_ret { #define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ -typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ +typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc); +extern void ftrace_graph_stop(void); + /* The current handlers in use */ extern trace_func_graph_ret_t ftrace_graph_return; extern trace_func_graph_ent_t ftrace_graph_entry; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 3bb87a753fa3..1a350a847edd 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -124,6 +124,11 @@ void tracing_on(void); void tracing_off(void); void tracing_off_permanent(void); +void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); +void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); +int ring_buffer_read_page(struct ring_buffer *buffer, + void **data_page, int cpu, int full); + enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; diff --git a/kernel/fork.c b/kernel/fork.c index 5f82a999c032..7407ab319875 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1137,6 +1137,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, } } + ftrace_graph_init_task(p); + p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) @@ -1145,7 +1147,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) - goto bad_fork_free_pid; + goto bad_fork_free_graph; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; @@ -1238,7 +1240,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; - goto bad_fork_free_pid; + goto bad_fork_free_graph; } if (clone_flags & CLONE_THREAD) { @@ -1271,11 +1273,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); - ftrace_graph_init_task(p); proc_fork_connector(p); cgroup_post_fork(p); return p; +bad_fork_free_graph: + ftrace_graph_exit_task(p); bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 46a404173db2..74b1878b8bb8 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -25,6 +25,7 @@ * Thanks to Arjan van de Ven for coming up with the initial idea of * mapping lock dependencies runtime. */ +#define DISABLE_BRANCH_PROFILING #include <linux/mutex.h> #include <linux/sched.h> #include <linux/delay.h> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8b6b673b4d6c..bde6f03512d5 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -67,6 +67,7 @@ config FUNCTION_GRAPH_TRACER bool "Kernel Function Graph Tracer" depends on HAVE_FUNCTION_GRAPH_TRACER depends on FUNCTION_TRACER + default y help Enable the kernel to trace a function at both its return and its entry. diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2e78628443e8..65b9e863056b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1636,11 +1636,15 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, static atomic_t ftrace_graph_active; +int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) +{ + return 0; +} + /* The callbacks that hook a function */ trace_func_graph_ret_t ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; -trace_func_graph_ent_t ftrace_graph_entry = - (trace_func_graph_ent_t)ftrace_stub; +trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) @@ -1738,7 +1742,7 @@ void unregister_ftrace_graph(void) atomic_dec(&ftrace_graph_active); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; - ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub; + ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); mutex_unlock(&ftrace_sysctl_lock); @@ -1769,5 +1773,10 @@ void ftrace_graph_exit_task(struct task_struct *t) kfree(ret_stack); } + +void ftrace_graph_stop(void) +{ + ftrace_stop(); +} #endif diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e206951603c1..7f69cfeaadf7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -195,20 +195,24 @@ void *ring_buffer_event_data(struct ring_buffer_event *event) #define TS_MASK ((1ULL << TS_SHIFT) - 1) #define TS_DELTA_TEST (~TS_MASK) -/* - * This hack stolen from mm/slob.c. - * We can store per page timing information in the page frame of the page. - * Thanks to Peter Zijlstra for suggesting this idea. - */ -struct buffer_page { +struct buffer_data_page { u64 time_stamp; /* page time stamp */ - local_t write; /* index for next write */ local_t commit; /* write commited index */ + unsigned char data[]; /* data of buffer page */ +}; + +struct buffer_page { + local_t write; /* index for next write */ unsigned read; /* index for next read */ struct list_head list; /* list of free pages */ - void *page; /* Actual data page */ + struct buffer_data_page *page; /* Actual data page */ }; +static void rb_init_page(struct buffer_data_page *bpage) +{ + local_set(&bpage->commit, 0); +} + /* * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing * this issue out. @@ -230,7 +234,7 @@ static inline int test_time_stamp(u64 delta) return 0; } -#define BUF_PAGE_SIZE PAGE_SIZE +#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page)) /* * head_page == tail_page && head == tail then buffer is empty. @@ -294,19 +298,19 @@ struct ring_buffer_iter { static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *head = &cpu_buffer->pages; - struct buffer_page *page, *tmp; + struct buffer_page *bpage, *tmp; if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) return -1; if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) return -1; - list_for_each_entry_safe(page, tmp, head, list) { + list_for_each_entry_safe(bpage, tmp, head, list) { if (RB_WARN_ON(cpu_buffer, - page->list.next->prev != &page->list)) + bpage->list.next->prev != &bpage->list)) return -1; if (RB_WARN_ON(cpu_buffer, - page->list.prev->next != &page->list)) + bpage->list.prev->next != &bpage->list)) return -1; } @@ -317,22 +321,23 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) { struct list_head *head = &cpu_buffer->pages; - struct buffer_page *page, *tmp; + struct buffer_page *bpage, *tmp; unsigned long addr; LIST_HEAD(pages); unsigned i; for (i = 0; i < nr_pages; i++) { - page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), + bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); - if (!page) + if (!bpage) goto free_pages; - list_add(&page->list, &pages); + list_add(&bpage->list, &pages); addr = __get_free_page(GFP_KERNEL); if (!addr) goto free_pages; - page->page = (void *)addr; + bpage->page = (void *)addr; + rb_init_page(bpage->page); } list_splice(&pages, head); @@ -342,9 +347,9 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, return 0; free_pages: - list_for_each_entry_safe(page, tmp, &pages, list) { - list_del_init(&page->list); - free_buffer_page(page); + list_for_each_entry_safe(bpage, tmp, &pages, list) { + list_del_init(&bpage->list); + free_buffer_page(bpage); } return -ENOMEM; } @@ -353,7 +358,7 @@ static struct ring_buffer_per_cpu * rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - struct buffer_page *page; + struct buffer_page *bpage; unsigned long addr; int ret; @@ -368,16 +373,17 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&cpu_buffer->pages); - page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), + bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); - if (!page) + if (!bpage) goto fail_free_buffer; - cpu_buffer->reader_page = page; + cpu_buffer->reader_page = bpage; addr = __get_free_page(GFP_KERNEL); if (!addr) goto fail_free_reader; - page->page = (void *)addr; + bpage->page = (void *)addr; + rb_init_page(bpage->page); INIT_LIST_HEAD(&cpu_buffer->reader_page->list); @@ -402,14 +408,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *head = &cpu_buffer->pages; - struct buffer_page *page, *tmp; + struct buffer_page *bpage, *tmp; list_del_init(&cpu_buffer->reader_page->list); free_buffer_page(cpu_buffer->reader_page); - list_for_each_entry_safe(page, tmp, head, list) { - list_del_init(&page->list); - free_buffer_page(page); + list_for_each_entry_safe(bpage, tmp, head, list) { + list_del_init(&bpage->list); + free_buffer_page(bpage); } kfree(cpu_buffer); } @@ -506,7 +512,7 @@ static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); static void rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) { - struct buffer_page *page; + struct buffer_page *bpage; struct list_head *p; unsigned i; @@ -517,9 +523,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) return; p = cpu_buffer->pages.next; - page = list_entry(p, struct buffer_page, list); - list_del_init(&page->list); - free_buffer_page(page); + bpage = list_entry(p, struct buffer_page, list); + list_del_init(&bpage->list); + free_buffer_page(bpage); } if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) return; @@ -536,7 +542,7 @@ static void rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, struct list_head *pages, unsigned nr_pages) { - struct buffer_page *page; + struct buffer_page *bpage; struct list_head *p; unsigned i; @@ -547,9 +553,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, if (RB_WARN_ON(cpu_buffer, list_empty(pages))) return; p = pages->next; - page = list_entry(p, struct buffer_page, list); - list_del_init(&page->list); - list_add_tail(&page->list, &cpu_buffer->pages); + bpage = list_entry(p, struct buffer_page, list); + list_del_init(&bpage->list); + list_add_tail(&bpage->list, &cpu_buffer->pages); } rb_reset_cpu(cpu_buffer); @@ -576,7 +582,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) { struct ring_buffer_per_cpu *cpu_buffer; unsigned nr_pages, rm_pages, new_pages; - struct buffer_page *page, *tmp; + struct buffer_page *bpage, *tmp; unsigned long buffer_size; unsigned long addr; LIST_HEAD(pages); @@ -637,16 +643,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) for_each_buffer_cpu(buffer, cpu) { for (i = 0; i < new_pages; i++) { - page = kzalloc_node(ALIGN(sizeof(*page), + bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); - if (!page) + if (!bpage) goto free_pages; - list_add(&page->list, &pages); + list_add(&bpage->list, &pages); addr = __get_free_page(GFP_KERNEL); if (!addr) goto free_pages; - page->page = (void *)addr; + bpage->page = (void *)addr; + rb_init_page(bpage->page); } } @@ -667,9 +674,9 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) return size; free_pages: - list_for_each_entry_safe(page, tmp, &pages, list) { - list_del_init(&page->list); - free_buffer_page(page); + list_for_each_entry_safe(bpage, tmp, &pages, list) { + list_del_init(&bpage->list); + free_buffer_page(bpage); } mutex_unlock(&buffer->mutex); return -ENOMEM; @@ -680,9 +687,15 @@ static inline int rb_null_event(struct ring_buffer_event *event) return event->type == RINGBUF_TYPE_PADDING; } -static inline void *__rb_page_index(struct buffer_page *page, unsigned index) +static inline void * +__rb_data_page_index(struct buffer_data_page *bpage, unsigned index) { - return page->page + index; + return bpage->data + index; +} + +static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) +{ + return bpage->page->data + index; } static inline struct ring_buffer_event * @@ -712,7 +725,7 @@ static inline unsigned rb_page_write(struct buffer_page *bpage) static inline unsigned rb_page_commit(struct buffer_page *bpage) { - return local_read(&bpage->commit); + return local_read(&bpage->page->commit); } /* Size is determined by what has been commited */ @@ -758,14 +771,14 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) } static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_page **page) + struct buffer_page **bpage) { - struct list_head *p = (*page)->list.next; + struct list_head *p = (*bpage)->list.next; if (p == &cpu_buffer->pages) p = p->next; - *page = list_entry(p, struct buffer_page, list); + *bpage = list_entry(p, struct buffer_page, list); } static inline unsigned @@ -804,14 +817,15 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, if (RB_WARN_ON(cpu_buffer, cpu_buffer->commit_page == cpu_buffer->tail_page)) return; - cpu_buffer->commit_page->commit = + cpu_buffer->commit_page->page->commit = cpu_buffer->commit_page->write; rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); - cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; + cpu_buffer->write_stamp = + cpu_buffer->commit_page->page->time_stamp; } /* Now set the commit to the event's index */ - local_set(&cpu_buffer->commit_page->commit, index); + local_set(&cpu_buffer->commit_page->page->commit, index); } static inline void @@ -826,16 +840,17 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) * assign the commit to the tail. */ while (cpu_buffer->commit_page != cpu_buffer->tail_page) { - cpu_buffer->commit_page->commit = + cpu_buffer->commit_page->page->commit = cpu_buffer->commit_page->write; rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); - cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; + cpu_buffer->write_stamp = + cpu_buffer->commit_page->page->time_stamp; /* add barrier to keep gcc from optimizing too much */ barrier(); } while (rb_commit_index(cpu_buffer) != rb_page_write(cpu_buffer->commit_page)) { - cpu_buffer->commit_page->commit = + cpu_buffer->commit_page->page->commit = cpu_buffer->commit_page->write; barrier(); } @@ -843,7 +858,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) { - cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp; + cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; cpu_buffer->reader_page->read = 0; } @@ -862,7 +877,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter) else rb_inc_page(cpu_buffer, &iter->head_page); - iter->read_stamp = iter->head_page->time_stamp; + iter->read_stamp = iter->head_page->page->time_stamp; iter->head = 0; } @@ -998,12 +1013,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, */ if (tail_page == cpu_buffer->tail_page) { local_set(&next_page->write, 0); - local_set(&next_page->commit, 0); + local_set(&next_page->page->commit, 0); cpu_buffer->tail_page = next_page; /* reread the time stamp */ *ts = ring_buffer_time_stamp(cpu_buffer->cpu); - cpu_buffer->tail_page->time_stamp = *ts; + cpu_buffer->tail_page->page->time_stamp = *ts; } /* @@ -1048,7 +1063,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, * this page's time stamp. */ if (!tail && rb_is_commit(cpu_buffer, event)) - cpu_buffer->commit_page->time_stamp = *ts; + cpu_buffer->commit_page->page->time_stamp = *ts; return event; @@ -1099,7 +1114,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, event->time_delta = *delta & TS_MASK; event->array[0] = *delta >> TS_SHIFT; } else { - cpu_buffer->commit_page->time_stamp = *ts; + cpu_buffer->commit_page->page->time_stamp = *ts; event->time_delta = 0; event->array[0] = 0; } @@ -1552,7 +1567,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) if (iter->head) iter->read_stamp = cpu_buffer->read_stamp; else - iter->read_stamp = iter->head_page->time_stamp; + iter->read_stamp = iter->head_page->page->time_stamp; } /** @@ -1696,7 +1711,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->list.prev = reader->list.prev; local_set(&cpu_buffer->reader_page->write, 0); - local_set(&cpu_buffer->reader_page->commit, 0); + local_set(&cpu_buffer->reader_page->page->commit, 0); /* Make the reader page now replace the head */ reader->list.prev->next = &cpu_buffer->reader_page->list; @@ -2088,7 +2103,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->head_page = list_entry(cpu_buffer->pages.next, struct buffer_page, list); local_set(&cpu_buffer->head_page->write, 0); - local_set(&cpu_buffer->head_page->commit, 0); + local_set(&cpu_buffer->head_page->page->commit, 0); cpu_buffer->head_page->read = 0; @@ -2097,7 +2112,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) INIT_LIST_HEAD(&cpu_buffer->reader_page->list); local_set(&cpu_buffer->reader_page->write, 0); - local_set(&cpu_buffer->reader_page->commit, 0); + local_set(&cpu_buffer->reader_page->page->commit, 0); cpu_buffer->reader_page->read = 0; cpu_buffer->overrun = 0; @@ -2223,6 +2238,166 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, return 0; } +static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, + struct buffer_data_page *bpage) +{ + struct ring_buffer_event *event; + unsigned long head; + + __raw_spin_lock(&cpu_buffer->lock); + for (head = 0; head < local_read(&bpage->commit); + head += rb_event_length(event)) { + + event = __rb_data_page_index(bpage, head); + if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) + return; + /* Only count data entries */ + if (event->type != RINGBUF_TYPE_DATA) + continue; + cpu_buffer->entries--; + } + __raw_spin_unlock(&cpu_buffer->lock); +} + +/** + * ring_buffer_alloc_read_page - allocate a page to read from buffer + * @buffer: the buffer to allocate for. + * + * This function is used in conjunction with ring_buffer_read_page. + * When reading a full page from the ring buffer, these functions + * can be used to speed up the process. The calling function should + * allocate a few pages first with this function. Then when it + * needs to get pages from the ring buffer, it passes the result + * of this function into ring_buffer_read_page, which will swap + * the page that was allocated, with the read page of the buffer. + * + * Returns: + * The page allocated, or NULL on error. + */ +void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) +{ + unsigned long addr; + struct buffer_data_page *bpage; + + addr = __get_free_page(GFP_KERNEL); + if (!addr) + return NULL; + + bpage = (void *)addr; + + return bpage; +} + +/** + * ring_buffer_free_read_page - free an allocated read page + * @buffer: the buffer the page was allocate for + * @data: the page to free + * + * Free a page allocated from ring_buffer_alloc_read_page. + */ +void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) +{ + free_page((unsigned long)data); +} + +/** + * ring_buffer_read_page - extract a page from the ring buffer + * @buffer: buffer to extract from + * @data_page: the page to use allocated from ring_buffer_alloc_read_page + * @cpu: the cpu of the buffer to extract + * @full: should the extraction only happen when the page is full. + * + * This function will pull out a page from the ring buffer and consume it. + * @data_page must be the address of the variable that was returned + * from ring_buffer_alloc_read_page. This is because the page might be used + * to swap with a page in the ring buffer. + * + * for example: + * rpage = ring_buffer_alloc_page(buffer); + * if (!rpage) + * return error; + * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); + * if (ret) + * process_page(rpage); + * + * When @full is set, the function will not return true unless + * the writer is off the reader page. + * + * Note: it is up to the calling functions to handle sleeps and wakeups. + * The ring buffer can be used anywhere in the kernel and can not + * blindly call wake_up. The layer that uses the ring buffer must be + * responsible for that. + * + * Returns: + * 1 if data has been transferred + * 0 if no data has been transferred. + */ +int ring_buffer_read_page(struct ring_buffer *buffer, + void **data_page, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; + struct ring_buffer_event *event; + struct buffer_data_page *bpage; + unsigned long flags; + int ret = 0; + + if (!data_page) + return 0; + + bpage = *data_page; + if (!bpage) + return 0; + + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + + /* + * rb_buffer_peek will get the next ring buffer if + * the current reader page is empty. + */ + event = rb_buffer_peek(buffer, cpu, NULL); + if (!event) + goto out; + + /* check for data */ + if (!local_read(&cpu_buffer->reader_page->page->commit)) + goto out; + /* + * If the writer is already off of the read page, then simply + * switch the read page with the given page. Otherwise + * we need to copy the data from the reader to the writer. + */ + if (cpu_buffer->reader_page == cpu_buffer->commit_page) { + unsigned int read = cpu_buffer->reader_page->read; + + if (full) + goto out; + /* The writer is still on the reader page, we must copy */ + bpage = cpu_buffer->reader_page->page; + memcpy(bpage->data, + cpu_buffer->reader_page->page->data + read, + local_read(&bpage->commit) - read); + + /* consume what was read */ + cpu_buffer->reader_page += read; + + } else { + /* swap the pages */ + rb_init_page(bpage); + bpage = cpu_buffer->reader_page->page; + cpu_buffer->reader_page->page = *data_page; + cpu_buffer->reader_page->read = 0; + *data_page = bpage; + } + ret = 1; + + /* update the entry counter */ + rb_remove_entries(cpu_buffer, bpage); + out: + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + return ret; +} + static ssize_t rb_simple_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 91887a280ab9..8b6409a62b54 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1200,7 +1200,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -void trace_graph_entry(struct ftrace_graph_ent *trace) +int trace_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1209,7 +1209,7 @@ void trace_graph_entry(struct ftrace_graph_ent *trace) int cpu; int pc; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -1218,7 +1218,9 @@ void trace_graph_entry(struct ftrace_graph_ent *trace) __trace_graph_entry(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); + + return 1; } void trace_graph_return(struct ftrace_graph_ret *trace) @@ -1230,7 +1232,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) int cpu; int pc; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -1239,7 +1241,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) __trace_graph_return(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -2645,7 +2647,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, if (err) goto err_unlock; - raw_local_irq_disable(); + local_irq_disable(); __raw_spin_lock(&ftrace_max_lock); for_each_tracing_cpu(cpu) { /* @@ -2662,7 +2664,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, } } __raw_spin_unlock(&ftrace_max_lock); - raw_local_irq_enable(); + local_irq_enable(); tracing_cpumask = tracing_cpumask_new; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f96f4e787ff3..0565ae9a2210 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -412,7 +412,7 @@ void trace_function(struct trace_array *tr, unsigned long flags, int pc); void trace_graph_return(struct ftrace_graph_ret *trace); -void trace_graph_entry(struct ftrace_graph_ent *trace); +int trace_graph_entry(struct ftrace_graph_ent *trace); void trace_bts(struct trace_array *tr, unsigned long from, unsigned long to); diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index bc972753568d..6c00feb3bac7 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -42,7 +42,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) if (unlikely(!tr)) return; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) goto out; @@ -74,7 +74,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) out: atomic_dec(&tr->data[cpu]->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } static inline diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 894b50bca313..c66578f2fdc2 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -19,6 +19,7 @@ #define TRACE_GRAPH_PRINT_OVERRUN 0x1 #define TRACE_GRAPH_PRINT_CPU 0x2 #define TRACE_GRAPH_PRINT_OVERHEAD 0x4 +#define TRACE_GRAPH_PRINT_PROC 0x8 static struct tracer_opt trace_opts[] = { /* Display overruns ? */ @@ -27,11 +28,13 @@ static struct tracer_opt trace_opts[] = { { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) }, /* Display Overhead ? */ { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) }, + /* Display proc name/pid */ + { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) }, { } /* Empty entry */ }; static struct tracer_flags tracer_flags = { - /* Don't display overruns by default */ + /* Don't display overruns and proc by default */ .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD, .opts = trace_opts }; @@ -104,23 +107,63 @@ print_graph_cpu(struct trace_seq *s, int cpu) return TRACE_TYPE_HANDLED; } +#define TRACE_GRAPH_PROCINFO_LENGTH 14 + +static enum print_line_t +print_graph_proc(struct trace_seq *s, pid_t pid) +{ + int i; + int ret; + int len; + char comm[8]; + int spaces = 0; + /* sign + log10(MAX_INT) + '\0' */ + char pid_str[11]; + + strncpy(comm, trace_find_cmdline(pid), 7); + comm[7] = '\0'; + sprintf(pid_str, "%d", pid); + + /* 1 stands for the "-" character */ + len = strlen(comm) + strlen(pid_str) + 1; + + if (len < TRACE_GRAPH_PROCINFO_LENGTH) + spaces = TRACE_GRAPH_PROCINFO_LENGTH - len; + + /* First spaces to align center */ + for (i = 0; i < spaces / 2; i++) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "%s-%s", comm, pid_str); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Last spaces to align center */ + for (i = 0; i < spaces - (spaces / 2); i++) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + return TRACE_TYPE_HANDLED; +} + /* If the pid changed since the last trace, output this event */ -static int verif_pid(struct trace_seq *s, pid_t pid, int cpu) +static enum print_line_t +verif_pid(struct trace_seq *s, pid_t pid, int cpu) { - char *comm, *prev_comm; pid_t prev_pid; int ret; if (last_pid[cpu] != -1 && last_pid[cpu] == pid) - return 1; + return TRACE_TYPE_HANDLED; prev_pid = last_pid[cpu]; last_pid[cpu] = pid; - comm = trace_find_cmdline(pid); - prev_comm = trace_find_cmdline(prev_pid); - /* * Context-switch trace line: @@ -130,11 +173,31 @@ static int verif_pid(struct trace_seq *s, pid_t pid, int cpu) */ ret = trace_seq_printf(s, - " ------------------------------------------\n"); - ret += trace_seq_printf(s, " | %d) %s-%d => %s-%d\n", - cpu, prev_comm, prev_pid, comm, pid); - ret += trace_seq_printf(s, - " ------------------------------------------\n\n"); + "\n ------------------------------------------\n |"); + if (!ret) + TRACE_TYPE_PARTIAL_LINE; + + ret = print_graph_cpu(s, cpu); + if (ret == TRACE_TYPE_PARTIAL_LINE) + TRACE_TYPE_PARTIAL_LINE; + + ret = print_graph_proc(s, prev_pid); + if (ret == TRACE_TYPE_PARTIAL_LINE) + TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, " => "); + if (!ret) + TRACE_TYPE_PARTIAL_LINE; + + ret = print_graph_proc(s, pid); + if (ret == TRACE_TYPE_PARTIAL_LINE) + TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, + "\n ------------------------------------------\n\n"); + if (!ret) + TRACE_TYPE_PARTIAL_LINE; + return ret; } @@ -169,11 +232,50 @@ trace_branch_is_leaf(struct trace_iterator *iter, } -static inline int +static enum print_line_t print_graph_duration(unsigned long long duration, struct trace_seq *s) { unsigned long nsecs_rem = do_div(duration, 1000); - return trace_seq_printf(s, "%4llu.%3lu us | ", duration, nsecs_rem); + /* log10(ULONG_MAX) + '\0' */ + char msecs_str[21]; + char nsecs_str[5]; + int ret, len; + int i; + + sprintf(msecs_str, "%lu", (unsigned long) duration); + + /* Print msecs */ + ret = trace_seq_printf(s, msecs_str); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + len = strlen(msecs_str); + + /* Print nsecs (we don't want to exceed 7 numbers) */ + if (len < 7) { + snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem); + ret = trace_seq_printf(s, ".%s", nsecs_str); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + len += strlen(nsecs_str); + } + + ret = trace_seq_printf(s, " us "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Print remaining spaces to fit the row's width */ + for (i = len; i < 7; i++) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "| "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + return TRACE_TYPE_HANDLED; + } /* Signal a overhead of time execution to the output */ @@ -210,10 +312,6 @@ print_graph_entry_leaf(struct trace_iterator *iter, call = &entry->graph_ent; duration = graph_ret->rettime - graph_ret->calltime; - /* Must not exceed 8 characters: 9999.999 us */ - if (duration > 10000000ULL) - duration = 9999999ULL; - /* Overhead */ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { ret = print_graph_overhead(duration, s); @@ -223,7 +321,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, /* Duration */ ret = print_graph_duration(duration, s); - if (!ret) + if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; /* Function */ @@ -288,12 +386,23 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, struct trace_entry *ent = iter->ent; /* Pid */ - if (!verif_pid(s, ent->pid, cpu)) + if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; /* Cpu */ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { ret = print_graph_cpu(s, cpu); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* Proc */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { + ret = print_graph_proc(s, ent->pid); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, " | "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } @@ -313,17 +422,24 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, int ret; unsigned long long duration = trace->rettime - trace->calltime; - /* Must not exceed 8 characters: xxxx.yyy us */ - if (duration > 10000000ULL) - duration = 9999999ULL; - /* Pid */ - if (!verif_pid(s, ent->pid, cpu)) + if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; /* Cpu */ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { ret = print_graph_cpu(s, cpu); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* Proc */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { + ret = print_graph_proc(s, ent->pid); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, " | "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } @@ -337,7 +453,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, /* Duration */ ret = print_graph_duration(duration, s); - if (!ret) + if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; /* Closing brace */ diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index fde3be15c642..0b863f2cbc8e 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -48,7 +48,7 @@ static inline void check_stack(void) if (!object_is_on_stack(&this_size)) return; - raw_local_irq_save(flags); + local_irq_save(flags); __raw_spin_lock(&max_stack_lock); /* a race could have already updated it */ @@ -78,6 +78,7 @@ static inline void check_stack(void) * on a new max, so it is far from a fast path. */ while (i < max_stack_trace.nr_entries) { + int found = 0; stack_dump_index[i] = this_size; p = start; @@ -86,17 +87,19 @@ static inline void check_stack(void) if (*p == stack_dump_trace[i]) { this_size = stack_dump_index[i++] = (top - p) * sizeof(unsigned long); + found = 1; /* Start the search from here */ start = p + 1; } } - i++; + if (!found) + i++; } out: __raw_spin_unlock(&max_stack_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); } static void @@ -162,11 +165,11 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, if (ret < 0) return ret; - raw_local_irq_save(flags); + local_irq_save(flags); __raw_spin_lock(&max_stack_lock); *ptr = val; __raw_spin_unlock(&max_stack_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); return count; } |