diff options
51 files changed, 2593 insertions, 727 deletions
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 9cc4d685dde5..753f4de4b175 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -82,7 +82,7 @@ of ftrace. Here is a list of some of the key files: tracer is not adding more data, they will display the same information every time they are read. - iter_ctrl: This file lets the user control the amount of data + trace_options: This file lets the user control the amount of data that is displayed in one of the above output files. @@ -94,10 +94,10 @@ of ftrace. Here is a list of some of the key files: only be recorded if the latency is greater than the value in this file. (in microseconds) - trace_entries: This sets or displays the number of bytes each CPU + buffer_size_kb: This sets or displays the number of kilobytes each CPU buffer can hold. The tracer buffers are the same size for each CPU. The displayed number is the size of the - CPU buffer and not total size of all buffers. The + CPU buffer and not total size of all buffers. The trace buffers are allocated in pages (blocks of memory that the kernel uses for allocation, usually 4 KB in size). If the last page allocated has room for more bytes @@ -316,23 +316,23 @@ The above is mostly meaningful for kernel developers. The rest is the same as the 'trace' file. -iter_ctrl ---------- +trace_options +------------- -The iter_ctrl file is used to control what gets printed in the trace +The trace_options file is used to control what gets printed in the trace output. To see what is available, simply cat the file: - cat /debug/tracing/iter_ctrl + cat /debug/tracing/trace_options print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ noblock nostacktrace nosched-tree To disable one of the options, echo in the option prepended with "no". - echo noprint-parent > /debug/tracing/iter_ctrl + echo noprint-parent > /debug/tracing/trace_options To enable an option, leave off the "no". - echo sym-offset > /debug/tracing/iter_ctrl + echo sym-offset > /debug/tracing/trace_options Here are the available options: @@ -1299,41 +1299,29 @@ trace entries ------------- Having too much or not enough data can be troublesome in diagnosing -an issue in the kernel. The file trace_entries is used to modify +an issue in the kernel. The file buffer_size_kb is used to modify the size of the internal trace buffers. The number listed is the number of entries that can be recorded per CPU. To know the full size, multiply the number of possible CPUS with the number of entries. - # cat /debug/tracing/trace_entries -65620 + # cat /debug/tracing/buffer_size_kb +1408 (units kilobytes) Note, to modify this, you must have tracing completely disabled. To do that, echo "nop" into the current_tracer. If the current_tracer is not set to "nop", an EINVAL error will be returned. # echo nop > /debug/tracing/current_tracer - # echo 100000 > /debug/tracing/trace_entries - # cat /debug/tracing/trace_entries -100045 - - -Notice that we echoed in 100,000 but the size is 100,045. The entries -are held in individual pages. It allocates the number of pages it takes -to fulfill the request. If more entries may fit on the last page -then they will be added. - - # echo 1 > /debug/tracing/trace_entries - # cat /debug/tracing/trace_entries -85 - -This shows us that 85 entries can fit in a single page. + # echo 10000 > /debug/tracing/buffer_size_kb + # cat /debug/tracing/buffer_size_kb +10000 (units kilobytes) The number of pages which will be allocated is limited to a percentage of available memory. Allocating too much will produce an error. - # echo 1000000000000 > /debug/tracing/trace_entries + # echo 1000000000000 > /debug/tracing/buffer_size_kb -bash: echo: write error: Cannot allocate memory - # cat /debug/tracing/trace_entries + # cat /debug/tracing/buffer_size_kb 85 diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c86c07459712..196fd1c62a2f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -765,6 +765,14 @@ and is between 256 and 4096 characters. It is defined in the file parameter will force ia64_sal_cache_flush to call ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. + ftrace=[tracer] + [ftrace] will set and start the specified tracer + as early as possible in order to facilitate early + boot debugging. + + ftrace_dump_on_oops + [ftrace] will dump the trace buffers on oops. + gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 93224b569187..7780cc8b51d1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -29,6 +29,8 @@ config X86 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_RET_TRACER if X86_32 + select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER select HAVE_ARCH_TRACEHOOK diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 2a3dfbd5e677..fa013f529b74 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -186,14 +186,10 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config MMIOTRACE_HOOKS - bool - config MMIOTRACE bool "Memory mapped IO tracing" depends on DEBUG_KERNEL && PCI select TRACING - select MMIOTRACE_HOOKS help Mmiotrace traces Memory Mapped I/O access and is meant for debugging and reverse engineering. It is called from the ioremap diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9e8bc29b8b17..9b6a1fa19e70 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -17,8 +17,33 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) */ return addr - 1; } -#endif - +#endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_FUNCTION_RET_TRACER +#define FTRACE_RET_STACK_SIZE 20 + +#ifndef __ASSEMBLY__ + +/* + * Stack of return addresses for functions + * of a thread. + * Used in struct thread_info + */ +struct ftrace_ret_stack { + unsigned long ret; + unsigned long func; + unsigned long long calltime; +}; + +/* + * Primary handler of a function return. + * It relays on ftrace_return_to_handler. + * Defined in entry32.S + */ +extern void return_to_handler(void); + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_FUNCTION_RET_TRACER */ + #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e44d379faad2..a71158369fd4 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -20,6 +20,7 @@ struct task_struct; struct exec_domain; #include <asm/processor.h> +#include <asm/ftrace.h> struct thread_info { struct task_struct *task; /* main task structure */ @@ -38,8 +39,30 @@ struct thread_info { */ __u8 supervisor_stack[0]; #endif + +#ifdef CONFIG_FUNCTION_RET_TRACER + /* Index of current stored adress in ret_stack */ + int curr_ret_stack; + /* Stack of return addresses for return function tracing */ + struct ftrace_ret_stack ret_stack[FTRACE_RET_STACK_SIZE]; +#endif }; +#ifdef CONFIG_FUNCTION_RET_TRACER +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ + .curr_ret_stack = -1,\ +} +#else #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ @@ -52,6 +75,7 @@ struct thread_info { .fn = do_no_restart_syscall, \ }, \ } +#endif #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e489ff9cb3e2..1d8ed95da846 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -14,6 +14,11 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_ftrace.o = -pg endif +ifdef CONFIG_FUNCTION_RET_TRACER +# Don't trace __switch_to() but let it for function tracer +CFLAGS_REMOVE_process_32.o = -pg +endif + # # vsyscalls (which work on the user stack) should have # no stack-protector checks: @@ -65,6 +70,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_RET_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 28b597ef9ca1..f97621149839 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1157,6 +1157,9 @@ ENTRY(mcount) END(mcount) ENTRY(ftrace_caller) + cmpl $0, function_trace_stop + jne ftrace_stub + pushl %eax pushl %ecx pushl %edx @@ -1180,8 +1183,15 @@ END(ftrace_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) + cmpl $0, function_trace_stop + jne ftrace_stub + cmpl $ftrace_stub, ftrace_trace_function jnz trace +#ifdef CONFIG_FUNCTION_RET_TRACER + cmpl $ftrace_stub, ftrace_function_return + jnz trace_return +#endif .globl ftrace_stub ftrace_stub: ret @@ -1200,8 +1210,34 @@ trace: popl %edx popl %ecx popl %eax + jmp ftrace_stub +#ifdef CONFIG_FUNCTION_RET_TRACER +trace_return: + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %edx + lea 0x4(%ebp), %eax + call prepare_ftrace_return + popl %edx + popl %ecx + popl %eax jmp ftrace_stub + +.globl return_to_handler +return_to_handler: + pushl $0 + pushl %eax + pushl %ecx + pushl %edx + call ftrace_return_to_handler + movl %eax, 0xc(%esp) + popl %edx + popl %ecx + popl %eax + ret +#endif /* CONFIG_FUNCTION_RET_TRACER */ END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b86f332c96a6..08aa6b10933c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -68,6 +68,8 @@ ENTRY(mcount) END(mcount) ENTRY(ftrace_caller) + cmpl $0, function_trace_stop + jne ftrace_stub /* taken from glibc */ subq $0x38, %rsp @@ -103,6 +105,9 @@ END(ftrace_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) + cmpl $0, function_trace_stop + jne ftrace_stub + cmpq $ftrace_stub, ftrace_trace_function jnz trace .globl ftrace_stub diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 50ea0ac8c9bf..fe832738e1e2 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -14,14 +14,144 @@ #include <linux/uaccess.h> #include <linux/ftrace.h> #include <linux/percpu.h> +#include <linux/sched.h> #include <linux/init.h> #include <linux/list.h> #include <asm/ftrace.h> +#include <linux/ftrace.h> #include <asm/nops.h> +#include <asm/nmi.h> -static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; + +#ifdef CONFIG_FUNCTION_RET_TRACER + +/* + * These functions are picked from those used on + * this page for dynamic ftrace. They have been + * simplified to ignore all traces in NMI context. + */ +static atomic_t in_nmi; + +void ftrace_nmi_enter(void) +{ + atomic_inc(&in_nmi); +} + +void ftrace_nmi_exit(void) +{ + atomic_dec(&in_nmi); +} + +/* Add a function return address to the trace stack on thread info.*/ +static int push_return_trace(unsigned long ret, unsigned long long time, + unsigned long func) +{ + int index; + struct thread_info *ti = current_thread_info(); + + /* The return trace stack is full */ + if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) + return -EBUSY; + + index = ++ti->curr_ret_stack; + ti->ret_stack[index].ret = ret; + ti->ret_stack[index].func = func; + ti->ret_stack[index].calltime = time; + + return 0; +} + +/* Retrieve a function return address to the trace stack on thread info.*/ +static void pop_return_trace(unsigned long *ret, unsigned long long *time, + unsigned long *func) +{ + int index; + + struct thread_info *ti = current_thread_info(); + index = ti->curr_ret_stack; + *ret = ti->ret_stack[index].ret; + *func = ti->ret_stack[index].func; + *time = ti->ret_stack[index].calltime; + ti->curr_ret_stack--; +} + +/* + * Send the trace to the ring-buffer. + * @return the original return address. + */ +unsigned long ftrace_return_to_handler(void) +{ + struct ftrace_retfunc trace; + pop_return_trace(&trace.ret, &trace.calltime, &trace.func); + trace.rettime = cpu_clock(raw_smp_processor_id()); + ftrace_function_return(&trace); + + return trace.ret; +} + +/* + * Hook the return address and push it in the stack of return addrs + * in current thread info. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) +{ + unsigned long old; + unsigned long long calltime; + int faulted; + unsigned long return_hooker = (unsigned long) + &return_to_handler; + + /* Nmi's are currently unsupported */ + if (atomic_read(&in_nmi)) + return; + + /* + * Protect against fault, even if it shouldn't + * happen. This tool is too much intrusive to + * ignore such a protection. + */ + asm volatile( + "1: movl (%[parent_old]), %[old]\n" + "2: movl %[return_hooker], (%[parent_replaced])\n" + " movl $0, %[faulted]\n" + + ".section .fixup, \"ax\"\n" + "3: movl $1, %[faulted]\n" + ".previous\n" + + ".section __ex_table, \"a\"\n" + " .long 1b, 3b\n" + " .long 2b, 3b\n" + ".previous\n" + + : [parent_replaced] "=r" (parent), [old] "=r" (old), + [faulted] "=r" (faulted) + : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) + : "memory" + ); + + if (WARN_ON(faulted)) { + unregister_ftrace_return(); + return; + } + + if (WARN_ON(!__kernel_text_address(old))) { + unregister_ftrace_return(); + *parent = old; + return; + } + + calltime = cpu_clock(raw_smp_processor_id()); + + if (push_return_trace(old, calltime, self_addr) == -EBUSY) + *parent = old; +} + +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; @@ -31,17 +161,11 @@ union ftrace_code_union { } __attribute__((packed)); }; - static int ftrace_calc_offset(long ip, long addr) { return (int)(addr - ip); } -unsigned char *ftrace_nop_replace(void) -{ - return ftrace_nop; -} - unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; @@ -56,6 +180,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) return calc.code; } +/* + * Modifying code must take extra care. On an SMP machine, if + * the code being modified is also being executed on another CPU + * that CPU will have undefined results and possibly take a GPF. + * We use kstop_machine to stop other CPUS from exectuing code. + * But this does not stop NMIs from happening. We still need + * to protect against that. We separate out the modification of + * the code to take care of this. + * + * Two buffers are added: An IP buffer and a "code" buffer. + * + * 1) Put the instruction pointer into the IP buffer + * and the new code into the "code" buffer. + * 2) Set a flag that says we are modifying code + * 3) Wait for any running NMIs to finish. + * 4) Write the code + * 5) clear the flag. + * 6) Wait for any running NMIs to finish. + * + * If an NMI is executed, the first thing it does is to call + * "ftrace_nmi_enter". This will check if the flag is set to write + * and if it is, it will write what is in the IP and "code" buffers. + * + * The trick is, it does not matter if everyone is writing the same + * content to the code location. Also, if a CPU is executing code + * it is OK to write to that code location if the contents being written + * are the same as what exists. + */ + +static atomic_t in_nmi = ATOMIC_INIT(0); +static int mod_code_status; /* holds return value of text write */ +static int mod_code_write; /* set when NMI should do the write */ +static void *mod_code_ip; /* holds the IP to write to */ +static void *mod_code_newcode; /* holds the text to write to the IP */ + +static unsigned nmi_wait_count; +static atomic_t nmi_update_count = ATOMIC_INIT(0); + +int ftrace_arch_read_dyn_info(char *buf, int size) +{ + int r; + + r = snprintf(buf, size, "%u %u", + nmi_wait_count, + atomic_read(&nmi_update_count)); + return r; +} + +static void ftrace_mod_code(void) +{ + /* + * Yes, more than one CPU process can be writing to mod_code_status. + * (and the code itself) + * But if one were to fail, then they all should, and if one were + * to succeed, then they all should. + */ + mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, + MCOUNT_INSN_SIZE); + +} + +void ftrace_nmi_enter(void) +{ + atomic_inc(&in_nmi); + /* Must have in_nmi seen before reading write flag */ + smp_mb(); + if (mod_code_write) { + ftrace_mod_code(); + atomic_inc(&nmi_update_count); + } +} + +void ftrace_nmi_exit(void) +{ + /* Finish all executions before clearing in_nmi */ + smp_wmb(); + atomic_dec(&in_nmi); +} + +static void wait_for_nmi(void) +{ + int waited = 0; + + while (atomic_read(&in_nmi)) { + waited = 1; + cpu_relax(); + } + + if (waited) + nmi_wait_count++; +} + +static int +do_ftrace_mod_code(unsigned long ip, void *new_code) +{ + mod_code_ip = (void *)ip; + mod_code_newcode = new_code; + + /* The buffers need to be visible before we let NMIs write them */ + smp_wmb(); + + mod_code_write = 1; + + /* Make sure write bit is visible before we wait on NMIs */ + smp_mb(); + + wait_for_nmi(); + + /* Make sure all running NMIs have finished before we write the code */ + smp_mb(); + + ftrace_mod_code(); + + /* Make sure the write happens before clearing the bit */ + smp_wmb(); + + mod_code_write = 0; + + /* make sure NMIs see the cleared bit */ + smp_mb(); + + wait_for_nmi(); + + return mod_code_status; +} + + + + +static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; + +unsigned char *ftrace_nop_replace(void) +{ + return ftrace_nop; +} + int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) @@ -81,7 +341,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return -EINVAL; /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) + if (do_ftrace_mod_code(ip, new_code)) return -EPERM; sync_core(); @@ -165,3 +425,4 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } +#endif diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 0b8b6690a86d..6f3d3d4cd973 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -17,6 +17,9 @@ * want per guest time just set the kernel.vsyscall64 sysctl to 0. */ +/* Disable profiling for userspace code: */ +#define DISABLE_BRANCH_PROFILING + #include <linux/time.h> #include <linux/init.h> #include <linux/kernel.h> diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index fea4565ff576..d8cc96a2738f 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o -obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-y := pf_in.o mmio-mod.o +mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o obj-$(CONFIG_NUMA) += numa_$(BITS).o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 31e8730fa246..4152d3c3b138 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -53,7 +53,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { -#ifdef CONFIG_MMIOTRACE_HOOKS +#ifdef CONFIG_MMIOTRACE if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) return -1; diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 1ef0f90813d6..d9d35824c56f 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -9,6 +9,9 @@ * Also alternative() doesn't work. */ +/* Disable profiling for userspace code: */ +#define DISABLE_BRANCH_PROFILING + #include <linux/kernel.h> #include <linux/posix-timers.h> #include <linux/time.h> diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index ce0d9da52a8a..94966edfb44d 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -274,6 +274,22 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = { .enable_mask = SYSRQ_ENABLE_DUMP, }; +#ifdef CONFIG_TRACING +#include <linux/ftrace.h> + +static void sysrq_ftrace_dump(int key, struct tty_struct *tty) +{ + ftrace_dump(); +} +static struct sysrq_key_op sysrq_ftrace_dump_op = { + .handler = sysrq_ftrace_dump, + .help_msg = "dumpZ-ftrace-buffer", + .action_msg = "Dump ftrace buffer", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; +#else +#define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)0) +#endif static void sysrq_handle_showmem(int key, struct tty_struct *tty) { @@ -406,7 +422,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = { NULL, /* x */ /* y: May be registered on sparc64 for global register dump */ NULL, /* y */ - NULL /* z */ + &sysrq_ftrace_dump_op, /* z */ }; /* key2index calculation, -1 on invalid index */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 80744606bad1..a5e4ed9baec8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -45,6 +45,17 @@ #define MCOUNT_REC() #endif +#ifdef CONFIG_TRACE_BRANCH_PROFILING +#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \ + *(_ftrace_likely) \ + VMLINUX_SYMBOL(__stop_likely_profile) = .; \ + VMLINUX_SYMBOL(__start_unlikely_profile) = .; \ + *(_ftrace_unlikely) \ + VMLINUX_SYMBOL(__stop_unlikely_profile) = .; +#else +#define LIKELY_PROFILE() +#endif + /* .data section */ #define DATA_DATA \ *(.data) \ @@ -62,7 +73,8 @@ VMLINUX_SYMBOL(__stop___markers) = .; \ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ - VMLINUX_SYMBOL(__stop___tracepoints) = .; + VMLINUX_SYMBOL(__stop___tracepoints) = .; \ + LIKELY_PROFILE() #define RO_DATA(align) \ . = ALIGN((align)); \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 98115d9d04da..c7d804a7a4d6 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -59,8 +59,70 @@ extern void __chk_io_ptr(const volatile void __iomem *); * specific implementations come from the above header files */ -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) +struct ftrace_branch_data { + const char *func; + const char *file; + unsigned line; + unsigned long correct; + unsigned long incorrect; +}; + +/* + * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code + * to disable branch tracing on a per file basis. + */ +#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING) +void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); + +#define likely_notrace(x) __builtin_expect(!!(x), 1) +#define unlikely_notrace(x) __builtin_expect(!!(x), 0) + +#define likely_check(x) ({ \ + int ______r; \ + static struct ftrace_branch_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_likely"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______f.line = __LINE__; \ + ______r = likely_notrace(x); \ + ftrace_likely_update(&______f, ______r, 1); \ + ______r; \ + }) +#define unlikely_check(x) ({ \ + int ______r; \ + static struct ftrace_branch_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_unlikely"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______f.line = __LINE__; \ + ______r = unlikely_notrace(x); \ + ftrace_likely_update(&______f, ______r, 0); \ + ______r; \ + }) + +/* + * Using __builtin_constant_p(x) to ignore cases where the return + * value is always the same. This idea is taken from a similar patch + * written by Daniel Walker. + */ +# ifndef likely +# define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x)) +# endif +# ifndef unlikely +# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x)) +# endif +#else +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +#endif /* Optimization barrier */ #ifndef barrier diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 703eb53cfa2b..4fbc4a8b86a5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -23,6 +23,34 @@ struct ftrace_ops { struct ftrace_ops *next; }; +extern int function_trace_stop; + +/** + * ftrace_stop - stop function tracer. + * + * A quick way to stop the function tracer. Note this an on off switch, + * it is not something that is recursive like preempt_disable. + * This does not disable the calling of mcount, it only stops the + * calling of functions from mcount. + */ +static inline void ftrace_stop(void) +{ + function_trace_stop = 1; +} + +/** + * ftrace_start - start the function tracer. + * + * This function is the inverse of ftrace_stop. This does not enable + * the function tracing if the function tracer is disabled. This only + * sets the function tracer flag to continue calling the functions + * from mcount. + */ +static inline void ftrace_start(void) +{ + function_trace_stop = 0; +} + /* * The ftrace_ops must be a static and should also * be read_mostly. These functions do modify read_mostly variables @@ -41,10 +69,11 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1); # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) static inline void ftrace_kill(void) { } +static inline void ftrace_stop(void) { } +static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE - enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), @@ -74,6 +103,9 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +/* May be defined in arch */ +extern int ftrace_arch_read_dyn_info(char *buf, int size); + /** * ftrace_modify_code - modify code segment * @ip: the address of the code segment @@ -102,7 +134,6 @@ extern void ftrace_release(void *start, unsigned long size); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); - #else # define skip_trace(ip) ({ 0; }) # define ftrace_force_update() ({ 0; }) @@ -181,6 +212,11 @@ static inline void __ftrace_enabled_restore(int enabled) #endif #ifdef CONFIG_TRACING +extern int ftrace_dump_on_oops; + +extern void tracing_start(void); +extern void tracing_stop(void); + extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); @@ -211,6 +247,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } static inline int ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); +static inline void tracing_start(void) { } +static inline void tracing_stop(void) { } static inline int ftrace_printk(const char *fmt, ...) { @@ -229,25 +267,24 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } #endif -struct boot_trace { - pid_t caller; - char func[KSYM_NAME_LEN]; - int result; - unsigned long long duration; /* usecs */ - ktime_t calltime; - ktime_t rettime; +/* + * Structure that defines a return function trace. + */ +struct ftrace_retfunc { + unsigned long ret; /* Return address */ + unsigned long func; /* Current function */ + unsigned long long calltime; + unsigned long long rettime; }; -#ifdef CONFIG_BOOT_TRACER -extern void trace_boot(struct boot_trace *it, initcall_t fn); -extern void start_boot_trace(void); -extern void stop_boot_trace(void); -#else -static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } -static inline void start_boot_trace(void) { } -static inline void stop_boot_trace(void) { } -#endif - +#ifdef CONFIG_FUNCTION_RET_TRACER +/* Type of a callback handler of tracing return function */ +typedef void (*trace_function_return_t)(struct ftrace_retfunc *); +extern void register_ftrace_return(trace_function_return_t func); +/* The current handler in use */ +extern trace_function_return_t ftrace_function_return; +extern void unregister_ftrace_return(void); +#endif #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h new file mode 100644 index 000000000000..0b4df55d7a74 --- /dev/null +++ b/include/linux/ftrace_irq.h @@ -0,0 +1,13 @@ +#ifndef _LINUX_FTRACE_IRQ_H +#define _LINUX_FTRACE_IRQ_H + + +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_RET_TRACER) +extern void ftrace_nmi_enter(void); +extern void ftrace_nmi_exit(void); +#else +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } +#endif + +#endif /* _LINUX_FTRACE_IRQ_H */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 181006cc94a0..89a56d79e4c6 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -4,6 +4,7 @@ #include <linux/preempt.h> #include <linux/smp_lock.h> #include <linux/lockdep.h> +#include <linux/ftrace_irq.h> #include <asm/hardirq.h> #include <asm/system.h> @@ -161,7 +162,17 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) +#define nmi_enter() \ + do { \ + ftrace_nmi_enter(); \ + lockdep_off(); \ + __irq_enter(); \ + } while (0) +#define nmi_exit() \ + do { \ + __irq_exit(); \ + lockdep_on(); \ + ftrace_nmi_exit(); \ + } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/marker.h b/include/linux/marker.h index 889196c7fbb1..4cf45472d9f5 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -136,8 +136,6 @@ extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, void *call_private, ...); -extern void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, ...); /* * Connect a probe to a marker. diff --git a/include/linux/sched.h b/include/linux/sched.h index 644ffbda17ca..61c8cc36028a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2006,6 +2006,17 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct { *task_thread_info(p) = *task_thread_info(org); task_thread_info(p)->task = p; + +#ifdef CONFIG_FUNCTION_RET_TRACER + /* + * When fork() creates a child process, this function is called. + * But the child task may not inherit the return adresses traced + * by the return function tracer because it will directly execute + * in userspace and will not return to kernel functions its parent + * used. + */ + task_thread_info(p)->curr_ret_stack = -1; +#endif } static inline unsigned long *end_of_stack(struct task_struct *p) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c5bb39c7a770..63064e9403f2 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -112,6 +112,10 @@ extern int tracepoint_probe_register(const char *name, void *probe); */ extern int tracepoint_probe_unregister(const char *name, void *probe); +extern int tracepoint_probe_register_noupdate(const char *name, void *probe); +extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); +extern void tracepoint_probe_update_all(void); + struct tracepoint_iter { struct module *module; struct tracepoint *tracepoint; diff --git a/include/trace/boot.h b/include/trace/boot.h new file mode 100644 index 000000000000..6b54537eab02 --- /dev/null +++ b/include/trace/boot.h @@ -0,0 +1,56 @@ +#ifndef _LINUX_TRACE_BOOT_H +#define _LINUX_TRACE_BOOT_H + +/* + * Structure which defines the trace of an initcall + * while it is called. + * You don't have to fill the func field since it is + * only used internally by the tracer. + */ +struct boot_trace_call { + pid_t caller; + char func[KSYM_NAME_LEN]; +}; + +/* + * Structure which defines the trace of an initcall + * while it returns. + */ +struct boot_trace_ret { + char func[KSYM_NAME_LEN]; + int result; + unsigned long long duration; /* nsecs */ +}; + +#ifdef CONFIG_BOOT_TRACER +/* Append the traces on the ring-buffer */ +extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn); +extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn); + +/* Tells the tracer that smp_pre_initcall is finished. + * So we can start the tracing + */ +extern void start_boot_trace(void); + +/* Resume the tracing of other necessary events + * such as sched switches + */ +extern void enable_boot_trace(void); + +/* Suspend this tracing. Actually, only sched_switches tracing have + * to be suspended. Initcalls doesn't need it.) + */ +extern void disable_boot_trace(void); +#else +static inline +void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { } + +static inline +void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { } + +static inline void start_boot_trace(void) { } +static inline void enable_boot_trace(void) { } +static inline void disable_boot_trace(void) { } +#endif /* CONFIG_BOOT_TRACER */ + +#endif /* __LINUX_TRACE_BOOT_H */ diff --git a/init/main.c b/init/main.c index 7e117a231af1..e810196bf2f2 100644 --- a/init/main.c +++ b/init/main.c @@ -63,6 +63,7 @@ #include <linux/signal.h> #include <linux/idr.h> #include <linux/ftrace.h> +#include <trace/boot.h> #include <asm/io.h> #include <asm/bugs.h> @@ -703,31 +704,35 @@ core_param(initcall_debug, initcall_debug, bool, 0644); int do_one_initcall(initcall_t fn) { int count = preempt_count(); - ktime_t delta; + ktime_t calltime, delta, rettime; char msgbuf[64]; - struct boot_trace it; + struct boot_trace_call call; + struct boot_trace_ret ret; if (initcall_debug) { - it.caller = task_pid_nr(current); - printk("calling %pF @ %i\n", fn, it.caller); - it.calltime = ktime_get(); + call.caller = task_pid_nr(current); + printk("calling %pF @ %i\n", fn, call.caller); + calltime = ktime_get(); + trace_boot_call(&call, fn); + enable_boot_trace(); } - it.result = fn(); + ret.result = fn(); if (initcall_debug) { - it.rettime = ktime_get(); - delta = ktime_sub(it.rettime, it.calltime); - it.duration = (unsigned long long) delta.tv64 >> 10; + disable_boot_trace(); + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + ret.duration = (unsigned long long) delta.tv64 >> 10; + trace_boot_ret(&ret, fn); printk("initcall %pF returned %d after %Ld usecs\n", fn, - it.result, it.duration); - trace_boot(&it, fn); + ret.result, ret.duration); } msgbuf[0] = 0; - if (it.result && it.result != -ENODEV && initcall_debug) - sprintf(msgbuf, "error code %d ", it.result); + if (ret.result && ret.result != -ENODEV && initcall_debug) + sprintf(msgbuf, "error code %d ", ret.result); if (preempt_count() != count) { strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); @@ -741,7 +746,7 @@ int do_one_initcall(initcall_t fn) printk("initcall %pF returned with %s\n", fn, msgbuf); } - return it.result; + return ret.result; } @@ -882,7 +887,7 @@ static int __init kernel_init(void * unused) * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */ - stop_boot_trace(); + init_post(); return 0; } diff --git a/kernel/Makefile b/kernel/Makefile index 9a3ec66a9d84..af3be57acbbb 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -23,6 +23,10 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg CFLAGS_REMOVE_sched.o = -mno-spe -pg endif +ifdef CONFIG_FUNCTION_RET_TRACER +CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address() +CFLAGS_REMOVE_module.o = -pg # For __module_text_address() +endif obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o diff --git a/kernel/marker.c b/kernel/marker.c index e9c6b2bc9400..2898b647d415 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -43,6 +43,7 @@ static DEFINE_MUTEX(markers_mutex); */ #define MARKER_HASH_BITS 6 #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) +static struct hlist_head marker_table[MARKER_TABLE_SIZE]; /* * Note about RCU : @@ -64,11 +65,10 @@ struct marker_entry { void *oldptr; int rcu_pending; unsigned char ptype:1; + unsigned char format_allocated:1; char name[0]; /* Contains name'\0'format'\0' */ }; -static struct hlist_head marker_table[MARKER_TABLE_SIZE]; - /** * __mark_empty_function - Empty probe callback * @probe_private: probe private data @@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(marker_probe_cb); * * Should be connected to markers "MARK_NOARGS". */ -void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) +static void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) { va_list args; /* not initialized */ char ptype; @@ -197,7 +197,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) } rcu_read_unlock_sched(); } -EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); static void free_old_closure(struct rcu_head *head) { @@ -416,6 +415,7 @@ static struct marker_entry *add_marker(const char *name, const char *format) e->single.probe_private = NULL; e->multi = NULL; e->ptype = 0; + e->format_allocated = 0; e->refcount = 0; e->rcu_pending = 0; hlist_add_head(&e->hlist, head); @@ -447,6 +447,8 @@ static int remove_marker(const char *name) if (e->single.func != __mark_empty_function) return -EBUSY; hlist_del(&e->hlist); + if (e->format_allocated) + kfree(e->format); /* Make sure the call_rcu has been executed */ if (e->rcu_pending) rcu_barrier_sched(); @@ -457,57 +459,34 @@ static int remove_marker(const char *name) /* * Set the mark_entry format to the format found in the element. */ -static int marker_set_format(struct marker_entry **entry, const char *format) +static int marker_set_format(struct marker_entry *entry, const char *format) { - struct marker_entry *e; - size_t name_len = strlen((*entry)->name) + 1; - size_t format_len = strlen(format) + 1; - - - e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, - GFP_KERNEL); - if (!e) + entry->format = kstrdup(format, GFP_KERNEL); + if (!entry->format) return -ENOMEM; - memcpy(&e->name[0], (*entry)->name, name_len); - e->format = &e->name[name_len]; - memcpy(e->format, format, format_len); - if (strcmp(e->format, MARK_NOARGS) == 0) - e->call = marker_probe_cb_noarg; - else - e->call = marker_probe_cb; - e->single = (*entry)->single; - e->multi = (*entry)->multi; - e->ptype = (*entry)->ptype; - e->refcount = (*entry)->refcount; - e->rcu_pending = 0; - hlist_add_before(&e->hlist, &(*entry)->hlist); - hlist_del(&(*entry)->hlist); - /* Make sure the call_rcu has been executed */ - if ((*entry)->rcu_pending) - rcu_barrier_sched(); - kfree(*entry); - *entry = e; + entry->format_allocated = 1; + trace_mark(core_marker_format, "name %s format %s", - e->name, e->format); + entry->name, entry->format); return 0; } /* * Sets the probe callback corresponding to one marker. */ -static int set_marker(struct marker_entry **entry, struct marker *elem, +static int set_marker(struct marker_entry *entry, struct marker *elem, int active) { int ret; - WARN_ON(strcmp((*entry)->name, elem->name) != 0); + WARN_ON(strcmp(entry->name, elem->name) != 0); - if ((*entry)->format) { - if (strcmp((*entry)->format, elem->format) != 0) { + if (entry->format) { + if (strcmp(entry->format, elem->format) != 0) { printk(KERN_NOTICE "Format mismatch for probe %s " "(%s), marker (%s)\n", - (*entry)->name, - (*entry)->format, + entry->name, + entry->format, elem->format); return -EPERM; } @@ -523,34 +502,33 @@ static int set_marker(struct marker_entry **entry, struct marker *elem, * pass from a "safe" callback (with argument) to an "unsafe" * callback (does not set arguments). */ - elem->call = (*entry)->call; + elem->call = entry->call; /* * Sanity check : * We only update the single probe private data when the ptr is * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) */ WARN_ON(elem->single.func != __mark_empty_function - && elem->single.probe_private - != (*entry)->single.probe_private && - !elem->ptype); - elem->single.probe_private = (*entry)->single.probe_private; + && elem->single.probe_private != entry->single.probe_private + && !elem->ptype); + elem->single.probe_private = entry->single.probe_private; /* * Make sure the private data is valid when we update the * single probe ptr. */ smp_wmb(); - elem->single.func = (*entry)->single.func; + elem->single.func = entry->single.func; /* * We also make sure that the new probe callbacks array is consistent * before setting a pointer to it. */ - rcu_assign_pointer(elem->multi, (*entry)->multi); + rcu_assign_pointer(elem->multi, entry->multi); /* * Update the function or multi probe array pointer before setting the * ptype. */ smp_wmb(); - elem->ptype = (*entry)->ptype; + elem->ptype = entry->ptype; elem->state = active; return 0; @@ -594,8 +572,7 @@ void marker_update_probe_range(struct marker *begin, for (iter = begin; iter < end; iter++) { mark_entry = get_marker(iter->name); if (mark_entry) { - set_marker(&mark_entry, iter, - !!mark_entry->refcount); + set_marker(mark_entry, iter, !!mark_entry->refcount); /* * ignore error, continue */ @@ -657,7 +634,7 @@ int marker_probe_register(const char *name, const char *format, ret = PTR_ERR(entry); } else if (format) { if (!entry->format) - ret = marker_set_format(&entry, format); + ret = marker_set_format(entry, format); else if (strcmp(entry->format, format)) ret = -EPERM; } @@ -848,8 +825,6 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe, if (!e->ptype) { if (num == 0 && e->single.func == probe) return e->single.probe_private; - else - break; } else { struct marker_probe_closure *closure; int match = 0; @@ -861,6 +836,7 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe, return closure[i].probe_private; } } + break; } } return ERR_PTR(-ENOENT); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9d048fa2d902..65d4a9ba79e4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -484,6 +484,16 @@ static struct ctl_table kern_table[] = { .proc_handler = &ftrace_enable_sysctl, }, #endif +#ifdef CONFIG_TRACING + { + .ctl_name = CTL_UNNUMBERED, + .procname = "ftrace_dump_on_oops", + .data = &ftrace_dump_on_oops, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif #ifdef CONFIG_MODULES { .ctl_name = KERN_MODPROBE, diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 33dbefd471e8..9c89526b6b7c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -9,6 +9,16 @@ config NOP_TRACER config HAVE_FUNCTION_TRACER bool +config HAVE_FUNCTION_RET_TRACER + bool + +config HAVE_FUNCTION_TRACE_MCOUNT_TEST + bool + help + This gets selected when the arch tests the function_trace_stop + variable at the mcount call site. Otherwise, this variable + is tested by the called function. + config HAVE_DYNAMIC_FTRACE bool @@ -47,6 +57,17 @@ config FUNCTION_TRACER (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. +config FUNCTION_RET_TRACER + bool "Kernel Function return Tracer" + depends on !DYNAMIC_FTRACE + depends on HAVE_FUNCTION_RET_TRACER + depends on FUNCTION_TRACER + help + Enable the kernel to trace a function at its return. + It's first purpose is to trace the duration of functions. + This is done by setting the current return address on the thread + info structure of the current task. + config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n @@ -138,6 +159,44 @@ config BOOT_TRACER selected, because the self-tests are an initcall as well and that would invalidate the boot trace. ) +config TRACE_BRANCH_PROFILING + bool "Trace likely/unlikely profiler" + depends on DEBUG_KERNEL + select TRACING + help + This tracer profiles all the the likely and unlikely macros + in the kernel. It will display the results in: + + /debugfs/tracing/profile_likely + /debugfs/tracing/profile_unlikely + + Note: this will add a significant overhead, only turn this + on if you need to profile the system's use of these macros. + + Say N if unsure. + +config TRACING_BRANCHES + bool + help + Selected by tracers that will trace the likely and unlikely + conditions. This prevents the tracers themselves from being + profiled. Profiling the tracing infrastructure can only happen + when the likelys and unlikelys are not being traced. + +config BRANCH_TRACER + bool "Trace likely/unlikely instances" + depends on TRACE_BRANCH_PROFILING + select TRACING_BRANCHES + help + This traces the events of likely and unlikely condition + calls in the kernel. The difference between this and the + "Trace likely/unlikely profiler" is that this is not a + histogram of the callers, but actually places the calling + events into a running trace buffer to see when and where the + events happened, as well as their results. + + Say N if unsure. + config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c8228b1a49e9..1a8c9259dc69 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -10,6 +10,11 @@ CFLAGS_trace_selftest_dynamic.o = -pg obj-y += trace_selftest_dynamic.o endif +# If unlikely tracing is enabled, do not trace these files +ifdef CONFIG_TRACING_BRANCHES +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +endif + obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o @@ -24,5 +29,7 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o +obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o +obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o libftrace-y := ftrace.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 14fa52297b28..54cb9a7d15e5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -47,6 +47,9 @@ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; +/* Quick disabling of function tracer. */ +int function_trace_stop; + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -63,6 +66,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly = static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; +ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { @@ -88,7 +92,22 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; + __ftrace_trace_function = ftrace_stub; +} + +#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST +/* + * For those archs that do not test ftrace_trace_stop in their + * mcount call site, we need to do it from C. + */ +static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip) +{ + if (function_trace_stop) + return; + + __ftrace_trace_function(ip, parent_ip); } +#endif static int __register_ftrace_function(struct ftrace_ops *ops) { @@ -110,10 +129,18 @@ static int __register_ftrace_function(struct ftrace_ops *ops) * For one func, simply call it directly. * For more than one func, call the chain. */ +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST if (ops->next == &ftrace_list_end) ftrace_trace_function = ops->func; else ftrace_trace_function = ftrace_list_func; +#else + if (ops->next == &ftrace_list_end) + __ftrace_trace_function = ops->func; + else + __ftrace_trace_function = ftrace_list_func; + ftrace_trace_function = ftrace_test_stop_func; +#endif } spin_unlock(&ftrace_lock); @@ -152,8 +179,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) { /* If we only have one func left, then call that directly */ - if (ftrace_list == &ftrace_list_end || - ftrace_list->next == &ftrace_list_end) + if (ftrace_list->next == &ftrace_list_end) ftrace_trace_function = ftrace_list->func; } @@ -522,7 +548,7 @@ static void ftrace_run_update_code(int command) } static ftrace_func_t saved_ftrace_func; -static int ftrace_start; +static int ftrace_start_up; static DEFINE_MUTEX(ftrace_start_lock); static void ftrace_startup(void) @@ -533,8 +559,8 @@ static void ftrace_startup(void) return; mutex_lock(&ftrace_start_lock); - ftrace_start++; - if (ftrace_start == 1) + ftrace_start_up++; + if (ftrace_start_up == 1) command |= FTRACE_ENABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { @@ -558,8 +584,8 @@ static void ftrace_shutdown(void) return; mutex_lock(&ftrace_start_lock); - ftrace_start--; - if (!ftrace_start) + ftrace_start_up--; + if (!ftrace_start_up) command |= FTRACE_DISABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { @@ -585,8 +611,8 @@ static void ftrace_startup_sysctl(void) mutex_lock(&ftrace_start_lock); /* Force update next time */ saved_ftrace_func = NULL; - /* ftrace_start is true if we want ftrace running */ - if (ftrace_start) + /* ftrace_start_up is true if we want ftrace running */ + if (ftrace_start_up) command |= FTRACE_ENABLE_CALLS; ftrace_run_update_code(command); @@ -601,8 +627,8 @@ static void ftrace_shutdown_sysctl(void) return; mutex_lock(&ftrace_start_lock); - /* ftrace_start is true if ftrace is running */ - if (ftrace_start) + /* ftrace_start_up is true if ftrace is running */ + if (ftrace_start_up) command |= FTRACE_DISABLE_CALLS; ftrace_run_update_code(command); @@ -734,6 +760,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos) ((iter->flags & FTRACE_ITER_FAILURES) && !(rec->flags & FTRACE_FL_FAILED)) || + ((iter->flags & FTRACE_ITER_FILTER) && + !(rec->flags & FTRACE_FL_FILTER)) || + ((iter->flags & FTRACE_ITER_NOTRACE) && !(rec->flags & FTRACE_FL_NOTRACE))) { rec = NULL; @@ -1182,7 +1211,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftrace_start_lock); - if (iter->filtered && ftrace_start && ftrace_enabled) + if (iter->filtered && ftrace_start_up && ftrace_enabled) ftrace_run_update_code(FTRACE_ENABLE_CALLS); mutex_unlock(&ftrace_start_lock); mutex_unlock(&ftrace_sysctl_lock); @@ -1450,3 +1479,19 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, return ret; } +#ifdef CONFIG_FUNCTION_RET_TRACER +trace_function_return_t ftrace_function_return = + (trace_function_return_t)ftrace_stub; +void register_ftrace_return(trace_function_return_t func) +{ + ftrace_function_return = func; +} + +void unregister_ftrace_return(void) +{ + ftrace_function_return = (trace_function_return_t)ftrace_stub; +} +#endif + + + diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 036456cbb4f7..caa4fda50f8a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -45,6 +45,8 @@ void tracing_off(void) ring_buffers_off = 1; } +#include "trace.h" + /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 @@ -187,7 +189,8 @@ static inline int test_time_stamp(u64 delta) struct ring_buffer_per_cpu { int cpu; struct ring_buffer *buffer; - spinlock_t lock; + spinlock_t reader_lock; /* serialize readers */ + raw_spinlock_t lock; struct lock_class_key lock_key; struct list_head pages; struct buffer_page *head_page; /* read from head */ @@ -221,32 +224,16 @@ struct ring_buffer_iter { u64 read_stamp; }; +/* buffer may be either ring_buffer or ring_buffer_per_cpu */ #define RB_WARN_ON(buffer, cond) \ - do { \ - if (unlikely(cond)) { \ - atomic_inc(&buffer->record_disabled); \ - WARN_ON(1); \ - } \ - } while (0) - -#define RB_WARN_ON_RET(buffer, cond) \ - do { \ - if (unlikely(cond)) { \ - atomic_inc(&buffer->record_disabled); \ - WARN_ON(1); \ - return -1; \ - } \ - } while (0) - -#define RB_WARN_ON_ONCE(buffer, cond) \ - do { \ - static int once; \ - if (unlikely(cond) && !once) { \ - once++; \ + ({ \ + int _____ret = unlikely(cond); \ + if (_____ret) { \ atomic_inc(&buffer->record_disabled); \ WARN_ON(1); \ } \ - } while (0) + _____ret; \ + }) /** * check_pages - integrity check of buffer pages @@ -260,14 +247,18 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) struct list_head *head = &cpu_buffer->pages; struct buffer_page *page, *tmp; - RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); - RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); + if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) + return -1; + if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) + return -1; list_for_each_entry_safe(page, tmp, head, list) { - RB_WARN_ON_RET(cpu_buffer, - page->list.next->prev != &page->list); - RB_WARN_ON_RET(cpu_buffer, - page->list.prev->next != &page->list); + if (RB_WARN_ON(cpu_buffer, + page->list.next->prev != &page->list)) + return -1; + if (RB_WARN_ON(cpu_buffer, + page->list.prev->next != &page->list)) + return -1; } return 0; @@ -324,7 +315,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) cpu_buffer->cpu = cpu; cpu_buffer->buffer = buffer; - spin_lock_init(&cpu_buffer->lock); + spin_lock_init(&cpu_buffer->reader_lock); + cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&cpu_buffer->pages); page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), @@ -473,13 +465,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) synchronize_sched(); for (i = 0; i < nr_pages; i++) { - BUG_ON(list_empty(&cpu_buffer->pages)); + if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) + return; p = cpu_buffer->pages.next; page = list_entry(p, struct buffer_page, list); list_del_init(&page->list); free_buffer_page(page); } - BUG_ON(list_empty(&cpu_buffer->pages)); + if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) + return; rb_reset_cpu(cpu_buffer); @@ -501,7 +495,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, synchronize_sched(); for (i = 0; i < nr_pages; i++) { - BUG_ON(list_empty(pages)); + if (RB_WARN_ON(cpu_buffer, list_empty(pages))) + return; p = pages->next; page = list_entry(p, struct buffer_page, list); list_del_init(&page->list); @@ -562,7 +557,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) if (size < buffer_size) { /* easy case, just free pages */ - BUG_ON(nr_pages >= buffer->pages); + if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) { + mutex_unlock(&buffer->mutex); + return -1; + } rm_pages = buffer->pages - nr_pages; @@ -581,7 +579,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) * add these pages to the cpu_buffers. Otherwise we just free * them all and return -ENOMEM; */ - BUG_ON(nr_pages <= buffer->pages); + if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) { + mutex_unlock(&buffer->mutex); + return -1; + } + new_pages = nr_pages - buffer->pages; for_each_buffer_cpu(buffer, cpu) { @@ -604,7 +606,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) rb_insert_pages(cpu_buffer, &pages, new_pages); } - BUG_ON(!list_empty(&pages)); + if (RB_WARN_ON(buffer, !list_empty(&pages))) { + mutex_unlock(&buffer->mutex); + return -1; + } out: buffer->pages = nr_pages; @@ -692,7 +697,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) head += rb_event_length(event)) { event = __rb_page_index(cpu_buffer->head_page, head); - BUG_ON(rb_null_event(event)); + if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) + return; /* Only count data entries */ if (event->type != RINGBUF_TYPE_DATA) continue; @@ -745,8 +751,9 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, addr &= PAGE_MASK; while (cpu_buffer->commit_page->page != (void *)addr) { - RB_WARN_ON(cpu_buffer, - cpu_buffer->commit_page == cpu_buffer->tail_page); + if (RB_WARN_ON(cpu_buffer, + cpu_buffer->commit_page == cpu_buffer->tail_page)) + return; cpu_buffer->commit_page->commit = cpu_buffer->commit_page->write; rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); @@ -893,7 +900,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, if (write > BUF_PAGE_SIZE) { struct buffer_page *next_page = tail_page; - spin_lock_irqsave(&cpu_buffer->lock, flags); + local_irq_save(flags); + __raw_spin_lock(&cpu_buffer->lock); rb_inc_page(cpu_buffer, &next_page); @@ -901,7 +909,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, reader_page = cpu_buffer->reader_page; /* we grabbed the lock before incrementing */ - RB_WARN_ON(cpu_buffer, next_page == reader_page); + if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) + goto out_unlock; /* * If for some reason, we had an interrupt storm that made @@ -969,7 +978,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, rb_set_commit_to_write(cpu_buffer); } - spin_unlock_irqrestore(&cpu_buffer->lock, flags); + __raw_spin_unlock(&cpu_buffer->lock); + local_irq_restore(flags); /* fail and let the caller try again */ return ERR_PTR(-EAGAIN); @@ -977,7 +987,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* We reserved something on the buffer */ - BUG_ON(write > BUF_PAGE_SIZE); + if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE)) + return NULL; event = __rb_page_index(tail_page, tail); rb_update_event(event, type, length); @@ -992,7 +1003,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, return event; out_unlock: - spin_unlock_irqrestore(&cpu_buffer->lock, flags); + __raw_spin_unlock(&cpu_buffer->lock); + local_irq_restore(flags); return NULL; } @@ -1075,10 +1087,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, * storm or we have something buggy. * Bail! */ - if (unlikely(++nr_loops > 1000)) { - RB_WARN_ON(cpu_buffer, 1); + if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) return NULL; - } ts = ring_buffer_time_stamp(cpu_buffer->cpu); @@ -1181,8 +1191,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, return NULL; /* If we are tracing schedule, we don't want to recurse */ - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); @@ -1213,10 +1222,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, return event; out: - if (resched) - preempt_enable_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); return NULL; } @@ -1258,12 +1264,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, /* * Only the last preempt count needs to restore preemption. */ - if (preempt_count() == 1) { - if (per_cpu(rb_need_resched, cpu)) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); - } else + if (preempt_count() == 1) + ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); + else preempt_enable_no_resched_notrace(); return 0; @@ -1299,8 +1302,7 @@ int ring_buffer_write(struct ring_buffer *buffer, if (atomic_read(&buffer->record_disabled)) return -EBUSY; - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); @@ -1326,10 +1328,7 @@ int ring_buffer_write(struct ring_buffer *buffer, ret = 0; out: - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); return ret; } @@ -1488,14 +1487,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer) return overruns; } -/** - * ring_buffer_iter_reset - reset an iterator - * @iter: The iterator to reset - * - * Resets the iterator, so that it will start from the beginning - * again. - */ -void ring_buffer_iter_reset(struct ring_buffer_iter *iter) +static void rb_iter_reset(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; @@ -1514,6 +1506,23 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter) } /** + * ring_buffer_iter_reset - reset an iterator + * @iter: The iterator to reset + * + * Resets the iterator, so that it will start from the beginning + * again. + */ +void ring_buffer_iter_reset(struct ring_buffer_iter *iter) +{ + struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; + unsigned long flags; + + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + rb_iter_reset(iter); + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); +} + +/** * ring_buffer_iter_empty - check if an iterator has no more to read * @iter: The iterator to check */ @@ -1596,7 +1605,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) unsigned long flags; int nr_loops = 0; - spin_lock_irqsave(&cpu_buffer->lock, flags); + local_irq_save(flags); + __raw_spin_lock(&cpu_buffer->lock); again: /* @@ -1605,8 +1615,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * a case where we will loop three times. There should be no * reason to loop four times (that I know of). */ - if (unlikely(++nr_loops > 3)) { - RB_WARN_ON(cpu_buffer, 1); + if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) { reader = NULL; goto out; } @@ -1618,8 +1627,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) goto out; /* Never should we have an index greater than the size */ - RB_WARN_ON(cpu_buffer, - cpu_buffer->reader_page->read > rb_page_size(reader)); + if (RB_WARN_ON(cpu_buffer, + cpu_buffer->reader_page->read > rb_page_size(reader))) + goto out; /* check if we caught up to the tail */ reader = NULL; @@ -1658,7 +1668,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) goto again; out: - spin_unlock_irqrestore(&cpu_buffer->lock, flags); + __raw_spin_unlock(&cpu_buffer->lock); + local_irq_restore(flags); return reader; } @@ -1672,7 +1683,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) reader = rb_get_reader_page(cpu_buffer); /* This function should not be called when buffer is empty */ - BUG_ON(!reader); + if (RB_WARN_ON(cpu_buffer, !reader)) + return; event = rb_reader_event(cpu_buffer); @@ -1699,7 +1711,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) * Check if we are at the end of the buffer. */ if (iter->head >= rb_page_size(iter->head_page)) { - BUG_ON(iter->head_page == cpu_buffer->commit_page); + if (RB_WARN_ON(buffer, + iter->head_page == cpu_buffer->commit_page)) + return; rb_inc_iter(iter); return; } @@ -1712,8 +1726,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) * This should not be called to advance the header if we are * at the tail of the buffer. */ - BUG_ON((iter->head_page == cpu_buffer->commit_page) && - (iter->head + length > rb_commit_index(cpu_buffer))); + if (RB_WARN_ON(cpu_buffer, + (iter->head_page == cpu_buffer->commit_page) && + (iter->head + length > rb_commit_index(cpu_buffer)))) + return; rb_update_iter_read_stamp(iter, event); @@ -1725,17 +1741,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) rb_advance_iter(iter); } -/** - * ring_buffer_peek - peek at the next event to be read - * @buffer: The ring buffer to read - * @cpu: The cpu to peak at - * @ts: The timestamp counter of this event. - * - * This will return the event that will be read next, but does - * not consume the data. - */ -struct ring_buffer_event * -ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) +static struct ring_buffer_event * +rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; @@ -1756,10 +1763,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) * can have. Nesting 10 deep of interrupts is clearly * an anomaly. */ - if (unlikely(++nr_loops > 10)) { - RB_WARN_ON(cpu_buffer, 1); + if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) return NULL; - } reader = rb_get_reader_page(cpu_buffer); if (!reader) @@ -1797,16 +1802,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) return NULL; } -/** - * ring_buffer_iter_peek - peek at the next event to be read - * @iter: The ring buffer iterator - * @ts: The timestamp counter of this event. - * - * This will return the event that will be read next, but does - * not increment the iterator. - */ -struct ring_buffer_event * -ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) +static struct ring_buffer_event * +rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) { struct ring_buffer *buffer; struct ring_buffer_per_cpu *cpu_buffer; @@ -1828,10 +1825,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) * can have. Nesting 10 deep of interrupts is clearly * an anomaly. */ - if (unlikely(++nr_loops > 10)) { - RB_WARN_ON(cpu_buffer, 1); + if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) return NULL; - } if (rb_per_cpu_empty(cpu_buffer)) return NULL; @@ -1868,6 +1863,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) } /** + * ring_buffer_peek - peek at the next event to be read + * @buffer: The ring buffer to read + * @cpu: The cpu to peak at + * @ts: The timestamp counter of this event. + * + * This will return the event that will be read next, but does + * not consume the data. + */ +struct ring_buffer_event * +ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) +{ + struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; + struct ring_buffer_event *event; + unsigned long flags; + + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + event = rb_buffer_peek(buffer, cpu, ts); + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + return event; +} + +/** + * ring_buffer_iter_peek - peek at the next event to be read + * @iter: The ring buffer iterator + * @ts: The timestamp counter of this event. + * + * This will return the event that will be read next, but does + * not increment the iterator. + */ +struct ring_buffer_event * +ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) +{ + struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; + struct ring_buffer_event *event; + unsigned long flags; + + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + event = rb_iter_peek(iter, ts); + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + return event; +} + +/** * ring_buffer_consume - return an event and consume it * @buffer: The ring buffer to get the next event from * @@ -1878,19 +1918,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) struct ring_buffer_event * ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) { - struct ring_buffer_per_cpu *cpu_buffer; + struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct ring_buffer_event *event; + unsigned long flags; if (!cpu_isset(cpu, buffer->cpumask)) return NULL; - event = ring_buffer_peek(buffer, cpu, ts); + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + + event = rb_buffer_peek(buffer, cpu, ts); if (!event) - return NULL; + goto out; - cpu_buffer = buffer->buffers[cpu]; rb_advance_reader(cpu_buffer); + out: + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + return event; } @@ -1927,9 +1972,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) atomic_inc(&cpu_buffer->record_disabled); synchronize_sched(); - spin_lock_irqsave(&cpu_buffer->lock, flags); - ring_buffer_iter_reset(iter); - spin_unlock_irqrestore(&cpu_buffer->lock, flags); + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + __raw_spin_lock(&cpu_buffer->lock); + rb_iter_reset(iter); + __raw_spin_unlock(&cpu_buffer->lock); + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); return iter; } @@ -1961,12 +2008,17 @@ struct ring_buffer_event * ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) { struct ring_buffer_event *event; + struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; + unsigned long flags; - event = ring_buffer_iter_peek(iter, ts); + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + event = rb_iter_peek(iter, ts); if (!event) - return NULL; + goto out; rb_advance_iter(iter); + out: + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); return event; } @@ -2015,11 +2067,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) if (!cpu_isset(cpu, buffer->cpumask)) return; - spin_lock_irqsave(&cpu_buffer->lock, flags); + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + + __raw_spin_lock(&cpu_buffer->lock); rb_reset_cpu(cpu_buffer); - spin_unlock_irqrestore(&cpu_buffer->lock, flags); + __raw_spin_unlock(&cpu_buffer->lock); + + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 697eda36b86a..4a904623e05d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -43,6 +43,15 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; + +/* + * Kill all tracing for good (never come back). + * It is initialized to 1 but will turn to zero if the initialization + * of the tracer is successful. But that is the only place that sets + * this back to zero. + */ +int tracing_disabled = 1; + static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); static inline void ftrace_disable_cpu(void) @@ -62,7 +71,36 @@ static cpumask_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ for_each_cpu_mask(cpu, tracing_buffer_mask) -static int tracing_disabled = 1; +/* + * ftrace_dump_on_oops - variable to dump ftrace buffer on oops + * + * If there is an oops (or kernel panic) and the ftrace_dump_on_oops + * is set, then ftrace_dump is called. This will output the contents + * of the ftrace buffers to the console. This is very useful for + * capturing traces that lead to crashes and outputing it to a + * serial console. + * + * It is default off, but you can enable it with either specifying + * "ftrace_dump_on_oops" in the kernel command line, or setting + * /proc/sys/kernel/ftrace_dump_on_oops to true. + */ +int ftrace_dump_on_oops; + +static int tracing_set_tracer(char *buf); + +static int __init set_ftrace(char *str) +{ + tracing_set_tracer(str); + return 1; +} +__setup("ftrace", set_ftrace); + +static int __init set_ftrace_dump_on_oops(char *str) +{ + ftrace_dump_on_oops = 1; + return 1; +} +__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); long ns2usecs(cycle_t nsec) @@ -112,6 +150,19 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data); /* tracer_enabled is used to toggle activation of a tracer */ static int tracer_enabled = 1; +/** + * tracing_is_enabled - return tracer_enabled status + * + * This function is used by other tracers to know the status + * of the tracer_enabled flag. Tracers may use this function + * to know if it should enable their features when starting + * up. See irqsoff tracer for an example (start_irqsoff_tracer). + */ +int tracing_is_enabled(void) +{ + return tracer_enabled; +} + /* function tracing enabled */ int ftrace_function_enabled; @@ -153,8 +204,9 @@ static DEFINE_MUTEX(trace_types_lock); /* trace_wait is a waitqueue for tasks blocked on trace_poll */ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); -/* trace_flags holds iter_ctrl options */ -unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; +/* trace_flags holds trace_options default values */ +unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | + TRACE_ITER_ANNOTATE; /** * trace_wake_up - wake up tasks waiting for trace input @@ -193,13 +245,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) return nsecs / 1000; } -/* - * TRACE_ITER_SYM_MASK masks the options in trace_flags that - * control the output of kernel symbols. - */ -#define TRACE_ITER_SYM_MASK \ - (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) - /* These must match the bit postions in trace_iterator_flags */ static const char *trace_options[] = { "print-parent", @@ -213,6 +258,11 @@ static const char *trace_options[] = { "stacktrace", "sched-tree", "ftrace_printk", + "ftrace_preempt", +#ifdef CONFIG_BRANCH_TRACER + "branch", +#endif + "annotate", NULL }; @@ -485,7 +535,6 @@ int register_tracer(struct tracer *type) if (type->selftest) { struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; - int saved_ctrl = tr->ctrl; int i; /* * Run a selftest on this tracer. @@ -498,13 +547,11 @@ int register_tracer(struct tracer *type) tracing_reset(tr, i); } current_trace = type; - tr->ctrl = 0; /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); ret = type->selftest(type, tr); /* the test is responsible for resetting too */ current_trace = saved_tracer; - tr->ctrl = saved_ctrl; if (ret) { printk(KERN_CONT "FAILED!\n"); goto out; @@ -581,6 +628,76 @@ static void trace_init_cmdlines(void) cmdline_idx = 0; } +static int trace_stop_count; +static DEFINE_SPINLOCK(tracing_start_lock); + +/** + * tracing_start - quick start of the tracer + * + * If tracing is enabled but was stopped by tracing_stop, + * this will start the tracer back up. + */ +void tracing_start(void) +{ + struct ring_buffer *buffer; + unsigned long flags; + + if (tracing_disabled) + return; + + spin_lock_irqsave(&tracing_start_lock, flags); + if (--trace_stop_count) + goto out; + + if (trace_stop_count < 0) { + /* Someone screwed up their debugging */ + WARN_ON_ONCE(1); + trace_stop_count = 0; + goto out; + } + + + buffer = global_trace.buffer; + if (buffer) + ring_buffer_record_enable(buffer); + + buffer = max_tr.buffer; + if (buffer) + ring_buffer_record_enable(buffer); + + ftrace_start(); + out: + spin_unlock_irqrestore(&tracing_start_lock, flags); +} + +/** + * tracing_stop - quick stop of the tracer + * + * Light weight way to stop tracing. Use in conjunction with + * tracing_start. + */ +void tracing_stop(void) +{ + struct ring_buffer *buffer; + unsigned long flags; + + ftrace_stop(); + spin_lock_irqsave(&tracing_start_lock, flags); + if (trace_stop_count++) + goto out; + + buffer = global_trace.buffer; + if (buffer) + ring_buffer_record_disable(buffer); + + buffer = max_tr.buffer; + if (buffer) + ring_buffer_record_disable(buffer); + + out: + spin_unlock_irqrestore(&tracing_start_lock, flags); +} + void trace_stop_cmdline_recording(void); static void trace_save_cmdline(struct task_struct *tsk) @@ -691,6 +808,35 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } +#ifdef CONFIG_FUNCTION_RET_TRACER +static void __trace_function_return(struct trace_array *tr, + struct trace_array_cpu *data, + struct ftrace_retfunc *trace, + unsigned long flags, + int pc) +{ + struct ring_buffer_event *event; + struct ftrace_ret_entry *entry; + unsigned long irq_flags; + + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return; + + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_FN_RET; + entry->ip = trace->func; + entry->parent_ip = trace->ret; + entry->rettime = trace->rettime; + entry->calltime = trace->calltime; + ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); +} +#endif + void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags, @@ -841,26 +987,28 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; + unsigned long flags; int cpu; int pc; - if (tracing_disabled || !tr->ctrl) + if (tracing_disabled) return; pc = preempt_count(); - preempt_disable_notrace(); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (likely(!atomic_read(&data->disabled))) + if (likely(atomic_inc_return(&data->disabled) == 1)) ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); - preempt_enable_notrace(); + atomic_dec(&data->disabled); + local_irq_restore(flags); } #ifdef CONFIG_FUNCTION_TRACER static void -function_trace_call(unsigned long ip, unsigned long parent_ip) +function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -873,8 +1021,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) return; pc = preempt_count(); - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); local_save_flags(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -884,11 +1031,62 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) trace_function(tr, data, ip, parent_ip, flags, pc); atomic_dec(&data->disabled); - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); +} + +static void +function_trace_call(unsigned long ip, unsigned long parent_ip) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + if (unlikely(!ftrace_function_enabled)) + return; + + /* + * Need to use raw, since this must be called before the + * recursive protection is performed. + */ + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + trace_function(tr, data, ip, parent_ip, flags, pc); + } + + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} + +#ifdef CONFIG_FUNCTION_RET_TRACER +void trace_function_return(struct ftrace_retfunc *trace) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_function_return(tr, data, trace, flags, pc); + } + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); } +#endif /* CONFIG_FUNCTION_RET_TRACER */ static struct ftrace_ops trace_ops __read_mostly = { @@ -898,9 +1096,14 @@ static struct ftrace_ops trace_ops __read_mostly = void tracing_start_function_trace(void) { ftrace_function_enabled = 0; + + if (trace_flags & TRACE_ITER_PREEMPTONLY) + trace_ops.func = function_trace_call_preempt_only; + else + trace_ops.func = function_trace_call; + register_ftrace_function(&trace_ops); - if (tracer_enabled) - ftrace_function_enabled = 1; + ftrace_function_enabled = 1; } void tracing_stop_function_trace(void) @@ -912,6 +1115,7 @@ void tracing_stop_function_trace(void) enum trace_file_type { TRACE_FILE_LAT_FMT = 1, + TRACE_FILE_ANNOTATE = 2, }; static void trace_iterator_increment(struct trace_iterator *iter, int cpu) @@ -1047,10 +1251,6 @@ static void *s_start(struct seq_file *m, loff_t *pos) atomic_inc(&trace_record_cmdline_disabled); - /* let the tracer grab locks here if needed */ - if (current_trace->start) - current_trace->start(iter); - if (*pos != iter->pos) { iter->ent = NULL; iter->cpu = 0; @@ -1077,14 +1277,7 @@ static void *s_start(struct seq_file *m, loff_t *pos) static void s_stop(struct seq_file *m, void *p) { - struct trace_iterator *iter = m->private; - atomic_dec(&trace_record_cmdline_disabled); - - /* let the tracer release locks here if needed */ - if (current_trace && current_trace == iter->trace && iter->trace->stop) - iter->trace->stop(iter); - mutex_unlock(&trace_types_lock); } @@ -1143,7 +1336,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, # define IP_FMT "%016lx" #endif -static int +int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { int ret; @@ -1338,6 +1531,23 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) trace_seq_putc(s, '\n'); } +static void test_cpu_buff_start(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + + if (!(trace_flags & TRACE_ITER_ANNOTATE)) + return; + + if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) + return; + + if (cpu_isset(iter->cpu, iter->started)) + return; + + cpu_set(iter->cpu, iter->started); + trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); +} + static enum print_line_t print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { @@ -1357,6 +1567,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) if (entry->type == TRACE_CONT) return TRACE_TYPE_HANDLED; + test_cpu_buff_start(iter); + next_entry = find_next_entry(iter, NULL, &next_ts); if (!next_entry) next_ts = iter->ts; @@ -1448,6 +1660,18 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_seq_print_cont(s, iter); break; } + case TRACE_BRANCH: { + struct trace_branch *field; + + trace_assign_type(field, entry); + + trace_seq_printf(s, "[%s] %s:%s:%d\n", + field->correct ? " ok " : " MISS ", + field->func, + field->file, + field->line); + break; + } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1472,6 +1696,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) if (entry->type == TRACE_CONT) return TRACE_TYPE_HANDLED; + test_cpu_buff_start(iter); + comm = trace_find_cmdline(iter->ent->pid); t = ns2usecs(iter->ts); @@ -1581,6 +1807,22 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_seq_print_cont(s, iter); break; } + case TRACE_FN_RET: { + return print_return_function(iter); + break; + } + case TRACE_BRANCH: { + struct trace_branch *field; + + trace_assign_type(field, entry); + + trace_seq_printf(s, "[%s] %s:%s:%d\n", + field->correct ? " ok " : " MISS ", + field->func, + field->file, + field->line); + break; + } } return TRACE_TYPE_HANDLED; } @@ -1899,6 +2141,11 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) iter->trace = current_trace; iter->pos = -1; + /* Annotate start of buffers if we had overruns */ + if (ring_buffer_overruns(iter->tr->buffer)) + iter->iter_flags |= TRACE_FILE_ANNOTATE; + + for_each_tracing_cpu(cpu) { iter->buffer_iter[cpu] = @@ -1917,10 +2164,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) m->private = iter; /* stop the trace while dumping */ - if (iter->tr->ctrl) { - tracer_enabled = 0; - ftrace_function_enabled = 0; - } + tracing_stop(); if (iter->trace && iter->trace->open) iter->trace->open(iter); @@ -1965,14 +2209,7 @@ int tracing_release(struct inode *inode, struct file *file) iter->trace->close(iter); /* reenable tracing if it was previously enabled */ - if (iter->tr->ctrl) { - tracer_enabled = 1; - /* - * It is safe to enable function tracing even if it - * isn't used - */ - ftrace_function_enabled = 1; - } + tracing_start(); mutex_unlock(&trace_types_lock); seq_release(inode, file); @@ -2188,7 +2425,7 @@ static struct file_operations tracing_cpumask_fops = { }; static ssize_t -tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, +tracing_trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { char *buf; @@ -2225,7 +2462,7 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, } static ssize_t -tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, +tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[64]; @@ -2270,8 +2507,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, static struct file_operations tracing_iter_fops = { .open = tracing_open_generic, - .read = tracing_iter_ctrl_read, - .write = tracing_iter_ctrl_write, + .read = tracing_trace_options_read, + .write = tracing_trace_options_write, }; static const char readme_msg[] = @@ -2285,9 +2522,9 @@ static const char readme_msg[] = "# echo sched_switch > /debug/tracing/current_tracer\n" "# cat /debug/tracing/current_tracer\n" "sched_switch\n" - "# cat /debug/tracing/iter_ctrl\n" + "# cat /debug/tracing/trace_options\n" "noprint-parent nosym-offset nosym-addr noverbose\n" - "# echo print-parent > /debug/tracing/iter_ctrl\n" + "# echo print-parent > /debug/tracing/trace_options\n" "# echo 1 > /debug/tracing/tracing_enabled\n" "# cat /debug/tracing/trace > /tmp/trace.txt\n" "echo 0 > /debug/tracing/tracing_enabled\n" @@ -2310,11 +2547,10 @@ static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_array *tr = filp->private_data; char buf[64]; int r; - r = sprintf(buf, "%ld\n", tr->ctrl); + r = sprintf(buf, "%u\n", tracer_enabled); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2342,16 +2578,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, val = !!val; mutex_lock(&trace_types_lock); - if (tr->ctrl ^ val) { - if (val) + if (tracer_enabled ^ val) { + if (val) { tracer_enabled = 1; - else + if (current_trace->start) + current_trace->start(tr); + tracing_start(); + } else { tracer_enabled = 0; - - tr->ctrl = val; - - if (current_trace && current_trace->ctrl_update) - current_trace->ctrl_update(tr); + tracing_stop(); + if (current_trace->stop) + current_trace->stop(tr); + } } mutex_unlock(&trace_types_lock); @@ -2377,29 +2615,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static ssize_t -tracing_set_trace_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +static int tracing_set_tracer(char *buf) { struct trace_array *tr = &global_trace; struct tracer *t; - char buf[max_tracer_type_len+1]; - int i; - size_t ret; - - ret = cnt; - - if (cnt > max_tracer_type_len) - cnt = max_tracer_type_len; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - /* strip ending whitespace. */ - for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) - buf[i] = 0; + int ret = 0; mutex_lock(&trace_types_lock); for (t = trace_types; t; t = t->next) { @@ -2413,6 +2633,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, if (t == current_trace) goto out; + trace_branch_disable(); if (current_trace && current_trace->reset) current_trace->reset(tr); @@ -2420,9 +2641,37 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, if (t->init) t->init(tr); + trace_branch_enable(tr); out: mutex_unlock(&trace_types_lock); + return ret; +} + +static ssize_t +tracing_set_trace_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[max_tracer_type_len+1]; + int i; + size_t ret; + + if (cnt > max_tracer_type_len) + cnt = max_tracer_type_len; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + /* strip ending whitespace. */ + for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) + buf[i] = 0; + + ret = tracing_set_tracer(buf); + if (!ret) + ret = cnt; + if (ret > 0) filp->f_pos += ret; @@ -2491,6 +2740,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) return -ENOMEM; mutex_lock(&trace_types_lock); + + /* trace pipe does not show start of buffer */ + cpus_setall(iter->started); + iter->tr = &global_trace; iter->trace = current_trace; filp->private_data = iter; @@ -2666,7 +2919,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, char buf[64]; int r; - r = sprintf(buf, "%lu\n", tr->entries); + r = sprintf(buf, "%lu\n", tr->entries >> 10); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2677,7 +2930,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, unsigned long val; char buf[64]; int ret, cpu; - struct trace_array *tr = filp->private_data; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2697,12 +2949,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, mutex_lock(&trace_types_lock); - if (tr->ctrl) { - cnt = -EBUSY; - pr_info("ftrace: please disable tracing" - " before modifying buffer size\n"); - goto out; - } + tracing_stop(); /* disable all cpu buffers */ for_each_tracing_cpu(cpu) { @@ -2712,6 +2959,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, atomic_inc(&max_tr.data[cpu]->disabled); } + /* value is in KB */ + val <<= 10; + if (val != global_trace.entries) { ret = ring_buffer_resize(global_trace.buffer, val); if (ret < 0) { @@ -2750,6 +3000,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, atomic_dec(&max_tr.data[cpu]->disabled); } + tracing_start(); max_tr.entries = global_trace.entries; mutex_unlock(&trace_types_lock); @@ -2772,9 +3023,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, { char *buf; char *end; - struct trace_array *tr = &global_trace; - if (!tr->ctrl || tracing_disabled) + if (tracing_disabled) return -EINVAL; if (cnt > TRACE_BUF_SIZE) @@ -2840,22 +3090,38 @@ static struct file_operations tracing_mark_fops = { #ifdef CONFIG_DYNAMIC_FTRACE +int __weak ftrace_arch_read_dyn_info(char *buf, int size) +{ + return 0; +} + static ssize_t -tracing_read_long(struct file *filp, char __user *ubuf, +tracing_read_dyn_info(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { + static char ftrace_dyn_info_buffer[1024]; + static DEFINE_MUTEX(dyn_info_mutex); unsigned long *p = filp->private_data; - char buf[64]; + char *buf = ftrace_dyn_info_buffer; + int size = ARRAY_SIZE(ftrace_dyn_info_buffer); int r; - r = sprintf(buf, "%ld\n", *p); + mutex_lock(&dyn_info_mutex); + r = sprintf(buf, "%ld ", *p); - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); + r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r); + buf[r++] = '\n'; + + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); + + mutex_unlock(&dyn_info_mutex); + + return r; } -static struct file_operations tracing_read_long_fops = { +static struct file_operations tracing_dyn_info_fops = { .open = tracing_open_generic, - .read = tracing_read_long, + .read = tracing_read_dyn_info, }; #endif @@ -2896,10 +3162,10 @@ static __init int tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); - entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, + entry = debugfs_create_file("trace_options", 0644, d_tracer, NULL, &tracing_iter_fops); if (!entry) - pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); + pr_warning("Could not create debugfs 'trace_options' entry\n"); entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, NULL, &tracing_cpumask_fops); @@ -2949,11 +3215,11 @@ static __init int tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'trace_pipe' entry\n"); - entry = debugfs_create_file("trace_entries", 0644, d_tracer, + entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer, &global_trace, &tracing_entries_fops); if (!entry) pr_warning("Could not create debugfs " - "'trace_entries' entry\n"); + "'buffer_size_kb' entry\n"); entry = debugfs_create_file("trace_marker", 0220, d_tracer, NULL, &tracing_mark_fops); @@ -2964,7 +3230,7 @@ static __init int tracer_init_debugfs(void) #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, - &tracing_read_long_fops); + &tracing_dyn_info_fops); if (!entry) pr_warning("Could not create debugfs " "'dyn_ftrace_total_info' entry\n"); @@ -2987,7 +3253,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) unsigned long flags, irq_flags; int cpu, len = 0, size, pc; - if (!tr->ctrl || tracing_disabled) + if (tracing_disabled) return 0; pc = preempt_count(); @@ -3045,7 +3311,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk); static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { - ftrace_dump(); + if (ftrace_dump_on_oops) + ftrace_dump(); return NOTIFY_OK; } @@ -3061,7 +3328,8 @@ static int trace_die_handler(struct notifier_block *self, { switch (val) { case DIE_OOPS: - ftrace_dump(); + if (ftrace_dump_on_oops) + ftrace_dump(); break; default: break; @@ -3102,7 +3370,6 @@ trace_printk_seq(struct trace_seq *s) trace_seq_reset(s); } - void ftrace_dump(void) { static DEFINE_SPINLOCK(ftrace_dump_lock); @@ -3220,7 +3487,6 @@ __init static int tracer_alloc_buffers(void) #endif /* All seems OK, enable tracing */ - global_trace.ctrl = tracer_enabled; tracing_disabled = 0; atomic_notifier_chain_register(&panic_notifier_list, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8465ad052707..790ea8c0e1f3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -8,6 +8,7 @@ #include <linux/ring_buffer.h> #include <linux/mmiotrace.h> #include <linux/ftrace.h> +#include <trace/boot.h> enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -21,7 +22,10 @@ enum trace_type { TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, - TRACE_BOOT, + TRACE_BRANCH, + TRACE_BOOT_CALL, + TRACE_BOOT_RET, + TRACE_FN_RET, __TRACE_LAST_TYPE }; @@ -48,6 +52,15 @@ struct ftrace_entry { unsigned long ip; unsigned long parent_ip; }; + +/* Function return entry */ +struct ftrace_ret_entry { + struct trace_entry ent; + unsigned long ip; + unsigned long parent_ip; + unsigned long long calltime; + unsigned long long rettime; +}; extern struct tracer boot_tracer; /* @@ -112,9 +125,24 @@ struct trace_mmiotrace_map { struct mmiotrace_map map; }; -struct trace_boot { +struct trace_boot_call { + struct trace_entry ent; + struct boot_trace_call boot_call; +}; + +struct trace_boot_ret { struct trace_entry ent; - struct boot_trace initcall; + struct boot_trace_ret boot_ret; +}; + +#define TRACE_FUNC_SIZE 30 +#define TRACE_FILE_SIZE 20 +struct trace_branch { + struct trace_entry ent; + unsigned line; + char func[TRACE_FUNC_SIZE+1]; + char file[TRACE_FILE_SIZE+1]; + char correct; }; /* @@ -172,7 +200,6 @@ struct trace_iterator; struct trace_array { struct ring_buffer *buffer; unsigned long entries; - long ctrl; int cpu; cycle_t time_start; struct task_struct *waiter; @@ -218,7 +245,10 @@ extern void __ftrace_bad_type(void); TRACE_MMIO_RW); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ TRACE_MMIO_MAP); \ - IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \ + IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ + IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ + IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ + IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ __ftrace_bad_type(); \ } while (0) @@ -236,15 +266,14 @@ struct tracer { const char *name; void (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); + void (*start)(struct trace_array *tr); + void (*stop)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); - void (*start)(struct trace_iterator *iter); - void (*stop)(struct trace_iterator *iter); ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); - void (*ctrl_update)(struct trace_array *tr); #ifdef CONFIG_FTRACE_STARTUP_TEST int (*selftest)(struct tracer *trace, struct trace_array *tr); @@ -279,8 +308,11 @@ struct trace_iterator { unsigned long iter_flags; loff_t pos; long idx; + + cpumask_t started; }; +int tracing_is_enabled(void); void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); int tracing_open_generic(struct inode *inode, struct file *filp); @@ -320,9 +352,14 @@ void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); +void +trace_function_return(struct ftrace_retfunc *trace); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); +void tracing_sched_switch_assign_trace(struct trace_array *tr); +void tracing_stop_sched_switch_record(void); +void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); @@ -383,12 +420,18 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr); +extern int trace_selftest_startup_branch(struct tracer *trace, + struct trace_array *tr); #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); extern void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter); + +extern int +seq_print_ip_sym(struct trace_seq *s, unsigned long ip, + unsigned long sym_flags); extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt); extern long ns2usecs(cycle_t nsec); @@ -396,6 +439,17 @@ extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern unsigned long trace_flags; +/* Standard output formatting function used for function return traces */ +#ifdef CONFIG_FUNCTION_RET_TRACER +extern enum print_line_t print_return_function(struct trace_iterator *iter); +#else +static inline enum print_line_t +print_return_function(struct trace_iterator *iter) +{ + return TRACE_TYPE_UNHANDLED; +} +#endif + /* * trace_iterator_flags is an enumeration that defines bit * positions into trace_flags that controls the output. @@ -415,8 +469,92 @@ enum trace_iterator_flags { TRACE_ITER_STACKTRACE = 0x100, TRACE_ITER_SCHED_TREE = 0x200, TRACE_ITER_PRINTK = 0x400, + TRACE_ITER_PREEMPTONLY = 0x800, +#ifdef CONFIG_BRANCH_TRACER + TRACE_ITER_BRANCH = 0x1000, +#endif + TRACE_ITER_ANNOTATE = 0x2000, }; +/* + * TRACE_ITER_SYM_MASK masks the options in trace_flags that + * control the output of kernel symbols. + */ +#define TRACE_ITER_SYM_MASK \ + (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) + extern struct tracer nop_trace; +/** + * ftrace_preempt_disable - disable preemption scheduler safe + * + * When tracing can happen inside the scheduler, there exists + * cases that the tracing might happen before the need_resched + * flag is checked. If this happens and the tracer calls + * preempt_enable (after a disable), a schedule might take place + * causing an infinite recursion. + * + * To prevent this, we read the need_recshed flag before + * disabling preemption. When we want to enable preemption we + * check the flag, if it is set, then we call preempt_enable_no_resched. + * Otherwise, we call preempt_enable. + * + * The rational for doing the above is that if need resched is set + * and we have yet to reschedule, we are either in an atomic location + * (where we do not need to check for scheduling) or we are inside + * the scheduler and do not want to resched. + */ +static inline int ftrace_preempt_disable(void) +{ + int resched; + + resched = need_resched(); + preempt_disable_notrace(); + + return resched; +} + +/** + * ftrace_preempt_enable - enable preemption scheduler safe + * @resched: the return value from ftrace_preempt_disable + * + * This is a scheduler safe way to enable preemption and not miss + * any preemption checks. The disabled saved the state of preemption. + * If resched is set, then we were either inside an atomic or + * are inside the scheduler (we would have already scheduled + * otherwise). In this case, we do not want to call normal + * preempt_enable, but preempt_enable_no_resched instead. + */ +static inline void ftrace_preempt_enable(int resched) +{ + if (resched) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); +} + +#ifdef CONFIG_BRANCH_TRACER +extern int enable_branch_tracing(struct trace_array *tr); +extern void disable_branch_tracing(void); +static inline int trace_branch_enable(struct trace_array *tr) +{ + if (trace_flags & TRACE_ITER_BRANCH) + return enable_branch_tracing(tr); + return 0; +} +static inline void trace_branch_disable(void) +{ + /* due to races, always disable */ + disable_branch_tracing(); +} +#else +static inline int trace_branch_enable(struct trace_array *tr) +{ + return 0; +} +static inline void trace_branch_disable(void) +{ +} +#endif /* CONFIG_BRANCH_TRACER */ + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index d0a5e50eeff2..cb333b7fd113 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -13,23 +13,38 @@ #include "trace.h" static struct trace_array *boot_trace; -static int trace_boot_enabled; +static bool pre_initcalls_finished; - -/* Should be started after do_pre_smp_initcalls() in init/main.c */ +/* Tells the boot tracer that the pre_smp_initcalls are finished. + * So we are ready . + * It doesn't enable sched events tracing however. + * You have to call enable_boot_trace to do so. + */ void start_boot_trace(void) { - trace_boot_enabled = 1; + pre_initcalls_finished = true; } -void stop_boot_trace(void) +void enable_boot_trace(void) { - trace_boot_enabled = 0; + if (pre_initcalls_finished) + tracing_start_sched_switch_record(); } -void reset_boot_trace(struct trace_array *tr) +void disable_boot_trace(void) { - stop_boot_trace(); + if (pre_initcalls_finished) + tracing_stop_sched_switch_record(); +} + +static void reset_boot_trace(struct trace_array *tr) +{ + int cpu; + + tr->time_start = ftrace_now(tr->cpu); + + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); } static void boot_trace_init(struct trace_array *tr) @@ -37,49 +52,77 @@ static void boot_trace_init(struct trace_array *tr) int cpu; boot_trace = tr; - trace_boot_enabled = 0; - for_each_cpu_mask(cpu, cpu_possible_map) tracing_reset(tr, cpu); + + tracing_sched_switch_assign_trace(tr); } -static void boot_trace_ctrl_update(struct trace_array *tr) +static enum print_line_t +initcall_call_print_line(struct trace_iterator *iter) { - if (tr->ctrl) - start_boot_trace(); + struct trace_entry *entry = iter->ent; + struct trace_seq *s = &iter->seq; + struct trace_boot_call *field; + struct boot_trace_call *call; + u64 ts; + unsigned long nsec_rem; + int ret; + + trace_assign_type(field, entry); + call = &field->boot_call; + ts = iter->ts; + nsec_rem = do_div(ts, 1000000000); + + ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", + (unsigned long)ts, nsec_rem, call->func, call->caller); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; else - stop_boot_trace(); + return TRACE_TYPE_HANDLED; } -static enum print_line_t initcall_print_line(struct trace_iterator *iter) +static enum print_line_t +initcall_ret_print_line(struct trace_iterator *iter) { - int ret; struct trace_entry *entry = iter->ent; - struct trace_boot *field = (struct trace_boot *)entry; - struct boot_trace *it = &field->initcall; struct trace_seq *s = &iter->seq; - struct timespec calltime = ktime_to_timespec(it->calltime); - struct timespec rettime = ktime_to_timespec(it->rettime); - - if (entry->type == TRACE_BOOT) { - ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", - calltime.tv_sec, - calltime.tv_nsec, - it->func, it->caller); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " - "returned %d after %lld msecs\n", - rettime.tv_sec, - rettime.tv_nsec, - it->func, it->result, it->duration); - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + struct trace_boot_ret *field; + struct boot_trace_ret *init_ret; + u64 ts; + unsigned long nsec_rem; + int ret; + + trace_assign_type(field, entry); + init_ret = &field->boot_ret; + ts = iter->ts; + nsec_rem = do_div(ts, 1000000000); + + ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " + "returned %d after %llu msecs\n", + (unsigned long) ts, + nsec_rem, + init_ret->func, init_ret->result, init_ret->duration); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + else return TRACE_TYPE_HANDLED; +} + +static enum print_line_t initcall_print_line(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + + switch (entry->type) { + case TRACE_BOOT_CALL: + return initcall_call_print_line(iter); + case TRACE_BOOT_RET: + return initcall_ret_print_line(iter); + default: + return TRACE_TYPE_UNHANDLED; } - return TRACE_TYPE_UNHANDLED; } struct tracer boot_tracer __read_mostly = @@ -87,27 +130,53 @@ struct tracer boot_tracer __read_mostly = .name = "initcall", .init = boot_trace_init, .reset = reset_boot_trace, - .ctrl_update = boot_trace_ctrl_update, .print_line = initcall_print_line, }; -void trace_boot(struct boot_trace *it, initcall_t fn) +void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { struct ring_buffer_event *event; - struct trace_boot *entry; - struct trace_array_cpu *data; + struct trace_boot_call *entry; unsigned long irq_flags; struct trace_array *tr = boot_trace; - if (!trace_boot_enabled) + if (!pre_initcalls_finished) return; /* Get its name now since this function could * disappear because it is in the .init section. */ - sprint_symbol(it->func, (unsigned long)fn); + sprint_symbol(bt->func, (unsigned long)fn); + preempt_disable(); + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + entry->ent.type = TRACE_BOOT_CALL; + entry->boot_call = *bt; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); + + out: + preempt_enable(); +} + +void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) +{ + struct ring_buffer_event *event; + struct trace_boot_ret *entry; + unsigned long irq_flags; + struct trace_array *tr = boot_trace; + + if (!pre_initcalls_finished) + return; + + sprint_symbol(bt->func, (unsigned long)fn); preempt_disable(); - data = tr->data[smp_processor_id()]; event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq_flags); @@ -115,8 +184,8 @@ void trace_boot(struct boot_trace *it, initcall_t fn) goto out; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_BOOT; - entry->initcall = *it; + entry->ent.type = TRACE_BOOT_RET; + entry->boot_ret = *bt; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); trace_wake_up(); diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c new file mode 100644 index 000000000000..85265553918f --- /dev/null +++ b/kernel/trace/trace_branch.c @@ -0,0 +1,320 @@ +/* + * unlikely profiler + * + * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> + */ +#include <linux/kallsyms.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/ftrace.h> +#include <linux/hash.h> +#include <linux/fs.h> +#include <asm/local.h> +#include "trace.h" + +#ifdef CONFIG_BRANCH_TRACER + +static int branch_tracing_enabled __read_mostly; +static DEFINE_MUTEX(branch_tracing_mutex); +static struct trace_array *branch_tracer; + +static void +probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) +{ + struct trace_array *tr = branch_tracer; + struct ring_buffer_event *event; + struct trace_branch *entry; + unsigned long flags, irq_flags; + int cpu, pc; + const char *p; + + /* + * I would love to save just the ftrace_likely_data pointer, but + * this code can also be used by modules. Ugly things can happen + * if the module is unloaded, and then we go and read the + * pointer. This is slower, but much safer. + */ + + if (unlikely(!tr)) + return; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) + goto out; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + + pc = preempt_count(); + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_BRANCH; + + /* Strip off the path, only save the file */ + p = f->file + strlen(f->file); + while (p >= f->file && *p != '/') + p--; + p++; + + strncpy(entry->func, f->func, TRACE_FUNC_SIZE); + strncpy(entry->file, p, TRACE_FILE_SIZE); + entry->func[TRACE_FUNC_SIZE] = 0; + entry->file[TRACE_FILE_SIZE] = 0; + entry->line = f->line; + entry->correct = val == expect; + + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + out: + atomic_dec(&tr->data[cpu]->disabled); + local_irq_restore(flags); +} + +static inline +void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) +{ + if (!branch_tracing_enabled) + return; + + probe_likely_condition(f, val, expect); +} + +int enable_branch_tracing(struct trace_array *tr) +{ + int ret = 0; + + mutex_lock(&branch_tracing_mutex); + branch_tracer = tr; + /* + * Must be seen before enabling. The reader is a condition + * where we do not need a matching rmb() + */ + smp_wmb(); + branch_tracing_enabled++; + mutex_unlock(&branch_tracing_mutex); + + return ret; +} + +void disable_branch_tracing(void) +{ + mutex_lock(&branch_tracing_mutex); + + if (!branch_tracing_enabled) + goto out_unlock; + + branch_tracing_enabled--; + + out_unlock: + mutex_unlock(&branch_tracing_mutex); +} + +static void start_branch_trace(struct trace_array *tr) +{ + enable_branch_tracing(tr); +} + +static void stop_branch_trace(struct trace_array *tr) +{ + disable_branch_tracing(); +} + +static void branch_trace_init(struct trace_array *tr) +{ + int cpu; + + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); + + start_branch_trace(tr); +} + +static void branch_trace_reset(struct trace_array *tr) +{ + stop_branch_trace(tr); +} + +struct tracer branch_trace __read_mostly = +{ + .name = "branch", + .init = branch_trace_init, + .reset = branch_trace_reset, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_branch, +#endif +}; + +__init static int init_branch_trace(void) +{ + return register_tracer(&branch_trace); +} + +device_initcall(init_branch_trace); +#else +static inline +void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) +{ +} +#endif /* CONFIG_BRANCH_TRACER */ + +void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect) +{ + /* + * I would love to have a trace point here instead, but the + * trace point code is so inundated with unlikely and likely + * conditions that the recursive nightmare that exists is too + * much to try to get working. At least for now. + */ + trace_likely_condition(f, val, expect); + + /* FIXME: Make this atomic! */ + if (val == expect) + f->correct++; + else + f->incorrect++; +} +EXPORT_SYMBOL(ftrace_likely_update); + +struct ftrace_pointer { + void *start; + void *stop; +}; + +static void * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct ftrace_pointer *f = m->private; + struct ftrace_branch_data *p = v; + + (*pos)++; + + if (v == (void *)1) + return f->start; + + ++p; + + if ((void *)p >= (void *)f->stop) + return NULL; + + return p; +} + +static void *t_start(struct seq_file *m, loff_t *pos) +{ + void *t = (void *)1; + loff_t l = 0; + + for (; t && l < *pos; t = t_next(m, t, &l)) + ; + + return t; +} + +static void t_stop(struct seq_file *m, void *p) +{ +} + +static int t_show(struct seq_file *m, void *v) +{ + struct ftrace_branch_data *p = v; + const char *f; + unsigned long percent; + + if (v == (void *)1) { + seq_printf(m, " correct incorrect %% " + " Function " + " File Line\n" + " ------- --------- - " + " -------- " + " ---- ----\n"); + return 0; + } + + /* Only print the file, not the path */ + f = p->file + strlen(p->file); + while (f >= p->file && *f != '/') + f--; + f++; + + if (p->correct) { + percent = p->incorrect * 100; + percent /= p->correct + p->incorrect; + } else + percent = p->incorrect ? 100 : 0; + + seq_printf(m, "%8lu %8lu %3lu ", p->correct, p->incorrect, percent); + seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line); + return 0; +} + +static struct seq_operations tracing_likely_seq_ops = { + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = t_show, +}; + +static int tracing_likely_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &tracing_likely_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = (void *)inode->i_private; + } + + return ret; +} + +static struct file_operations tracing_likely_fops = { + .open = tracing_likely_open, + .read = seq_read, + .llseek = seq_lseek, +}; + +extern unsigned long __start_likely_profile[]; +extern unsigned long __stop_likely_profile[]; +extern unsigned long __start_unlikely_profile[]; +extern unsigned long __stop_unlikely_profile[]; + +static struct ftrace_pointer ftrace_likely_pos = { + .start = __start_likely_profile, + .stop = __stop_likely_profile, +}; + +static struct ftrace_pointer ftrace_unlikely_pos = { + .start = __start_unlikely_profile, + .stop = __stop_unlikely_profile, +}; + +static __init int ftrace_branch_init(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + + entry = debugfs_create_file("profile_likely", 0444, d_tracer, + &ftrace_likely_pos, + &tracing_likely_fops); + if (!entry) + pr_warning("Could not create debugfs 'profile_likely' entry\n"); + + entry = debugfs_create_file("profile_unlikely", 0444, d_tracer, + &ftrace_unlikely_pos, + &tracing_likely_fops); + if (!entry) + pr_warning("Could not create debugfs" + " 'profile_unlikely' entry\n"); + + return 0; +} + +device_initcall(ftrace_branch_init); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 0f85a64003d3..8693b7a0a5b2 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -44,22 +44,17 @@ static void stop_function_trace(struct trace_array *tr) static void function_trace_init(struct trace_array *tr) { - if (tr->ctrl) - start_function_trace(tr); + start_function_trace(tr); } static void function_trace_reset(struct trace_array *tr) { - if (tr->ctrl) - stop_function_trace(tr); + stop_function_trace(tr); } -static void function_trace_ctrl_update(struct trace_array *tr) +static void function_trace_start(struct trace_array *tr) { - if (tr->ctrl) - start_function_trace(tr); - else - stop_function_trace(tr); + function_reset(tr); } static struct tracer function_trace __read_mostly = @@ -67,7 +62,7 @@ static struct tracer function_trace __read_mostly = .name = "function", .init = function_trace_init, .reset = function_trace_reset, - .ctrl_update = function_trace_ctrl_update, + .start = function_trace_start, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_function, #endif diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c new file mode 100644 index 000000000000..7680b21537dd --- /dev/null +++ b/kernel/trace/trace_functions_return.c @@ -0,0 +1,82 @@ +/* + * + * Function return tracer. + * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com> + * Mostly borrowed from function tracer which + * is Copyright (c) Steven Rostedt <srostedt@redhat.com> + * + */ +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/ftrace.h> +#include <linux/fs.h> + +#include "trace.h" + + +static void start_return_trace(struct trace_array *tr) +{ + register_ftrace_return(&trace_function_return); +} + +static void stop_return_trace(struct trace_array *tr) +{ + unregister_ftrace_return(); +} + +static void return_trace_init(struct trace_array *tr) +{ + int cpu; + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); + + start_return_trace(tr); +} + +static void return_trace_reset(struct trace_array *tr) +{ + stop_return_trace(tr); +} + + +enum print_line_t +print_return_function(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct ftrace_ret_entry *field; + int ret; + + if (entry->type == TRACE_FN_RET) { + trace_assign_type(field, entry); + ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + ret = seq_print_ip_sym(s, field->ip, + trace_flags & TRACE_ITER_SYM_MASK); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + ret = trace_seq_printf(s, " (%llu ns)\n", + field->rettime - field->calltime); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + else + return TRACE_TYPE_HANDLED; + } + return TRACE_TYPE_UNHANDLED; +} + +static struct tracer return_trace __read_mostly = +{ + .name = "return", + .init = return_trace_init, + .reset = return_trace_reset, + .print_line = print_return_function +}; + +static __init int init_return_trace(void) +{ + return register_tracer(&return_trace); +} + +device_initcall(init_return_trace); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 9c74071c10e0..d919d4eaa7cc 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -353,15 +353,28 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) } #endif /* CONFIG_PREEMPT_TRACER */ +/* + * save_tracer_enabled is used to save the state of the tracer_enabled + * variable when we disable it when we open a trace output file. + */ +static int save_tracer_enabled; + static void start_irqsoff_tracer(struct trace_array *tr) { register_ftrace_function(&trace_ops); - tracer_enabled = 1; + if (tracing_is_enabled()) { + tracer_enabled = 1; + save_tracer_enabled = 1; + } else { + tracer_enabled = 0; + save_tracer_enabled = 0; + } } static void stop_irqsoff_tracer(struct trace_array *tr) { tracer_enabled = 0; + save_tracer_enabled = 0; unregister_ftrace_function(&trace_ops); } @@ -370,36 +383,36 @@ static void __irqsoff_tracer_init(struct trace_array *tr) irqsoff_trace = tr; /* make sure that the tracer is visible */ smp_wmb(); - - if (tr->ctrl) - start_irqsoff_tracer(tr); + start_irqsoff_tracer(tr); } static void irqsoff_tracer_reset(struct trace_array *tr) { - if (tr->ctrl) - stop_irqsoff_tracer(tr); + stop_irqsoff_tracer(tr); } -static void irqsoff_tracer_ctrl_update(struct trace_array *tr) +static void irqsoff_tracer_start(struct trace_array *tr) { - if (tr->ctrl) - start_irqsoff_tracer(tr); - else - stop_irqsoff_tracer(tr); + tracer_enabled = 1; + save_tracer_enabled = 1; +} + +static void irqsoff_tracer_stop(struct trace_array *tr) +{ + tracer_enabled = 0; + save_tracer_enabled = 0; } static void irqsoff_tracer_open(struct trace_iterator *iter) { /* stop the trace while dumping */ - if (iter->tr->ctrl) - stop_irqsoff_tracer(iter->tr); + tracer_enabled = 0; } static void irqsoff_tracer_close(struct trace_iterator *iter) { - if (iter->tr->ctrl) - start_irqsoff_tracer(iter->tr); + /* restart tracing */ + tracer_enabled = save_tracer_enabled; } #ifdef CONFIG_IRQSOFF_TRACER @@ -414,9 +427,10 @@ static struct tracer irqsoff_tracer __read_mostly = .name = "irqsoff", .init = irqsoff_tracer_init, .reset = irqsoff_tracer_reset, + .start = irqsoff_tracer_start, + .stop = irqsoff_tracer_stop, .open = irqsoff_tracer_open, .close = irqsoff_tracer_close, - .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_irqsoff, @@ -440,9 +454,10 @@ static struct tracer preemptoff_tracer __read_mostly = .name = "preemptoff", .init = preemptoff_tracer_init, .reset = irqsoff_tracer_reset, + .start = irqsoff_tracer_start, + .stop = irqsoff_tracer_stop, .open = irqsoff_tracer_open, .close = irqsoff_tracer_close, - .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptoff, @@ -468,9 +483,10 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .name = "preemptirqsoff", .init = preemptirqsoff_tracer_init, .reset = irqsoff_tracer_reset, + .start = irqsoff_tracer_start, + .stop = irqsoff_tracer_stop, .open = irqsoff_tracer_open, .close = irqsoff_tracer_close, - .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptirqsoff, diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index f28484618ff0..51bcf370215e 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -34,30 +34,24 @@ static void mmio_trace_init(struct trace_array *tr) { pr_debug("in %s\n", __func__); mmio_trace_array = tr; - if (tr->ctrl) { - mmio_reset_data(tr); - enable_mmiotrace(); - } + + mmio_reset_data(tr); + enable_mmiotrace(); } static void mmio_trace_reset(struct trace_array *tr) { pr_debug("in %s\n", __func__); - if (tr->ctrl) - disable_mmiotrace(); + + disable_mmiotrace(); mmio_reset_data(tr); mmio_trace_array = NULL; } -static void mmio_trace_ctrl_update(struct trace_array *tr) +static void mmio_trace_start(struct trace_array *tr) { pr_debug("in %s\n", __func__); - if (tr->ctrl) { - mmio_reset_data(tr); - enable_mmiotrace(); - } else { - disable_mmiotrace(); - } + mmio_reset_data(tr); } static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) @@ -298,10 +292,10 @@ static struct tracer mmio_tracer __read_mostly = .name = "mmiotrace", .init = mmio_trace_init, .reset = mmio_trace_reset, + .start = mmio_trace_start, .pipe_open = mmio_pipe_open, .close = mmio_close, .read = mmio_read, - .ctrl_update = mmio_trace_ctrl_update, .print_line = mmio_print_line, }; diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 4592b4862515..2ef1d227e7d8 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -32,23 +32,12 @@ static void nop_trace_init(struct trace_array *tr) for_each_online_cpu(cpu) tracing_reset(tr, cpu); - if (tr->ctrl) - start_nop_trace(tr); + start_nop_trace(tr); } static void nop_trace_reset(struct trace_array *tr) { - if (tr->ctrl) - stop_nop_trace(tr); -} - -static void nop_trace_ctrl_update(struct trace_array *tr) -{ - /* When starting a new trace, reset the buffers */ - if (tr->ctrl) - start_nop_trace(tr); - else - stop_nop_trace(tr); + stop_nop_trace(tr); } struct tracer nop_trace __read_mostly = @@ -56,7 +45,6 @@ struct tracer nop_trace __read_mostly = .name = "nop", .init = nop_trace_init, .reset = nop_trace_reset, - .ctrl_update = nop_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_nop, #endif diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index b8f56beb1a62..be35bdfe2e38 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -16,7 +16,8 @@ static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; -static atomic_t sched_ref; +static int sched_ref; +static DEFINE_MUTEX(sched_register_mutex); static void probe_sched_switch(struct rq *__rq, struct task_struct *prev, @@ -27,7 +28,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev, int cpu; int pc; - if (!atomic_read(&sched_ref)) + if (!sched_ref) return; tracing_record_cmdline(prev); @@ -123,20 +124,18 @@ static void tracing_sched_unregister(void) static void tracing_start_sched_switch(void) { - long ref; - - ref = atomic_inc_return(&sched_ref); - if (ref == 1) + mutex_lock(&sched_register_mutex); + if (!(sched_ref++)) tracing_sched_register(); + mutex_unlock(&sched_register_mutex); } static void tracing_stop_sched_switch(void) { - long ref; - - ref = atomic_dec_and_test(&sched_ref); - if (ref) + mutex_lock(&sched_register_mutex); + if (!(--sched_ref)) tracing_sched_unregister(); + mutex_unlock(&sched_register_mutex); } void tracing_start_cmdline_record(void) @@ -149,40 +148,85 @@ void tracing_stop_cmdline_record(void) tracing_stop_sched_switch(); } +/** + * tracing_start_sched_switch_record - start tracing context switches + * + * Turns on context switch tracing for a tracer. + */ +void tracing_start_sched_switch_record(void) +{ + if (unlikely(!ctx_trace)) { + WARN_ON(1); + return; + } + + tracing_start_sched_switch(); + + mutex_lock(&sched_register_mutex); + tracer_enabled++; + mutex_unlock(&sched_register_mutex); +} + +/** + * tracing_stop_sched_switch_record - start tracing context switches + * + * Turns off context switch tracing for a tracer. + */ +void tracing_stop_sched_switch_record(void) +{ + mutex_lock(&sched_register_mutex); + tracer_enabled--; + WARN_ON(tracer_enabled < 0); + mutex_unlock(&sched_register_mutex); + + tracing_stop_sched_switch(); +} + +/** + * tracing_sched_switch_assign_trace - assign a trace array for ctx switch + * @tr: trace array pointer to assign + * + * Some tracers might want to record the context switches in their + * trace. This function lets those tracers assign the trace array + * to use. + */ +void tracing_sched_switch_assign_trace(struct trace_array *tr) +{ + ctx_trace = tr; +} + static void start_sched_trace(struct trace_array *tr) { sched_switch_reset(tr); - tracing_start_cmdline_record(); - tracer_enabled = 1; + tracing_start_sched_switch_record(); } static void stop_sched_trace(struct trace_array *tr) { - tracer_enabled = 0; - tracing_stop_cmdline_record(); + tracing_stop_sched_switch_record(); } static void sched_switch_trace_init(struct trace_array *tr) { ctx_trace = tr; - - if (tr->ctrl) - start_sched_trace(tr); + start_sched_trace(tr); } static void sched_switch_trace_reset(struct trace_array *tr) { - if (tr->ctrl) + if (sched_ref) stop_sched_trace(tr); } -static void sched_switch_trace_ctrl_update(struct trace_array *tr) +static void sched_switch_trace_start(struct trace_array *tr) { - /* When starting a new trace, reset the buffers */ - if (tr->ctrl) - start_sched_trace(tr); - else - stop_sched_trace(tr); + sched_switch_reset(tr); + tracing_start_sched_switch(); +} + +static void sched_switch_trace_stop(struct trace_array *tr) +{ + tracing_stop_sched_switch(); } static struct tracer sched_switch_trace __read_mostly = @@ -190,7 +234,8 @@ static struct tracer sched_switch_trace __read_mostly = .name = "sched_switch", .init = sched_switch_trace_init, .reset = sched_switch_trace_reset, - .ctrl_update = sched_switch_trace_ctrl_update, + .start = sched_switch_trace_start, + .stop = sched_switch_trace_stop, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_sched_switch, #endif @@ -198,14 +243,6 @@ static struct tracer sched_switch_trace __read_mostly = __init static int init_sched_switch_trace(void) { - int ret = 0; - - if (atomic_read(&sched_ref)) - ret = tracing_sched_register(); - if (ret) { - pr_info("error registering scheduler trace\n"); - return ret; - } return register_tracer(&sched_switch_trace); } device_initcall(init_sched_switch_trace); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3ae93f16b565..983f2b1478c9 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -50,8 +50,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) return; pc = preempt_count(); - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -81,15 +80,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) out: atomic_dec(&data->disabled); - /* - * To prevent recursion from the scheduler, if the - * resched flag was set before we entered, then - * don't reschedule. - */ - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); } static struct ftrace_ops trace_ops __read_mostly = @@ -271,6 +262,12 @@ out: atomic_dec(&wakeup_trace->data[cpu]->disabled); } +/* + * save_tracer_enabled is used to save the state of the tracer_enabled + * variable when we disable it when we open a trace output file. + */ +static int save_tracer_enabled; + static void start_wakeup_tracer(struct trace_array *tr) { int ret; @@ -309,7 +306,13 @@ static void start_wakeup_tracer(struct trace_array *tr) register_ftrace_function(&trace_ops); - tracer_enabled = 1; + if (tracing_is_enabled()) { + tracer_enabled = 1; + save_tracer_enabled = 1; + } else { + tracer_enabled = 0; + save_tracer_enabled = 0; + } return; fail_deprobe_wake_new: @@ -321,6 +324,7 @@ fail_deprobe: static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; + save_tracer_enabled = 0; unregister_ftrace_function(&trace_ops); unregister_trace_sched_switch(probe_wakeup_sched_switch); unregister_trace_sched_wakeup_new(probe_wakeup); @@ -330,40 +334,42 @@ static void stop_wakeup_tracer(struct trace_array *tr) static void wakeup_tracer_init(struct trace_array *tr) { wakeup_trace = tr; - - if (tr->ctrl) - start_wakeup_tracer(tr); + start_wakeup_tracer(tr); } static void wakeup_tracer_reset(struct trace_array *tr) { - if (tr->ctrl) { - stop_wakeup_tracer(tr); - /* make sure we put back any tasks we are tracing */ - wakeup_reset(tr); - } + stop_wakeup_tracer(tr); + /* make sure we put back any tasks we are tracing */ + wakeup_reset(tr); } -static void wakeup_tracer_ctrl_update(struct trace_array *tr) +static void wakeup_tracer_start(struct trace_array *tr) { - if (tr->ctrl) - start_wakeup_tracer(tr); - else - stop_wakeup_tracer(tr); + wakeup_reset(tr); + tracer_enabled = 1; + save_tracer_enabled = 1; +} + +static void wakeup_tracer_stop(struct trace_array *tr) +{ + tracer_enabled = 0; + save_tracer_enabled = 0; } static void wakeup_tracer_open(struct trace_iterator *iter) { /* stop the trace while dumping */ - if (iter->tr->ctrl) - stop_wakeup_tracer(iter->tr); + tracer_enabled = 0; } static void wakeup_tracer_close(struct trace_iterator *iter) { /* forget about any processes we were recording */ - if (iter->tr->ctrl) - start_wakeup_tracer(iter->tr); + if (save_tracer_enabled) { + wakeup_reset(iter->tr); + tracer_enabled = 1; + } } static struct tracer wakeup_tracer __read_mostly = @@ -371,9 +377,10 @@ static struct tracer wakeup_tracer __read_mostly = .name = "wakeup", .init = wakeup_tracer_init, .reset = wakeup_tracer_reset, + .start = wakeup_tracer_start, + .stop = wakeup_tracer_stop, .open = wakeup_tracer_open, .close = wakeup_tracer_close, - .ctrl_update = wakeup_tracer_ctrl_update, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 90bc752a7580..24e6e075e6d6 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -13,6 +13,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_STACK: case TRACE_PRINT: case TRACE_SPECIAL: + case TRACE_BRANCH: return 1; } return 0; @@ -110,7 +111,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ftrace_set_filter(func_name, strlen(func_name), 1); /* enable tracing */ - tr->ctrl = 1; trace->init(tr); /* Sleep for a 1/10 of a second */ @@ -134,13 +134,13 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, msleep(100); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); ftrace_enabled = 0; /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); + tracing_start(); /* we should only have one item */ if (!ret && count != 1) { @@ -148,6 +148,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ret = -1; goto out; } + out: ftrace_enabled = save_ftrace_enabled; tracer_enabled = save_tracer_enabled; @@ -180,18 +181,17 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = 1; tracer_enabled = 1; - tr->ctrl = 1; trace->init(tr); /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); ftrace_enabled = 0; /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); + tracing_start(); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -223,7 +223,6 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - tr->ctrl = 1; trace->init(tr); /* reset the max latency */ tracing_max_latency = 0; @@ -232,13 +231,13 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) udelay(100); local_irq_enable(); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (!ret) ret = trace_test_buffer(&max_tr, &count); trace->reset(tr); + tracing_start(); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -259,8 +258,20 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) unsigned long count; int ret; + /* + * Now that the big kernel lock is no longer preemptable, + * and this is called with the BKL held, it will always + * fail. If preemption is already disabled, simply + * pass the test. When the BKL is removed, or becomes + * preemptible again, we will once again test this, + * so keep it in. + */ + if (preempt_count()) { + printk(KERN_CONT "can not test ... force "); + return 0; + } + /* start the tracing */ - tr->ctrl = 1; trace->init(tr); /* reset the max latency */ tracing_max_latency = 0; @@ -269,13 +280,13 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) udelay(100); preempt_enable(); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (!ret) ret = trace_test_buffer(&max_tr, &count); trace->reset(tr); + tracing_start(); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -296,8 +307,20 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * unsigned long count; int ret; + /* + * Now that the big kernel lock is no longer preemptable, + * and this is called with the BKL held, it will always + * fail. If preemption is already disabled, simply + * pass the test. When the BKL is removed, or becomes + * preemptible again, we will once again test this, + * so keep it in. + */ + if (preempt_count()) { + printk(KERN_CONT "can not test ... force "); + return 0; + } + /* start the tracing */ - tr->ctrl = 1; trace->init(tr); /* reset the max latency */ @@ -312,27 +335,30 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * local_irq_enable(); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); - if (ret) + if (ret) { + tracing_start(); goto out; + } ret = trace_test_buffer(&max_tr, &count); - if (ret) + if (ret) { + tracing_start(); goto out; + } if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); ret = -1; + tracing_start(); goto out; } /* do the test by disabling interrupts first this time */ tracing_max_latency = 0; - tr->ctrl = 1; - trace->ctrl_update(tr); + tracing_start(); preempt_disable(); local_irq_disable(); udelay(100); @@ -341,8 +367,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * local_irq_enable(); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (ret) @@ -358,6 +383,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * out: trace->reset(tr); + tracing_start(); tracing_max_latency = save_max; return ret; @@ -423,7 +449,6 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) wait_for_completion(&isrt); /* start the tracing */ - tr->ctrl = 1; trace->init(tr); /* reset the max latency */ tracing_max_latency = 0; @@ -448,8 +473,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) msleep(100); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (!ret) @@ -457,6 +481,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) trace->reset(tr); + tracing_start(); tracing_max_latency = save_max; @@ -480,16 +505,15 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr int ret; /* start the tracing */ - tr->ctrl = 1; trace->init(tr); /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); + tracing_start(); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -508,17 +532,38 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - tr->ctrl = 1; trace->init(tr); /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); + tracing_start(); return ret; } #endif /* CONFIG_SYSPROF_TRACER */ + +#ifdef CONFIG_BRANCH_TRACER +int +trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) +{ + unsigned long count; + int ret; + + /* start the tracing */ + trace->init(tr); + /* Sleep for a 1/10 of a second */ + msleep(100); + /* stop the tracing. */ + tracing_stop(); + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + tracing_start(); + + return ret; +} +#endif /* CONFIG_BRANCH_TRACER */ diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index be682b62fe58..d39e8b7de6a2 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -107,8 +107,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) if (unlikely(!ftrace_enabled || stack_trace_disabled)) return; - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); /* no atomic needed, we only modify this variable by this cpu */ @@ -120,10 +119,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) out: per_cpu(trace_active, cpu)--; /* prevent recursion in schedule */ - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); } static struct ftrace_ops trace_ops __read_mostly = diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 9587d3bcba55..05f753422aea 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -265,23 +265,12 @@ static void stack_trace_init(struct trace_array *tr) { sysprof_trace = tr; - if (tr->ctrl) - start_stack_trace(tr); + start_stack_trace(tr); } static void stack_trace_reset(struct trace_array *tr) { - if (tr->ctrl) - stop_stack_trace(tr); -} - -static void stack_trace_ctrl_update(struct trace_array *tr) -{ - /* When starting a new trace, reset the buffers */ - if (tr->ctrl) - start_stack_trace(tr); - else - stop_stack_trace(tr); + stop_stack_trace(tr); } static struct tracer stack_trace __read_mostly = @@ -289,7 +278,6 @@ static struct tracer stack_trace __read_mostly = .name = "sysprof", .init = stack_trace_init, .reset = stack_trace_reset, - .ctrl_update = stack_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_sysprof, #endif diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index af8c85664882..e96590f17de1 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -43,6 +43,7 @@ static DEFINE_MUTEX(tracepoints_mutex); */ #define TRACEPOINT_HASH_BITS 6 #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) +static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; /* * Note about RCU : @@ -54,40 +55,43 @@ struct tracepoint_entry { struct hlist_node hlist; void **funcs; int refcount; /* Number of times armed. 0 if disarmed. */ - struct rcu_head rcu; - void *oldptr; - unsigned char rcu_pending:1; char name[0]; }; -static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; +struct tp_probes { + union { + struct rcu_head rcu; + struct list_head list; + } u; + void *probes[0]; +}; -static void free_old_closure(struct rcu_head *head) +static inline void *allocate_probes(int count) { - struct tracepoint_entry *entry = container_of(head, - struct tracepoint_entry, rcu); - kfree(entry->oldptr); - /* Make sure we free the data before setting the pending flag to 0 */ - smp_wmb(); - entry->rcu_pending = 0; + struct tp_probes *p = kmalloc(count * sizeof(void *) + + sizeof(struct tp_probes), GFP_KERNEL); + return p == NULL ? NULL : p->probes; } -static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) +static void rcu_free_old_probes(struct rcu_head *head) { - if (!old) - return; - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); + kfree(container_of(head, struct tp_probes, u.rcu)); +} + +static inline void release_probes(void *old) +{ + if (old) { + struct tp_probes *tp_probes = container_of(old, + struct tp_probes, probes[0]); + call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes); + } } static void debug_print_probes(struct tracepoint_entry *entry) { int i; - if (!tracepoint_debug) + if (!tracepoint_debug || !entry->funcs) return; for (i = 0; entry->funcs[i]; i++) @@ -111,12 +115,13 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe) return ERR_PTR(-EEXIST); } /* + 2 : one for new probe, one for NULL func */ - new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); + new = allocate_probes(nr_probes + 2); if (new == NULL) return ERR_PTR(-ENOMEM); if (old) memcpy(new, old, nr_probes * sizeof(void *)); new[nr_probes] = probe; + new[nr_probes + 1] = NULL; entry->refcount = nr_probes + 1; entry->funcs = new; debug_print_probes(entry); @@ -132,7 +137,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) old = entry->funcs; if (!old) - return NULL; + return ERR_PTR(-ENOENT); debug_print_probes(entry); /* (N -> M), (N > 1, M >= 0) probes */ @@ -151,13 +156,13 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) int j = 0; /* N -> M, (N > 1, M > 0) */ /* + 1 for NULL */ - new = kzalloc((nr_probes - nr_del + 1) - * sizeof(void *), GFP_KERNEL); + new = allocate_probes(nr_probes - nr_del + 1); if (new == NULL) return ERR_PTR(-ENOMEM); for (i = 0; old[i]; i++) if ((probe && old[i] != probe)) new[j++] = old[i]; + new[nr_probes - nr_del] = NULL; entry->refcount = nr_probes - nr_del; entry->funcs = new; } @@ -215,7 +220,6 @@ static struct tracepoint_entry *add_tracepoint(const char *name) memcpy(&e->name[0], name, name_len); e->funcs = NULL; e->refcount = 0; - e->rcu_pending = 0; hlist_add_head(&e->hlist, head); return e; } @@ -224,32 +228,10 @@ static struct tracepoint_entry *add_tracepoint(const char *name) * Remove the tracepoint from the tracepoint hash table. Must be called with * mutex_lock held. */ -static int remove_tracepoint(const char *name) +static inline void remove_tracepoint(struct tracepoint_entry *e) { - struct hlist_head *head; - struct hlist_node *node; - struct tracepoint_entry *e; - int found = 0; - size_t len = strlen(name) + 1; - u32 hash = jhash(name, len-1, 0); - - head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - found = 1; - break; - } - } - if (!found) - return -ENOENT; - if (e->refcount) - return -EBUSY; hlist_del(&e->hlist); - /* Make sure the call_rcu_sched has been executed */ - if (e->rcu_pending) - rcu_barrier_sched(); kfree(e); - return 0; } /* @@ -320,6 +302,23 @@ static void tracepoint_update_probes(void) module_update_tracepoints(); } +static void *tracepoint_add_probe(const char *name, void *probe) +{ + struct tracepoint_entry *entry; + void *old; + + entry = get_tracepoint(name); + if (!entry) { + entry = add_tracepoint(name); + if (IS_ERR(entry)) + return entry; + } + old = tracepoint_entry_add_probe(entry, probe); + if (IS_ERR(old) && !entry->refcount) + remove_tracepoint(entry); + return old; +} + /** * tracepoint_probe_register - Connect a probe to a tracepoint * @name: tracepoint name @@ -330,44 +329,36 @@ static void tracepoint_update_probes(void) */ int tracepoint_probe_register(const char *name, void *probe) { - struct tracepoint_entry *entry; - int ret = 0; void *old; mutex_lock(&tracepoints_mutex); - entry = get_tracepoint(name); - if (!entry) { - entry = add_tracepoint(name); - if (IS_ERR(entry)) { - ret = PTR_ERR(entry); - goto end; - } - } - /* - * If we detect that a call_rcu_sched is pending for this tracepoint, - * make sure it's executed now. - */ - if (entry->rcu_pending) - rcu_barrier_sched(); - old = tracepoint_entry_add_probe(entry, probe); - if (IS_ERR(old)) { - ret = PTR_ERR(old); - goto end; - } + old = tracepoint_add_probe(name, probe); mutex_unlock(&tracepoints_mutex); + if (IS_ERR(old)) + return PTR_ERR(old); + tracepoint_update_probes(); /* may update entry */ - mutex_lock(&tracepoints_mutex); - entry = get_tracepoint(name); - WARN_ON(!entry); - if (entry->rcu_pending) - rcu_barrier_sched(); - tracepoint_entry_free_old(entry, old); -end: - mutex_unlock(&tracepoints_mutex); - return ret; + release_probes(old); + return 0; } EXPORT_SYMBOL_GPL(tracepoint_probe_register); +static void *tracepoint_remove_probe(const char *name, void *probe) +{ + struct tracepoint_entry *entry; + void *old; + + entry = get_tracepoint(name); + if (!entry) + return ERR_PTR(-ENOENT); + old = tracepoint_entry_remove_probe(entry, probe); + if (IS_ERR(old)) + return old; + if (!entry->refcount) + remove_tracepoint(entry); + return old; +} + /** * tracepoint_probe_unregister - Disconnect a probe from a tracepoint * @name: tracepoint name @@ -380,38 +371,104 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register); */ int tracepoint_probe_unregister(const char *name, void *probe) { - struct tracepoint_entry *entry; void *old; - int ret = -ENOENT; mutex_lock(&tracepoints_mutex); - entry = get_tracepoint(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - old = tracepoint_entry_remove_probe(entry, probe); - if (!old) { - printk(KERN_WARNING "Warning: Trying to unregister a probe" - "that doesn't exist\n"); - goto end; - } + old = tracepoint_remove_probe(name, probe); mutex_unlock(&tracepoints_mutex); + if (IS_ERR(old)) + return PTR_ERR(old); + tracepoint_update_probes(); /* may update entry */ + release_probes(old); + return 0; +} +EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); + +static LIST_HEAD(old_probes); +static int need_update; + +static void tracepoint_add_old_probes(void *old) +{ + need_update = 1; + if (old) { + struct tp_probes *tp_probes = container_of(old, + struct tp_probes, probes[0]); + list_add(&tp_probes->u.list, &old_probes); + } +} + +/** + * tracepoint_probe_register_noupdate - register a probe but not connect + * @name: tracepoint name + * @probe: probe handler + * + * caller must call tracepoint_probe_update_all() + */ +int tracepoint_probe_register_noupdate(const char *name, void *probe) +{ + void *old; + mutex_lock(&tracepoints_mutex); - entry = get_tracepoint(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - tracepoint_entry_free_old(entry, old); - remove_tracepoint(name); /* Ignore busy error message */ - ret = 0; -end: + old = tracepoint_add_probe(name, probe); + if (IS_ERR(old)) { + mutex_unlock(&tracepoints_mutex); + return PTR_ERR(old); + } + tracepoint_add_old_probes(old); mutex_unlock(&tracepoints_mutex); - return ret; + return 0; } -EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); +EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate); + +/** + * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect + * @name: tracepoint name + * @probe: probe function pointer + * + * caller must call tracepoint_probe_update_all() + */ +int tracepoint_probe_unregister_noupdate(const char *name, void *probe) +{ + void *old; + + mutex_lock(&tracepoints_mutex); + old = tracepoint_remove_probe(name, probe); + if (IS_ERR(old)) { + mutex_unlock(&tracepoints_mutex); + return PTR_ERR(old); + } + tracepoint_add_old_probes(old); + mutex_unlock(&tracepoints_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate); + +/** + * tracepoint_probe_update_all - update tracepoints + */ +void tracepoint_probe_update_all(void) +{ + LIST_HEAD(release_probes); + struct tp_probes *pos, *next; + + mutex_lock(&tracepoints_mutex); + if (!need_update) { + mutex_unlock(&tracepoints_mutex); + return; + } + if (!list_empty(&old_probes)) + list_replace_init(&old_probes, &release_probes); + need_update = 0; + mutex_unlock(&tracepoints_mutex); + + tracepoint_update_probes(); + list_for_each_entry_safe(pos, next, &release_probes, u.list) { + list_del(&pos->u.list); + call_rcu_sched(&pos->u.rcu, rcu_free_old_probes); + } +} +EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); /** * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 468fbc9016c7..7a176773af85 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -198,16 +198,10 @@ cmd_modversions = \ fi; endif -ifdef CONFIG_64BIT -arch_bits = 64 -else -arch_bits = 32 -endif - ifdef CONFIG_FTRACE_MCOUNT_RECORD -cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \ - "$(ARCH)" "$(arch_bits)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" \ - "$(NM)" "$(RM)" "$(MV)" "$(@)"; +cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ + "$(if $(CONFIG_64BIT),64,32)" \ + "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)"; endif define rule_cc_o_c diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl index d2c61efc216f..f0af9aa9b243 100644 --- a/scripts/bootgraph.pl +++ b/scripts/bootgraph.pl @@ -78,11 +78,13 @@ while (<>) { } if ($count == 0) { - print "No data found in the dmesg. Make sure that 'printk.time=1' and\n"; - print "'initcall_debug' are passed on the kernel command line.\n\n"; - print "Usage: \n"; - print " dmesg | perl scripts/bootgraph.pl > output.svg\n\n"; - exit; + print STDERR <<END; +No data found in the dmesg. Make sure that 'printk.time=1' and +'initcall_debug' are passed on the kernel command line. +Usage: + dmesg | perl scripts/bootgraph.pl > output.svg +END + exit 1; } print "<?xml version=\"1.0\" standalone=\"no\"?> \n"; @@ -109,8 +111,8 @@ my $stylecounter = 0; my %rows; my $rowscount = 1; my @initcalls = sort { $start{$a} <=> $start{$b} } keys(%start); -my $key; -foreach $key (@initcalls) { + +foreach my $key (@initcalls) { my $duration = $end{$key} - $start{$key}; if ($duration >= $threshold) { diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 6b9fe3eb8360..eeac71c87c66 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -134,6 +134,7 @@ my $section_regex; # Find the start of a section my $function_regex; # Find the name of a function # (return offset and func name) my $mcount_regex; # Find the call site to mcount (return offset) +my $alignment; # The .align value to use for $mcount_section if ($arch eq "x86") { if ($bits == 64) { @@ -148,6 +149,7 @@ if ($arch eq "x86_64") { $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$"; $type = ".quad"; + $alignment = 8; # force flags for this arch $ld .= " -m elf_x86_64"; @@ -160,6 +162,7 @@ if ($arch eq "x86_64") { $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$"; $type = ".long"; + $alignment = 4; # force flags for this arch $ld .= " -m elf_i386"; @@ -288,6 +291,7 @@ sub update_funcs open(FILE, ">$mcount_s") || die "can't create $mcount_s\n"; $opened = 1; print FILE "\t.section $mcount_section,\"a\",\@progbits\n"; + print FILE "\t.align $alignment\n"; } printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset; } diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py new file mode 100644 index 000000000000..902f9a992620 --- /dev/null +++ b/scripts/tracing/draw_functrace.py @@ -0,0 +1,130 @@ +#!/usr/bin/python + +""" +Copyright 2008 (c) Frederic Weisbecker <fweisbec@gmail.com> +Licensed under the terms of the GNU GPL License version 2 + +This script parses a trace provided by the function tracer in +kernel/trace/trace_functions.c +The resulted trace is processed into a tree to produce a more human +view of the call stack by drawing textual but hierarchical tree of +calls. Only the functions's names and the the call time are provided. + +Usage: + Be sure that you have CONFIG_FUNCTION_TRACER + # mkdir /debugfs + # mount -t debug debug /debug + # echo function > /debug/tracing/current_tracer + $ cat /debug/tracing/trace_pipe > ~/raw_trace_func + Wait some times but not too much, the script is a bit slow. + Break the pipe (Ctrl + Z) + $ scripts/draw_functrace.py < raw_trace_func > draw_functrace + Then you have your drawn trace in draw_functrace +""" + + +import sys, re + +class CallTree: + """ This class provides a tree representation of the functions + call stack. If a function has no parent in the kernel (interrupt, + syscall, kernel thread...) then it is attached to a virtual parent + called ROOT. + """ + ROOT = None + + def __init__(self, func, time = None, parent = None): + self._func = func + self._time = time + if parent is None: + self._parent = CallTree.ROOT + else: + self._parent = parent + self._children = [] + + def calls(self, func, calltime): + """ If a function calls another one, call this method to insert it + into the tree at the appropriate place. + @return: A reference to the newly created child node. + """ + child = CallTree(func, calltime, self) + self._children.append(child) + return child + + def getParent(self, func): + """ Retrieve the last parent of the current node that + has the name given by func. If this function is not + on a parent, then create it as new child of root + @return: A reference to the parent. + """ + tree = self + while tree != CallTree.ROOT and tree._func != func: + tree = tree._parent + if tree == CallTree.ROOT: + child = CallTree.ROOT.calls(func, None) + return child + return tree + + def __repr__(self): + return self.__toString("", True) + + def __toString(self, branch, lastChild): + if self._time is not None: + s = "%s----%s (%s)\n" % (branch, self._func, self._time) + else: + s = "%s----%s\n" % (branch, self._func) + + i = 0 + if lastChild: + branch = branch[:-1] + " " + while i < len(self._children): + if i != len(self._children) - 1: + s += "%s" % self._children[i].__toString(branch +\ + " |", False) + else: + s += "%s" % self._children[i].__toString(branch +\ + " |", True) + i += 1 + return s + +class BrokenLineException(Exception): + """If the last line is not complete because of the pipe breakage, + we want to stop the processing and ignore this line. + """ + pass + +class CommentLineException(Exception): + """ If the line is a comment (as in the beginning of the trace file), + just ignore it. + """ + pass + + +def parseLine(line): + line = line.strip() + if line.startswith("#"): + raise CommentLineException + m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line) + if m is None: + raise BrokenLineException + return (m.group(1), m.group(2), m.group(3)) + + +def main(): + CallTree.ROOT = CallTree("Root (Nowhere)", None, None) + tree = CallTree.ROOT + + for line in sys.stdin: + try: + calltime, callee, caller = parseLine(line) + except BrokenLineException: + break + except CommentLineException: + continue + tree = tree.getParent(caller) + tree = tree.calls(callee, calltime) + + print CallTree.ROOT + +if __name__ == "__main__": + main() |