diff options
47 files changed, 1577 insertions, 273 deletions
diff --git a/Documentation/trace/coresight.txt b/Documentation/trace/coresight.txt index a33c88cd5d1d..6f0120c3a4f1 100644 --- a/Documentation/trace/coresight.txt +++ b/Documentation/trace/coresight.txt @@ -330,3 +330,54 @@ Details on how to use the generic STM API can be found here [2]. [1]. Documentation/ABI/testing/sysfs-bus-coresight-devices-stm [2]. Documentation/trace/stm.txt + + +Using perf tools +---------------- + +perf can be used to record and analyze trace of programs. + +Execution can be recorded using 'perf record' with the cs_etm event, +specifying the name of the sink to record to, e.g: + + perf record -e cs_etm/@20070000.etr/u --per-thread + +The 'perf report' and 'perf script' commands can be used to analyze execution, +synthesizing instruction and branch events from the instruction trace. +'perf inject' can be used to replace the trace data with the synthesized events. +The --itrace option controls the type and frequency of synthesized events +(see perf documentation). + +Note that only 64-bit programs are currently supported - further work is +required to support instruction decode of 32-bit Arm programs. + + +Generating coverage files for Feedback Directed Optimization: AutoFDO +--------------------------------------------------------------------- + +'perf inject' accepts the --itrace option in which case tracing data is +removed and replaced with the synthesized events. e.g. + + perf inject --itrace --strip -i perf.data -o perf.data.new + +Below is an example of using ARM ETM for autoFDO. It requires autofdo +(https://github.com/google/autofdo) and gcc version 5. The bubble +sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial). + + $ gcc-5 -O3 sort.c -o sort + $ taskset -c 2 ./sort + Bubble sorting array of 30000 elements + 5910 ms + + $ perf record -e cs_etm/@20070000.etr/u --per-thread taskset -c 2 ./sort + Bubble sorting array of 30000 elements + 12543 ms + [ perf record: Woken up 35 times to write data ] + [ perf record: Captured and wrote 69.640 MB perf.data ] + + $ perf inject -i perf.data -o inj.data --itrace=il64 --strip + $ create_gcov --binary=./sort --profile=inj.data --gcov=sort.gcov -gcov_version=1 + $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo + $ taskset -c 2 ./sort_autofdo + Bubble sorting array of 30000 elements + 5806 ms diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..389c36fd8299 --- /dev/null +++ b/tools/arch/powerpc/include/uapi/asm/unistd.h @@ -0,0 +1,402 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * This file contains the system call numbers. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _UAPI_ASM_POWERPC_UNISTD_H_ +#define _UAPI_ASM_POWERPC_UNISTD_H_ + + +#define __NR_restart_syscall 0 +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_waitpid 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lchown 16 +#define __NR_break 17 +#define __NR_oldstat 18 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_oldfstat 28 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_stty 31 +#define __NR_gtty 32 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_ftime 35 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_prof 44 +#define __NR_brk 45 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_signal 48 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_lock 53 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_mpx 56 +#define __NR_setpgid 57 +#define __NR_ulimit 58 +#define __NR_oldolduname 59 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sgetmask 68 +#define __NR_ssetmask 69 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrlimit 76 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_select 82 +#define __NR_symlink 83 +#define __NR_oldlstat 84 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_fchown 95 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_profil 98 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_ioperm 101 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_olduname 109 +#define __NR_iopl 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_vm86 113 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_modify_ldt 123 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR_getdents 141 +#define __NR__newselect 142 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_query_module 166 +#define __NR_poll 167 +#define __NR_nfsservctl 168 +#define __NR_setresgid 169 +#define __NR_getresgid 170 +#define __NR_prctl 171 +#define __NR_rt_sigreturn 172 +#define __NR_rt_sigaction 173 +#define __NR_rt_sigprocmask 174 +#define __NR_rt_sigpending 175 +#define __NR_rt_sigtimedwait 176 +#define __NR_rt_sigqueueinfo 177 +#define __NR_rt_sigsuspend 178 +#define __NR_pread64 179 +#define __NR_pwrite64 180 +#define __NR_chown 181 +#define __NR_getcwd 182 +#define __NR_capget 183 +#define __NR_capset 184 +#define __NR_sigaltstack 185 +#define __NR_sendfile 186 +#define __NR_getpmsg 187 /* some people actually want streams */ +#define __NR_putpmsg 188 /* some people actually want streams */ +#define __NR_vfork 189 +#define __NR_ugetrlimit 190 /* SuS compliant getrlimit */ +#define __NR_readahead 191 +#ifndef __powerpc64__ /* these are 32-bit only */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#endif +#define __NR_pciconfig_read 198 +#define __NR_pciconfig_write 199 +#define __NR_pciconfig_iobase 200 +#define __NR_multiplexer 201 +#define __NR_getdents64 202 +#define __NR_pivot_root 203 +#ifndef __powerpc64__ +#define __NR_fcntl64 204 +#endif +#define __NR_madvise 205 +#define __NR_mincore 206 +#define __NR_gettid 207 +#define __NR_tkill 208 +#define __NR_setxattr 209 +#define __NR_lsetxattr 210 +#define __NR_fsetxattr 211 +#define __NR_getxattr 212 +#define __NR_lgetxattr 213 +#define __NR_fgetxattr 214 +#define __NR_listxattr 215 +#define __NR_llistxattr 216 +#define __NR_flistxattr 217 +#define __NR_removexattr 218 +#define __NR_lremovexattr 219 +#define __NR_fremovexattr 220 +#define __NR_futex 221 +#define __NR_sched_setaffinity 222 +#define __NR_sched_getaffinity 223 +/* 224 currently unused */ +#define __NR_tuxcall 225 +#ifndef __powerpc64__ +#define __NR_sendfile64 226 +#endif +#define __NR_io_setup 227 +#define __NR_io_destroy 228 +#define __NR_io_getevents 229 +#define __NR_io_submit 230 +#define __NR_io_cancel 231 +#define __NR_set_tid_address 232 +#define __NR_fadvise64 233 +#define __NR_exit_group 234 +#define __NR_lookup_dcookie 235 +#define __NR_epoll_create 236 +#define __NR_epoll_ctl 237 +#define __NR_epoll_wait 238 +#define __NR_remap_file_pages 239 +#define __NR_timer_create 240 +#define __NR_timer_settime 241 +#define __NR_timer_gettime 242 +#define __NR_timer_getoverrun 243 +#define __NR_timer_delete 244 +#define __NR_clock_settime 245 +#define __NR_clock_gettime 246 +#define __NR_clock_getres 247 +#define __NR_clock_nanosleep 248 +#define __NR_swapcontext 249 +#define __NR_tgkill 250 +#define __NR_utimes 251 +#define __NR_statfs64 252 +#define __NR_fstatfs64 253 +#ifndef __powerpc64__ +#define __NR_fadvise64_64 254 +#endif +#define __NR_rtas 255 +#define __NR_sys_debug_setcontext 256 +/* Number 257 is reserved for vserver */ +#define __NR_migrate_pages 258 +#define __NR_mbind 259 +#define __NR_get_mempolicy 260 +#define __NR_set_mempolicy 261 +#define __NR_mq_open 262 +#define __NR_mq_unlink 263 +#define __NR_mq_timedsend 264 +#define __NR_mq_timedreceive 265 +#define __NR_mq_notify 266 +#define __NR_mq_getsetattr 267 +#define __NR_kexec_load 268 +#define __NR_add_key 269 +#define __NR_request_key 270 +#define __NR_keyctl 271 +#define __NR_waitid 272 +#define __NR_ioprio_set 273 +#define __NR_ioprio_get 274 +#define __NR_inotify_init 275 +#define __NR_inotify_add_watch 276 +#define __NR_inotify_rm_watch 277 +#define __NR_spu_run 278 +#define __NR_spu_create 279 +#define __NR_pselect6 280 +#define __NR_ppoll 281 +#define __NR_unshare 282 +#define __NR_splice 283 +#define __NR_tee 284 +#define __NR_vmsplice 285 +#define __NR_openat 286 +#define __NR_mkdirat 287 +#define __NR_mknodat 288 +#define __NR_fchownat 289 +#define __NR_futimesat 290 +#ifdef __powerpc64__ +#define __NR_newfstatat 291 +#else +#define __NR_fstatat64 291 +#endif +#define __NR_unlinkat 292 +#define __NR_renameat 293 +#define __NR_linkat 294 +#define __NR_symlinkat 295 +#define __NR_readlinkat 296 +#define __NR_fchmodat 297 +#define __NR_faccessat 298 +#define __NR_get_robust_list 299 +#define __NR_set_robust_list 300 +#define __NR_move_pages 301 +#define __NR_getcpu 302 +#define __NR_epoll_pwait 303 +#define __NR_utimensat 304 +#define __NR_signalfd 305 +#define __NR_timerfd_create 306 +#define __NR_eventfd 307 +#define __NR_sync_file_range2 308 +#define __NR_fallocate 309 +#define __NR_subpage_prot 310 +#define __NR_timerfd_settime 311 +#define __NR_timerfd_gettime 312 +#define __NR_signalfd4 313 +#define __NR_eventfd2 314 +#define __NR_epoll_create1 315 +#define __NR_dup3 316 +#define __NR_pipe2 317 +#define __NR_inotify_init1 318 +#define __NR_perf_event_open 319 +#define __NR_preadv 320 +#define __NR_pwritev 321 +#define __NR_rt_tgsigqueueinfo 322 +#define __NR_fanotify_init 323 +#define __NR_fanotify_mark 324 +#define __NR_prlimit64 325 +#define __NR_socket 326 +#define __NR_bind 327 +#define __NR_connect 328 +#define __NR_listen 329 +#define __NR_accept 330 +#define __NR_getsockname 331 +#define __NR_getpeername 332 +#define __NR_socketpair 333 +#define __NR_send 334 +#define __NR_sendto 335 +#define __NR_recv 336 +#define __NR_recvfrom 337 +#define __NR_shutdown 338 +#define __NR_setsockopt 339 +#define __NR_getsockopt 340 +#define __NR_sendmsg 341 +#define __NR_recvmsg 342 +#define __NR_recvmmsg 343 +#define __NR_accept4 344 +#define __NR_name_to_handle_at 345 +#define __NR_open_by_handle_at 346 +#define __NR_clock_adjtime 347 +#define __NR_syncfs 348 +#define __NR_sendmmsg 349 +#define __NR_setns 350 +#define __NR_process_vm_readv 351 +#define __NR_process_vm_writev 352 +#define __NR_finit_module 353 +#define __NR_kcmp 354 +#define __NR_sched_setattr 355 +#define __NR_sched_getattr 356 +#define __NR_renameat2 357 +#define __NR_seccomp 358 +#define __NR_getrandom 359 +#define __NR_memfd_create 360 +#define __NR_bpf 361 +#define __NR_execveat 362 +#define __NR_switch_endian 363 +#define __NR_userfaultfd 364 +#define __NR_membarrier 365 +#define __NR_mlock2 378 +#define __NR_copy_file_range 379 +#define __NR_preadv2 380 +#define __NR_pwritev2 381 +#define __NR_kexec_file_load 382 +#define __NR_statx 383 +#define __NR_pkey_alloc 384 +#define __NR_pkey_free 385 +#define __NR_pkey_mprotect 386 + +#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index b24afc0e6e81..6a12bbf39f7b 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -315,12 +315,8 @@ int filename__read_int(const char *filename, int *value) return err; } -/* - * Parses @value out of @filename with strtoull. - * By using 0 for base, the strtoull detects the - * base automatically (see man strtoull). - */ -int filename__read_ull(const char *filename, unsigned long long *value) +static int filename__read_ull_base(const char *filename, + unsigned long long *value, int base) { char line[64]; int fd = open(filename, O_RDONLY), err = -1; @@ -329,7 +325,7 @@ int filename__read_ull(const char *filename, unsigned long long *value) return -1; if (read(fd, line, sizeof(line)) > 0) { - *value = strtoull(line, NULL, 0); + *value = strtoull(line, NULL, base); if (*value != ULLONG_MAX) err = 0; } @@ -338,6 +334,25 @@ int filename__read_ull(const char *filename, unsigned long long *value) return err; } +/* + * Parses @value out of @filename with strtoull. + * By using 16 for base to treat the number as hex. + */ +int filename__read_xll(const char *filename, unsigned long long *value) +{ + return filename__read_ull_base(filename, value, 16); +} + +/* + * Parses @value out of @filename with strtoull. + * By using 0 for base, the strtoull detects the + * base automatically (see man strtoull). + */ +int filename__read_ull(const char *filename, unsigned long long *value) +{ + return filename__read_ull_base(filename, value, 0); +} + #define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ int filename__read_str(const char *filename, char **buf, size_t *sizep) @@ -417,7 +432,8 @@ int procfs__read_str(const char *entry, char **buf, size_t *sizep) return filename__read_str(path, buf, sizep); } -int sysfs__read_ull(const char *entry, unsigned long long *value) +static int sysfs__read_ull_base(const char *entry, + unsigned long long *value, int base) { char path[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); @@ -427,7 +443,17 @@ int sysfs__read_ull(const char *entry, unsigned long long *value) snprintf(path, sizeof(path), "%s/%s", sysfs, entry); - return filename__read_ull(path, value); + return filename__read_ull_base(path, value, base); +} + +int sysfs__read_xll(const char *entry, unsigned long long *value) +{ + return sysfs__read_ull_base(entry, value, 16); +} + +int sysfs__read_ull(const char *entry, unsigned long long *value) +{ + return sysfs__read_ull_base(entry, value, 0); } int sysfs__read_int(const char *entry, int *value) diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index dda49deefb52..92d03b8396b1 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -30,6 +30,7 @@ FS(bpf_fs) int filename__read_int(const char *filename, int *value); int filename__read_ull(const char *filename, unsigned long long *value); +int filename__read_xll(const char *filename, unsigned long long *value); int filename__read_str(const char *filename, char **buf, size_t *sizep); int filename__write_int(const char *filename, int value); @@ -39,6 +40,7 @@ int procfs__read_str(const char *entry, char **buf, size_t *sizep); int sysctl__read_int(const char *sysctl, int *value); int sysfs__read_int(const char *entry, int *value); int sysfs__read_ull(const char *entry, unsigned long long *value); +int sysfs__read_xll(const char *entry, unsigned long long *value); int sysfs__read_str(const char *entry, char **buf, size_t *sizep); int sysfs__read_bool(const char *entry, bool *value); diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c index 914cb8e3d40b..689b6a130dd7 100644 --- a/tools/lib/symbol/kallsyms.c +++ b/tools/lib/symbol/kallsyms.c @@ -38,6 +38,10 @@ int kallsyms__parse(const char *filename, void *arg, len = hex2u64(line, &start); + /* Skip the line if we failed to parse the address. */ + if (!len) + continue; + len++; if (len + 2 >= line_len) continue; diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index c635eab6af54..292809c3c0ca 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -21,7 +21,7 @@ If there is no debug info in the object, then annotated assembly is displayed. OPTIONS ------- -i:: ---input=:: +--input=<file>:: Input file name. (default: perf.data unless stdin is a fifo) -d:: @@ -69,7 +69,7 @@ OPTIONS --stdio:: Use the stdio interface. ---stdio-color:: +--stdio-color=<mode>:: 'always', 'never' or 'auto', allowing configuring color output via the command line, in addition to via "color.ui" .perfconfig. Use '--stdio-color always' to generate color even when redirecting @@ -84,7 +84,7 @@ OPTIONS --gtk:: Use the GTK interface. -C:: ---cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can +--cpu=<cpu>:: Only report samples for the list of CPUs provided. Multiple CPUs can be provided as a comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default is to report samples on all CPUs. diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt index 479fc3261a50..85b8ac695c87 100644 --- a/tools/perf/Documentation/perf-kmem.txt +++ b/tools/perf/Documentation/perf-kmem.txt @@ -25,6 +25,10 @@ OPTIONS --input=<file>:: Select the input file (default: perf.data unless stdin is a fifo) +-f:: +--force:: + Don't do ownership validation + -v:: --verbose:: Be more verbose. (show symbol address, etc) @@ -61,7 +65,7 @@ OPTIONS default, but this option shows live (currently allocated) pages instead. (This option works with --page option only) ---time:: +--time=<start>,<stop>:: Only analyze samples within given time window: <start>,<stop>. Times have the format seconds.microseconds. If start is not given (i.e., time string is ',x.y') then analysis starts at the beginning of the file. If diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 4be08a1e3f8d..b0211410969b 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -28,6 +28,10 @@ OPTIONS <command>...:: Any command you can specify in a shell. +-f:: +--force:: + Don't do ownership validation + -t:: --type=:: Select the memory operation type: load or store (default: load,store) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 907e505b6309..cba16d8a970e 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -354,7 +354,8 @@ OPTIONS Path to objdump binary. --group:: - Show event group information together. + Show event group information together. It forces group output also + if there are no groups defined in data file. --demangle:: Demangle symbol names to human readable form. It's enabled by default, @@ -367,7 +368,7 @@ OPTIONS Use the data addresses of samples in addition to instruction addresses to build the histograms. To generate meaningful output, the perf.data file must have been obtained using perf record -d -W and using a - special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See + special event -e cpu/mem-loads/p or -e cpu/mem-stores/p. See 'perf mem' for simpler access. --percent-limit:: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 7730c1d2b5d3..36ec0257f8d3 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -303,6 +303,9 @@ OPTIONS --show-lost-events Display lost events i.e. events of type PERF_RECORD_LOST. +--show-round-events + Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 823fce7674bb..2bbe79a50d3c 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -146,6 +146,16 @@ Print count deltas every N milliseconds (minimum: 10ms) The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. example: 'perf stat -I 1000 -e cycles -a sleep 5' +--interval-count times:: +Print count deltas for fixed number of times. +This option should be used together with "-I" option. + example: 'perf stat -I 1000 --interval-count 2 -e cycles -a' + +--timeout msecs:: +Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms). +This option is not supported with the "-I" option. + example: 'perf stat --time 2000 -e cycles -a' + --metric-only:: Only print computed metrics. Print them in a single line. Don't show any raw values. Not supported with --per-thread. diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 0dfdaa9fa81e..577a5d2988fe 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -27,6 +27,8 @@ NO_SYSCALL_TABLE := 1 # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) NO_PERF_REGS := 0 + NO_SYSCALL_TABLE := 0 + CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 endif diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index 2323581b157d..fa639e3e52ac 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -68,7 +68,7 @@ struct auxtrace_record bool found_spe = false; static struct perf_pmu **arm_spe_pmus = NULL; static int nr_spes = 0; - int i; + int i = 0; if (!evlist) return NULL; diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index fbfc055d3f4d..5c655ad4621e 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -298,12 +298,17 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, { int i; int etmv3 = 0, etmv4 = 0; - const struct cpu_map *cpus = evlist->cpus; + struct cpu_map *event_cpus = evlist->cpus; + struct cpu_map *online_cpus = cpu_map__new(NULL); /* cpu map is not empty, we have specific CPUs to work with */ - if (!cpu_map__empty(cpus)) { - for (i = 0; i < cpu_map__nr(cpus); i++) { - if (cs_etm_is_etmv4(itr, cpus->map[i])) + if (!cpu_map__empty(event_cpus)) { + for (i = 0; i < cpu__max_cpu(); i++) { + if (!cpu_map__has(event_cpus, i) || + !cpu_map__has(online_cpus, i)) + continue; + + if (cs_etm_is_etmv4(itr, i)) etmv4++; else etmv3++; @@ -311,6 +316,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, } else { /* get configuration for all CPUs in the system */ for (i = 0; i < cpu__max_cpu(); i++) { + if (!cpu_map__has(online_cpus, i)) + continue; + if (cs_etm_is_etmv4(itr, i)) etmv4++; else @@ -318,6 +326,8 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, } } + cpu_map__put(online_cpus); + return (CS_ETM_HEADER_SIZE + (etmv4 * CS_ETMV4_PRIV_SIZE) + (etmv3 * CS_ETMV3_PRIV_SIZE)); @@ -447,7 +457,9 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, int i; u32 offset; u64 nr_cpu, type; - const struct cpu_map *cpus = session->evlist->cpus; + struct cpu_map *cpu_map; + struct cpu_map *event_cpus = session->evlist->cpus; + struct cpu_map *online_cpus = cpu_map__new(NULL); struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; @@ -458,8 +470,21 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, if (!session->evlist->nr_mmaps) return -EINVAL; - /* If the cpu_map is empty all CPUs are involved */ - nr_cpu = cpu_map__empty(cpus) ? cpu__max_cpu() : cpu_map__nr(cpus); + /* If the cpu_map is empty all online CPUs are involved */ + if (cpu_map__empty(event_cpus)) { + cpu_map = online_cpus; + } else { + /* Make sure all specified CPUs are online */ + for (i = 0; i < cpu_map__nr(event_cpus); i++) { + if (cpu_map__has(event_cpus, i) && + !cpu_map__has(online_cpus, i)) + return -EINVAL; + } + + cpu_map = event_cpus; + } + + nr_cpu = cpu_map__nr(cpu_map); /* Get PMU type as dynamically assigned by the core */ type = cs_etm_pmu->type; @@ -472,15 +497,11 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, offset = CS_ETM_SNAPSHOT + 1; - /* cpu map is not empty, we have specific CPUs to work with */ - if (!cpu_map__empty(cpus)) { - for (i = 0; i < cpu_map__nr(cpus) && offset < priv_size; i++) - cs_etm_get_metadata(cpus->map[i], &offset, itr, info); - } else { - /* get configuration for all CPUs in the system */ - for (i = 0; i < cpu__max_cpu(); i++) + for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++) + if (cpu_map__has(cpu_map, i)) cs_etm_get_metadata(i, &offset, itr, info); - } + + cpu_map__put(online_cpus); return 0; } diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 42dab7c8f508..a111239df182 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -6,3 +6,28 @@ endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/powerpc/include/generated/asm +header32 := $(out)/syscalls_32.c +header64 := $(out)/syscalls_64.c +sysdef := $(srctree)/tools/arch/powerpc/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header64): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '64' '$(CC)' $(sysdef) > $@ + +$(header32): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '32' '$(CC)' $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, powerpc) $(RM) $(header32) $(header64) + +archheaders: $(header32) $(header64) diff --git a/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl new file mode 100755 index 000000000000..ef52e1dd694b --- /dev/null +++ b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl @@ -0,0 +1,37 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf. Derived from +# s390 script. +# +# Copyright IBM Corp. 2017 +# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> +# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com> + +wordsize=$1 +gcc=$2 +input=$3 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table() +{ + local wordsize=$1 + local max_nr + + echo "static const char *syscalltbl_powerpc_${wordsize}[] = {" + while read sc nr; do + printf '\t[%d] = "%s",\n' $nr $sc + max_nr=$nr + done + echo '};' + echo "#define SYSCALLTBL_POWERPC_${wordsize}_MAX_ID $max_nr" +} + +$gcc -m${wordsize} -E -dM -x c $input \ + |sed -ne 's/^#define __NR_//p' \ + |sort -t' ' -k2 -nu \ + |create_table ${wordsize} diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index 8c72b44444cb..01df9d8303e1 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -23,12 +23,37 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na return ops; } +static int s390__cpuid_parse(struct arch *arch, char *cpuid) +{ + unsigned int family; + char model[16], model_c[16], cpumf_v[16], cpumf_a[16]; + int ret; + + /* + * cpuid string format: + * "IBM,family,model-capacity,model[,cpum_cf-version,cpum_cf-authorization]" + */ + ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%s", &family, model_c, + model, cpumf_v, cpumf_a); + if (ret >= 2) { + arch->family = family; + arch->model = 0; + return 0; + } + + return -1; +} + static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused) { + int err = 0; + if (!arch->initialized) { arch->initialized = true; arch->associate_instruction_ops = s390__associate_ins_ops; + if (cpuid) + err = s390__cpuid_parse(arch, cpuid); } - return 0; + return err; } diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c index 9fa6c3e5782c..231294b80dc4 100644 --- a/tools/perf/arch/s390/util/header.c +++ b/tools/perf/arch/s390/util/header.c @@ -1,8 +1,9 @@ /* * Implementation of get_cpuid(). * - * Copyright 2014 IBM Corp. + * Copyright IBM Corp. 2014, 2018 * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com> + * Thomas Richter <tmricht@linux.vnet.ibm.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -13,16 +14,153 @@ #include <unistd.h> #include <stdio.h> #include <string.h> +#include <ctype.h> #include "../../util/header.h" +#include "../../util/util.h" + +#define SYSINFO_MANU "Manufacturer:" +#define SYSINFO_TYPE "Type:" +#define SYSINFO_MODEL "Model:" +#define SRVLVL_CPUMF "CPU-MF:" +#define SRVLVL_VERSION "version=" +#define SRVLVL_AUTHORIZATION "authorization=" +#define SYSINFO "/proc/sysinfo" +#define SRVLVL "/proc/service_levels" int get_cpuid(char *buffer, size_t sz) { - const char *cpuid = "IBM/S390"; + char *cp, *line = NULL, *line2; + char type[8], model[33], version[8], manufacturer[32], authorization[8]; + int tpsize = 0, mdsize = 0, vssize = 0, mfsize = 0, atsize = 0; + int read; + unsigned long line_sz; + size_t nbytes; + FILE *sysinfo; + + /* + * Scan /proc/sysinfo line by line and read out values for + * Manufacturer:, Type: and Model:, for example: + * Manufacturer: IBM + * Type: 2964 + * Model: 702 N96 + * The first word is the Model Capacity and the second word is + * Model (can be omitted). Both words have a maximum size of 16 + * bytes. + */ + memset(manufacturer, 0, sizeof(manufacturer)); + memset(type, 0, sizeof(type)); + memset(model, 0, sizeof(model)); + memset(version, 0, sizeof(version)); + memset(authorization, 0, sizeof(authorization)); + + sysinfo = fopen(SYSINFO, "r"); + if (sysinfo == NULL) + return -1; + + while ((read = getline(&line, &line_sz, sysinfo)) != -1) { + if (!strncmp(line, SYSINFO_MANU, strlen(SYSINFO_MANU))) { + line2 = line + strlen(SYSINFO_MANU); + + while ((cp = strtok_r(line2, "\n ", &line2))) { + mfsize += scnprintf(manufacturer + mfsize, + sizeof(manufacturer) - mfsize, "%s", cp); + } + } + + if (!strncmp(line, SYSINFO_TYPE, strlen(SYSINFO_TYPE))) { + line2 = line + strlen(SYSINFO_TYPE); - if (strlen(cpuid) + 1 > sz) + while ((cp = strtok_r(line2, "\n ", &line2))) { + tpsize += scnprintf(type + tpsize, + sizeof(type) - tpsize, "%s", cp); + } + } + + if (!strncmp(line, SYSINFO_MODEL, strlen(SYSINFO_MODEL))) { + line2 = line + strlen(SYSINFO_MODEL); + + while ((cp = strtok_r(line2, "\n ", &line2))) { + mdsize += scnprintf(model + mdsize, sizeof(type) - mdsize, + "%s%s", model[0] ? "," : "", cp); + } + break; + } + } + fclose(sysinfo); + + /* Missing manufacturer, type or model information should not happen */ + if (!manufacturer[0] || !type[0] || !model[0]) return -1; - strcpy(buffer, cpuid); - return 0; + /* + * Scan /proc/service_levels and return the CPU-MF counter facility + * version number and authorization level. + * Optional, does not exist on z/VM guests. + */ + sysinfo = fopen(SRVLVL, "r"); + if (sysinfo == NULL) + goto skip_sysinfo; + while ((read = getline(&line, &line_sz, sysinfo)) != -1) { + if (strncmp(line, SRVLVL_CPUMF, strlen(SRVLVL_CPUMF))) + continue; + + line2 = line + strlen(SRVLVL_CPUMF); + while ((cp = strtok_r(line2, "\n ", &line2))) { + if (!strncmp(cp, SRVLVL_VERSION, + strlen(SRVLVL_VERSION))) { + char *sep = strchr(cp, '='); + + vssize += scnprintf(version + vssize, + sizeof(version) - vssize, "%s", sep + 1); + } + if (!strncmp(cp, SRVLVL_AUTHORIZATION, + strlen(SRVLVL_AUTHORIZATION))) { + char *sep = strchr(cp, '='); + + atsize += scnprintf(authorization + atsize, + sizeof(authorization) - atsize, "%s", sep + 1); + } + } + } + fclose(sysinfo); + +skip_sysinfo: + free(line); + + if (version[0] && authorization[0] ) + nbytes = snprintf(buffer, sz, "%s,%s,%s,%s,%s", + manufacturer, type, model, version, + authorization); + else + nbytes = snprintf(buffer, sz, "%s,%s,%s", manufacturer, type, + model); + return (nbytes >= sz) ? -1 : 0; +} + +char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +{ + char *buf = malloc(128); + + if (buf && get_cpuid(buf, 128) < 0) + zfree(&buf); + return buf; +} + +/* + * Compare the cpuid string returned by get_cpuid() function + * with the name generated by the jevents file read from + * pmu-events/arch/s390/mapfile.csv. + * + * Parameter mapcpuid is the cpuid as stored in the + * pmu-events/arch/s390/mapfile.csv. This is just the type number. + * Parameter cpuid is the cpuid returned by function get_cpuid(). + */ +int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) +{ + char *cp = strchr(cpuid, ','); + + if (cp == NULL) + return -1; + return strncmp(cp + 1, mapcpuid, strlen(mapcpuid)); } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index bf4ca749d1ac..907267206973 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1803,7 +1803,7 @@ int cmd_record(int argc, const char **argv) err = target__validate(&rec->opts.target); if (err) { target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); - ui__warning("%s", errbuf); + ui__warning("%s\n", errbuf); } err = target__parse_uid(&rec->opts.target); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4ad5dc649716..1eedb1815c4c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -614,6 +614,7 @@ static int stats_print(struct report *rep) static void tasks_setup(struct report *rep) { memset(&rep->tool, 0, sizeof(rep->tool)); + rep->tool.ordered_events = true; if (rep->mmaps_mode) { rep->tool.mmap = perf_event__process_mmap; rep->tool.mmap2 = perf_event__process_mmap2; @@ -937,6 +938,7 @@ int cmd_report(int argc, const char **argv) "perf report [<options>]", NULL }; + bool group_set = false; struct report report = { .tool = { .sample = process_sample_event, @@ -1056,7 +1058,7 @@ int cmd_report(int argc, const char **argv) "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), - OPT_BOOLEAN(0, "group", &symbol_conf.event_group, + OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &group_set, "Show event group information together"), OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", "use branch records for per branch histogram filling", @@ -1173,6 +1175,9 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); + if (group_set && !session->evlist->nr_groups) + perf_evlist__set_leader(session->evlist); + if (itrace_synth_opts.last_branch) has_br_stack = true; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ab19a6ee4093..cce926aeb0c0 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1489,6 +1489,7 @@ struct perf_script { bool show_switch_events; bool show_namespace_events; bool show_lost_events; + bool show_round_events; bool allocated; bool per_event_dump; struct cpu_map *cpus; @@ -2104,6 +2105,16 @@ process_lost_event(struct perf_tool *tool, return 0; } +static int +process_finished_round_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct ordered_events *oe __maybe_unused) + +{ + perf_event__fprintf(event, stdout); + return 0; +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -2200,6 +2211,10 @@ static int __cmd_script(struct perf_script *script) script->tool.namespaces = process_namespaces_event; if (script->show_lost_events) script->tool.lost = process_lost_event; + if (script->show_round_events) { + script->tool.ordered_events = false; + script->tool.finished_round = process_finished_round_event; + } if (perf_script__setup_per_event_dump(script)) { pr_err("Couldn't create the per event dump files\n"); @@ -3139,6 +3154,8 @@ int cmd_script(int argc, const char **argv) "Show namespace events (if recorded)"), OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events, "Show lost events (if recorded)"), + OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events, + "Show round events (if recorded)"), OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, "Dump trace output to files named by the monitored events"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 98bf9d32f222..2d49eccf98f2 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -168,6 +168,7 @@ static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; static bool append_file; +static bool interval_count; static const char *output_name; static int output_fd; static int print_free_counters_hint; @@ -571,6 +572,8 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) static int __run_perf_stat(int argc, const char **argv) { int interval = stat_config.interval; + int times = stat_config.times; + int timeout = stat_config.timeout; char msg[BUFSIZ]; unsigned long long t0, t1; struct perf_evsel *counter; @@ -584,6 +587,9 @@ static int __run_perf_stat(int argc, const char **argv) if (interval) { ts.tv_sec = interval / USEC_PER_MSEC; ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; + } else if (timeout) { + ts.tv_sec = timeout / USEC_PER_MSEC; + ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC; } else { ts.tv_sec = 1; ts.tv_nsec = 0; @@ -696,10 +702,14 @@ try_again: perf_evlist__start_workload(evsel_list); enable_counters(); - if (interval) { + if (interval || timeout) { while (!waitpid(child_pid, &status, WNOHANG)) { nanosleep(&ts, NULL); + if (timeout) + break; process_interval(); + if (interval_count && !(--times)) + break; } } waitpid(child_pid, &status, 0); @@ -716,8 +726,13 @@ try_again: enable_counters(); while (!done) { nanosleep(&ts, NULL); - if (interval) + if (timeout) + break; + if (interval) { process_interval(); + if (interval_count && !(--times)) + break; + } } } @@ -1891,6 +1906,10 @@ static const struct option stat_options[] = { "command to run after to the measured command"), OPT_UINTEGER('I', "interval-print", &stat_config.interval, "print counts at regular interval in ms (>= 10)"), + OPT_INTEGER(0, "interval-count", &stat_config.times, + "print counts for fixed number of times"), + OPT_UINTEGER(0, "timeout", &stat_config.timeout, + "stop workload and print counts after a timeout period in ms (>= 10ms)"), OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, @@ -2688,7 +2707,7 @@ int cmd_stat(int argc, const char **argv) int status = -EINVAL, run_idx; const char *mode; FILE *output = stderr; - unsigned int interval; + unsigned int interval, timeout; const char * const stat_subcommands[] = { "record", "report" }; setlocale(LC_ALL, ""); @@ -2719,6 +2738,7 @@ int cmd_stat(int argc, const char **argv) return __cmd_report(argc, argv); interval = stat_config.interval; + timeout = stat_config.timeout; /* * For record command the -o is already taken care of. @@ -2871,6 +2891,33 @@ int cmd_stat(int argc, const char **argv) "Please proceed with caution.\n"); } + if (stat_config.times && interval) + interval_count = true; + else if (stat_config.times && !interval) { + pr_err("interval-count option should be used together with " + "interval-print.\n"); + parse_options_usage(stat_usage, stat_options, "interval-count", 0); + parse_options_usage(stat_usage, stat_options, "I", 1); + goto out; + } + + if (timeout && timeout < 100) { + if (timeout < 10) { + pr_err("timeout must be >= 10ms.\n"); + parse_options_usage(stat_usage, stat_options, "timeout", 0); + goto out; + } else + pr_warning("timeout < 100ms. " + "The overhead percentage could be high in some cases. " + "Please proceed with caution.\n"); + } + if (timeout && interval) { + pr_err("timeout option is not supported with interval-print.\n"); + parse_options_usage(stat_usage, stat_options, "timeout", 0); + parse_options_usage(stat_usage, stat_options, "I", 1); + goto out; + } + if (perf_evlist__alloc_stats(evsel_list, interval)) goto out; diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 790ec25919a0..bf206ffe5c45 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -42,6 +42,7 @@ arch/parisc/include/uapi/asm/errno.h arch/powerpc/include/uapi/asm/errno.h arch/sparc/include/uapi/asm/errno.h arch/x86/include/uapi/asm/errno.h +arch/powerpc/include/uapi/asm/unistd.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h include/asm-generic/bitops/__fls.h diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 3bf7b145b826..c7115d369511 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -482,6 +482,34 @@ static void fs_something(void) } } +static const char *do_determine_event(bool excl_kernel) +{ + const char *event = excl_kernel ? "cycles:u" : "cycles"; + +#ifdef __s390x__ + char cpuid[128], model[16], model_c[16], cpum_cf_v[16]; + unsigned int family; + int ret, cpum_cf_a; + + if (get_cpuid(cpuid, sizeof(cpuid))) + goto out_clocks; + ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%x", &family, model_c, + model, cpum_cf_v, &cpum_cf_a); + if (ret != 5) /* Not available */ + goto out_clocks; + if (excl_kernel && (cpum_cf_a & 4)) + return event; + if (!excl_kernel && (cpum_cf_a & 2)) + return event; + + /* Fall through: missing authorization */ +out_clocks: + event = excl_kernel ? "cpu-clock:u" : "cpu-clock"; + +#endif + return event; +} + static void do_something(void) { fs_something(); @@ -592,10 +620,7 @@ static int do_test_code_reading(bool try_kcore) perf_evlist__set_maps(evlist, cpus, threads); - if (excl_kernel) - str = "cycles:u"; - else - str = "cycles"; + str = do_determine_event(excl_kernel); pr_debug("Parsing event '%s'\n", str); ret = parse_events(evlist, str, NULL); if (ret < 0) { diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 260418969120..2f008067d989 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -37,6 +37,19 @@ static int init_live_machine(struct machine *machine) mmap_handler, machine, true, 500); } +/* + * We need to keep these functions global, despite the + * fact that they are used only locally in this object, + * in order to keep them around even if the binary is + * stripped. If they are gone, the unwind check for + * symbol fails. + */ +int test_dwarf_unwind__thread(struct thread *thread); +int test_dwarf_unwind__compare(void *p1, void *p2); +int test_dwarf_unwind__krava_3(struct thread *thread); +int test_dwarf_unwind__krava_2(struct thread *thread); +int test_dwarf_unwind__krava_1(struct thread *thread); + #define MAX_STACK 8 static int unwind_entry(struct unwind_entry *entry, void *arg) @@ -45,12 +58,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) char *symbol = entry->sym ? entry->sym->name : NULL; static const char *funcs[MAX_STACK] = { "test__arch_unwind_sample", - "unwind_thread", - "compare", + "test_dwarf_unwind__thread", + "test_dwarf_unwind__compare", "bsearch", - "krava_3", - "krava_2", - "krava_1", + "test_dwarf_unwind__krava_3", + "test_dwarf_unwind__krava_2", + "test_dwarf_unwind__krava_1", "test__dwarf_unwind" }; /* @@ -77,7 +90,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) return strcmp((const char *) symbol, funcs[idx]); } -static noinline int unwind_thread(struct thread *thread) +noinline int test_dwarf_unwind__thread(struct thread *thread) { struct perf_sample sample; unsigned long cnt = 0; @@ -108,7 +121,7 @@ static noinline int unwind_thread(struct thread *thread) static int global_unwind_retval = -INT_MAX; -static noinline int compare(void *p1, void *p2) +noinline int test_dwarf_unwind__compare(void *p1, void *p2) { /* Any possible value should be 'thread' */ struct thread *thread = *(struct thread **)p1; @@ -117,17 +130,17 @@ static noinline int compare(void *p1, void *p2) /* Call unwinder twice for both callchain orders. */ callchain_param.order = ORDER_CALLER; - global_unwind_retval = unwind_thread(thread); + global_unwind_retval = test_dwarf_unwind__thread(thread); if (!global_unwind_retval) { callchain_param.order = ORDER_CALLEE; - global_unwind_retval = unwind_thread(thread); + global_unwind_retval = test_dwarf_unwind__thread(thread); } } return p1 - p2; } -static noinline int krava_3(struct thread *thread) +noinline int test_dwarf_unwind__krava_3(struct thread *thread) { struct thread *array[2] = {thread, thread}; void *fp = &bsearch; @@ -141,18 +154,19 @@ static noinline int krava_3(struct thread *thread) size_t, int (*)(void *, void *)); _bsearch = fp; - _bsearch(array, &thread, 2, sizeof(struct thread **), compare); + _bsearch(array, &thread, 2, sizeof(struct thread **), + test_dwarf_unwind__compare); return global_unwind_retval; } -static noinline int krava_2(struct thread *thread) +noinline int test_dwarf_unwind__krava_2(struct thread *thread) { - return krava_3(thread); + return test_dwarf_unwind__krava_3(thread); } -static noinline int krava_1(struct thread *thread) +noinline int test_dwarf_unwind__krava_1(struct thread *thread) { - return krava_2(thread); + return test_dwarf_unwind__krava_2(thread); } int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused) @@ -189,7 +203,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu goto out; } - err = krava_1(thread); + err = test_dwarf_unwind__krava_1(thread); thread__put(thread); out: diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh index 30a950c9d407..1c16e56cd93e 100644 --- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh @@ -5,7 +5,7 @@ had_vfs_getname=$? cleanup_probe_vfs_getname() { if [ $had_vfs_getname -eq 1 ] ; then - perf probe -q -d probe:vfs_getname + perf probe -q -d probe:vfs_getname* fi } diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh index c446c894b297..8c4ab0b390c0 100755 --- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh @@ -21,12 +21,12 @@ trace_libc_inet_pton_backtrace() { expected[3]=".*packets transmitted.*" expected[4]="rtt min.*" expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)" - expected[6]=".*inet_pton[[:space:]]\($libc\)$" + expected[6]=".*inet_pton[[:space:]]\($libc|inlined\)$" case "$(uname -m)" in s390x) eventattr='call-graph=dwarf' - expected[7]="gaih_inet[[:space:]]\(inlined\)$" - expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$" + expected[7]="gaih_inet.*[[:space:]]\($libc|inlined\)$" + expected[8]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$" expected[9]="main[[:space:]]\(.*/bin/ping.*\)$" expected[10]="__libc_start_main[[:space:]]\($libc\)$" expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$" diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index f6789fb029d6..1e5adb65632a 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -56,7 +56,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest * be compacted against the list of modules found in the "vmlinux" * code and with the one got from /proc/modules from the "kallsyms" code. */ - if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true) <= 0) { + if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type) <= 0) { pr_debug("dso__load_kallsyms "); goto out; } @@ -125,7 +125,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest if (pair && UM(pair->start) == mem_start) { next_pair: - if (strcmp(sym->name, pair->name) == 0) { + if (arch__compare_symbol_names(sym->name, pair->name) == 0) { /* * kallsyms don't have the symbol end, so we * set that by using the next symbol start - 1, diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 286427975112..e2f666391ac4 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -319,6 +319,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) struct map_symbol *ms = ab->b.priv; struct symbol *sym = ms->sym; u8 pcnt_width = annotate_browser__pcnt_width(ab); + int width = 0; /* PLT symbols contain external offsets */ if (strstr(sym->name, "@plt")) @@ -340,13 +341,17 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) to = (u64)btarget->idx; } + if (ab->have_cycles) + width = IPC_WIDTH + CYCLES_WIDTH; + ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS); - __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, + __ui_browser__line_arrow(browser, + pcnt_width + 2 + ab->addr_width + width, from, to); if (is_fused(ab, cursor)) { ui_browser__mark_fused(browser, - pcnt_width + 3 + ab->addr_width, + pcnt_width + 3 + ab->addr_width + width, from - 1, to > from ? true : false); } diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 7f8553630c4d..537eadd81914 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -316,7 +316,6 @@ static int machine__write_buildid_table(struct machine *machine, struct feat_fd *fd) { int err = 0; - char nm[PATH_MAX]; struct dso *pos; u16 kmisc = PERF_RECORD_MISC_KERNEL, umisc = PERF_RECORD_MISC_USER; @@ -338,9 +337,8 @@ static int machine__write_buildid_table(struct machine *machine, name = pos->short_name; name_len = pos->short_name_len; } else if (dso__is_kcore(pos)) { - machine__mmap_name(machine, nm, sizeof(nm)); - name = nm; - name_len = strlen(nm); + name = machine->mmap_name; + name_len = strlen(name); } else { name = pos->long_name; name_len = pos->long_name_len; @@ -813,12 +811,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine) bool is_kallsyms = dso__is_kallsyms(dso); bool is_vdso = dso__is_vdso(dso); const char *name = dso->long_name; - char nm[PATH_MAX]; if (dso__is_kcore(dso)) { is_kallsyms = true; - machine__mmap_name(machine, nm, sizeof(nm)); - name = nm; + name = machine->mmap_name; } return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, dso->nsinfo, is_kallsyms, is_vdso); diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 1fb01849f1c7..640af88331b4 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -78,6 +78,8 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) { ocsd_datapath_resp_t dp_ret; + decoder->prev_return = OCSD_RESP_CONT; + dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, 0, 0, NULL, NULL); if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) @@ -253,16 +255,16 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->packet_count = 0; for (i = 0; i < MAX_BUFFER; i++) { decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL; - decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; - decoder->packet_buffer[i].exc = false; - decoder->packet_buffer[i].exc_ret = false; - decoder->packet_buffer[i].cpu = INT_MIN; + decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[i].last_instr_taken_branch = false; + decoder->packet_buffer[i].exc = false; + decoder->packet_buffer[i].exc_ret = false; + decoder->packet_buffer[i].cpu = INT_MIN; } } static ocsd_datapath_resp_t cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, - const ocsd_generic_trace_elem *elem, const u8 trace_chan_id, enum cs_etm_sample_type sample_type) { @@ -278,18 +280,16 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, return OCSD_RESP_FATAL_SYS_ERR; et = decoder->tail; + et = (et + 1) & (MAX_BUFFER - 1); + decoder->tail = et; + decoder->packet_count++; + decoder->packet_buffer[et].sample_type = sample_type; - decoder->packet_buffer[et].start_addr = elem->st_addr; - decoder->packet_buffer[et].end_addr = elem->en_addr; decoder->packet_buffer[et].exc = false; decoder->packet_buffer[et].exc_ret = false; decoder->packet_buffer[et].cpu = *((int *)inode->priv); - - /* Wrap around if need be */ - et = (et + 1) & (MAX_BUFFER - 1); - - decoder->tail = et; - decoder->packet_count++; + decoder->packet_buffer[et].start_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[et].end_addr = 0xdeadbeefdeadbeefUL; if (decoder->packet_count == MAX_BUFFER - 1) return OCSD_RESP_WAIT; @@ -297,6 +297,47 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, return OCSD_RESP_CONT; } +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, + const ocsd_generic_trace_elem *elem, + const uint8_t trace_chan_id) +{ + int ret = 0; + struct cs_etm_packet *packet; + + ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + CS_ETM_RANGE); + if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) + return ret; + + packet = &decoder->packet_buffer[decoder->tail]; + + packet->start_addr = elem->st_addr; + packet->end_addr = elem->en_addr; + switch (elem->last_i_type) { + case OCSD_INSTR_BR: + case OCSD_INSTR_BR_INDIRECT: + packet->last_instr_taken_branch = elem->last_instr_exec; + break; + case OCSD_INSTR_ISB: + case OCSD_INSTR_DSB_DMB: + case OCSD_INSTR_OTHER: + default: + packet->last_instr_taken_branch = false; + break; + } + + return ret; +} + +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_trace_on(struct cs_etm_decoder *decoder, + const uint8_t trace_chan_id) +{ + return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + CS_ETM_TRACE_ON); +} + static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const void *context, const ocsd_trc_index_t indx __maybe_unused, @@ -313,12 +354,13 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( decoder->trace_on = false; break; case OCSD_GEN_TRC_ELEM_TRACE_ON: + resp = cs_etm_decoder__buffer_trace_on(decoder, + trace_chan_id); decoder->trace_on = true; break; case OCSD_GEN_TRC_ELEM_INSTR_RANGE: - resp = cs_etm_decoder__buffer_packet(decoder, elem, - trace_chan_id, - CS_ETM_RANGE); + resp = cs_etm_decoder__buffer_range(decoder, elem, + trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION: decoder->packet_buffer[decoder->tail].exc = true; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 3d2e6205d186..743f5f444304 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -24,12 +24,14 @@ struct cs_etm_buffer { enum cs_etm_sample_type { CS_ETM_RANGE = 1 << 0, + CS_ETM_TRACE_ON = 1 << 1, }; struct cs_etm_packet { enum cs_etm_sample_type sample_type; u64 start_addr; u64 end_addr; + u8 last_instr_taken_branch; u8 exc; u8 exc_ret; int cpu; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index b9f0a53dfa65..1b0d422373be 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -32,6 +32,14 @@ #define MAX_TIMESTAMP (~0ULL) +/* + * A64 instructions are always 4 bytes + * + * Only A64 is supported, so can use this constant for converting between + * addresses and instruction counts, calculting offsets etc + */ +#define A64_INSTR_SIZE 4 + struct cs_etm_auxtrace { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -45,11 +53,15 @@ struct cs_etm_auxtrace { u8 snapshot_mode; u8 data_queued; u8 sample_branches; + u8 sample_instructions; int num_cpu; u32 auxtrace_type; u64 branches_sample_type; u64 branches_id; + u64 instructions_sample_type; + u64 instructions_sample_period; + u64 instructions_id; u64 **metadata; u64 kernel_start; unsigned int pmu_type; @@ -68,6 +80,12 @@ struct cs_etm_queue { u64 time; u64 timestamp; u64 offset; + u64 period_instructions; + struct branch_stack *last_branch; + struct branch_stack *last_branch_rb; + size_t last_branch_pos; + struct cs_etm_packet *prev_packet; + struct cs_etm_packet *packet; }; static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); @@ -174,6 +192,16 @@ static void cs_etm__free_queue(void *priv) { struct cs_etm_queue *etmq = priv; + if (!etmq) + return; + + thread__zput(etmq->thread); + cs_etm_decoder__free(etmq->decoder); + zfree(&etmq->event_buf); + zfree(&etmq->last_branch); + zfree(&etmq->last_branch_rb); + zfree(&etmq->prev_packet); + zfree(&etmq->packet); free(etmq); } @@ -270,11 +298,35 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params; struct cs_etm_queue *etmq; + size_t szp = sizeof(struct cs_etm_packet); etmq = zalloc(sizeof(*etmq)); if (!etmq) return NULL; + etmq->packet = zalloc(szp); + if (!etmq->packet) + goto out_free; + + if (etm->synth_opts.last_branch || etm->sample_branches) { + etmq->prev_packet = zalloc(szp); + if (!etmq->prev_packet) + goto out_free; + } + + if (etm->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + sz += etm->synth_opts.last_branch_sz * + sizeof(struct branch_entry); + etmq->last_branch = zalloc(sz); + if (!etmq->last_branch) + goto out_free; + etmq->last_branch_rb = zalloc(sz); + if (!etmq->last_branch_rb) + goto out_free; + } + etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); if (!etmq->event_buf) goto out_free; @@ -329,6 +381,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, goto out_free_decoder; etmq->offset = 0; + etmq->period_instructions = 0; return etmq; @@ -336,6 +389,10 @@ out_free_decoder: cs_etm_decoder__free(etmq->decoder); out_free: zfree(&etmq->event_buf); + zfree(&etmq->last_branch); + zfree(&etmq->last_branch_rb); + zfree(&etmq->prev_packet); + zfree(&etmq->packet); free(etmq); return NULL; @@ -389,6 +446,129 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) return 0; } +static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) +{ + struct branch_stack *bs_src = etmq->last_branch_rb; + struct branch_stack *bs_dst = etmq->last_branch; + size_t nr = 0; + + /* + * Set the number of records before early exit: ->nr is used to + * determine how many branches to copy from ->entries. + */ + bs_dst->nr = bs_src->nr; + + /* + * Early exit when there is nothing to copy. + */ + if (!bs_src->nr) + return; + + /* + * As bs_src->entries is a circular buffer, we need to copy from it in + * two steps. First, copy the branches from the most recently inserted + * branch ->last_branch_pos until the end of bs_src->entries buffer. + */ + nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos; + memcpy(&bs_dst->entries[0], + &bs_src->entries[etmq->last_branch_pos], + sizeof(struct branch_entry) * nr); + + /* + * If we wrapped around at least once, the branches from the beginning + * of the bs_src->entries buffer and until the ->last_branch_pos element + * are older valid branches: copy them over. The total number of + * branches copied over will be equal to the number of branches asked by + * the user in last_branch_sz. + */ + if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { + memcpy(&bs_dst->entries[nr], + &bs_src->entries[0], + sizeof(struct branch_entry) * etmq->last_branch_pos); + } +} + +static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) +{ + etmq->last_branch_pos = 0; + etmq->last_branch_rb->nr = 0; +} + +static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet) +{ + /* + * The packet records the execution range with an exclusive end address + * + * A64 instructions are constant size, so the last executed + * instruction is A64_INSTR_SIZE before the end address + * Will need to do instruction level decode for T32 instructions as + * they can be variable size (not yet supported). + */ + return packet->end_addr - A64_INSTR_SIZE; +} + +static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet) +{ + /* + * Only A64 instructions are currently supported, so can get + * instruction count by dividing. + * Will need to do instruction level decode for T32 instructions as + * they can be variable size (not yet supported). + */ + return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE; +} + +static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet, + u64 offset) +{ + /* + * Only A64 instructions are currently supported, so can get + * instruction address by muliplying. + * Will need to do instruction level decode for T32 instructions as + * they can be variable size (not yet supported). + */ + return packet->start_addr + offset * A64_INSTR_SIZE; +} + +static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) +{ + struct branch_stack *bs = etmq->last_branch_rb; + struct branch_entry *be; + + /* + * The branches are recorded in a circular buffer in reverse + * chronological order: we start recording from the last element of the + * buffer down. After writing the first element of the stack, move the + * insert position back to the end of the buffer. + */ + if (!etmq->last_branch_pos) + etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; + + etmq->last_branch_pos -= 1; + + be = &bs->entries[etmq->last_branch_pos]; + be->from = cs_etm__last_executed_instr(etmq->prev_packet); + be->to = etmq->packet->start_addr; + /* No support for mispredict */ + be->flags.mispred = 0; + be->flags.predicted = 1; + + /* + * Increment bs->nr until reaching the number of last branches asked by + * the user on the command line. + */ + if (bs->nr < etmq->etm->synth_opts.last_branch_sz) + bs->nr += 1; +} + +static int cs_etm__inject_event(union perf_event *event, + struct perf_sample *sample, u64 type) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample); +} + + static int cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) { @@ -453,35 +633,105 @@ static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, } } +static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, + u64 addr, u64 period) +{ + int ret = 0; + struct cs_etm_auxtrace *etm = etmq->etm; + union perf_event *event = etmq->event_buf; + struct perf_sample sample = {.ip = 0,}; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + sample.ip = addr; + sample.pid = etmq->pid; + sample.tid = etmq->tid; + sample.id = etmq->etm->instructions_id; + sample.stream_id = etmq->etm->instructions_id; + sample.period = period; + sample.cpu = etmq->packet->cpu; + sample.flags = 0; + sample.insn_len = 1; + sample.cpumode = event->header.misc; + + if (etm->synth_opts.last_branch) { + cs_etm__copy_last_branch_rb(etmq); + sample.branch_stack = etmq->last_branch; + } + + if (etm->synth_opts.inject) { + ret = cs_etm__inject_event(event, &sample, + etm->instructions_sample_type); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(etm->session, event, &sample); + + if (ret) + pr_err( + "CS ETM Trace: failed to deliver instruction event, error %d\n", + ret); + + if (etm->synth_opts.last_branch) + cs_etm__reset_last_branch_rb(etmq); + + return ret; +} + /* * The cs etm packet encodes an instruction range between a branch target * and the next taken branch. Generate sample accordingly. */ -static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, - struct cs_etm_packet *packet) +static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) { int ret = 0; struct cs_etm_auxtrace *etm = etmq->etm; struct perf_sample sample = {.ip = 0,}; union perf_event *event = etmq->event_buf; - u64 start_addr = packet->start_addr; - u64 end_addr = packet->end_addr; + struct dummy_branch_stack { + u64 nr; + struct branch_entry entries; + } dummy_bs; event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); - sample.ip = start_addr; + sample.ip = cs_etm__last_executed_instr(etmq->prev_packet); sample.pid = etmq->pid; sample.tid = etmq->tid; - sample.addr = end_addr; + sample.addr = etmq->packet->start_addr; sample.id = etmq->etm->branches_id; sample.stream_id = etmq->etm->branches_id; sample.period = 1; - sample.cpu = packet->cpu; + sample.cpu = etmq->packet->cpu; sample.flags = 0; sample.cpumode = PERF_RECORD_MISC_USER; + /* + * perf report cannot handle events without a branch stack + */ + if (etm->synth_opts.last_branch) { + dummy_bs = (struct dummy_branch_stack){ + .nr = 1, + .entries = { + .from = sample.ip, + .to = sample.addr, + }, + }; + sample.branch_stack = (struct branch_stack *)&dummy_bs; + } + + if (etm->synth_opts.inject) { + ret = cs_etm__inject_event(event, &sample, + etm->branches_sample_type); + if (ret) + return ret; + } + ret = perf_session__deliver_synth_event(etm->session, event, &sample); if (ret) @@ -578,6 +828,24 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, etm->sample_branches = true; etm->branches_sample_type = attr.sample_type; etm->branches_id = id; + id += 1; + attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; + } + + if (etm->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + + if (etm->synth_opts.instructions) { + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + attr.sample_period = etm->synth_opts.period; + etm->instructions_sample_period = attr.sample_period; + err = cs_etm__synth_event(session, &attr, id); + if (err) + return err; + etm->sample_instructions = true; + etm->instructions_sample_type = attr.sample_type; + etm->instructions_id = id; + id += 1; } return 0; @@ -585,25 +853,108 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, static int cs_etm__sample(struct cs_etm_queue *etmq) { + struct cs_etm_auxtrace *etm = etmq->etm; + struct cs_etm_packet *tmp; int ret; - struct cs_etm_packet packet; + u64 instrs_executed; - while (1) { - ret = cs_etm_decoder__get_packet(etmq->decoder, &packet); - if (ret <= 0) + instrs_executed = cs_etm__instr_count(etmq->packet); + etmq->period_instructions += instrs_executed; + + /* + * Record a branch when the last instruction in + * PREV_PACKET is a branch. + */ + if (etm->synth_opts.last_branch && + etmq->prev_packet && + etmq->prev_packet->sample_type == CS_ETM_RANGE && + etmq->prev_packet->last_instr_taken_branch) + cs_etm__update_last_branch_rb(etmq); + + if (etm->sample_instructions && + etmq->period_instructions >= etm->instructions_sample_period) { + /* + * Emit instruction sample periodically + * TODO: allow period to be defined in cycles and clock time + */ + + /* Get number of instructions executed after the sample point */ + u64 instrs_over = etmq->period_instructions - + etm->instructions_sample_period; + + /* + * Calculate the address of the sampled instruction (-1 as + * sample is reported as though instruction has just been + * executed, but PC has not advanced to next instruction) + */ + u64 offset = (instrs_executed - instrs_over - 1); + u64 addr = cs_etm__instr_addr(etmq->packet, offset); + + ret = cs_etm__synth_instruction_sample( + etmq, addr, etm->instructions_sample_period); + if (ret) + return ret; + + /* Carry remaining instructions into next sample period */ + etmq->period_instructions = instrs_over; + } + + if (etm->sample_branches && + etmq->prev_packet && + etmq->prev_packet->sample_type == CS_ETM_RANGE && + etmq->prev_packet->last_instr_taken_branch) { + ret = cs_etm__synth_branch_sample(etmq); + if (ret) return ret; + } + if (etm->sample_branches || etm->synth_opts.last_branch) { /* - * If the packet contains an instruction range, generate an - * instruction sequence event. + * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for + * the next incoming packet. */ - if (packet.sample_type & CS_ETM_RANGE) - cs_etm__synth_branch_sample(etmq, &packet); + tmp = etmq->packet; + etmq->packet = etmq->prev_packet; + etmq->prev_packet = tmp; } return 0; } +static int cs_etm__flush(struct cs_etm_queue *etmq) +{ + int err = 0; + struct cs_etm_packet *tmp; + + if (etmq->etm->synth_opts.last_branch && + etmq->prev_packet && + etmq->prev_packet->sample_type == CS_ETM_RANGE) { + /* + * Generate a last branch event for the branches left in the + * circular buffer at the end of the trace. + * + * Use the address of the end of the last reported execution + * range + */ + u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); + + err = cs_etm__synth_instruction_sample( + etmq, addr, + etmq->period_instructions); + etmq->period_instructions = 0; + + /* + * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for + * the next incoming packet. + */ + tmp = etmq->packet; + etmq->packet = etmq->prev_packet; + etmq->prev_packet = tmp; + } + + return err; +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { struct cs_etm_auxtrace *etm = etmq->etm; @@ -615,45 +966,72 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) etm->kernel_start = machine__kernel_start(etm->machine); /* Go through each buffer in the queue and decode them one by one */ -more: - buffer_used = 0; - memset(&buffer, 0, sizeof(buffer)); - err = cs_etm__get_trace(&buffer, etmq); - if (err <= 0) - return err; - /* - * We cannot assume consecutive blocks in the data file are contiguous, - * reset the decoder to force re-sync. - */ - err = cs_etm_decoder__reset(etmq->decoder); - if (err != 0) - return err; - - /* Run trace decoder until buffer consumed or end of trace */ - do { - processed = 0; - - err = cs_etm_decoder__process_data_block( - etmq->decoder, - etmq->offset, - &buffer.buf[buffer_used], - buffer.len - buffer_used, - &processed); - - if (err) + while (1) { + buffer_used = 0; + memset(&buffer, 0, sizeof(buffer)); + err = cs_etm__get_trace(&buffer, etmq); + if (err <= 0) return err; - - etmq->offset += processed; - buffer_used += processed; - /* - * Nothing to do with an error condition, let's hope the next - * chunk will be better. + * We cannot assume consecutive blocks in the data file are + * contiguous, reset the decoder to force re-sync. */ - err = cs_etm__sample(etmq); - } while (buffer.len > buffer_used); + err = cs_etm_decoder__reset(etmq->decoder); + if (err != 0) + return err; + + /* Run trace decoder until buffer consumed or end of trace */ + do { + processed = 0; + err = cs_etm_decoder__process_data_block( + etmq->decoder, + etmq->offset, + &buffer.buf[buffer_used], + buffer.len - buffer_used, + &processed); + if (err) + return err; + + etmq->offset += processed; + buffer_used += processed; + + /* Process each packet in this chunk */ + while (1) { + err = cs_etm_decoder__get_packet(etmq->decoder, + etmq->packet); + if (err <= 0) + /* + * Stop processing this chunk on + * end of data or error + */ + break; + + switch (etmq->packet->sample_type) { + case CS_ETM_RANGE: + /* + * If the packet contains an instruction + * range, generate instruction sequence + * events. + */ + cs_etm__sample(etmq); + break; + case CS_ETM_TRACE_ON: + /* + * Discontinuity in trace, flush + * previous branch stack + */ + cs_etm__flush(etmq); + break; + default: + break; + } + } + } while (buffer.len > buffer_used); -goto more; + if (err == 0) + /* Flush any remaining branch stack entries */ + err = cs_etm__flush(etmq); + } return err; } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 44e603c27944..f0a6cbd033cc 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -894,8 +894,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, struct machine *machine) { size_t size; - const char *mmap_name; - char name_buff[PATH_MAX]; struct map *map = machine__kernel_map(machine); struct kmap *kmap; int err; @@ -918,7 +916,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, return -1; } - mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff)); if (machine__is_host(machine)) { /* * kernel uses PERF_RECORD_MISC_USER for user space maps, @@ -931,7 +928,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, kmap = map__kmap(map); size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), - "%s%s", mmap_name, kmap->ref_reloc_sym->name) + 1; + "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; size = PERF_ALIGN(size, sizeof(u64)); event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - @@ -1591,17 +1588,6 @@ int machine__resolve(struct machine *machine, struct addr_location *al, return -1; dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); - /* - * Have we already created the kernel maps for this machine? - * - * This should have happened earlier, when we processed the kernel MMAP - * events, but for older perf.data files there was no such thing, so do - * it now. - */ - if (sample->cpumode == PERF_RECORD_MISC_KERNEL && - machine__kernel_map(machine) == NULL) - machine__create_kernel_maps(machine); - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e5fc14e53c05..7b7d535396f7 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1086,11 +1086,30 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { + bool all_threads = (target->per_thread && target->system_wide); struct cpu_map *cpus; struct thread_map *threads; + /* + * If specify '-a' and '--per-thread' to perf record, perf record + * will override '--per-thread'. target->per_thread = false and + * target->system_wide = true. + * + * If specify '--per-thread' only to perf record, + * target->per_thread = true and target->system_wide = false. + * + * So target->per_thread && target->system_wide is false. + * For perf record, thread_map__new_str doesn't call + * thread_map__new_all_cpus. That will keep perf record's + * current behavior. + * + * For perf stat, it allows the case that target->per_thread and + * target->system_wide are all true. It means to collect system-wide + * per-thread data. thread_map__new_str will call + * thread_map__new_all_cpus to enumerate all threads. + */ threads = thread_map__new_str(target->pid, target->tid, target->uid, - target->per_thread); + all_threads); if (!threads) return -1; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index f28aaaa3a440..942bdec6d70d 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -174,4 +174,5 @@ int write_padded(struct feat_fd *fd, const void *bf, int get_cpuid(char *buffer, size_t sz); char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); +int strcmp_cpuid_str(const char *s1, const char *s2); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b6140950301e..44a8456cea10 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -879,7 +879,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter, * cumulated only one time to prevent entries more than 100% * overhead. */ - he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1)); + he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1)); if (he_cache == NULL) return -ENOMEM; @@ -1045,8 +1045,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, if (err) return err; - iter->max_stack = max_stack_depth; - err = iter->ops->prepare_entry(iter, al); if (err) goto out; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 02721b579746..e869cad4d89f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -107,7 +107,6 @@ struct hist_entry_iter { int curr; bool hide_unresolved; - int max_stack; struct perf_evsel *evsel; struct perf_sample *sample; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b05a67464c03..fe27ef55cbb9 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -48,8 +48,31 @@ static void machine__threads_init(struct machine *machine) } } +static int machine__set_mmap_name(struct machine *machine) +{ + if (machine__is_host(machine)) { + if (symbol_conf.vmlinux_name) + machine->mmap_name = strdup(symbol_conf.vmlinux_name); + else + machine->mmap_name = strdup("[kernel.kallsyms]"); + } else if (machine__is_default_guest(machine)) { + if (symbol_conf.default_guest_vmlinux_name) + machine->mmap_name = strdup(symbol_conf.default_guest_vmlinux_name); + else + machine->mmap_name = strdup("[guest.kernel.kallsyms]"); + } else { + if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]", + machine->pid) < 0) + machine->mmap_name = NULL; + } + + return machine->mmap_name ? 0 : -ENOMEM; +} + int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { + int err = -ENOMEM; + memset(machine, 0, sizeof(*machine)); map_groups__init(&machine->kmaps, machine); RB_CLEAR_NODE(&machine->rb_node); @@ -73,13 +96,16 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) if (machine->root_dir == NULL) return -ENOMEM; + if (machine__set_mmap_name(machine)) + goto out; + if (pid != HOST_KERNEL_ID) { struct thread *thread = machine__findnew_thread(machine, -1, pid); char comm[64]; if (thread == NULL) - return -ENOMEM; + goto out; snprintf(comm, sizeof(comm), "[guest/%d]", pid); thread__set_comm(thread, comm, 0); @@ -87,7 +113,13 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) } machine->current_tid = NULL; + err = 0; +out: + if (err) { + zfree(&machine->root_dir); + zfree(&machine->mmap_name); + } return 0; } @@ -119,7 +151,7 @@ struct machine *machine__new_kallsyms(void) * ask for not using the kcore parsing code, once this one is fixed * to create a map per module. */ - if (machine && __machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION, true) <= 0) { + if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) { machine__delete(machine); machine = NULL; } @@ -180,6 +212,7 @@ void machine__exit(struct machine *machine) dsos__exit(&machine->dsos); machine__exit_vdso(machine); zfree(&machine->root_dir); + zfree(&machine->mmap_name); zfree(&machine->current_tid); for (i = 0; i < THREADS__TABLE_SIZE; i++) { @@ -322,20 +355,6 @@ void machines__process_guests(struct machines *machines, } } -char *machine__mmap_name(struct machine *machine, char *bf, size_t size) -{ - if (machine__is_host(machine)) - snprintf(bf, size, "[%s]", "kernel.kallsyms"); - else if (machine__is_default_guest(machine)) - snprintf(bf, size, "[%s]", "guest.kernel.kallsyms"); - else { - snprintf(bf, size, "[%s.%d]", "guest.kernel.kallsyms", - machine->pid); - } - - return bf; -} - void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size) { struct rb_node *node; @@ -771,25 +790,13 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) static struct dso *machine__get_kernel(struct machine *machine) { - const char *vmlinux_name = NULL; + const char *vmlinux_name = machine->mmap_name; struct dso *kernel; if (machine__is_host(machine)) { - vmlinux_name = symbol_conf.vmlinux_name; - if (!vmlinux_name) - vmlinux_name = DSO__NAME_KALLSYMS; - kernel = machine__findnew_kernel(machine, vmlinux_name, "[kernel]", DSO_TYPE_KERNEL); } else { - char bf[PATH_MAX]; - - if (machine__is_default_guest(machine)) - vmlinux_name = symbol_conf.default_guest_vmlinux_name; - if (!vmlinux_name) - vmlinux_name = machine__mmap_name(machine, bf, - sizeof(bf)); - kernel = machine__findnew_kernel(machine, vmlinux_name, "[guest.kernel]", DSO_TYPE_GUEST_KERNEL); @@ -849,13 +856,10 @@ static int machine__get_running_kernel_start(struct machine *machine, return 0; } -int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) +static int +__machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { int type; - u64 start = 0; - - if (machine__get_running_kernel_start(machine, NULL, &start)) - return -1; /* In case of renewal the kernel map, destroy previous one */ machine__destroy_kernel_maps(machine); @@ -864,7 +868,7 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) struct kmap *kmap; struct map *map; - machine->vmlinux_maps[type] = map__new2(start, kernel, type); + machine->vmlinux_maps[type] = map__new2(0, kernel, type); if (machine->vmlinux_maps[type] == NULL) return -1; @@ -987,11 +991,11 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid) return machine__create_kernel_maps(machine); } -int __machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type, bool no_kcore) +int machine__load_kallsyms(struct machine *machine, const char *filename, + enum map_type type) { struct map *map = machine__kernel_map(machine); - int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore); + int ret = __dso__load_kallsyms(map->dso, filename, map, true); if (ret > 0) { dso__set_loaded(map->dso, type); @@ -1006,12 +1010,6 @@ int __machine__load_kallsyms(struct machine *machine, const char *filename, return ret; } -int machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type) -{ - return __machine__load_kallsyms(machine, filename, type, false); -} - int machine__load_vmlinux_path(struct machine *machine, enum map_type type) { struct map *map = machine__kernel_map(machine); @@ -1215,6 +1213,24 @@ static int machine__create_modules(struct machine *machine) return 0; } +static void machine__set_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + int i; + + for (i = 0; i < MAP__NR_TYPES; i++) { + machine->vmlinux_maps[i]->start = start; + machine->vmlinux_maps[i]->end = end; + + /* + * Be a bit paranoid here, some perf.data file came with + * a zero sized synthesized MMAP event for the kernel. + */ + if (machine->vmlinux_maps[i]->end == 0) + machine->vmlinux_maps[i]->end = ~0ULL; + } +} + int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); @@ -1239,40 +1255,22 @@ int machine__create_kernel_maps(struct machine *machine) "continuing anyway...\n", machine->pid); } - /* - * Now that we have all the maps created, just set the ->end of them: - */ - map_groups__fixup_end(&machine->kmaps); - if (!machine__get_running_kernel_start(machine, &name, &addr)) { if (name && maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { machine__destroy_kernel_maps(machine); return -1; } + machine__set_kernel_mmap(machine, addr, 0); } + /* + * Now that we have all the maps created, just set the ->end of them: + */ + map_groups__fixup_end(&machine->kmaps); return 0; } -static void machine__set_kernel_mmap_len(struct machine *machine, - union perf_event *event) -{ - int i; - - for (i = 0; i < MAP__NR_TYPES; i++) { - machine->vmlinux_maps[i]->start = event->mmap.start; - machine->vmlinux_maps[i]->end = (event->mmap.start + - event->mmap.len); - /* - * Be a bit paranoid here, some perf.data file came with - * a zero sized synthesized MMAP event for the kernel. - */ - if (machine->vmlinux_maps[i]->end == 0) - machine->vmlinux_maps[i]->end = ~0ULL; - } -} - static bool machine__uses_kcore(struct machine *machine) { struct dso *dso; @@ -1289,7 +1287,6 @@ static int machine__process_kernel_mmap_event(struct machine *machine, union perf_event *event) { struct map *map; - char kmmap_prefix[PATH_MAX]; enum dso_kernel_type kernel_type; bool is_kernel_mmap; @@ -1297,15 +1294,14 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (machine__uses_kcore(machine)) return 0; - machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix)); if (machine__is_host(machine)) kernel_type = DSO_TYPE_KERNEL; else kernel_type = DSO_TYPE_GUEST_KERNEL; is_kernel_mmap = memcmp(event->mmap.filename, - kmmap_prefix, - strlen(kmmap_prefix) - 1) == 0; + machine->mmap_name, + strlen(machine->mmap_name) - 1) == 0; if (event->mmap.filename[0] == '/' || (!is_kernel_mmap && event->mmap.filename[0] == '[')) { map = machine__findnew_module_map(machine, event->mmap.start, @@ -1316,7 +1312,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, map->end = map->start + event->mmap.len; } else if (is_kernel_mmap) { const char *symbol_name = (event->mmap.filename + - strlen(kmmap_prefix)); + strlen(machine->mmap_name)); /* * Should be there already, from the build-id table in * the header. @@ -1357,7 +1353,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, up_read(&machine->dsos.lock); if (kernel == NULL) - kernel = machine__findnew_dso(machine, kmmap_prefix); + kernel = machine__findnew_dso(machine, machine->mmap_name); if (kernel == NULL) goto out_problem; @@ -1370,7 +1366,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (strstr(kernel->long_name, "vmlinux")) dso__set_short_name(kernel, "[kernel.vmlinux]", false); - machine__set_kernel_mmap_len(machine, event); + machine__set_kernel_mmap(machine, event->mmap.start, + event->mmap.start + event->mmap.len); /* * Avoid using a zero address (kptr_restrict) for the ref reloc diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 5ce860b64c74..66cc200ef86f 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -43,6 +43,7 @@ struct machine { bool comm_exec; bool kptr_restrict_warned; char *root_dir; + char *mmap_name; struct threads threads[THREADS__TABLE_SIZE]; struct vdso_info *vdso_info; struct perf_env *env; @@ -142,8 +143,6 @@ struct machine *machines__find(struct machines *machines, pid_t pid); struct machine *machines__findnew(struct machines *machines, pid_t pid); void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size); -char *machine__mmap_name(struct machine *machine, char *bf, size_t size); - void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); @@ -226,8 +225,6 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename); int arch__fix_module_text_start(u64 *start, const char *name); -int __machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type, bool no_kcore); int machine__load_kallsyms(struct machine *machine, const char *filename, enum map_type type); int machine__load_vmlinux_path(struct machine *machine, enum map_type type); @@ -239,7 +236,6 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm); void machine__destroy_kernel_maps(struct machine *machine); -int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel); int machine__create_kernel_maps(struct machine *machine); int machines__create_kernel_maps(struct machines *machines, pid_t pid); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 57e38fdf0b34..1111d5bf15ca 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -576,6 +576,34 @@ char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) return NULL; } +/* Return zero when the cpuid from the mapfile.csv matches the + * cpuid string generated on this platform. + * Otherwise return non-zero. + */ +int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) +{ + regex_t re; + regmatch_t pmatch[1]; + int match; + + if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) { + /* Warn unable to generate match particular string. */ + pr_info("Invalid regular expression %s\n", mapcpuid); + return 1; + } + + match = !regexec(&re, cpuid, 1, pmatch, 0); + regfree(&re); + if (match) { + size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); + + /* Verify the entire string matched. */ + if (match_len == strlen(cpuid)) + return 0; + } + return 1; +} + static char *perf_pmu__getcpuid(struct perf_pmu *pmu) { char *cpuid; @@ -610,31 +638,14 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) i = 0; for (;;) { - regex_t re; - regmatch_t pmatch[1]; - int match; - map = &pmu_events_map[i++]; if (!map->table) { map = NULL; break; } - if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) { - /* Warn unable to generate match particular string. */ - pr_info("Invalid regular expression %s\n", map->cpuid); + if (!strcmp_cpuid_str(map->cpuid, cpuid)) break; - } - - match = !regexec(&re, cpuid, 1, pmatch, 0); - regfree(&re); - if (match) { - size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); - - /* Verify the entire string matched. */ - if (match_len == strlen(cpuid)) - break; - } } free(cpuid); return map; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2da4d0456a03..e8514f651865 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -111,17 +111,20 @@ struct sort_entry sort_thread = { /* --sort comm */ +/* + * We can't use pointer comparison in functions below, + * because it gives different results based on pointer + * values, which could break some sorting assumptions. + */ static int64_t sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) { - /* Compare the addr that should be unique among comm */ return strcmp(comm__str(right->comm), comm__str(left->comm)); } static int64_t sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) { - /* Compare the addr that should be unique among comm */ return strcmp(comm__str(right->comm), comm__str(left->comm)); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index dbc6f7134f61..2f44e386a0e8 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -90,6 +90,8 @@ struct perf_stat_config { bool scale; FILE *output; unsigned int interval; + unsigned int timeout; + int times; struct runtime_stat *stats; int stats_num; }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index cc065d4bfafc..a1a312d99f30 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1582,7 +1582,7 @@ int dso__load(struct dso *dso, struct map *map) bool next_slot = false; bool is_reg; bool nsexit; - int sirc; + int sirc = -1; enum dso_binary_type symtab_type = binary_type_symtab[i]; @@ -1600,16 +1600,14 @@ int dso__load(struct dso *dso, struct map *map) nsinfo__mountns_exit(&nsc); is_reg = is_regular_file(name); - sirc = symsrc__init(ss, dso, name, symtab_type); + if (is_reg) + sirc = symsrc__init(ss, dso, name, symtab_type); if (nsexit) nsinfo__mountns_enter(dso->nsinfo, &nsc); - if (!is_reg || sirc < 0) { - if (sirc >= 0) - symsrc__destroy(ss); + if (!is_reg || sirc < 0) continue; - } if (!syms_ss && symsrc__has_symtab(ss)) { syms_ss = ss; @@ -1960,8 +1958,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; - machine__mmap_name(machine, path, sizeof(path)); - dso__set_long_name(dso, strdup(path), true); + dso__set_long_name(dso, machine->mmap_name, false); map__fixup_start(map); map__fixup_end(map); } diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 303bdb84ab5a..895122d638dd 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -30,6 +30,14 @@ static const char **syscalltbl_native = syscalltbl_x86_64; #include <asm/syscalls_64.c> const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; static const char **syscalltbl_native = syscalltbl_s390_64; +#elif defined(__powerpc64__) +#include <asm/syscalls_64.c> +const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_powerpc_64; +#elif defined(__powerpc__) +#include <asm/syscalls_32.c> +const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID; +static const char **syscalltbl_native = syscalltbl_powerpc_32; #endif struct syscall { diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 3e1038f6491c..729dad8f412d 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -323,7 +323,7 @@ out_free_threads: } struct thread_map *thread_map__new_str(const char *pid, const char *tid, - uid_t uid, bool per_thread) + uid_t uid, bool all_threads) { if (pid) return thread_map__new_by_pid_str(pid); @@ -331,7 +331,7 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, if (!tid && uid != UINT_MAX) return thread_map__new_by_uid(uid); - if (per_thread) + if (all_threads) return thread_map__new_all_cpus(); return thread_map__new_by_tid_str(tid); diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 0a806b99e73c..5ec91cfd1869 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -31,7 +31,7 @@ struct thread_map *thread_map__get(struct thread_map *map); void thread_map__put(struct thread_map *map); struct thread_map *thread_map__new_str(const char *pid, - const char *tid, uid_t uid, bool per_thread); + const char *tid, uid_t uid, bool all_threads); struct thread_map *thread_map__new_by_tid_str(const char *tid_str); |