summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/trace/coresight.txt51
-rw-r--r--tools/arch/powerpc/include/uapi/asm/unistd.h402
-rw-r--r--tools/lib/api/fs/fs.c44
-rw-r--r--tools/lib/api/fs/fs.h2
-rw-r--r--tools/lib/symbol/kallsyms.c4
-rw-r--r--tools/perf/Documentation/perf-annotate.txt6
-rw-r--r--tools/perf/Documentation/perf-kmem.txt6
-rw-r--r--tools/perf/Documentation/perf-mem.txt4
-rw-r--r--tools/perf/Documentation/perf-report.txt5
-rw-r--r--tools/perf/Documentation/perf-script.txt3
-rw-r--r--tools/perf/Documentation/perf-stat.txt10
-rw-r--r--tools/perf/Makefile.config2
-rw-r--r--tools/perf/arch/arm/util/auxtrace.c2
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c51
-rw-r--r--tools/perf/arch/powerpc/Makefile25
-rwxr-xr-xtools/perf/arch/powerpc/entry/syscalls/mksyscalltbl37
-rw-r--r--tools/perf/arch/s390/annotate/instructions.c27
-rw-r--r--tools/perf/arch/s390/util/header.c148
-rw-r--r--tools/perf/builtin-record.c2
-rw-r--r--tools/perf/builtin-report.c7
-rw-r--r--tools/perf/builtin-script.c17
-rw-r--r--tools/perf/builtin-stat.c53
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/tests/code-reading.c33
-rw-r--r--tools/perf/tests/dwarf-unwind.c46
-rw-r--r--tools/perf/tests/shell/lib/probe_vfs_getname.sh2
-rwxr-xr-xtools/perf/tests/shell/trace+probe_libc_inet_pton.sh6
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c4
-rw-r--r--tools/perf/ui/browsers/annotate.c9
-rw-r--r--tools/perf/util/build-id.c10
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c74
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h2
-rw-r--r--tools/perf/util/cs-etm.c478
-rw-r--r--tools/perf/util/event.c16
-rw-r--r--tools/perf/util/evlist.c21
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/hist.c4
-rw-r--r--tools/perf/util/hist.h1
-rw-r--r--tools/perf/util/machine.c145
-rw-r--r--tools/perf/util/machine.h6
-rw-r--r--tools/perf/util/pmu.c47
-rw-r--r--tools/perf/util/sort.c7
-rw-r--r--tools/perf/util/stat.h2
-rw-r--r--tools/perf/util/symbol.c13
-rw-r--r--tools/perf/util/syscalltbl.c8
-rw-r--r--tools/perf/util/thread_map.c4
-rw-r--r--tools/perf/util/thread_map.h2
47 files changed, 1577 insertions, 273 deletions
diff --git a/Documentation/trace/coresight.txt b/Documentation/trace/coresight.txt
index a33c88cd5d1d..6f0120c3a4f1 100644
--- a/Documentation/trace/coresight.txt
+++ b/Documentation/trace/coresight.txt
@@ -330,3 +330,54 @@ Details on how to use the generic STM API can be found here [2].
[1]. Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
[2]. Documentation/trace/stm.txt
+
+
+Using perf tools
+----------------
+
+perf can be used to record and analyze trace of programs.
+
+Execution can be recorded using 'perf record' with the cs_etm event,
+specifying the name of the sink to record to, e.g:
+
+ perf record -e cs_etm/@20070000.etr/u --per-thread
+
+The 'perf report' and 'perf script' commands can be used to analyze execution,
+synthesizing instruction and branch events from the instruction trace.
+'perf inject' can be used to replace the trace data with the synthesized events.
+The --itrace option controls the type and frequency of synthesized events
+(see perf documentation).
+
+Note that only 64-bit programs are currently supported - further work is
+required to support instruction decode of 32-bit Arm programs.
+
+
+Generating coverage files for Feedback Directed Optimization: AutoFDO
+---------------------------------------------------------------------
+
+'perf inject' accepts the --itrace option in which case tracing data is
+removed and replaced with the synthesized events. e.g.
+
+ perf inject --itrace --strip -i perf.data -o perf.data.new
+
+Below is an example of using ARM ETM for autoFDO. It requires autofdo
+(https://github.com/google/autofdo) and gcc version 5. The bubble
+sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial).
+
+ $ gcc-5 -O3 sort.c -o sort
+ $ taskset -c 2 ./sort
+ Bubble sorting array of 30000 elements
+ 5910 ms
+
+ $ perf record -e cs_etm/@20070000.etr/u --per-thread taskset -c 2 ./sort
+ Bubble sorting array of 30000 elements
+ 12543 ms
+ [ perf record: Woken up 35 times to write data ]
+ [ perf record: Captured and wrote 69.640 MB perf.data ]
+
+ $ perf inject -i perf.data -o inj.data --itrace=il64 --strip
+ $ create_gcov --binary=./sort --profile=inj.data --gcov=sort.gcov -gcov_version=1
+ $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
+ $ taskset -c 2 ./sort_autofdo
+ Bubble sorting array of 30000 elements
+ 5806 ms
diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h
new file mode 100644
index 000000000000..389c36fd8299
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/unistd.h
@@ -0,0 +1,402 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * This file contains the system call numbers.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_ASM_POWERPC_UNISTD_H_
+#define _UAPI_ASM_POWERPC_UNISTD_H_
+
+
+#define __NR_restart_syscall 0
+#define __NR_exit 1
+#define __NR_fork 2
+#define __NR_read 3
+#define __NR_write 4
+#define __NR_open 5
+#define __NR_close 6
+#define __NR_waitpid 7
+#define __NR_creat 8
+#define __NR_link 9
+#define __NR_unlink 10
+#define __NR_execve 11
+#define __NR_chdir 12
+#define __NR_time 13
+#define __NR_mknod 14
+#define __NR_chmod 15
+#define __NR_lchown 16
+#define __NR_break 17
+#define __NR_oldstat 18
+#define __NR_lseek 19
+#define __NR_getpid 20
+#define __NR_mount 21
+#define __NR_umount 22
+#define __NR_setuid 23
+#define __NR_getuid 24
+#define __NR_stime 25
+#define __NR_ptrace 26
+#define __NR_alarm 27
+#define __NR_oldfstat 28
+#define __NR_pause 29
+#define __NR_utime 30
+#define __NR_stty 31
+#define __NR_gtty 32
+#define __NR_access 33
+#define __NR_nice 34
+#define __NR_ftime 35
+#define __NR_sync 36
+#define __NR_kill 37
+#define __NR_rename 38
+#define __NR_mkdir 39
+#define __NR_rmdir 40
+#define __NR_dup 41
+#define __NR_pipe 42
+#define __NR_times 43
+#define __NR_prof 44
+#define __NR_brk 45
+#define __NR_setgid 46
+#define __NR_getgid 47
+#define __NR_signal 48
+#define __NR_geteuid 49
+#define __NR_getegid 50
+#define __NR_acct 51
+#define __NR_umount2 52
+#define __NR_lock 53
+#define __NR_ioctl 54
+#define __NR_fcntl 55
+#define __NR_mpx 56
+#define __NR_setpgid 57
+#define __NR_ulimit 58
+#define __NR_oldolduname 59
+#define __NR_umask 60
+#define __NR_chroot 61
+#define __NR_ustat 62
+#define __NR_dup2 63
+#define __NR_getppid 64
+#define __NR_getpgrp 65
+#define __NR_setsid 66
+#define __NR_sigaction 67
+#define __NR_sgetmask 68
+#define __NR_ssetmask 69
+#define __NR_setreuid 70
+#define __NR_setregid 71
+#define __NR_sigsuspend 72
+#define __NR_sigpending 73
+#define __NR_sethostname 74
+#define __NR_setrlimit 75
+#define __NR_getrlimit 76
+#define __NR_getrusage 77
+#define __NR_gettimeofday 78
+#define __NR_settimeofday 79
+#define __NR_getgroups 80
+#define __NR_setgroups 81
+#define __NR_select 82
+#define __NR_symlink 83
+#define __NR_oldlstat 84
+#define __NR_readlink 85
+#define __NR_uselib 86
+#define __NR_swapon 87
+#define __NR_reboot 88
+#define __NR_readdir 89
+#define __NR_mmap 90
+#define __NR_munmap 91
+#define __NR_truncate 92
+#define __NR_ftruncate 93
+#define __NR_fchmod 94
+#define __NR_fchown 95
+#define __NR_getpriority 96
+#define __NR_setpriority 97
+#define __NR_profil 98
+#define __NR_statfs 99
+#define __NR_fstatfs 100
+#define __NR_ioperm 101
+#define __NR_socketcall 102
+#define __NR_syslog 103
+#define __NR_setitimer 104
+#define __NR_getitimer 105
+#define __NR_stat 106
+#define __NR_lstat 107
+#define __NR_fstat 108
+#define __NR_olduname 109
+#define __NR_iopl 110
+#define __NR_vhangup 111
+#define __NR_idle 112
+#define __NR_vm86 113
+#define __NR_wait4 114
+#define __NR_swapoff 115
+#define __NR_sysinfo 116
+#define __NR_ipc 117
+#define __NR_fsync 118
+#define __NR_sigreturn 119
+#define __NR_clone 120
+#define __NR_setdomainname 121
+#define __NR_uname 122
+#define __NR_modify_ldt 123
+#define __NR_adjtimex 124
+#define __NR_mprotect 125
+#define __NR_sigprocmask 126
+#define __NR_create_module 127
+#define __NR_init_module 128
+#define __NR_delete_module 129
+#define __NR_get_kernel_syms 130
+#define __NR_quotactl 131
+#define __NR_getpgid 132
+#define __NR_fchdir 133
+#define __NR_bdflush 134
+#define __NR_sysfs 135
+#define __NR_personality 136
+#define __NR_afs_syscall 137 /* Syscall for Andrew File System */
+#define __NR_setfsuid 138
+#define __NR_setfsgid 139
+#define __NR__llseek 140
+#define __NR_getdents 141
+#define __NR__newselect 142
+#define __NR_flock 143
+#define __NR_msync 144
+#define __NR_readv 145
+#define __NR_writev 146
+#define __NR_getsid 147
+#define __NR_fdatasync 148
+#define __NR__sysctl 149
+#define __NR_mlock 150
+#define __NR_munlock 151
+#define __NR_mlockall 152
+#define __NR_munlockall 153
+#define __NR_sched_setparam 154
+#define __NR_sched_getparam 155
+#define __NR_sched_setscheduler 156
+#define __NR_sched_getscheduler 157
+#define __NR_sched_yield 158
+#define __NR_sched_get_priority_max 159
+#define __NR_sched_get_priority_min 160
+#define __NR_sched_rr_get_interval 161
+#define __NR_nanosleep 162
+#define __NR_mremap 163
+#define __NR_setresuid 164
+#define __NR_getresuid 165
+#define __NR_query_module 166
+#define __NR_poll 167
+#define __NR_nfsservctl 168
+#define __NR_setresgid 169
+#define __NR_getresgid 170
+#define __NR_prctl 171
+#define __NR_rt_sigreturn 172
+#define __NR_rt_sigaction 173
+#define __NR_rt_sigprocmask 174
+#define __NR_rt_sigpending 175
+#define __NR_rt_sigtimedwait 176
+#define __NR_rt_sigqueueinfo 177
+#define __NR_rt_sigsuspend 178
+#define __NR_pread64 179
+#define __NR_pwrite64 180
+#define __NR_chown 181
+#define __NR_getcwd 182
+#define __NR_capget 183
+#define __NR_capset 184
+#define __NR_sigaltstack 185
+#define __NR_sendfile 186
+#define __NR_getpmsg 187 /* some people actually want streams */
+#define __NR_putpmsg 188 /* some people actually want streams */
+#define __NR_vfork 189
+#define __NR_ugetrlimit 190 /* SuS compliant getrlimit */
+#define __NR_readahead 191
+#ifndef __powerpc64__ /* these are 32-bit only */
+#define __NR_mmap2 192
+#define __NR_truncate64 193
+#define __NR_ftruncate64 194
+#define __NR_stat64 195
+#define __NR_lstat64 196
+#define __NR_fstat64 197
+#endif
+#define __NR_pciconfig_read 198
+#define __NR_pciconfig_write 199
+#define __NR_pciconfig_iobase 200
+#define __NR_multiplexer 201
+#define __NR_getdents64 202
+#define __NR_pivot_root 203
+#ifndef __powerpc64__
+#define __NR_fcntl64 204
+#endif
+#define __NR_madvise 205
+#define __NR_mincore 206
+#define __NR_gettid 207
+#define __NR_tkill 208
+#define __NR_setxattr 209
+#define __NR_lsetxattr 210
+#define __NR_fsetxattr 211
+#define __NR_getxattr 212
+#define __NR_lgetxattr 213
+#define __NR_fgetxattr 214
+#define __NR_listxattr 215
+#define __NR_llistxattr 216
+#define __NR_flistxattr 217
+#define __NR_removexattr 218
+#define __NR_lremovexattr 219
+#define __NR_fremovexattr 220
+#define __NR_futex 221
+#define __NR_sched_setaffinity 222
+#define __NR_sched_getaffinity 223
+/* 224 currently unused */
+#define __NR_tuxcall 225
+#ifndef __powerpc64__
+#define __NR_sendfile64 226
+#endif
+#define __NR_io_setup 227
+#define __NR_io_destroy 228
+#define __NR_io_getevents 229
+#define __NR_io_submit 230
+#define __NR_io_cancel 231
+#define __NR_set_tid_address 232
+#define __NR_fadvise64 233
+#define __NR_exit_group 234
+#define __NR_lookup_dcookie 235
+#define __NR_epoll_create 236
+#define __NR_epoll_ctl 237
+#define __NR_epoll_wait 238
+#define __NR_remap_file_pages 239
+#define __NR_timer_create 240
+#define __NR_timer_settime 241
+#define __NR_timer_gettime 242
+#define __NR_timer_getoverrun 243
+#define __NR_timer_delete 244
+#define __NR_clock_settime 245
+#define __NR_clock_gettime 246
+#define __NR_clock_getres 247
+#define __NR_clock_nanosleep 248
+#define __NR_swapcontext 249
+#define __NR_tgkill 250
+#define __NR_utimes 251
+#define __NR_statfs64 252
+#define __NR_fstatfs64 253
+#ifndef __powerpc64__
+#define __NR_fadvise64_64 254
+#endif
+#define __NR_rtas 255
+#define __NR_sys_debug_setcontext 256
+/* Number 257 is reserved for vserver */
+#define __NR_migrate_pages 258
+#define __NR_mbind 259
+#define __NR_get_mempolicy 260
+#define __NR_set_mempolicy 261
+#define __NR_mq_open 262
+#define __NR_mq_unlink 263
+#define __NR_mq_timedsend 264
+#define __NR_mq_timedreceive 265
+#define __NR_mq_notify 266
+#define __NR_mq_getsetattr 267
+#define __NR_kexec_load 268
+#define __NR_add_key 269
+#define __NR_request_key 270
+#define __NR_keyctl 271
+#define __NR_waitid 272
+#define __NR_ioprio_set 273
+#define __NR_ioprio_get 274
+#define __NR_inotify_init 275
+#define __NR_inotify_add_watch 276
+#define __NR_inotify_rm_watch 277
+#define __NR_spu_run 278
+#define __NR_spu_create 279
+#define __NR_pselect6 280
+#define __NR_ppoll 281
+#define __NR_unshare 282
+#define __NR_splice 283
+#define __NR_tee 284
+#define __NR_vmsplice 285
+#define __NR_openat 286
+#define __NR_mkdirat 287
+#define __NR_mknodat 288
+#define __NR_fchownat 289
+#define __NR_futimesat 290
+#ifdef __powerpc64__
+#define __NR_newfstatat 291
+#else
+#define __NR_fstatat64 291
+#endif
+#define __NR_unlinkat 292
+#define __NR_renameat 293
+#define __NR_linkat 294
+#define __NR_symlinkat 295
+#define __NR_readlinkat 296
+#define __NR_fchmodat 297
+#define __NR_faccessat 298
+#define __NR_get_robust_list 299
+#define __NR_set_robust_list 300
+#define __NR_move_pages 301
+#define __NR_getcpu 302
+#define __NR_epoll_pwait 303
+#define __NR_utimensat 304
+#define __NR_signalfd 305
+#define __NR_timerfd_create 306
+#define __NR_eventfd 307
+#define __NR_sync_file_range2 308
+#define __NR_fallocate 309
+#define __NR_subpage_prot 310
+#define __NR_timerfd_settime 311
+#define __NR_timerfd_gettime 312
+#define __NR_signalfd4 313
+#define __NR_eventfd2 314
+#define __NR_epoll_create1 315
+#define __NR_dup3 316
+#define __NR_pipe2 317
+#define __NR_inotify_init1 318
+#define __NR_perf_event_open 319
+#define __NR_preadv 320
+#define __NR_pwritev 321
+#define __NR_rt_tgsigqueueinfo 322
+#define __NR_fanotify_init 323
+#define __NR_fanotify_mark 324
+#define __NR_prlimit64 325
+#define __NR_socket 326
+#define __NR_bind 327
+#define __NR_connect 328
+#define __NR_listen 329
+#define __NR_accept 330
+#define __NR_getsockname 331
+#define __NR_getpeername 332
+#define __NR_socketpair 333
+#define __NR_send 334
+#define __NR_sendto 335
+#define __NR_recv 336
+#define __NR_recvfrom 337
+#define __NR_shutdown 338
+#define __NR_setsockopt 339
+#define __NR_getsockopt 340
+#define __NR_sendmsg 341
+#define __NR_recvmsg 342
+#define __NR_recvmmsg 343
+#define __NR_accept4 344
+#define __NR_name_to_handle_at 345
+#define __NR_open_by_handle_at 346
+#define __NR_clock_adjtime 347
+#define __NR_syncfs 348
+#define __NR_sendmmsg 349
+#define __NR_setns 350
+#define __NR_process_vm_readv 351
+#define __NR_process_vm_writev 352
+#define __NR_finit_module 353
+#define __NR_kcmp 354
+#define __NR_sched_setattr 355
+#define __NR_sched_getattr 356
+#define __NR_renameat2 357
+#define __NR_seccomp 358
+#define __NR_getrandom 359
+#define __NR_memfd_create 360
+#define __NR_bpf 361
+#define __NR_execveat 362
+#define __NR_switch_endian 363
+#define __NR_userfaultfd 364
+#define __NR_membarrier 365
+#define __NR_mlock2 378
+#define __NR_copy_file_range 379
+#define __NR_preadv2 380
+#define __NR_pwritev2 381
+#define __NR_kexec_file_load 382
+#define __NR_statx 383
+#define __NR_pkey_alloc 384
+#define __NR_pkey_free 385
+#define __NR_pkey_mprotect 386
+
+#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index b24afc0e6e81..6a12bbf39f7b 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -315,12 +315,8 @@ int filename__read_int(const char *filename, int *value)
return err;
}
-/*
- * Parses @value out of @filename with strtoull.
- * By using 0 for base, the strtoull detects the
- * base automatically (see man strtoull).
- */
-int filename__read_ull(const char *filename, unsigned long long *value)
+static int filename__read_ull_base(const char *filename,
+ unsigned long long *value, int base)
{
char line[64];
int fd = open(filename, O_RDONLY), err = -1;
@@ -329,7 +325,7 @@ int filename__read_ull(const char *filename, unsigned long long *value)
return -1;
if (read(fd, line, sizeof(line)) > 0) {
- *value = strtoull(line, NULL, 0);
+ *value = strtoull(line, NULL, base);
if (*value != ULLONG_MAX)
err = 0;
}
@@ -338,6 +334,25 @@ int filename__read_ull(const char *filename, unsigned long long *value)
return err;
}
+/*
+ * Parses @value out of @filename with strtoull.
+ * By using 16 for base to treat the number as hex.
+ */
+int filename__read_xll(const char *filename, unsigned long long *value)
+{
+ return filename__read_ull_base(filename, value, 16);
+}
+
+/*
+ * Parses @value out of @filename with strtoull.
+ * By using 0 for base, the strtoull detects the
+ * base automatically (see man strtoull).
+ */
+int filename__read_ull(const char *filename, unsigned long long *value)
+{
+ return filename__read_ull_base(filename, value, 0);
+}
+
#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */
int filename__read_str(const char *filename, char **buf, size_t *sizep)
@@ -417,7 +432,8 @@ int procfs__read_str(const char *entry, char **buf, size_t *sizep)
return filename__read_str(path, buf, sizep);
}
-int sysfs__read_ull(const char *entry, unsigned long long *value)
+static int sysfs__read_ull_base(const char *entry,
+ unsigned long long *value, int base)
{
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
@@ -427,7 +443,17 @@ int sysfs__read_ull(const char *entry, unsigned long long *value)
snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
- return filename__read_ull(path, value);
+ return filename__read_ull_base(path, value, base);
+}
+
+int sysfs__read_xll(const char *entry, unsigned long long *value)
+{
+ return sysfs__read_ull_base(entry, value, 16);
+}
+
+int sysfs__read_ull(const char *entry, unsigned long long *value)
+{
+ return sysfs__read_ull_base(entry, value, 0);
}
int sysfs__read_int(const char *entry, int *value)
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index dda49deefb52..92d03b8396b1 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -30,6 +30,7 @@ FS(bpf_fs)
int filename__read_int(const char *filename, int *value);
int filename__read_ull(const char *filename, unsigned long long *value);
+int filename__read_xll(const char *filename, unsigned long long *value);
int filename__read_str(const char *filename, char **buf, size_t *sizep);
int filename__write_int(const char *filename, int value);
@@ -39,6 +40,7 @@ int procfs__read_str(const char *entry, char **buf, size_t *sizep);
int sysctl__read_int(const char *sysctl, int *value);
int sysfs__read_int(const char *entry, int *value);
int sysfs__read_ull(const char *entry, unsigned long long *value);
+int sysfs__read_xll(const char *entry, unsigned long long *value);
int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
int sysfs__read_bool(const char *entry, bool *value);
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
index 914cb8e3d40b..689b6a130dd7 100644
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -38,6 +38,10 @@ int kallsyms__parse(const char *filename, void *arg,
len = hex2u64(line, &start);
+ /* Skip the line if we failed to parse the address. */
+ if (!len)
+ continue;
+
len++;
if (len + 2 >= line_len)
continue;
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index c635eab6af54..292809c3c0ca 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -21,7 +21,7 @@ If there is no debug info in the object, then annotated assembly is displayed.
OPTIONS
-------
-i::
---input=::
+--input=<file>::
Input file name. (default: perf.data unless stdin is a fifo)
-d::
@@ -69,7 +69,7 @@ OPTIONS
--stdio:: Use the stdio interface.
---stdio-color::
+--stdio-color=<mode>::
'always', 'never' or 'auto', allowing configuring color output
via the command line, in addition to via "color.ui" .perfconfig.
Use '--stdio-color always' to generate color even when redirecting
@@ -84,7 +84,7 @@ OPTIONS
--gtk:: Use the GTK interface.
-C::
---cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+--cpu=<cpu>:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
CPUs are specified with -: 0-2. Default is to report samples on all
CPUs.
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index 479fc3261a50..85b8ac695c87 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -25,6 +25,10 @@ OPTIONS
--input=<file>::
Select the input file (default: perf.data unless stdin is a fifo)
+-f::
+--force::
+ Don't do ownership validation
+
-v::
--verbose::
Be more verbose. (show symbol address, etc)
@@ -61,7 +65,7 @@ OPTIONS
default, but this option shows live (currently allocated) pages
instead. (This option works with --page option only)
---time::
+--time=<start>,<stop>::
Only analyze samples within given time window: <start>,<stop>. Times
have the format seconds.microseconds. If start is not given (i.e., time
string is ',x.y') then analysis starts at the beginning of the file. If
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 4be08a1e3f8d..b0211410969b 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -28,6 +28,10 @@ OPTIONS
<command>...::
Any command you can specify in a shell.
+-f::
+--force::
+ Don't do ownership validation
+
-t::
--type=::
Select the memory operation type: load or store (default: load,store)
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 907e505b6309..cba16d8a970e 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -354,7 +354,8 @@ OPTIONS
Path to objdump binary.
--group::
- Show event group information together.
+ Show event group information together. It forces group output also
+ if there are no groups defined in data file.
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
@@ -367,7 +368,7 @@ OPTIONS
Use the data addresses of samples in addition to instruction addresses
to build the histograms. To generate meaningful output, the perf.data
file must have been obtained using perf record -d -W and using a
- special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See
+ special event -e cpu/mem-loads/p or -e cpu/mem-stores/p. See
'perf mem' for simpler access.
--percent-limit::
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 7730c1d2b5d3..36ec0257f8d3 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -303,6 +303,9 @@ OPTIONS
--show-lost-events
Display lost events i.e. events of type PERF_RECORD_LOST.
+--show-round-events
+ Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND.
+
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 823fce7674bb..2bbe79a50d3c 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -146,6 +146,16 @@ Print count deltas every N milliseconds (minimum: 10ms)
The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution.
example: 'perf stat -I 1000 -e cycles -a sleep 5'
+--interval-count times::
+Print count deltas for fixed number of times.
+This option should be used together with "-I" option.
+ example: 'perf stat -I 1000 --interval-count 2 -e cycles -a'
+
+--timeout msecs::
+Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms).
+This option is not supported with the "-I" option.
+ example: 'perf stat --time 2000 -e cycles -a'
+
--metric-only::
Only print computed metrics. Print them in a single line.
Don't show any raw values. Not supported with --per-thread.
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 0dfdaa9fa81e..577a5d2988fe 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -27,6 +27,8 @@ NO_SYSCALL_TABLE := 1
# Additional ARCH settings for ppc
ifeq ($(SRCARCH),powerpc)
NO_PERF_REGS := 0
+ NO_SYSCALL_TABLE := 0
+ CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated
LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
endif
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index 2323581b157d..fa639e3e52ac 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -68,7 +68,7 @@ struct auxtrace_record
bool found_spe = false;
static struct perf_pmu **arm_spe_pmus = NULL;
static int nr_spes = 0;
- int i;
+ int i = 0;
if (!evlist)
return NULL;
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index fbfc055d3f4d..5c655ad4621e 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -298,12 +298,17 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
{
int i;
int etmv3 = 0, etmv4 = 0;
- const struct cpu_map *cpus = evlist->cpus;
+ struct cpu_map *event_cpus = evlist->cpus;
+ struct cpu_map *online_cpus = cpu_map__new(NULL);
/* cpu map is not empty, we have specific CPUs to work with */
- if (!cpu_map__empty(cpus)) {
- for (i = 0; i < cpu_map__nr(cpus); i++) {
- if (cs_etm_is_etmv4(itr, cpus->map[i]))
+ if (!cpu_map__empty(event_cpus)) {
+ for (i = 0; i < cpu__max_cpu(); i++) {
+ if (!cpu_map__has(event_cpus, i) ||
+ !cpu_map__has(online_cpus, i))
+ continue;
+
+ if (cs_etm_is_etmv4(itr, i))
etmv4++;
else
etmv3++;
@@ -311,6 +316,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
} else {
/* get configuration for all CPUs in the system */
for (i = 0; i < cpu__max_cpu(); i++) {
+ if (!cpu_map__has(online_cpus, i))
+ continue;
+
if (cs_etm_is_etmv4(itr, i))
etmv4++;
else
@@ -318,6 +326,8 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
}
}
+ cpu_map__put(online_cpus);
+
return (CS_ETM_HEADER_SIZE +
(etmv4 * CS_ETMV4_PRIV_SIZE) +
(etmv3 * CS_ETMV3_PRIV_SIZE));
@@ -447,7 +457,9 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
int i;
u32 offset;
u64 nr_cpu, type;
- const struct cpu_map *cpus = session->evlist->cpus;
+ struct cpu_map *cpu_map;
+ struct cpu_map *event_cpus = session->evlist->cpus;
+ struct cpu_map *online_cpus = cpu_map__new(NULL);
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
@@ -458,8 +470,21 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
if (!session->evlist->nr_mmaps)
return -EINVAL;
- /* If the cpu_map is empty all CPUs are involved */
- nr_cpu = cpu_map__empty(cpus) ? cpu__max_cpu() : cpu_map__nr(cpus);
+ /* If the cpu_map is empty all online CPUs are involved */
+ if (cpu_map__empty(event_cpus)) {
+ cpu_map = online_cpus;
+ } else {
+ /* Make sure all specified CPUs are online */
+ for (i = 0; i < cpu_map__nr(event_cpus); i++) {
+ if (cpu_map__has(event_cpus, i) &&
+ !cpu_map__has(online_cpus, i))
+ return -EINVAL;
+ }
+
+ cpu_map = event_cpus;
+ }
+
+ nr_cpu = cpu_map__nr(cpu_map);
/* Get PMU type as dynamically assigned by the core */
type = cs_etm_pmu->type;
@@ -472,15 +497,11 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
offset = CS_ETM_SNAPSHOT + 1;
- /* cpu map is not empty, we have specific CPUs to work with */
- if (!cpu_map__empty(cpus)) {
- for (i = 0; i < cpu_map__nr(cpus) && offset < priv_size; i++)
- cs_etm_get_metadata(cpus->map[i], &offset, itr, info);
- } else {
- /* get configuration for all CPUs in the system */
- for (i = 0; i < cpu__max_cpu(); i++)
+ for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++)
+ if (cpu_map__has(cpu_map, i))
cs_etm_get_metadata(i, &offset, itr, info);
- }
+
+ cpu_map__put(online_cpus);
return 0;
}
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 42dab7c8f508..a111239df182 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -6,3 +6,28 @@ endif
HAVE_KVM_STAT_SUPPORT := 1
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
PERF_HAVE_JITDUMP := 1
+
+#
+# Syscall table generation for perf
+#
+
+out := $(OUTPUT)arch/powerpc/include/generated/asm
+header32 := $(out)/syscalls_32.c
+header64 := $(out)/syscalls_64.c
+sysdef := $(srctree)/tools/arch/powerpc/include/uapi/asm/unistd.h
+sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls/
+systbl := $(sysprf)/mksyscalltbl
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header64): $(sysdef) $(systbl)
+ $(Q)$(SHELL) '$(systbl)' '64' '$(CC)' $(sysdef) > $@
+
+$(header32): $(sysdef) $(systbl)
+ $(Q)$(SHELL) '$(systbl)' '32' '$(CC)' $(sysdef) > $@
+
+clean::
+ $(call QUIET_CLEAN, powerpc) $(RM) $(header32) $(header64)
+
+archheaders: $(header32) $(header64)
diff --git a/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
new file mode 100755
index 000000000000..ef52e1dd694b
--- /dev/null
+++ b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
@@ -0,0 +1,37 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table for perf. Derived from
+# s390 script.
+#
+# Copyright IBM Corp. 2017
+# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+
+wordsize=$1
+gcc=$2
+input=$3
+
+if ! test -r $input; then
+ echo "Could not read input file" >&2
+ exit 1
+fi
+
+create_table()
+{
+ local wordsize=$1
+ local max_nr
+
+ echo "static const char *syscalltbl_powerpc_${wordsize}[] = {"
+ while read sc nr; do
+ printf '\t[%d] = "%s",\n' $nr $sc
+ max_nr=$nr
+ done
+ echo '};'
+ echo "#define SYSCALLTBL_POWERPC_${wordsize}_MAX_ID $max_nr"
+}
+
+$gcc -m${wordsize} -E -dM -x c $input \
+ |sed -ne 's/^#define __NR_//p' \
+ |sort -t' ' -k2 -nu \
+ |create_table ${wordsize}
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index 8c72b44444cb..01df9d8303e1 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -23,12 +23,37 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
return ops;
}
+static int s390__cpuid_parse(struct arch *arch, char *cpuid)
+{
+ unsigned int family;
+ char model[16], model_c[16], cpumf_v[16], cpumf_a[16];
+ int ret;
+
+ /*
+ * cpuid string format:
+ * "IBM,family,model-capacity,model[,cpum_cf-version,cpum_cf-authorization]"
+ */
+ ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%s", &family, model_c,
+ model, cpumf_v, cpumf_a);
+ if (ret >= 2) {
+ arch->family = family;
+ arch->model = 0;
+ return 0;
+ }
+
+ return -1;
+}
+
static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{
+ int err = 0;
+
if (!arch->initialized) {
arch->initialized = true;
arch->associate_instruction_ops = s390__associate_ins_ops;
+ if (cpuid)
+ err = s390__cpuid_parse(arch, cpuid);
}
- return 0;
+ return err;
}
diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c
index 9fa6c3e5782c..231294b80dc4 100644
--- a/tools/perf/arch/s390/util/header.c
+++ b/tools/perf/arch/s390/util/header.c
@@ -1,8 +1,9 @@
/*
* Implementation of get_cpuid().
*
- * Copyright 2014 IBM Corp.
+ * Copyright IBM Corp. 2014, 2018
* Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ * Thomas Richter <tmricht@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -13,16 +14,153 @@
#include <unistd.h>
#include <stdio.h>
#include <string.h>
+#include <ctype.h>
#include "../../util/header.h"
+#include "../../util/util.h"
+
+#define SYSINFO_MANU "Manufacturer:"
+#define SYSINFO_TYPE "Type:"
+#define SYSINFO_MODEL "Model:"
+#define SRVLVL_CPUMF "CPU-MF:"
+#define SRVLVL_VERSION "version="
+#define SRVLVL_AUTHORIZATION "authorization="
+#define SYSINFO "/proc/sysinfo"
+#define SRVLVL "/proc/service_levels"
int get_cpuid(char *buffer, size_t sz)
{
- const char *cpuid = "IBM/S390";
+ char *cp, *line = NULL, *line2;
+ char type[8], model[33], version[8], manufacturer[32], authorization[8];
+ int tpsize = 0, mdsize = 0, vssize = 0, mfsize = 0, atsize = 0;
+ int read;
+ unsigned long line_sz;
+ size_t nbytes;
+ FILE *sysinfo;
+
+ /*
+ * Scan /proc/sysinfo line by line and read out values for
+ * Manufacturer:, Type: and Model:, for example:
+ * Manufacturer: IBM
+ * Type: 2964
+ * Model: 702 N96
+ * The first word is the Model Capacity and the second word is
+ * Model (can be omitted). Both words have a maximum size of 16
+ * bytes.
+ */
+ memset(manufacturer, 0, sizeof(manufacturer));
+ memset(type, 0, sizeof(type));
+ memset(model, 0, sizeof(model));
+ memset(version, 0, sizeof(version));
+ memset(authorization, 0, sizeof(authorization));
+
+ sysinfo = fopen(SYSINFO, "r");
+ if (sysinfo == NULL)
+ return -1;
+
+ while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
+ if (!strncmp(line, SYSINFO_MANU, strlen(SYSINFO_MANU))) {
+ line2 = line + strlen(SYSINFO_MANU);
+
+ while ((cp = strtok_r(line2, "\n ", &line2))) {
+ mfsize += scnprintf(manufacturer + mfsize,
+ sizeof(manufacturer) - mfsize, "%s", cp);
+ }
+ }
+
+ if (!strncmp(line, SYSINFO_TYPE, strlen(SYSINFO_TYPE))) {
+ line2 = line + strlen(SYSINFO_TYPE);
- if (strlen(cpuid) + 1 > sz)
+ while ((cp = strtok_r(line2, "\n ", &line2))) {
+ tpsize += scnprintf(type + tpsize,
+ sizeof(type) - tpsize, "%s", cp);
+ }
+ }
+
+ if (!strncmp(line, SYSINFO_MODEL, strlen(SYSINFO_MODEL))) {
+ line2 = line + strlen(SYSINFO_MODEL);
+
+ while ((cp = strtok_r(line2, "\n ", &line2))) {
+ mdsize += scnprintf(model + mdsize, sizeof(type) - mdsize,
+ "%s%s", model[0] ? "," : "", cp);
+ }
+ break;
+ }
+ }
+ fclose(sysinfo);
+
+ /* Missing manufacturer, type or model information should not happen */
+ if (!manufacturer[0] || !type[0] || !model[0])
return -1;
- strcpy(buffer, cpuid);
- return 0;
+ /*
+ * Scan /proc/service_levels and return the CPU-MF counter facility
+ * version number and authorization level.
+ * Optional, does not exist on z/VM guests.
+ */
+ sysinfo = fopen(SRVLVL, "r");
+ if (sysinfo == NULL)
+ goto skip_sysinfo;
+ while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
+ if (strncmp(line, SRVLVL_CPUMF, strlen(SRVLVL_CPUMF)))
+ continue;
+
+ line2 = line + strlen(SRVLVL_CPUMF);
+ while ((cp = strtok_r(line2, "\n ", &line2))) {
+ if (!strncmp(cp, SRVLVL_VERSION,
+ strlen(SRVLVL_VERSION))) {
+ char *sep = strchr(cp, '=');
+
+ vssize += scnprintf(version + vssize,
+ sizeof(version) - vssize, "%s", sep + 1);
+ }
+ if (!strncmp(cp, SRVLVL_AUTHORIZATION,
+ strlen(SRVLVL_AUTHORIZATION))) {
+ char *sep = strchr(cp, '=');
+
+ atsize += scnprintf(authorization + atsize,
+ sizeof(authorization) - atsize, "%s", sep + 1);
+ }
+ }
+ }
+ fclose(sysinfo);
+
+skip_sysinfo:
+ free(line);
+
+ if (version[0] && authorization[0] )
+ nbytes = snprintf(buffer, sz, "%s,%s,%s,%s,%s",
+ manufacturer, type, model, version,
+ authorization);
+ else
+ nbytes = snprintf(buffer, sz, "%s,%s,%s", manufacturer, type,
+ model);
+ return (nbytes >= sz) ? -1 : 0;
+}
+
+char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+ char *buf = malloc(128);
+
+ if (buf && get_cpuid(buf, 128) < 0)
+ zfree(&buf);
+ return buf;
+}
+
+/*
+ * Compare the cpuid string returned by get_cpuid() function
+ * with the name generated by the jevents file read from
+ * pmu-events/arch/s390/mapfile.csv.
+ *
+ * Parameter mapcpuid is the cpuid as stored in the
+ * pmu-events/arch/s390/mapfile.csv. This is just the type number.
+ * Parameter cpuid is the cpuid returned by function get_cpuid().
+ */
+int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
+{
+ char *cp = strchr(cpuid, ',');
+
+ if (cp == NULL)
+ return -1;
+ return strncmp(cp + 1, mapcpuid, strlen(mapcpuid));
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index bf4ca749d1ac..907267206973 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1803,7 +1803,7 @@ int cmd_record(int argc, const char **argv)
err = target__validate(&rec->opts.target);
if (err) {
target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
- ui__warning("%s", errbuf);
+ ui__warning("%s\n", errbuf);
}
err = target__parse_uid(&rec->opts.target);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 4ad5dc649716..1eedb1815c4c 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -614,6 +614,7 @@ static int stats_print(struct report *rep)
static void tasks_setup(struct report *rep)
{
memset(&rep->tool, 0, sizeof(rep->tool));
+ rep->tool.ordered_events = true;
if (rep->mmaps_mode) {
rep->tool.mmap = perf_event__process_mmap;
rep->tool.mmap2 = perf_event__process_mmap2;
@@ -937,6 +938,7 @@ int cmd_report(int argc, const char **argv)
"perf report [<options>]",
NULL
};
+ bool group_set = false;
struct report report = {
.tool = {
.sample = process_sample_event,
@@ -1056,7 +1058,7 @@ int cmd_report(int argc, const char **argv)
"Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
"Show a column with the sum of periods"),
- OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
+ OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &group_set,
"Show event group information together"),
OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
"use branch records for per branch histogram filling",
@@ -1173,6 +1175,9 @@ repeat:
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
+ if (group_set && !session->evlist->nr_groups)
+ perf_evlist__set_leader(session->evlist);
+
if (itrace_synth_opts.last_branch)
has_br_stack = true;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ab19a6ee4093..cce926aeb0c0 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1489,6 +1489,7 @@ struct perf_script {
bool show_switch_events;
bool show_namespace_events;
bool show_lost_events;
+ bool show_round_events;
bool allocated;
bool per_event_dump;
struct cpu_map *cpus;
@@ -2104,6 +2105,16 @@ process_lost_event(struct perf_tool *tool,
return 0;
}
+static int
+process_finished_round_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct ordered_events *oe __maybe_unused)
+
+{
+ perf_event__fprintf(event, stdout);
+ return 0;
+}
+
static void sig_handler(int sig __maybe_unused)
{
session_done = 1;
@@ -2200,6 +2211,10 @@ static int __cmd_script(struct perf_script *script)
script->tool.namespaces = process_namespaces_event;
if (script->show_lost_events)
script->tool.lost = process_lost_event;
+ if (script->show_round_events) {
+ script->tool.ordered_events = false;
+ script->tool.finished_round = process_finished_round_event;
+ }
if (perf_script__setup_per_event_dump(script)) {
pr_err("Couldn't create the per event dump files\n");
@@ -3139,6 +3154,8 @@ int cmd_script(int argc, const char **argv)
"Show namespace events (if recorded)"),
OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events,
"Show lost events (if recorded)"),
+ OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events,
+ "Show round events (if recorded)"),
OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
"Dump trace output to files named by the monitored events"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 98bf9d32f222..2d49eccf98f2 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -168,6 +168,7 @@ static struct timespec ref_time;
static struct cpu_map *aggr_map;
static aggr_get_id_t aggr_get_id;
static bool append_file;
+static bool interval_count;
static const char *output_name;
static int output_fd;
static int print_free_counters_hint;
@@ -571,6 +572,8 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
static int __run_perf_stat(int argc, const char **argv)
{
int interval = stat_config.interval;
+ int times = stat_config.times;
+ int timeout = stat_config.timeout;
char msg[BUFSIZ];
unsigned long long t0, t1;
struct perf_evsel *counter;
@@ -584,6 +587,9 @@ static int __run_perf_stat(int argc, const char **argv)
if (interval) {
ts.tv_sec = interval / USEC_PER_MSEC;
ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
+ } else if (timeout) {
+ ts.tv_sec = timeout / USEC_PER_MSEC;
+ ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
} else {
ts.tv_sec = 1;
ts.tv_nsec = 0;
@@ -696,10 +702,14 @@ try_again:
perf_evlist__start_workload(evsel_list);
enable_counters();
- if (interval) {
+ if (interval || timeout) {
while (!waitpid(child_pid, &status, WNOHANG)) {
nanosleep(&ts, NULL);
+ if (timeout)
+ break;
process_interval();
+ if (interval_count && !(--times))
+ break;
}
}
waitpid(child_pid, &status, 0);
@@ -716,8 +726,13 @@ try_again:
enable_counters();
while (!done) {
nanosleep(&ts, NULL);
- if (interval)
+ if (timeout)
+ break;
+ if (interval) {
process_interval();
+ if (interval_count && !(--times))
+ break;
+ }
}
}
@@ -1891,6 +1906,10 @@ static const struct option stat_options[] = {
"command to run after to the measured command"),
OPT_UINTEGER('I', "interval-print", &stat_config.interval,
"print counts at regular interval in ms (>= 10)"),
+ OPT_INTEGER(0, "interval-count", &stat_config.times,
+ "print counts for fixed number of times"),
+ OPT_UINTEGER(0, "timeout", &stat_config.timeout,
+ "stop workload and print counts after a timeout period in ms (>= 10ms)"),
OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
@@ -2688,7 +2707,7 @@ int cmd_stat(int argc, const char **argv)
int status = -EINVAL, run_idx;
const char *mode;
FILE *output = stderr;
- unsigned int interval;
+ unsigned int interval, timeout;
const char * const stat_subcommands[] = { "record", "report" };
setlocale(LC_ALL, "");
@@ -2719,6 +2738,7 @@ int cmd_stat(int argc, const char **argv)
return __cmd_report(argc, argv);
interval = stat_config.interval;
+ timeout = stat_config.timeout;
/*
* For record command the -o is already taken care of.
@@ -2871,6 +2891,33 @@ int cmd_stat(int argc, const char **argv)
"Please proceed with caution.\n");
}
+ if (stat_config.times && interval)
+ interval_count = true;
+ else if (stat_config.times && !interval) {
+ pr_err("interval-count option should be used together with "
+ "interval-print.\n");
+ parse_options_usage(stat_usage, stat_options, "interval-count", 0);
+ parse_options_usage(stat_usage, stat_options, "I", 1);
+ goto out;
+ }
+
+ if (timeout && timeout < 100) {
+ if (timeout < 10) {
+ pr_err("timeout must be >= 10ms.\n");
+ parse_options_usage(stat_usage, stat_options, "timeout", 0);
+ goto out;
+ } else
+ pr_warning("timeout < 100ms. "
+ "The overhead percentage could be high in some cases. "
+ "Please proceed with caution.\n");
+ }
+ if (timeout && interval) {
+ pr_err("timeout option is not supported with interval-print.\n");
+ parse_options_usage(stat_usage, stat_options, "timeout", 0);
+ parse_options_usage(stat_usage, stat_options, "I", 1);
+ goto out;
+ }
+
if (perf_evlist__alloc_stats(evsel_list, interval))
goto out;
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 790ec25919a0..bf206ffe5c45 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -42,6 +42,7 @@ arch/parisc/include/uapi/asm/errno.h
arch/powerpc/include/uapi/asm/errno.h
arch/sparc/include/uapi/asm/errno.h
arch/x86/include/uapi/asm/errno.h
+arch/powerpc/include/uapi/asm/unistd.h
include/asm-generic/bitops/arch_hweight.h
include/asm-generic/bitops/const_hweight.h
include/asm-generic/bitops/__fls.h
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 3bf7b145b826..c7115d369511 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -482,6 +482,34 @@ static void fs_something(void)
}
}
+static const char *do_determine_event(bool excl_kernel)
+{
+ const char *event = excl_kernel ? "cycles:u" : "cycles";
+
+#ifdef __s390x__
+ char cpuid[128], model[16], model_c[16], cpum_cf_v[16];
+ unsigned int family;
+ int ret, cpum_cf_a;
+
+ if (get_cpuid(cpuid, sizeof(cpuid)))
+ goto out_clocks;
+ ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%x", &family, model_c,
+ model, cpum_cf_v, &cpum_cf_a);
+ if (ret != 5) /* Not available */
+ goto out_clocks;
+ if (excl_kernel && (cpum_cf_a & 4))
+ return event;
+ if (!excl_kernel && (cpum_cf_a & 2))
+ return event;
+
+ /* Fall through: missing authorization */
+out_clocks:
+ event = excl_kernel ? "cpu-clock:u" : "cpu-clock";
+
+#endif
+ return event;
+}
+
static void do_something(void)
{
fs_something();
@@ -592,10 +620,7 @@ static int do_test_code_reading(bool try_kcore)
perf_evlist__set_maps(evlist, cpus, threads);
- if (excl_kernel)
- str = "cycles:u";
- else
- str = "cycles";
+ str = do_determine_event(excl_kernel);
pr_debug("Parsing event '%s'\n", str);
ret = parse_events(evlist, str, NULL);
if (ret < 0) {
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 260418969120..2f008067d989 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -37,6 +37,19 @@ static int init_live_machine(struct machine *machine)
mmap_handler, machine, true, 500);
}
+/*
+ * We need to keep these functions global, despite the
+ * fact that they are used only locally in this object,
+ * in order to keep them around even if the binary is
+ * stripped. If they are gone, the unwind check for
+ * symbol fails.
+ */
+int test_dwarf_unwind__thread(struct thread *thread);
+int test_dwarf_unwind__compare(void *p1, void *p2);
+int test_dwarf_unwind__krava_3(struct thread *thread);
+int test_dwarf_unwind__krava_2(struct thread *thread);
+int test_dwarf_unwind__krava_1(struct thread *thread);
+
#define MAX_STACK 8
static int unwind_entry(struct unwind_entry *entry, void *arg)
@@ -45,12 +58,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
char *symbol = entry->sym ? entry->sym->name : NULL;
static const char *funcs[MAX_STACK] = {
"test__arch_unwind_sample",
- "unwind_thread",
- "compare",
+ "test_dwarf_unwind__thread",
+ "test_dwarf_unwind__compare",
"bsearch",
- "krava_3",
- "krava_2",
- "krava_1",
+ "test_dwarf_unwind__krava_3",
+ "test_dwarf_unwind__krava_2",
+ "test_dwarf_unwind__krava_1",
"test__dwarf_unwind"
};
/*
@@ -77,7 +90,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
return strcmp((const char *) symbol, funcs[idx]);
}
-static noinline int unwind_thread(struct thread *thread)
+noinline int test_dwarf_unwind__thread(struct thread *thread)
{
struct perf_sample sample;
unsigned long cnt = 0;
@@ -108,7 +121,7 @@ static noinline int unwind_thread(struct thread *thread)
static int global_unwind_retval = -INT_MAX;
-static noinline int compare(void *p1, void *p2)
+noinline int test_dwarf_unwind__compare(void *p1, void *p2)
{
/* Any possible value should be 'thread' */
struct thread *thread = *(struct thread **)p1;
@@ -117,17 +130,17 @@ static noinline int compare(void *p1, void *p2)
/* Call unwinder twice for both callchain orders. */
callchain_param.order = ORDER_CALLER;
- global_unwind_retval = unwind_thread(thread);
+ global_unwind_retval = test_dwarf_unwind__thread(thread);
if (!global_unwind_retval) {
callchain_param.order = ORDER_CALLEE;
- global_unwind_retval = unwind_thread(thread);
+ global_unwind_retval = test_dwarf_unwind__thread(thread);
}
}
return p1 - p2;
}
-static noinline int krava_3(struct thread *thread)
+noinline int test_dwarf_unwind__krava_3(struct thread *thread)
{
struct thread *array[2] = {thread, thread};
void *fp = &bsearch;
@@ -141,18 +154,19 @@ static noinline int krava_3(struct thread *thread)
size_t, int (*)(void *, void *));
_bsearch = fp;
- _bsearch(array, &thread, 2, sizeof(struct thread **), compare);
+ _bsearch(array, &thread, 2, sizeof(struct thread **),
+ test_dwarf_unwind__compare);
return global_unwind_retval;
}
-static noinline int krava_2(struct thread *thread)
+noinline int test_dwarf_unwind__krava_2(struct thread *thread)
{
- return krava_3(thread);
+ return test_dwarf_unwind__krava_3(thread);
}
-static noinline int krava_1(struct thread *thread)
+noinline int test_dwarf_unwind__krava_1(struct thread *thread)
{
- return krava_2(thread);
+ return test_dwarf_unwind__krava_2(thread);
}
int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused)
@@ -189,7 +203,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
goto out;
}
- err = krava_1(thread);
+ err = test_dwarf_unwind__krava_1(thread);
thread__put(thread);
out:
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index 30a950c9d407..1c16e56cd93e 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -5,7 +5,7 @@ had_vfs_getname=$?
cleanup_probe_vfs_getname() {
if [ $had_vfs_getname -eq 1 ] ; then
- perf probe -q -d probe:vfs_getname
+ perf probe -q -d probe:vfs_getname*
fi
}
diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
index c446c894b297..8c4ab0b390c0 100755
--- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
@@ -21,12 +21,12 @@ trace_libc_inet_pton_backtrace() {
expected[3]=".*packets transmitted.*"
expected[4]="rtt min.*"
expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)"
- expected[6]=".*inet_pton[[:space:]]\($libc\)$"
+ expected[6]=".*inet_pton[[:space:]]\($libc|inlined\)$"
case "$(uname -m)" in
s390x)
eventattr='call-graph=dwarf'
- expected[7]="gaih_inet[[:space:]]\(inlined\)$"
- expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$"
+ expected[7]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
+ expected[8]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$"
expected[9]="main[[:space:]]\(.*/bin/ping.*\)$"
expected[10]="__libc_start_main[[:space:]]\($libc\)$"
expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$"
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index f6789fb029d6..1e5adb65632a 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -56,7 +56,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
* be compacted against the list of modules found in the "vmlinux"
* code and with the one got from /proc/modules from the "kallsyms" code.
*/
- if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true) <= 0) {
+ if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type) <= 0) {
pr_debug("dso__load_kallsyms ");
goto out;
}
@@ -125,7 +125,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
if (pair && UM(pair->start) == mem_start) {
next_pair:
- if (strcmp(sym->name, pair->name) == 0) {
+ if (arch__compare_symbol_names(sym->name, pair->name) == 0) {
/*
* kallsyms don't have the symbol end, so we
* set that by using the next symbol start - 1,
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 286427975112..e2f666391ac4 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -319,6 +319,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
struct map_symbol *ms = ab->b.priv;
struct symbol *sym = ms->sym;
u8 pcnt_width = annotate_browser__pcnt_width(ab);
+ int width = 0;
/* PLT symbols contain external offsets */
if (strstr(sym->name, "@plt"))
@@ -340,13 +341,17 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
to = (u64)btarget->idx;
}
+ if (ab->have_cycles)
+ width = IPC_WIDTH + CYCLES_WIDTH;
+
ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
- __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
+ __ui_browser__line_arrow(browser,
+ pcnt_width + 2 + ab->addr_width + width,
from, to);
if (is_fused(ab, cursor)) {
ui_browser__mark_fused(browser,
- pcnt_width + 3 + ab->addr_width,
+ pcnt_width + 3 + ab->addr_width + width,
from - 1,
to > from ? true : false);
}
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 7f8553630c4d..537eadd81914 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -316,7 +316,6 @@ static int machine__write_buildid_table(struct machine *machine,
struct feat_fd *fd)
{
int err = 0;
- char nm[PATH_MAX];
struct dso *pos;
u16 kmisc = PERF_RECORD_MISC_KERNEL,
umisc = PERF_RECORD_MISC_USER;
@@ -338,9 +337,8 @@ static int machine__write_buildid_table(struct machine *machine,
name = pos->short_name;
name_len = pos->short_name_len;
} else if (dso__is_kcore(pos)) {
- machine__mmap_name(machine, nm, sizeof(nm));
- name = nm;
- name_len = strlen(nm);
+ name = machine->mmap_name;
+ name_len = strlen(name);
} else {
name = pos->long_name;
name_len = pos->long_name_len;
@@ -813,12 +811,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine)
bool is_kallsyms = dso__is_kallsyms(dso);
bool is_vdso = dso__is_vdso(dso);
const char *name = dso->long_name;
- char nm[PATH_MAX];
if (dso__is_kcore(dso)) {
is_kallsyms = true;
- machine__mmap_name(machine, nm, sizeof(nm));
- name = nm;
+ name = machine->mmap_name;
}
return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
dso->nsinfo, is_kallsyms, is_vdso);
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 1fb01849f1c7..640af88331b4 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -78,6 +78,8 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
{
ocsd_datapath_resp_t dp_ret;
+ decoder->prev_return = OCSD_RESP_CONT;
+
dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET,
0, 0, NULL, NULL);
if (OCSD_DATA_RESP_IS_FATAL(dp_ret))
@@ -253,16 +255,16 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
decoder->packet_count = 0;
for (i = 0; i < MAX_BUFFER; i++) {
decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL;
- decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL;
- decoder->packet_buffer[i].exc = false;
- decoder->packet_buffer[i].exc_ret = false;
- decoder->packet_buffer[i].cpu = INT_MIN;
+ decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL;
+ decoder->packet_buffer[i].last_instr_taken_branch = false;
+ decoder->packet_buffer[i].exc = false;
+ decoder->packet_buffer[i].exc_ret = false;
+ decoder->packet_buffer[i].cpu = INT_MIN;
}
}
static ocsd_datapath_resp_t
cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
- const ocsd_generic_trace_elem *elem,
const u8 trace_chan_id,
enum cs_etm_sample_type sample_type)
{
@@ -278,18 +280,16 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
return OCSD_RESP_FATAL_SYS_ERR;
et = decoder->tail;
+ et = (et + 1) & (MAX_BUFFER - 1);
+ decoder->tail = et;
+ decoder->packet_count++;
+
decoder->packet_buffer[et].sample_type = sample_type;
- decoder->packet_buffer[et].start_addr = elem->st_addr;
- decoder->packet_buffer[et].end_addr = elem->en_addr;
decoder->packet_buffer[et].exc = false;
decoder->packet_buffer[et].exc_ret = false;
decoder->packet_buffer[et].cpu = *((int *)inode->priv);
-
- /* Wrap around if need be */
- et = (et + 1) & (MAX_BUFFER - 1);
-
- decoder->tail = et;
- decoder->packet_count++;
+ decoder->packet_buffer[et].start_addr = 0xdeadbeefdeadbeefUL;
+ decoder->packet_buffer[et].end_addr = 0xdeadbeefdeadbeefUL;
if (decoder->packet_count == MAX_BUFFER - 1)
return OCSD_RESP_WAIT;
@@ -297,6 +297,47 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
return OCSD_RESP_CONT;
}
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
+ const ocsd_generic_trace_elem *elem,
+ const uint8_t trace_chan_id)
+{
+ int ret = 0;
+ struct cs_etm_packet *packet;
+
+ ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ CS_ETM_RANGE);
+ if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
+ return ret;
+
+ packet = &decoder->packet_buffer[decoder->tail];
+
+ packet->start_addr = elem->st_addr;
+ packet->end_addr = elem->en_addr;
+ switch (elem->last_i_type) {
+ case OCSD_INSTR_BR:
+ case OCSD_INSTR_BR_INDIRECT:
+ packet->last_instr_taken_branch = elem->last_instr_exec;
+ break;
+ case OCSD_INSTR_ISB:
+ case OCSD_INSTR_DSB_DMB:
+ case OCSD_INSTR_OTHER:
+ default:
+ packet->last_instr_taken_branch = false;
+ break;
+ }
+
+ return ret;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_trace_on(struct cs_etm_decoder *decoder,
+ const uint8_t trace_chan_id)
+{
+ return cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ CS_ETM_TRACE_ON);
+}
+
static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
const void *context,
const ocsd_trc_index_t indx __maybe_unused,
@@ -313,12 +354,13 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
decoder->trace_on = false;
break;
case OCSD_GEN_TRC_ELEM_TRACE_ON:
+ resp = cs_etm_decoder__buffer_trace_on(decoder,
+ trace_chan_id);
decoder->trace_on = true;
break;
case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
- resp = cs_etm_decoder__buffer_packet(decoder, elem,
- trace_chan_id,
- CS_ETM_RANGE);
+ resp = cs_etm_decoder__buffer_range(decoder, elem,
+ trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION:
decoder->packet_buffer[decoder->tail].exc = true;
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 3d2e6205d186..743f5f444304 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -24,12 +24,14 @@ struct cs_etm_buffer {
enum cs_etm_sample_type {
CS_ETM_RANGE = 1 << 0,
+ CS_ETM_TRACE_ON = 1 << 1,
};
struct cs_etm_packet {
enum cs_etm_sample_type sample_type;
u64 start_addr;
u64 end_addr;
+ u8 last_instr_taken_branch;
u8 exc;
u8 exc_ret;
int cpu;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index b9f0a53dfa65..1b0d422373be 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -32,6 +32,14 @@
#define MAX_TIMESTAMP (~0ULL)
+/*
+ * A64 instructions are always 4 bytes
+ *
+ * Only A64 is supported, so can use this constant for converting between
+ * addresses and instruction counts, calculting offsets etc
+ */
+#define A64_INSTR_SIZE 4
+
struct cs_etm_auxtrace {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
@@ -45,11 +53,15 @@ struct cs_etm_auxtrace {
u8 snapshot_mode;
u8 data_queued;
u8 sample_branches;
+ u8 sample_instructions;
int num_cpu;
u32 auxtrace_type;
u64 branches_sample_type;
u64 branches_id;
+ u64 instructions_sample_type;
+ u64 instructions_sample_period;
+ u64 instructions_id;
u64 **metadata;
u64 kernel_start;
unsigned int pmu_type;
@@ -68,6 +80,12 @@ struct cs_etm_queue {
u64 time;
u64 timestamp;
u64 offset;
+ u64 period_instructions;
+ struct branch_stack *last_branch;
+ struct branch_stack *last_branch_rb;
+ size_t last_branch_pos;
+ struct cs_etm_packet *prev_packet;
+ struct cs_etm_packet *packet;
};
static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
@@ -174,6 +192,16 @@ static void cs_etm__free_queue(void *priv)
{
struct cs_etm_queue *etmq = priv;
+ if (!etmq)
+ return;
+
+ thread__zput(etmq->thread);
+ cs_etm_decoder__free(etmq->decoder);
+ zfree(&etmq->event_buf);
+ zfree(&etmq->last_branch);
+ zfree(&etmq->last_branch_rb);
+ zfree(&etmq->prev_packet);
+ zfree(&etmq->packet);
free(etmq);
}
@@ -270,11 +298,35 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
struct cs_etm_decoder_params d_params;
struct cs_etm_trace_params *t_params;
struct cs_etm_queue *etmq;
+ size_t szp = sizeof(struct cs_etm_packet);
etmq = zalloc(sizeof(*etmq));
if (!etmq)
return NULL;
+ etmq->packet = zalloc(szp);
+ if (!etmq->packet)
+ goto out_free;
+
+ if (etm->synth_opts.last_branch || etm->sample_branches) {
+ etmq->prev_packet = zalloc(szp);
+ if (!etmq->prev_packet)
+ goto out_free;
+ }
+
+ if (etm->synth_opts.last_branch) {
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += etm->synth_opts.last_branch_sz *
+ sizeof(struct branch_entry);
+ etmq->last_branch = zalloc(sz);
+ if (!etmq->last_branch)
+ goto out_free;
+ etmq->last_branch_rb = zalloc(sz);
+ if (!etmq->last_branch_rb)
+ goto out_free;
+ }
+
etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
if (!etmq->event_buf)
goto out_free;
@@ -329,6 +381,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
goto out_free_decoder;
etmq->offset = 0;
+ etmq->period_instructions = 0;
return etmq;
@@ -336,6 +389,10 @@ out_free_decoder:
cs_etm_decoder__free(etmq->decoder);
out_free:
zfree(&etmq->event_buf);
+ zfree(&etmq->last_branch);
+ zfree(&etmq->last_branch_rb);
+ zfree(&etmq->prev_packet);
+ zfree(&etmq->packet);
free(etmq);
return NULL;
@@ -389,6 +446,129 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
return 0;
}
+static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
+{
+ struct branch_stack *bs_src = etmq->last_branch_rb;
+ struct branch_stack *bs_dst = etmq->last_branch;
+ size_t nr = 0;
+
+ /*
+ * Set the number of records before early exit: ->nr is used to
+ * determine how many branches to copy from ->entries.
+ */
+ bs_dst->nr = bs_src->nr;
+
+ /*
+ * Early exit when there is nothing to copy.
+ */
+ if (!bs_src->nr)
+ return;
+
+ /*
+ * As bs_src->entries is a circular buffer, we need to copy from it in
+ * two steps. First, copy the branches from the most recently inserted
+ * branch ->last_branch_pos until the end of bs_src->entries buffer.
+ */
+ nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos;
+ memcpy(&bs_dst->entries[0],
+ &bs_src->entries[etmq->last_branch_pos],
+ sizeof(struct branch_entry) * nr);
+
+ /*
+ * If we wrapped around at least once, the branches from the beginning
+ * of the bs_src->entries buffer and until the ->last_branch_pos element
+ * are older valid branches: copy them over. The total number of
+ * branches copied over will be equal to the number of branches asked by
+ * the user in last_branch_sz.
+ */
+ if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
+ memcpy(&bs_dst->entries[nr],
+ &bs_src->entries[0],
+ sizeof(struct branch_entry) * etmq->last_branch_pos);
+ }
+}
+
+static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
+{
+ etmq->last_branch_pos = 0;
+ etmq->last_branch_rb->nr = 0;
+}
+
+static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
+{
+ /*
+ * The packet records the execution range with an exclusive end address
+ *
+ * A64 instructions are constant size, so the last executed
+ * instruction is A64_INSTR_SIZE before the end address
+ * Will need to do instruction level decode for T32 instructions as
+ * they can be variable size (not yet supported).
+ */
+ return packet->end_addr - A64_INSTR_SIZE;
+}
+
+static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
+{
+ /*
+ * Only A64 instructions are currently supported, so can get
+ * instruction count by dividing.
+ * Will need to do instruction level decode for T32 instructions as
+ * they can be variable size (not yet supported).
+ */
+ return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
+}
+
+static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
+ u64 offset)
+{
+ /*
+ * Only A64 instructions are currently supported, so can get
+ * instruction address by muliplying.
+ * Will need to do instruction level decode for T32 instructions as
+ * they can be variable size (not yet supported).
+ */
+ return packet->start_addr + offset * A64_INSTR_SIZE;
+}
+
+static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
+{
+ struct branch_stack *bs = etmq->last_branch_rb;
+ struct branch_entry *be;
+
+ /*
+ * The branches are recorded in a circular buffer in reverse
+ * chronological order: we start recording from the last element of the
+ * buffer down. After writing the first element of the stack, move the
+ * insert position back to the end of the buffer.
+ */
+ if (!etmq->last_branch_pos)
+ etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
+
+ etmq->last_branch_pos -= 1;
+
+ be = &bs->entries[etmq->last_branch_pos];
+ be->from = cs_etm__last_executed_instr(etmq->prev_packet);
+ be->to = etmq->packet->start_addr;
+ /* No support for mispredict */
+ be->flags.mispred = 0;
+ be->flags.predicted = 1;
+
+ /*
+ * Increment bs->nr until reaching the number of last branches asked by
+ * the user on the command line.
+ */
+ if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
+ bs->nr += 1;
+}
+
+static int cs_etm__inject_event(union perf_event *event,
+ struct perf_sample *sample, u64 type)
+{
+ event->header.size = perf_event__sample_event_size(sample, type, 0);
+ return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
+
static int
cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
{
@@ -453,35 +633,105 @@ static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
}
}
+static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
+ u64 addr, u64 period)
+{
+ int ret = 0;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ union perf_event *event = etmq->event_buf;
+ struct perf_sample sample = {.ip = 0,};
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = PERF_RECORD_MISC_USER;
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ sample.ip = addr;
+ sample.pid = etmq->pid;
+ sample.tid = etmq->tid;
+ sample.id = etmq->etm->instructions_id;
+ sample.stream_id = etmq->etm->instructions_id;
+ sample.period = period;
+ sample.cpu = etmq->packet->cpu;
+ sample.flags = 0;
+ sample.insn_len = 1;
+ sample.cpumode = event->header.misc;
+
+ if (etm->synth_opts.last_branch) {
+ cs_etm__copy_last_branch_rb(etmq);
+ sample.branch_stack = etmq->last_branch;
+ }
+
+ if (etm->synth_opts.inject) {
+ ret = cs_etm__inject_event(event, &sample,
+ etm->instructions_sample_type);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(etm->session, event, &sample);
+
+ if (ret)
+ pr_err(
+ "CS ETM Trace: failed to deliver instruction event, error %d\n",
+ ret);
+
+ if (etm->synth_opts.last_branch)
+ cs_etm__reset_last_branch_rb(etmq);
+
+ return ret;
+}
+
/*
* The cs etm packet encodes an instruction range between a branch target
* and the next taken branch. Generate sample accordingly.
*/
-static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
- struct cs_etm_packet *packet)
+static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
{
int ret = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
struct perf_sample sample = {.ip = 0,};
union perf_event *event = etmq->event_buf;
- u64 start_addr = packet->start_addr;
- u64 end_addr = packet->end_addr;
+ struct dummy_branch_stack {
+ u64 nr;
+ struct branch_entry entries;
+ } dummy_bs;
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = PERF_RECORD_MISC_USER;
event->sample.header.size = sizeof(struct perf_event_header);
- sample.ip = start_addr;
+ sample.ip = cs_etm__last_executed_instr(etmq->prev_packet);
sample.pid = etmq->pid;
sample.tid = etmq->tid;
- sample.addr = end_addr;
+ sample.addr = etmq->packet->start_addr;
sample.id = etmq->etm->branches_id;
sample.stream_id = etmq->etm->branches_id;
sample.period = 1;
- sample.cpu = packet->cpu;
+ sample.cpu = etmq->packet->cpu;
sample.flags = 0;
sample.cpumode = PERF_RECORD_MISC_USER;
+ /*
+ * perf report cannot handle events without a branch stack
+ */
+ if (etm->synth_opts.last_branch) {
+ dummy_bs = (struct dummy_branch_stack){
+ .nr = 1,
+ .entries = {
+ .from = sample.ip,
+ .to = sample.addr,
+ },
+ };
+ sample.branch_stack = (struct branch_stack *)&dummy_bs;
+ }
+
+ if (etm->synth_opts.inject) {
+ ret = cs_etm__inject_event(event, &sample,
+ etm->branches_sample_type);
+ if (ret)
+ return ret;
+ }
+
ret = perf_session__deliver_synth_event(etm->session, event, &sample);
if (ret)
@@ -578,6 +828,24 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
etm->sample_branches = true;
etm->branches_sample_type = attr.sample_type;
etm->branches_id = id;
+ id += 1;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+ }
+
+ if (etm->synth_opts.last_branch)
+ attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+
+ if (etm->synth_opts.instructions) {
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ attr.sample_period = etm->synth_opts.period;
+ etm->instructions_sample_period = attr.sample_period;
+ err = cs_etm__synth_event(session, &attr, id);
+ if (err)
+ return err;
+ etm->sample_instructions = true;
+ etm->instructions_sample_type = attr.sample_type;
+ etm->instructions_id = id;
+ id += 1;
}
return 0;
@@ -585,25 +853,108 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
static int cs_etm__sample(struct cs_etm_queue *etmq)
{
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ struct cs_etm_packet *tmp;
int ret;
- struct cs_etm_packet packet;
+ u64 instrs_executed;
- while (1) {
- ret = cs_etm_decoder__get_packet(etmq->decoder, &packet);
- if (ret <= 0)
+ instrs_executed = cs_etm__instr_count(etmq->packet);
+ etmq->period_instructions += instrs_executed;
+
+ /*
+ * Record a branch when the last instruction in
+ * PREV_PACKET is a branch.
+ */
+ if (etm->synth_opts.last_branch &&
+ etmq->prev_packet &&
+ etmq->prev_packet->sample_type == CS_ETM_RANGE &&
+ etmq->prev_packet->last_instr_taken_branch)
+ cs_etm__update_last_branch_rb(etmq);
+
+ if (etm->sample_instructions &&
+ etmq->period_instructions >= etm->instructions_sample_period) {
+ /*
+ * Emit instruction sample periodically
+ * TODO: allow period to be defined in cycles and clock time
+ */
+
+ /* Get number of instructions executed after the sample point */
+ u64 instrs_over = etmq->period_instructions -
+ etm->instructions_sample_period;
+
+ /*
+ * Calculate the address of the sampled instruction (-1 as
+ * sample is reported as though instruction has just been
+ * executed, but PC has not advanced to next instruction)
+ */
+ u64 offset = (instrs_executed - instrs_over - 1);
+ u64 addr = cs_etm__instr_addr(etmq->packet, offset);
+
+ ret = cs_etm__synth_instruction_sample(
+ etmq, addr, etm->instructions_sample_period);
+ if (ret)
+ return ret;
+
+ /* Carry remaining instructions into next sample period */
+ etmq->period_instructions = instrs_over;
+ }
+
+ if (etm->sample_branches &&
+ etmq->prev_packet &&
+ etmq->prev_packet->sample_type == CS_ETM_RANGE &&
+ etmq->prev_packet->last_instr_taken_branch) {
+ ret = cs_etm__synth_branch_sample(etmq);
+ if (ret)
return ret;
+ }
+ if (etm->sample_branches || etm->synth_opts.last_branch) {
/*
- * If the packet contains an instruction range, generate an
- * instruction sequence event.
+ * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+ * the next incoming packet.
*/
- if (packet.sample_type & CS_ETM_RANGE)
- cs_etm__synth_branch_sample(etmq, &packet);
+ tmp = etmq->packet;
+ etmq->packet = etmq->prev_packet;
+ etmq->prev_packet = tmp;
}
return 0;
}
+static int cs_etm__flush(struct cs_etm_queue *etmq)
+{
+ int err = 0;
+ struct cs_etm_packet *tmp;
+
+ if (etmq->etm->synth_opts.last_branch &&
+ etmq->prev_packet &&
+ etmq->prev_packet->sample_type == CS_ETM_RANGE) {
+ /*
+ * Generate a last branch event for the branches left in the
+ * circular buffer at the end of the trace.
+ *
+ * Use the address of the end of the last reported execution
+ * range
+ */
+ u64 addr = cs_etm__last_executed_instr(etmq->prev_packet);
+
+ err = cs_etm__synth_instruction_sample(
+ etmq, addr,
+ etmq->period_instructions);
+ etmq->period_instructions = 0;
+
+ /*
+ * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+ * the next incoming packet.
+ */
+ tmp = etmq->packet;
+ etmq->packet = etmq->prev_packet;
+ etmq->prev_packet = tmp;
+ }
+
+ return err;
+}
+
static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
{
struct cs_etm_auxtrace *etm = etmq->etm;
@@ -615,45 +966,72 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
etm->kernel_start = machine__kernel_start(etm->machine);
/* Go through each buffer in the queue and decode them one by one */
-more:
- buffer_used = 0;
- memset(&buffer, 0, sizeof(buffer));
- err = cs_etm__get_trace(&buffer, etmq);
- if (err <= 0)
- return err;
- /*
- * We cannot assume consecutive blocks in the data file are contiguous,
- * reset the decoder to force re-sync.
- */
- err = cs_etm_decoder__reset(etmq->decoder);
- if (err != 0)
- return err;
-
- /* Run trace decoder until buffer consumed or end of trace */
- do {
- processed = 0;
-
- err = cs_etm_decoder__process_data_block(
- etmq->decoder,
- etmq->offset,
- &buffer.buf[buffer_used],
- buffer.len - buffer_used,
- &processed);
-
- if (err)
+ while (1) {
+ buffer_used = 0;
+ memset(&buffer, 0, sizeof(buffer));
+ err = cs_etm__get_trace(&buffer, etmq);
+ if (err <= 0)
return err;
-
- etmq->offset += processed;
- buffer_used += processed;
-
/*
- * Nothing to do with an error condition, let's hope the next
- * chunk will be better.
+ * We cannot assume consecutive blocks in the data file are
+ * contiguous, reset the decoder to force re-sync.
*/
- err = cs_etm__sample(etmq);
- } while (buffer.len > buffer_used);
+ err = cs_etm_decoder__reset(etmq->decoder);
+ if (err != 0)
+ return err;
+
+ /* Run trace decoder until buffer consumed or end of trace */
+ do {
+ processed = 0;
+ err = cs_etm_decoder__process_data_block(
+ etmq->decoder,
+ etmq->offset,
+ &buffer.buf[buffer_used],
+ buffer.len - buffer_used,
+ &processed);
+ if (err)
+ return err;
+
+ etmq->offset += processed;
+ buffer_used += processed;
+
+ /* Process each packet in this chunk */
+ while (1) {
+ err = cs_etm_decoder__get_packet(etmq->decoder,
+ etmq->packet);
+ if (err <= 0)
+ /*
+ * Stop processing this chunk on
+ * end of data or error
+ */
+ break;
+
+ switch (etmq->packet->sample_type) {
+ case CS_ETM_RANGE:
+ /*
+ * If the packet contains an instruction
+ * range, generate instruction sequence
+ * events.
+ */
+ cs_etm__sample(etmq);
+ break;
+ case CS_ETM_TRACE_ON:
+ /*
+ * Discontinuity in trace, flush
+ * previous branch stack
+ */
+ cs_etm__flush(etmq);
+ break;
+ default:
+ break;
+ }
+ }
+ } while (buffer.len > buffer_used);
-goto more;
+ if (err == 0)
+ /* Flush any remaining branch stack entries */
+ err = cs_etm__flush(etmq);
+ }
return err;
}
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 44e603c27944..f0a6cbd033cc 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -894,8 +894,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
struct machine *machine)
{
size_t size;
- const char *mmap_name;
- char name_buff[PATH_MAX];
struct map *map = machine__kernel_map(machine);
struct kmap *kmap;
int err;
@@ -918,7 +916,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
return -1;
}
- mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff));
if (machine__is_host(machine)) {
/*
* kernel uses PERF_RECORD_MISC_USER for user space maps,
@@ -931,7 +928,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
kmap = map__kmap(map);
size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
- "%s%s", mmap_name, kmap->ref_reloc_sym->name) + 1;
+ "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
size = PERF_ALIGN(size, sizeof(u64));
event->mmap.header.type = PERF_RECORD_MMAP;
event->mmap.header.size = (sizeof(event->mmap) -
@@ -1591,17 +1588,6 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
return -1;
dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
- /*
- * Have we already created the kernel maps for this machine?
- *
- * This should have happened earlier, when we processed the kernel MMAP
- * events, but for older perf.data files there was no such thing, so do
- * it now.
- */
- if (sample->cpumode == PERF_RECORD_MISC_KERNEL &&
- machine__kernel_map(machine) == NULL)
- machine__create_kernel_maps(machine);
-
thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al);
dump_printf(" ...... dso: %s\n",
al->map ? al->map->dso->long_name :
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e5fc14e53c05..7b7d535396f7 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1086,11 +1086,30 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
{
+ bool all_threads = (target->per_thread && target->system_wide);
struct cpu_map *cpus;
struct thread_map *threads;
+ /*
+ * If specify '-a' and '--per-thread' to perf record, perf record
+ * will override '--per-thread'. target->per_thread = false and
+ * target->system_wide = true.
+ *
+ * If specify '--per-thread' only to perf record,
+ * target->per_thread = true and target->system_wide = false.
+ *
+ * So target->per_thread && target->system_wide is false.
+ * For perf record, thread_map__new_str doesn't call
+ * thread_map__new_all_cpus. That will keep perf record's
+ * current behavior.
+ *
+ * For perf stat, it allows the case that target->per_thread and
+ * target->system_wide are all true. It means to collect system-wide
+ * per-thread data. thread_map__new_str will call
+ * thread_map__new_all_cpus to enumerate all threads.
+ */
threads = thread_map__new_str(target->pid, target->tid, target->uid,
- target->per_thread);
+ all_threads);
if (!threads)
return -1;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index f28aaaa3a440..942bdec6d70d 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -174,4 +174,5 @@ int write_padded(struct feat_fd *fd, const void *bf,
int get_cpuid(char *buffer, size_t sz);
char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused);
+int strcmp_cpuid_str(const char *s1, const char *s2);
#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b6140950301e..44a8456cea10 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -879,7 +879,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
* cumulated only one time to prevent entries more than 100%
* overhead.
*/
- he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1));
+ he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1));
if (he_cache == NULL)
return -ENOMEM;
@@ -1045,8 +1045,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
if (err)
return err;
- iter->max_stack = max_stack_depth;
-
err = iter->ops->prepare_entry(iter, al);
if (err)
goto out;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 02721b579746..e869cad4d89f 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -107,7 +107,6 @@ struct hist_entry_iter {
int curr;
bool hide_unresolved;
- int max_stack;
struct perf_evsel *evsel;
struct perf_sample *sample;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b05a67464c03..fe27ef55cbb9 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -48,8 +48,31 @@ static void machine__threads_init(struct machine *machine)
}
}
+static int machine__set_mmap_name(struct machine *machine)
+{
+ if (machine__is_host(machine)) {
+ if (symbol_conf.vmlinux_name)
+ machine->mmap_name = strdup(symbol_conf.vmlinux_name);
+ else
+ machine->mmap_name = strdup("[kernel.kallsyms]");
+ } else if (machine__is_default_guest(machine)) {
+ if (symbol_conf.default_guest_vmlinux_name)
+ machine->mmap_name = strdup(symbol_conf.default_guest_vmlinux_name);
+ else
+ machine->mmap_name = strdup("[guest.kernel.kallsyms]");
+ } else {
+ if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]",
+ machine->pid) < 0)
+ machine->mmap_name = NULL;
+ }
+
+ return machine->mmap_name ? 0 : -ENOMEM;
+}
+
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
+ int err = -ENOMEM;
+
memset(machine, 0, sizeof(*machine));
map_groups__init(&machine->kmaps, machine);
RB_CLEAR_NODE(&machine->rb_node);
@@ -73,13 +96,16 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
if (machine->root_dir == NULL)
return -ENOMEM;
+ if (machine__set_mmap_name(machine))
+ goto out;
+
if (pid != HOST_KERNEL_ID) {
struct thread *thread = machine__findnew_thread(machine, -1,
pid);
char comm[64];
if (thread == NULL)
- return -ENOMEM;
+ goto out;
snprintf(comm, sizeof(comm), "[guest/%d]", pid);
thread__set_comm(thread, comm, 0);
@@ -87,7 +113,13 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
}
machine->current_tid = NULL;
+ err = 0;
+out:
+ if (err) {
+ zfree(&machine->root_dir);
+ zfree(&machine->mmap_name);
+ }
return 0;
}
@@ -119,7 +151,7 @@ struct machine *machine__new_kallsyms(void)
* ask for not using the kcore parsing code, once this one is fixed
* to create a map per module.
*/
- if (machine && __machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION, true) <= 0) {
+ if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) {
machine__delete(machine);
machine = NULL;
}
@@ -180,6 +212,7 @@ void machine__exit(struct machine *machine)
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
zfree(&machine->root_dir);
+ zfree(&machine->mmap_name);
zfree(&machine->current_tid);
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
@@ -322,20 +355,6 @@ void machines__process_guests(struct machines *machines,
}
}
-char *machine__mmap_name(struct machine *machine, char *bf, size_t size)
-{
- if (machine__is_host(machine))
- snprintf(bf, size, "[%s]", "kernel.kallsyms");
- else if (machine__is_default_guest(machine))
- snprintf(bf, size, "[%s]", "guest.kernel.kallsyms");
- else {
- snprintf(bf, size, "[%s.%d]", "guest.kernel.kallsyms",
- machine->pid);
- }
-
- return bf;
-}
-
void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
{
struct rb_node *node;
@@ -771,25 +790,13 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
static struct dso *machine__get_kernel(struct machine *machine)
{
- const char *vmlinux_name = NULL;
+ const char *vmlinux_name = machine->mmap_name;
struct dso *kernel;
if (machine__is_host(machine)) {
- vmlinux_name = symbol_conf.vmlinux_name;
- if (!vmlinux_name)
- vmlinux_name = DSO__NAME_KALLSYMS;
-
kernel = machine__findnew_kernel(machine, vmlinux_name,
"[kernel]", DSO_TYPE_KERNEL);
} else {
- char bf[PATH_MAX];
-
- if (machine__is_default_guest(machine))
- vmlinux_name = symbol_conf.default_guest_vmlinux_name;
- if (!vmlinux_name)
- vmlinux_name = machine__mmap_name(machine, bf,
- sizeof(bf));
-
kernel = machine__findnew_kernel(machine, vmlinux_name,
"[guest.kernel]",
DSO_TYPE_GUEST_KERNEL);
@@ -849,13 +856,10 @@ static int machine__get_running_kernel_start(struct machine *machine,
return 0;
}
-int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
+static int
+__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
{
int type;
- u64 start = 0;
-
- if (machine__get_running_kernel_start(machine, NULL, &start))
- return -1;
/* In case of renewal the kernel map, destroy previous one */
machine__destroy_kernel_maps(machine);
@@ -864,7 +868,7 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
struct kmap *kmap;
struct map *map;
- machine->vmlinux_maps[type] = map__new2(start, kernel, type);
+ machine->vmlinux_maps[type] = map__new2(0, kernel, type);
if (machine->vmlinux_maps[type] == NULL)
return -1;
@@ -987,11 +991,11 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid)
return machine__create_kernel_maps(machine);
}
-int __machine__load_kallsyms(struct machine *machine, const char *filename,
- enum map_type type, bool no_kcore)
+int machine__load_kallsyms(struct machine *machine, const char *filename,
+ enum map_type type)
{
struct map *map = machine__kernel_map(machine);
- int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore);
+ int ret = __dso__load_kallsyms(map->dso, filename, map, true);
if (ret > 0) {
dso__set_loaded(map->dso, type);
@@ -1006,12 +1010,6 @@ int __machine__load_kallsyms(struct machine *machine, const char *filename,
return ret;
}
-int machine__load_kallsyms(struct machine *machine, const char *filename,
- enum map_type type)
-{
- return __machine__load_kallsyms(machine, filename, type, false);
-}
-
int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
{
struct map *map = machine__kernel_map(machine);
@@ -1215,6 +1213,24 @@ static int machine__create_modules(struct machine *machine)
return 0;
}
+static void machine__set_kernel_mmap(struct machine *machine,
+ u64 start, u64 end)
+{
+ int i;
+
+ for (i = 0; i < MAP__NR_TYPES; i++) {
+ machine->vmlinux_maps[i]->start = start;
+ machine->vmlinux_maps[i]->end = end;
+
+ /*
+ * Be a bit paranoid here, some perf.data file came with
+ * a zero sized synthesized MMAP event for the kernel.
+ */
+ if (machine->vmlinux_maps[i]->end == 0)
+ machine->vmlinux_maps[i]->end = ~0ULL;
+ }
+}
+
int machine__create_kernel_maps(struct machine *machine)
{
struct dso *kernel = machine__get_kernel(machine);
@@ -1239,40 +1255,22 @@ int machine__create_kernel_maps(struct machine *machine)
"continuing anyway...\n", machine->pid);
}
- /*
- * Now that we have all the maps created, just set the ->end of them:
- */
- map_groups__fixup_end(&machine->kmaps);
-
if (!machine__get_running_kernel_start(machine, &name, &addr)) {
if (name &&
maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
machine__destroy_kernel_maps(machine);
return -1;
}
+ machine__set_kernel_mmap(machine, addr, 0);
}
+ /*
+ * Now that we have all the maps created, just set the ->end of them:
+ */
+ map_groups__fixup_end(&machine->kmaps);
return 0;
}
-static void machine__set_kernel_mmap_len(struct machine *machine,
- union perf_event *event)
-{
- int i;
-
- for (i = 0; i < MAP__NR_TYPES; i++) {
- machine->vmlinux_maps[i]->start = event->mmap.start;
- machine->vmlinux_maps[i]->end = (event->mmap.start +
- event->mmap.len);
- /*
- * Be a bit paranoid here, some perf.data file came with
- * a zero sized synthesized MMAP event for the kernel.
- */
- if (machine->vmlinux_maps[i]->end == 0)
- machine->vmlinux_maps[i]->end = ~0ULL;
- }
-}
-
static bool machine__uses_kcore(struct machine *machine)
{
struct dso *dso;
@@ -1289,7 +1287,6 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
union perf_event *event)
{
struct map *map;
- char kmmap_prefix[PATH_MAX];
enum dso_kernel_type kernel_type;
bool is_kernel_mmap;
@@ -1297,15 +1294,14 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
if (machine__uses_kcore(machine))
return 0;
- machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix));
if (machine__is_host(machine))
kernel_type = DSO_TYPE_KERNEL;
else
kernel_type = DSO_TYPE_GUEST_KERNEL;
is_kernel_mmap = memcmp(event->mmap.filename,
- kmmap_prefix,
- strlen(kmmap_prefix) - 1) == 0;
+ machine->mmap_name,
+ strlen(machine->mmap_name) - 1) == 0;
if (event->mmap.filename[0] == '/' ||
(!is_kernel_mmap && event->mmap.filename[0] == '[')) {
map = machine__findnew_module_map(machine, event->mmap.start,
@@ -1316,7 +1312,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
map->end = map->start + event->mmap.len;
} else if (is_kernel_mmap) {
const char *symbol_name = (event->mmap.filename +
- strlen(kmmap_prefix));
+ strlen(machine->mmap_name));
/*
* Should be there already, from the build-id table in
* the header.
@@ -1357,7 +1353,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
up_read(&machine->dsos.lock);
if (kernel == NULL)
- kernel = machine__findnew_dso(machine, kmmap_prefix);
+ kernel = machine__findnew_dso(machine, machine->mmap_name);
if (kernel == NULL)
goto out_problem;
@@ -1370,7 +1366,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
if (strstr(kernel->long_name, "vmlinux"))
dso__set_short_name(kernel, "[kernel.vmlinux]", false);
- machine__set_kernel_mmap_len(machine, event);
+ machine__set_kernel_mmap(machine, event->mmap.start,
+ event->mmap.start + event->mmap.len);
/*
* Avoid using a zero address (kptr_restrict) for the ref reloc
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 5ce860b64c74..66cc200ef86f 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -43,6 +43,7 @@ struct machine {
bool comm_exec;
bool kptr_restrict_warned;
char *root_dir;
+ char *mmap_name;
struct threads threads[THREADS__TABLE_SIZE];
struct vdso_info *vdso_info;
struct perf_env *env;
@@ -142,8 +143,6 @@ struct machine *machines__find(struct machines *machines, pid_t pid);
struct machine *machines__findnew(struct machines *machines, pid_t pid);
void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
-char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
-
void machines__set_comm_exec(struct machines *machines, bool comm_exec);
struct machine *machine__new_host(void);
@@ -226,8 +225,6 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
const char *filename);
int arch__fix_module_text_start(u64 *start, const char *name);
-int __machine__load_kallsyms(struct machine *machine, const char *filename,
- enum map_type type, bool no_kcore);
int machine__load_kallsyms(struct machine *machine, const char *filename,
enum map_type type);
int machine__load_vmlinux_path(struct machine *machine, enum map_type type);
@@ -239,7 +236,6 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
bool (skip)(struct dso *dso, int parm), int parm);
void machine__destroy_kernel_maps(struct machine *machine);
-int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel);
int machine__create_kernel_maps(struct machine *machine);
int machines__create_kernel_maps(struct machines *machines, pid_t pid);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 57e38fdf0b34..1111d5bf15ca 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -576,6 +576,34 @@ char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
return NULL;
}
+/* Return zero when the cpuid from the mapfile.csv matches the
+ * cpuid string generated on this platform.
+ * Otherwise return non-zero.
+ */
+int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
+{
+ regex_t re;
+ regmatch_t pmatch[1];
+ int match;
+
+ if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) {
+ /* Warn unable to generate match particular string. */
+ pr_info("Invalid regular expression %s\n", mapcpuid);
+ return 1;
+ }
+
+ match = !regexec(&re, cpuid, 1, pmatch, 0);
+ regfree(&re);
+ if (match) {
+ size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
+
+ /* Verify the entire string matched. */
+ if (match_len == strlen(cpuid))
+ return 0;
+ }
+ return 1;
+}
+
static char *perf_pmu__getcpuid(struct perf_pmu *pmu)
{
char *cpuid;
@@ -610,31 +638,14 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)
i = 0;
for (;;) {
- regex_t re;
- regmatch_t pmatch[1];
- int match;
-
map = &pmu_events_map[i++];
if (!map->table) {
map = NULL;
break;
}
- if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) {
- /* Warn unable to generate match particular string. */
- pr_info("Invalid regular expression %s\n", map->cpuid);
+ if (!strcmp_cpuid_str(map->cpuid, cpuid))
break;
- }
-
- match = !regexec(&re, cpuid, 1, pmatch, 0);
- regfree(&re);
- if (match) {
- size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
-
- /* Verify the entire string matched. */
- if (match_len == strlen(cpuid))
- break;
- }
}
free(cpuid);
return map;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2da4d0456a03..e8514f651865 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -111,17 +111,20 @@ struct sort_entry sort_thread = {
/* --sort comm */
+/*
+ * We can't use pointer comparison in functions below,
+ * because it gives different results based on pointer
+ * values, which could break some sorting assumptions.
+ */
static int64_t
sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
{
- /* Compare the addr that should be unique among comm */
return strcmp(comm__str(right->comm), comm__str(left->comm));
}
static int64_t
sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
{
- /* Compare the addr that should be unique among comm */
return strcmp(comm__str(right->comm), comm__str(left->comm));
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index dbc6f7134f61..2f44e386a0e8 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -90,6 +90,8 @@ struct perf_stat_config {
bool scale;
FILE *output;
unsigned int interval;
+ unsigned int timeout;
+ int times;
struct runtime_stat *stats;
int stats_num;
};
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index cc065d4bfafc..a1a312d99f30 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1582,7 +1582,7 @@ int dso__load(struct dso *dso, struct map *map)
bool next_slot = false;
bool is_reg;
bool nsexit;
- int sirc;
+ int sirc = -1;
enum dso_binary_type symtab_type = binary_type_symtab[i];
@@ -1600,16 +1600,14 @@ int dso__load(struct dso *dso, struct map *map)
nsinfo__mountns_exit(&nsc);
is_reg = is_regular_file(name);
- sirc = symsrc__init(ss, dso, name, symtab_type);
+ if (is_reg)
+ sirc = symsrc__init(ss, dso, name, symtab_type);
if (nsexit)
nsinfo__mountns_enter(dso->nsinfo, &nsc);
- if (!is_reg || sirc < 0) {
- if (sirc >= 0)
- symsrc__destroy(ss);
+ if (!is_reg || sirc < 0)
continue;
- }
if (!syms_ss && symsrc__has_symtab(ss)) {
syms_ss = ss;
@@ -1960,8 +1958,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map)
pr_debug("Using %s for symbols\n", kallsyms_filename);
if (err > 0 && !dso__is_kcore(dso)) {
dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
- machine__mmap_name(machine, path, sizeof(path));
- dso__set_long_name(dso, strdup(path), true);
+ dso__set_long_name(dso, machine->mmap_name, false);
map__fixup_start(map);
map__fixup_end(map);
}
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 303bdb84ab5a..895122d638dd 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -30,6 +30,14 @@ static const char **syscalltbl_native = syscalltbl_x86_64;
#include <asm/syscalls_64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
static const char **syscalltbl_native = syscalltbl_s390_64;
+#elif defined(__powerpc64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_powerpc_64;
+#elif defined(__powerpc__)
+#include <asm/syscalls_32.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_powerpc_32;
#endif
struct syscall {
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 3e1038f6491c..729dad8f412d 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -323,7 +323,7 @@ out_free_threads:
}
struct thread_map *thread_map__new_str(const char *pid, const char *tid,
- uid_t uid, bool per_thread)
+ uid_t uid, bool all_threads)
{
if (pid)
return thread_map__new_by_pid_str(pid);
@@ -331,7 +331,7 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid,
if (!tid && uid != UINT_MAX)
return thread_map__new_by_uid(uid);
- if (per_thread)
+ if (all_threads)
return thread_map__new_all_cpus();
return thread_map__new_by_tid_str(tid);
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 0a806b99e73c..5ec91cfd1869 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -31,7 +31,7 @@ struct thread_map *thread_map__get(struct thread_map *map);
void thread_map__put(struct thread_map *map);
struct thread_map *thread_map__new_str(const char *pid,
- const char *tid, uid_t uid, bool per_thread);
+ const char *tid, uid_t uid, bool all_threads);
struct thread_map *thread_map__new_by_tid_str(const char *tid_str);