From 75be989a7a18e9666efd92b846ee48bed79e8086 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 14 Feb 2015 14:50:11 -0300 Subject: perf evlist: Adopt events_stats from perf_session For tools that don't deal with perf.data files, thus do not need to use perf_session. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-kglq67gvauq9tak02a4se00r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 891c3930080e..7ce296618717 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1473,9 +1473,9 @@ static int perf_sched__read_events(struct perf_sched *sched, goto out_delete; } - sched->nr_events = session->stats.nr_events[0]; - sched->nr_lost_events = session->stats.total_lost; - sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST]; + sched->nr_events = session->evlist->stats.nr_events[0]; + sched->nr_lost_events = session->evlist->stats.total_lost; + sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; } if (psession) -- cgit v1.2.3 From f3b623b8490af7a9b819cbcf2d99ab4597ece94b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Mar 2015 22:21:35 -0300 Subject: perf tools: Reference count struct thread We need to do that to stop accumulating entries in the dead_threads linked list, i.e. we were keeping references to threads in struct hists that continue to exist even after a thread exited and was removed from the machine threads rbtree. We still keep the dead_threads list, but just for debugging, allowing us to iterate at any given point over the threads that still are referenced by things like struct hist_entry. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-3ejvfyed0r7ue61dkurzjux4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- tools/perf/builtin-trace.c | 7 ++++++- tools/perf/ui/browsers/hists.c | 6 +++--- tools/perf/util/build-id.c | 5 +++-- tools/perf/util/hist.c | 2 ++ tools/perf/util/hist.h | 2 +- tools/perf/util/machine.c | 44 ++++++++++++++++++++++-------------------- tools/perf/util/machine.h | 1 - tools/perf/util/session.c | 6 ------ tools/perf/util/thread.c | 14 ++++++++++++++ tools/perf/util/thread.h | 13 +++++++++++++ 11 files changed, 66 insertions(+), 36 deletions(-) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 7ce296618717..e00e2eaf89da 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -831,7 +831,7 @@ static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) return -1; } - atoms->thread = thread; + atoms->thread = thread__get(thread); INIT_LIST_HEAD(&atoms->work_list); __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); return 0; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d95a8f4d988c..211614fba217 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1741,7 +1741,10 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, } else ttrace->entry_pending = true; - trace->current = thread; + if (trace->current != thread) { + thread__put(trace->current); + trace->current = thread__get(thread); + } return 0; } @@ -2274,6 +2277,8 @@ next_event: } out_disable: + thread__zput(trace->current); + perf_evlist__disable(evlist); if (!err) { diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 788506eef567..ad312d91caed 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1467,7 +1467,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, perf_hpp__set_user_width(symbol_conf.col_width_list_str); while (1) { - const struct thread *thread = NULL; + struct thread *thread = NULL; const struct dso *dso = NULL; int choice = 0, annotate = -2, zoom_dso = -2, zoom_thread = -2, @@ -1754,13 +1754,13 @@ zoom_thread: pstack__remove(fstack, &browser->hists->thread_filter); zoom_out_thread: ui_helpline__pop(); - browser->hists->thread_filter = NULL; + thread__zput(browser->hists->thread_filter); perf_hpp__set_elide(HISTC_THREAD, false); } else { ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", thread->comm_set ? thread__comm_str(thread) : "", thread->tid); - browser->hists->thread_filter = thread; + browser->hists->thread_filter = thread__get(thread); perf_hpp__set_elide(HISTC_THREAD, false); pstack__push(fstack, &browser->hists->thread_filter); } diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index ffdc338df925..a19674666b4e 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -61,8 +61,9 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused, if (thread) { rb_erase(&thread->rb_node, &machine->threads); - machine->last_match = NULL; - thread__delete(thread); + if (machine->last_match == thread) + thread__zput(machine->last_match); + thread__put(thread); } return 0; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 70b48a65064c..95f5ab707b74 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -355,6 +355,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, callchain_init(he->callchain); INIT_LIST_HEAD(&he->pairs.node); + thread__get(he->thread); } return he; @@ -941,6 +942,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) void hist_entry__delete(struct hist_entry *he) { + thread__zput(he->thread); zfree(&he->branch_info); zfree(&he->mem_info); zfree(&he->stat_acc); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2b690d028907..e988c9fcd1bc 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -60,7 +60,7 @@ struct hists { struct rb_root entries_collapsed; u64 nr_entries; u64 nr_non_filtered_entries; - const struct thread *thread_filter; + struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; const char *symbol_filter_str; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 9e0f60a7e7b3..24f8c978cfd4 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -14,6 +14,8 @@ #include "unwind.h" #include "linux/hash.h" +static void machine__remove_thread(struct machine *machine, struct thread *th); + static void dsos__init(struct dsos *dsos) { INIT_LIST_HEAD(&dsos->head); @@ -89,16 +91,6 @@ static void dsos__delete(struct dsos *dsos) } } -void machine__delete_dead_threads(struct machine *machine) -{ - struct thread *n, *t; - - list_for_each_entry_safe(t, n, &machine->dead_threads, node) { - list_del(&t->node); - thread__delete(t); - } -} - void machine__delete_threads(struct machine *machine) { struct rb_node *nd = rb_first(&machine->threads); @@ -106,9 +98,8 @@ void machine__delete_threads(struct machine *machine) while (nd) { struct thread *t = rb_entry(nd, struct thread, rb_node); - rb_erase(&t->rb_node, &machine->threads); nd = rb_next(nd); - thread__delete(t); + machine__remove_thread(machine, t); } } @@ -361,9 +352,13 @@ static struct thread *__machine__findnew_thread(struct machine *machine, * the full rbtree: */ th = machine->last_match; - if (th && th->tid == tid) { - machine__update_thread_pid(machine, th, pid); - return th; + if (th != NULL) { + if (th->tid == tid) { + machine__update_thread_pid(machine, th, pid); + return th; + } + + thread__zput(machine->last_match); } while (*p != NULL) { @@ -371,7 +366,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine, th = rb_entry(parent, struct thread, rb_node); if (th->tid == tid) { - machine->last_match = th; + machine->last_match = thread__get(th); machine__update_thread_pid(machine, th, pid); return th; } @@ -403,8 +398,11 @@ static struct thread *__machine__findnew_thread(struct machine *machine, thread__delete(th); return NULL; } - - machine->last_match = th; + /* + * It is now in the rbtree, get a ref + */ + thread__get(th); + machine->last_match = thread__get(th); } return th; @@ -1238,13 +1236,17 @@ out_problem: static void machine__remove_thread(struct machine *machine, struct thread *th) { - machine->last_match = NULL; + if (machine->last_match == th) + thread__zput(machine->last_match); + rb_erase(&th->rb_node, &machine->threads); /* - * We may have references to this thread, for instance in some hist_entry - * instances, so just move them to a separate list. + * Move it first to the dead_threads list, then drop the reference, + * if this is the last reference, then the thread__delete destructor + * will be called and we will remove it from the dead_threads list. */ list_add_tail(&th->node, &machine->dead_threads); + thread__put(th); } int machine__process_fork_event(struct machine *machine, union perf_event *event, diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index e8b7779a0a3f..e2faf3b47e7b 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -118,7 +118,6 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); -void machine__delete_dead_threads(struct machine *machine); void machine__delete_threads(struct machine *machine); void machine__delete(struct machine *machine); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e4f166981ff0..ed4e5cf2bd9d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -138,11 +138,6 @@ struct perf_session *perf_session__new(struct perf_data_file *file, return NULL; } -static void perf_session__delete_dead_threads(struct perf_session *session) -{ - machine__delete_dead_threads(&session->machines.host); -} - static void perf_session__delete_threads(struct perf_session *session) { machine__delete_threads(&session->machines.host); @@ -167,7 +162,6 @@ static void perf_session_env__delete(struct perf_session_env *env) void perf_session__delete(struct perf_session *session) { perf_session__destroy_kernel_maps(session); - perf_session__delete_dead_threads(session); perf_session__delete_threads(session); perf_session_env__delete(&session->header.env); machines__exit(&session->machines); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 9ebc8b1f9be5..a5dbba95107f 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -82,6 +82,20 @@ void thread__delete(struct thread *thread) free(thread); } +struct thread *thread__get(struct thread *thread) +{ + ++thread->refcnt; + return thread; +} + +void thread__put(struct thread *thread) +{ + if (thread && --thread->refcnt == 0) { + list_del_init(&thread->node); + thread__delete(thread); + } +} + struct comm *thread__comm(const struct thread *thread) { if (list_empty(&thread->comm_list)) diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 160fd066a7d1..783b6688d2f7 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -20,6 +20,7 @@ struct thread { pid_t tid; pid_t ppid; int cpu; + int refcnt; char shortname[3]; bool comm_set; bool dead; /* if set thread has exited */ @@ -37,6 +38,18 @@ struct comm; struct thread *thread__new(pid_t pid, pid_t tid); int thread__init_map_groups(struct thread *thread, struct machine *machine); void thread__delete(struct thread *thread); + +struct thread *thread__get(struct thread *thread); +void thread__put(struct thread *thread); + +static inline void __thread__zput(struct thread **thread) +{ + thread__put(*thread); + *thread = NULL; +} + +#define thread__zput(thread) __thread__zput(&thread) + static inline void thread__exited(struct thread *thread) { thread->dead = true; -- cgit v1.2.3 From ae536acfacb65a4a9858c32b12361e09f84f4157 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Mar 2015 22:28:41 -0300 Subject: perf sched: No need to keep the session around We were keeping the session around just because we kept pointers to struct thread instances, but now we reference count them, so no need for deferring the perf_session__delete call to after we traverse the work_list entries. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-9agtck6jdr3rebdp39z1lo0e@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index e00e2eaf89da..a3ebf1d3c29d 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1439,8 +1439,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ return err; } -static int perf_sched__read_events(struct perf_sched *sched, - struct perf_session **psession) +static int perf_sched__read_events(struct perf_sched *sched) { const struct perf_evsel_str_handler handlers[] = { { "sched:sched_switch", process_sched_switch_event, }, @@ -1454,6 +1453,7 @@ static int perf_sched__read_events(struct perf_sched *sched, .path = input_name, .mode = PERF_DATA_MODE_READ, }; + int rc = -1; session = perf_session__new(&file, false, &sched->tool); if (session == NULL) { @@ -1478,16 +1478,10 @@ static int perf_sched__read_events(struct perf_sched *sched, sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; } - if (psession) - *psession = session; - else - perf_session__delete(session); - - return 0; - + rc = 0; out_delete: perf_session__delete(session); - return -1; + return rc; } static void print_bad_events(struct perf_sched *sched) @@ -1515,12 +1509,10 @@ static void print_bad_events(struct perf_sched *sched) static int perf_sched__lat(struct perf_sched *sched) { struct rb_node *next; - struct perf_session *session; setup_pager(); - /* save session -- references to threads are held in work_list */ - if (perf_sched__read_events(sched, &session)) + if (perf_sched__read_events(sched)) return -1; perf_sched__sort_lat(sched); @@ -1537,6 +1529,7 @@ static int perf_sched__lat(struct perf_sched *sched) work_list = rb_entry(next, struct work_atoms, node); output_lat_thread(sched, work_list); next = rb_next(next); + thread__zput(work_list->thread); } printf(" -----------------------------------------------------------------------------------------------------------------\n"); @@ -1548,7 +1541,6 @@ static int perf_sched__lat(struct perf_sched *sched) print_bad_events(sched); printf("\n"); - perf_session__delete(session); return 0; } @@ -1557,7 +1549,7 @@ static int perf_sched__map(struct perf_sched *sched) sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); setup_pager(); - if (perf_sched__read_events(sched, NULL)) + if (perf_sched__read_events(sched)) return -1; print_bad_events(sched); return 0; @@ -1572,7 +1564,7 @@ static int perf_sched__replay(struct perf_sched *sched) test_calibrations(sched); - if (perf_sched__read_events(sched, NULL)) + if (perf_sched__read_events(sched)) return -1; printf("nr_run_events: %ld\n", sched->nr_run_events); -- cgit v1.2.3 From b7b61cbebd789a3dbca522e3fdb727fe5c95593f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Mar 2015 11:58:45 -0300 Subject: perf ordered_events: Shorten function signatures By keeping pointers to machines, evlist and tool in ordered_events. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-0c6huyaf59mqtm2ek9pmposl@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-buildid-list.c | 2 +- tools/perf/builtin-diff.c | 2 +- tools/perf/builtin-inject.c | 6 ++-- tools/perf/builtin-kmem.c | 2 +- tools/perf/builtin-kvm.c | 6 ++-- tools/perf/builtin-lock.c | 2 +- tools/perf/builtin-mem.c | 2 +- tools/perf/builtin-record.c | 4 +-- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-sched.c | 2 +- tools/perf/builtin-script.c | 2 +- tools/perf/builtin-timechart.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/util/data-convert-bt.c | 4 +-- tools/perf/util/ordered-events.c | 23 +++++++------- tools/perf/util/ordered-events.h | 10 +++--- tools/perf/util/session.c | 65 ++++++++++++++++----------------------- tools/perf/util/session.h | 11 +++---- 19 files changed, 68 insertions(+), 83 deletions(-) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 747f86103599..71bf7451c0ca 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -208,7 +208,7 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; } - ret = perf_session__process_events(session, &ann->tool); + ret = perf_session__process_events(session); if (ret) goto out; diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index ed3873b3e238..feb420f74c2d 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -74,7 +74,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID */ if (with_hits || perf_data_file__is_pipe(&file)) - perf_session__process_events(session, &build_id__mark_dso_hit_ops); + perf_session__process_events(session); perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits); perf_session__delete(session); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 74aada554b12..f800fc95f5d7 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -747,7 +747,7 @@ static int __cmd_diff(void) goto out_delete; } - ret = perf_session__process_events(d->session, &tool); + ret = perf_session__process_events(d->session); if (ret) { pr_err("Failed to process %s\n", d->file.path); goto out_delete; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index a13641e066f5..2563f07ec0e5 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -359,8 +359,6 @@ static int __cmd_inject(struct perf_inject *inject) } else if (inject->sched_stat) { struct perf_evsel *evsel; - inject->tool.ordered_events = true; - evlist__for_each(session->evlist, evsel) { const char *name = perf_evsel__name(evsel); @@ -379,7 +377,7 @@ static int __cmd_inject(struct perf_inject *inject) if (!file_out->is_pipe) lseek(fd, session->header.data_offset, SEEK_SET); - ret = perf_session__process_events(session, &inject->tool); + ret = perf_session__process_events(session); if (!file_out->is_pipe) { if (inject->build_ids) @@ -458,6 +456,8 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } + inject.tool.ordered_events = inject.sched_stat; + file.path = inject.input_name; inject.session = perf_session__new(&file, true, &inject.tool); if (inject.session == NULL) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index f295141025bc..62f165a9fa40 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -426,7 +426,7 @@ static int __cmd_kmem(struct perf_session *session) } setup_pager(); - err = perf_session__process_events(session, &perf_kmem); + err = perf_session__process_events(session); if (err != 0) goto out; sort_result(); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 0894a817f67e..802b8f53fa9a 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -730,9 +730,9 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, return -1; } - err = perf_session_queue_event(kvm->session, event, &kvm->tool, &sample, 0); + err = perf_session__queue_event(kvm->session, event, &sample, 0); /* - * FIXME: Here we can't consume the event, as perf_session_queue_event will + * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. */ perf_evlist__mmap_consume(kvm->evlist, idx); @@ -1066,7 +1066,7 @@ static int read_events(struct perf_kvm_stat *kvm) if (ret < 0) return ret; - return perf_session__process_events(kvm->session, &kvm->tool); + return perf_session__process_events(kvm->session); } static int parse_target_str(struct perf_kvm_stat *kvm) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index e7ec71589da6..7893a9bba2a7 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -878,7 +878,7 @@ static int __cmd_report(bool display_info) if (select_key()) goto out_delete; - err = perf_session__process_events(session, &eops); + err = perf_session__process_events(session); if (err) goto out_delete; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 9b5663950a4d..46c69318de84 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -141,7 +141,7 @@ static int report_raw_events(struct perf_mem *mem) printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); - err = perf_session__process_events(session, &mem->tool); + err = perf_session__process_events(session); if (err) return err; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4fdad06d37db..5a2ff510b75b 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -225,7 +225,7 @@ static int process_buildids(struct record *rec) */ symbol_conf.ignore_vmlinux_buildid = true; - return perf_session__process_events(session, &rec->tool); + return perf_session__process_events(session); } static void perf_event__synthesize_guest_os(struct machine *machine, void *data) @@ -343,7 +343,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGINT, sig_handler); signal(SIGTERM, sig_handler); - session = perf_session__new(file, false, NULL); + session = perf_session__new(file, false, tool); if (session == NULL) { pr_err("Perf session creation failed.\n"); return -1; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fb350343b1d7..52f74e1367e9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -482,7 +482,7 @@ static int __cmd_report(struct report *rep) if (ret) return ret; - ret = perf_session__process_events(session, &rep->tool); + ret = perf_session__process_events(session); if (ret) return ret; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index a3ebf1d3c29d..3b3a5bb97059 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1467,7 +1467,7 @@ static int perf_sched__read_events(struct perf_sched *sched) goto out_delete; if (perf_session__has_traces(session, "record -R")) { - int err = perf_session__process_events(session, &sched->tool); + int err = perf_session__process_events(session); if (err) { pr_err("Failed to process events, error %d", err); goto out_delete; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ce304dfd962a..c7e6750923ef 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -800,7 +800,7 @@ static int __cmd_script(struct perf_script *script) script->tool.mmap2 = process_mmap2_event; } - ret = perf_session__process_events(script->session, &script->tool); + ret = perf_session__process_events(script->session); if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index f3bb1a4bf060..51440d1fc722 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1623,7 +1623,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) goto out_delete; } - ret = perf_session__process_events(session, &tchart->tool); + ret = perf_session__process_events(session); if (ret) goto out_delete; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 211614fba217..6969ba98ff2f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2408,7 +2408,7 @@ static int trace__replay(struct trace *trace) setup_pager(); - err = perf_session__process_events(session, &trace->tool); + err = perf_session__process_events(session); if (err) pr_err("Failed to process events, error %d", err); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index e372e03ff480..1afd381b2346 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -579,7 +579,7 @@ int bt_convert__perf2ctf(const char *input, const char *path) return -1; /* perf.data session */ - session = perf_session__new(&file, 0, NULL); + session = perf_session__new(&file, 0, &c.tool); if (!session) goto free_writer; @@ -591,7 +591,7 @@ int bt_convert__perf2ctf(const char *input, const char *path) if (setup_events(cw, session)) goto free_session; - err = perf_session__process_events(session, &c.tool); + err = perf_session__process_events(session); if (!err) err = bt_ctf_stream_flush(cw->stream); diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index e6ab630dd374..bad46ce16591 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -153,10 +153,7 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve free_dup_event(oe, event->event); } -static int __ordered_events__flush(struct ordered_events *oe, - struct machines *machines, - struct perf_evlist *evlist, - struct perf_tool *tool) +static int __ordered_events__flush(struct ordered_events *oe) { struct list_head *head = &oe->events; struct ordered_event *tmp, *iter; @@ -180,12 +177,12 @@ static int __ordered_events__flush(struct ordered_events *oe, if (iter->timestamp > limit) break; - ret = perf_evlist__parse_sample(evlist, iter->event, &sample); + ret = perf_evlist__parse_sample(oe->evlist, iter->event, &sample); if (ret) pr_err("Can't parse sample, err = %d\n", ret); else { - ret = machines__deliver_event(machines, evlist, iter->event, - &sample, tool, iter->file_offset); + ret = machines__deliver_event(oe->machines, oe->evlist, iter->event, + &sample, oe->tool, iter->file_offset); if (ret) return ret; } @@ -205,9 +202,7 @@ static int __ordered_events__flush(struct ordered_events *oe, return 0; } -int ordered_events__flush(struct ordered_events *oe, struct machines *machines, - struct perf_evlist *evlist, struct perf_tool *tool, - enum oe_flush how) +int ordered_events__flush(struct ordered_events *oe, enum oe_flush how) { static const char * const str[] = { "NONE", @@ -252,7 +247,7 @@ int ordered_events__flush(struct ordered_events *oe, struct machines *machines, str[how], oe->nr_events); pr_oe_time(oe->max_timestamp, "max_timestamp\n"); - err = __ordered_events__flush(oe, machines, evlist, tool); + err = __ordered_events__flush(oe); if (!err) { if (how == OE_FLUSH__ROUND) @@ -268,13 +263,17 @@ int ordered_events__flush(struct ordered_events *oe, struct machines *machines, return err; } -void ordered_events__init(struct ordered_events *oe) +void ordered_events__init(struct ordered_events *oe, struct machines *machines, + struct perf_evlist *evlist, struct perf_tool *tool) { INIT_LIST_HEAD(&oe->events); INIT_LIST_HEAD(&oe->cache); INIT_LIST_HEAD(&oe->to_free); oe->max_alloc_size = (u64) -1; oe->cur_alloc_size = 0; + oe->evlist = evlist; + oe->machines = machines; + oe->tool = tool; } void ordered_events__free(struct ordered_events *oe) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index e09f2433c6d6..ef7d73ecb0d0 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -32,6 +32,9 @@ struct ordered_events { struct list_head to_free; struct ordered_event *buffer; struct ordered_event *last; + struct machines *machines; + struct perf_evlist *evlist; + struct perf_tool *tool; int buffer_idx; unsigned int nr_events; enum oe_flush last_flush_type; @@ -41,10 +44,9 @@ struct ordered_events { struct ordered_event *ordered_events__new(struct ordered_events *oe, u64 timestamp, union perf_event *event); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); -int ordered_events__flush(struct ordered_events *oe, struct machines *machines, - struct perf_evlist *evlist, struct perf_tool *tool, - enum oe_flush how); -void ordered_events__init(struct ordered_events *oe); +int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); +void ordered_events__init(struct ordered_events *oe, struct machines *machines, + struct perf_evlist *evlsit, struct perf_tool *tool); void ordered_events__free(struct ordered_events *oe); static inline diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 23be146bd2fc..c6dd89f62fc4 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -95,7 +95,6 @@ struct perf_session *perf_session__new(struct perf_data_file *file, goto out; session->repipe = repipe; - ordered_events__init(&session->ordered_events); machines__init(&session->machines); if (file) { @@ -126,7 +125,8 @@ struct perf_session *perf_session__new(struct perf_data_file *file, tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) { dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); tool->ordered_events = false; - } + } else + ordered_events__init(&session->ordered_events, &session->machines, session->evlist, tool); return session; @@ -508,24 +508,19 @@ static perf_event__swap_op perf_event__swap_ops[] = { * Flush every events below timestamp 7 * etc... */ -static int process_finished_round(struct perf_tool *tool, +static int process_finished_round(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; - struct perf_evlist *evlist = session->evlist; - struct machines *machines = &session->machines; - return ordered_events__flush(oe, machines, evlist, tool, OE_FLUSH__ROUND); + return ordered_events__flush(oe, OE_FLUSH__ROUND); } -int perf_session_queue_event(struct perf_session *s, union perf_event *event, - struct perf_tool *tool, struct perf_sample *sample, - u64 file_offset) +int perf_session__queue_event(struct perf_session *s, union perf_event *event, + struct perf_sample *sample, u64 file_offset) { struct ordered_events *oe = &s->ordered_events; - struct perf_evlist *evlist = s->evlist; - struct machines *machines = &s->machines; u64 timestamp = sample->time; struct ordered_event *new; @@ -543,7 +538,7 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event, new = ordered_events__new(oe, timestamp, event); if (!new) { - ordered_events__flush(oe, machines, evlist, tool, OE_FLUSH__HALF); + ordered_events__flush(oe, OE_FLUSH__HALF); new = ordered_events__new(oe, timestamp, event); } @@ -948,9 +943,9 @@ int machines__deliver_event(struct machines *machines, static s64 perf_session__process_user_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool, u64 file_offset) { + struct perf_tool *tool = session->ordered_events.tool; int fd = perf_data_file__fd(session->file); int err; @@ -988,15 +983,15 @@ static s64 perf_session__process_user_event(struct perf_session *session, int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool) + struct perf_sample *sample) { struct perf_evlist *evlist = session->evlist; + struct perf_tool *tool = session->ordered_events.tool; events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) - return perf_session__process_user_event(session, event, tool, 0); + return perf_session__process_user_event(session, event, 0); return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0); } @@ -1066,11 +1061,10 @@ out_parse_sample: } static s64 perf_session__process_event(struct perf_session *session, - union perf_event *event, - struct perf_tool *tool, - u64 file_offset) + union perf_event *event, u64 file_offset) { struct perf_evlist *evlist = session->evlist; + struct perf_tool *tool = session->ordered_events.tool; struct perf_sample sample; int ret; @@ -1083,7 +1077,7 @@ static s64 perf_session__process_event(struct perf_session *session, events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) - return perf_session__process_user_event(session, event, tool, file_offset); + return perf_session__process_user_event(session, event, file_offset); /* * For all kernel events we get the sample data @@ -1093,8 +1087,7 @@ static s64 perf_session__process_event(struct perf_session *session, return ret; if (tool->ordered_events) { - ret = perf_session_queue_event(session, event, tool, &sample, - file_offset); + ret = perf_session__queue_event(session, event, &sample, file_offset); if (ret != -ETIME) return ret; } @@ -1173,12 +1166,10 @@ static void perf_tool__warn_about_errors(const struct perf_tool *tool, volatile int session_done; -static int __perf_session__process_pipe_events(struct perf_session *session, - struct perf_tool *tool) +static int __perf_session__process_pipe_events(struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; - struct perf_evlist *evlist = session->evlist; - struct machines *machines = &session->machines; + struct perf_tool *tool = oe->tool; int fd = perf_data_file__fd(session->file); union perf_event *event; uint32_t size, cur_size = 0; @@ -1242,7 +1233,7 @@ more: } } - if ((skip = perf_session__process_event(session, event, tool, head)) < 0) { + if ((skip = perf_session__process_event(session, event, head)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", head, event->header.size, event->header.type); err = -EINVAL; @@ -1258,7 +1249,7 @@ more: goto more; done: /* do the final flush for ordered samples */ - err = ordered_events__flush(oe, machines, evlist, tool, OE_FLUSH__FINAL); + err = ordered_events__flush(oe, OE_FLUSH__FINAL); out_err: free(buf); perf_tool__warn_about_errors(tool, &session->evlist->stats); @@ -1308,11 +1299,10 @@ fetch_mmaped_event(struct perf_session *session, static int __perf_session__process_events(struct perf_session *session, u64 data_offset, u64 data_size, - u64 file_size, struct perf_tool *tool) + u64 file_size) { struct ordered_events *oe = &session->ordered_events; - struct perf_evlist *evlist = session->evlist; - struct machines *machines = &session->machines; + struct perf_tool *tool = oe->tool; int fd = perf_data_file__fd(session->file); u64 head, page_offset, file_offset, file_pos, size; int err, mmap_prot, mmap_flags, map_idx = 0; @@ -1381,8 +1371,7 @@ more: size = event->header.size; if (size < sizeof(struct perf_event_header) || - (skip = perf_session__process_event(session, event, tool, file_pos)) - < 0) { + (skip = perf_session__process_event(session, event, file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", file_offset + head, event->header.size, event->header.type); @@ -1406,7 +1395,7 @@ more: out: /* do the final flush for ordered samples */ - err = ordered_events__flush(oe, machines, evlist, tool, OE_FLUSH__FINAL); + err = ordered_events__flush(oe, OE_FLUSH__FINAL); out_err: ui_progress__finish(); perf_tool__warn_about_errors(tool, &session->evlist->stats); @@ -1415,8 +1404,7 @@ out_err: return err; } -int perf_session__process_events(struct perf_session *session, - struct perf_tool *tool) +int perf_session__process_events(struct perf_session *session) { u64 size = perf_data_file__size(session->file); int err; @@ -1427,10 +1415,9 @@ int perf_session__process_events(struct perf_session *session, if (!perf_data_file__is_pipe(session->file)) err = __perf_session__process_events(session, session->header.data_offset, - session->header.data_size, - size, tool); + session->header.data_size, size); else - err = __perf_session__process_pipe_events(session, tool); + err = __perf_session__process_pipe_events(session); return err; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index c08fa6be5bf3..06e0777e9803 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -48,12 +48,10 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, union perf_event **event_ptr, struct perf_sample *sample); -int perf_session__process_events(struct perf_session *session, - struct perf_tool *tool); +int perf_session__process_events(struct perf_session *session); -int perf_session_queue_event(struct perf_session *s, union perf_event *event, - struct perf_tool *tool, struct perf_sample *sample, - u64 file_offset); +int perf_session__queue_event(struct perf_session *s, union perf_event *event, + struct perf_sample *sample, u64 file_offset); void perf_tool__fill_defaults(struct perf_tool *tool); @@ -126,8 +124,7 @@ extern volatile int session_done; int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool); + struct perf_sample *sample); int perf_event__process_id_index(struct perf_tool *tool, union perf_event *event, -- cgit v1.2.3 From 0755bc4dc77a876aa60d4b3d33b5f6506f21f91b Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 31 Mar 2015 21:46:28 +0800 Subject: perf sched replay: Use struct task_desc instead of struct task_task for correct meaning There is no struct task_task at all, thus it is a typo error in the old commits, now fix it to what it should be in order to avoid unnecessary misunderstanding. Signed-off-by: Yunlong Song Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1427809596-29559-2-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 3b3a5bb97059..a1893e8dfe17 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -346,7 +346,7 @@ static struct task_desc *register_pid(struct perf_sched *sched, sched->pid_to_task[pid] = task; sched->nr_tasks++; - sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_task *)); + sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); BUG_ON(!sched->tasks); sched->tasks[task->nr] = task; -- cgit v1.2.3 From a35e27d0e5d801ff75481a8f639bb4d59ea1aafa Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 31 Mar 2015 21:46:29 +0800 Subject: perf sched replay: Increase the MAX_PID value to fix assertion failure problem Current MAX_PID is only 65536, which will cause assertion failure problem when CPU cores are more than 64 in x86_64. This is because the pid_max value in x86_64 is at least PIDS_PER_CPU_DEFAULT * num_possible_cpus() (see function pidmap_init defined in kernel/pid.c), where PIDS_PER_CPU_DEFAULT is 1024 (defined in include/linux/threads.h). Thus for MAX_PID = 65536, the correspoinding CPU cores are 65536/1024=64. This is obviously not enough at all for x86_64, and will cause an assertion failure problem due to BUG_ON(pid >= MAX_PID) in the codes. We increase MAX_PID value from 65536 to 1024*1000, which can be used in x86_64 with 1000 cores. This number is finally decided according to the limitation of stack size of calling process. Use 'ulimit -a', the result shows the stack size of any process is 8192 Kbytes, which is defined in include/uapi/linux/resource.h (#define _STK_LIM (8*1024*1024)). Thus we choose a large enough value for MAX_PID, and make it satisfy to the limitation of the stack size, i.e., making the perf process take up a memory space just smaller than 8192 Kbytes. We have calculated and tested that 1024*1000 is OK for MAX_PID. This means perf sched replay can now be used with at most 1000 cores in x86_64 without any assertion failure problem. Example: Test environment: x86_64 with 160 cores $ cat /proc/sys/kernel/pid_max 163840 Before this patch: $ perf sched replay run measurement overhead: 240 nsecs sleep measurement overhead: 55379 nsecs the run test took 1000004 nsecs the sleep test took 1059424 nsecs perf: builtin-sched.c:330: register_pid: Assertion `!(pid >= 65536)' failed. Aborted After this patch: $ perf sched replay run measurement overhead: 221 nsecs sleep measurement overhead: 55397 nsecs the run test took 999920 nsecs the sleep test took 1053313 nsecs nr_run_events: 10 nr_sleep_events: 1562 nr_wakeup_events: 5 task 0 ( :1: 1), nr_events: 1 task 1 ( :2: 2), nr_events: 1 task 2 ( :3: 3), nr_events: 1 task 3 ( :5: 5), nr_events: 1 ... Signed-off-by: Yunlong Song Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1427809596-29559-3-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index a1893e8dfe17..c46610447ede 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -28,7 +28,7 @@ #define MAX_CPUS 4096 #define COMM_LEN 20 #define SYM_LEN 129 -#define MAX_PID 65536 +#define MAX_PID 1024000 struct sched_atom; -- cgit v1.2.3 From cb06ac256a16fc1a5ab063107c2b35b3b9e95102 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 31 Mar 2015 21:46:30 +0800 Subject: perf sched replay: Alloc the memory of pid_to_task dynamically to adapt to the unexpected change of pid_max The current memory allocation of struct task_desc *pid_to_task[MAX_PID] is in a permanent and preset way, and it has two problems: Problem 1: If the pid_max, which is the max number of pids in the system, is much smaller than MAX_PID (1024*1000), then it causes a waste of stack memory. This may happen in the case where the number of cpu cores is much smaller than 1000. Problem 2: If the pid_max is changed from the default value to a value larger than MAX_PID, then it will cause assertion failure problem. The maximum value of pid_max can be set to pid_max_max (see pidmap_init defined in kernel/pid.c), which equals to PID_MAX_LIMIT. In x86_64, PID_MAX_LIMIT is 4*1024*1024 (defined in include/linux/threads.h). This value is much larger than MAX_PID, and will take up 32768 Kbytes (4*1024*1024*8/1024) for memory allocation of pid_to_task, which is much larger than the default 8192 Kbytes of the stack size of calling process. Due to these two problems, we use calloc to allocate the memory of pid_to_task dynamically. Example: Test environment: x86_64 with 160 cores $ cat /proc/sys/kernel/pid_max 163840 $ echo 1025000 > /proc/sys/kernel/pid_max $ cat /proc/sys/kernel/pid_max 1025000 Run some applications until the pid of some process is greater than the value of MAX_PID (1024*1000). Before this patch: $ perf sched replay run measurement overhead: 221 nsecs sleep measurement overhead: 55480 nsecs the run test took 1000008 nsecs the sleep test took 1063151 nsecs perf: builtin-sched.c:330: register_pid: Assertion `!(pid >= 1024000)' failed. Aborted After this patch: $ perf sched replay run measurement overhead: 221 nsecs sleep measurement overhead: 55435 nsecs the run test took 1000004 nsecs the sleep test took 1059312 nsecs nr_run_events: 10 nr_sleep_events: 1562 nr_wakeup_events: 5 task 0 ( :1: 1), nr_events: 1 task 1 ( :2: 2), nr_events: 1 task 2 ( :3: 3), nr_events: 1 task 3 ( :5: 5), nr_events: 1 ... Signed-off-by: Yunlong Song Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1427809596-29559-4-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index c46610447ede..20d887b222e4 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -23,6 +23,7 @@ #include #include #include +#include #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 @@ -124,7 +125,7 @@ struct perf_sched { struct perf_tool tool; const char *sort_order; unsigned long nr_tasks; - struct task_desc *pid_to_task[MAX_PID]; + struct task_desc **pid_to_task; struct task_desc **tasks; const struct trace_sched_handler *tp_handler; pthread_mutex_t start_work_mutex; @@ -326,8 +327,14 @@ static struct task_desc *register_pid(struct perf_sched *sched, unsigned long pid, const char *comm) { struct task_desc *task; + static int pid_max; - BUG_ON(pid >= MAX_PID); + if (sched->pid_to_task == NULL) { + if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) + pid_max = MAX_PID; + BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); + } + BUG_ON(pid >= (unsigned long)pid_max); task = sched->pid_to_task[pid]; -- cgit v1.2.3 From 3a423a5c36d1a28a258beaa7db855568b82d07ab Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 31 Mar 2015 21:46:31 +0800 Subject: perf sched replay: Realloc the memory of pid_to_task stepwise to adapt to the different pid_max configurations Although the memory of pid_to_task can be allocated via calloc according to the value of /proc/sys/kernel/pid_max, it cannot handle the case when pid_max is changed after 'perf sched record' has created its perf.data. If the new pid_max configured in 'perf sched replay' is smaller than the old pid_max configured in 'perf sched record', then it will cause the assertion failure problem. To solve this problem, we realloc the memory of pid_to_task stepwise once the passed-in pid parameter in register_pid is larger than the current pid_max. Example: Test environment: x86_64 with 160 cores $ cat /proc/sys/kernel/pid_max 163840 $ perf sched record ls $ echo 5000 > /proc/sys/kernel/pid_max $ cat /proc/sys/kernel/pid_max 5000 Before this patch: $ perf sched replay run measurement overhead: 221 nsecs sleep measurement overhead: 55356 nsecs the run test took 1000011 nsecs the sleep test took 1060940 nsecs perf: builtin-sched.c:337: register_pid: Assertion `!(pid >= (unsigned long)pid_max)' failed. Aborted After this patch: $ perf sched replay run measurement overhead: 221 nsecs sleep measurement overhead: 55611 nsecs the run test took 1000026 nsecs the sleep test took 1060486 nsecs nr_run_events: 10 nr_sleep_events: 1562 nr_wakeup_events: 5 task 0 ( :1: 1), nr_events: 1 task 1 ( :2: 2), nr_events: 1 task 2 ( :3: 3), nr_events: 1 task 3 ( :5: 5), nr_events: 1 ... Signed-off-by: Yunlong Song Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1427809596-29559-5-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 20d887b222e4..dd714818fa4d 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -334,7 +334,12 @@ static struct task_desc *register_pid(struct perf_sched *sched, pid_max = MAX_PID; BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); } - BUG_ON(pid >= (unsigned long)pid_max); + if (pid >= (unsigned long)pid_max) { + BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * + sizeof(struct task_desc *))) == NULL); + while (pid >= (unsigned long)pid_max) + sched->pid_to_task[pid_max++] = NULL; + } task = sched->pid_to_task[pid]; -- cgit v1.2.3 From 08097abc11bcee21355dd857852a807b2a30b79f Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 31 Mar 2015 21:46:32 +0800 Subject: perf sched replay: Fix the segmentation fault problem caused by pr_err in threads The pr_err in self_open_counters() prints error message to stderr. Unlike stdout, stderr uses memory buffer on the stack of each calling process. The pr_err in self_open_counters() works in a thread called thread_func created in function create_tasks, which concurrently creates sched->nr_tasks threads. If the error happens and pr_err prints the error message in each of these threads, the stack size of the perf process (default is 8192 kbytes) will quickly run out and the segmentation fault will happen then. To solve this problem, pr_err with self_open_counters() should be moved from newly created threads to the old main thread of the perf process. Then the pr_err can work in a stable situation without the strange segmentation fault problem. Example: Test environment: x86_64 with 160 cores Before this patch: $ perf sched replay ... task 1549 ( :163132: 163132), nr_events: 1 task 1550 ( :163540: 163540), nr_events: 1 task 1551 ( : 0), nr_events: 10 Segmentation fault After this patch: $ perf sched replay ... task 1549 ( :163132: 163132), nr_events: 1 task 1550 ( :163540: 163540), nr_events: 1 task 1551 ( : 0), nr_events: 10 ... As shown above, the result continues without any segmentation fault. Signed-off-by: Yunlong Song Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1427809596-29559-6-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index dd714818fa4d..7fe3b3cb4cc8 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -472,6 +472,7 @@ static u64 get_cpu_usage_nsec_self(int fd) struct sched_thread_parms { struct task_desc *task; struct perf_sched *sched; + int fd; }; static void *thread_func(void *ctx) @@ -482,13 +483,12 @@ static void *thread_func(void *ctx) u64 cpu_usage_0, cpu_usage_1; unsigned long i, ret; char comm2[22]; - int fd; + int fd = parms->fd; zfree(&parms); sprintf(comm2, ":%s", this_task->comm); prctl(PR_SET_NAME, comm2); - fd = self_open_counters(); if (fd < 0) return NULL; again: @@ -540,6 +540,7 @@ static void create_tasks(struct perf_sched *sched) BUG_ON(parms == NULL); parms->task = task = sched->tasks[i]; parms->sched = sched; + parms->fd = self_open_counters(); sem_init(&task->sleep_sem, 0, 0); sem_init(&task->ready_for_work, 0, 0); sem_init(&task->work_done_sem, 0, 0); -- cgit v1.2.3 From 1aff59be53ef37aa9943fb5f772f03148f789bb6 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 31 Mar 2015 21:46:33 +0800 Subject: perf sched replay: Handle the dead halt of sem_wait when create_tasks() fails for any task Since there is sem_wait for each task in the wait_for_tasks(), e.g. sem_wait(&task->work_done_sem). The sem_wait can continue only when work_done_sem is greater than 0, or it will be blocked. For perf sched replay, one task may sem_post the work_done_sem of another task, which causes the work_done_sem of that task processed in a reasonable sequence, e.g. sem_post, sem_wait, sem_wait, sem_post... This sequence simulates the sched process of the running tasks at the time when perf sched record runs. As a result, all the tasks are required and their threads must be successfully created. If any one (task A) of the tasks fails to create its thread, then another task (task B), whose work_done_sem needs sem_post from that failed task A, may likely block itself due to seg_wait. And this is a dead halt, since task B's thread_func cannot continue at all. To solve this problem, perf sched replay should exit once any task fails to create its thread. Example: Test environment: x86_64 with 160 cores Before this patch: $ perf sched replay ... Error: sys_perf_event_open() syscall returned with -1 (Too many open files) ------------------------------------------------------------ <- dead halt After this patch: $ perf sched replay ... task 1551 ( : 0), nr_events: 10 Error: sys_perf_event_open() syscall returned with -1 (Too many open files) $ As shown above, perf sched replay finishes the process after printing an error message and does not block itself. Signed-off-by: Yunlong Song Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1427809596-29559-7-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 7fe3b3cb4cc8..3261300c08f0 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -451,10 +451,12 @@ static int self_open_counters(void) fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag()); - if (fd < 0) + if (fd < 0) { pr_err("Error: sys_perf_event_open() syscall returned " "with %d (%s)\n", fd, strerror_r(errno, sbuf, sizeof(sbuf))); + exit(EXIT_FAILURE); + } return fd; } -- cgit v1.2.3 From 939cda521a24ae4dbf3beec983abd519bce56231 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 31 Mar 2015 21:46:34 +0800 Subject: perf sched replay: Fix the EMFILE error caused by the limitation of the maximum open files The soft maximum number of open files for a calling process is 1024, which is defined as INR_OPEN_CUR in include/uapi/linux/fs.h, and the hard maximum number of open files for a calling process is 4096, which is defined as INR_OPEN_MAX in include/uapi/linux/fs.h. Both INR_OPEN_CUR and INR_OPEN_MAX are used to limit the value of RLIMIT_NOFILE in include/asm-generic/resource.h. And the soft maximum number finally decides the limitation of the maximum files which are allowed to be opened. That is to say a process can use at most 1024 file descriptors for its o pened files, or an EMFILE error will happen. This error can be fixed by increasing the soft maximum number, under the constraint that the soft maximum number can not exceed the hard maximum number, or both soft and hard maximum number should be increased simultaneously with privilege. For perf sched replay, it uses sys_perf_event_open to create the file descriptor for each of the tasks in order to handle information of perf events. That is to say each task needs a unique file descriptor. In x86_64, there may be over 1024 or 4096 tasks correspoinding to the record in perf.data, which causes that no enough file descriptors can be used. As a result, EMFILE error happens and stops the replay process. To solve this problem, we adaptively increase the soft and hard maximum number of open files with a '-f' option. Example: Test environment: x86_64 with 160 cores $ cat /proc/sys/kernel/pid_max 163840 $ cat /proc/sys/fs/file-max 6815744 $ ulimit -Sn 1024 $ ulimit -Hn 4096 Before this patch: $ perf sched replay ... task 1549 ( :163132: 163132), nr_events: 1 task 1550 ( :163540: 163540), nr_events: 1 task 1551 ( : 0), nr_events: 10 Error: sys_perf_event_open() syscall returned with -1 (Too many open files) After this patch: $ perf sched replay ... task 1549 ( :163132: 163132), nr_events: 1 task 1550 ( :163540: 163540), nr_events: 1 task 1551 ( : 0), nr_events: 10 Error: sys_perf_event_open() syscall returned with -1 (Too many open files) Have a try with -f option $ perf sched replay -f ... task 1549 ( :163132: 163132), nr_events: 1 task 1550 ( :163540: 163540), nr_events: 1 task 1551 ( : 0), nr_events: 10 ------------------------------------------------------------ #1 : 54.401, ravg: 54.40, cpu: 3285.21 / 3285.21 #2 : 199.548, ravg: 68.92, cpu: 4999.65 / 3456.66 #3 : 170.483, ravg: 79.07, cpu: 1349.94 / 3245.99 #4 : 192.034, ravg: 90.37, cpu: 1322.88 / 3053.67 #5 : 182.929, ravg: 99.62, cpu: 1406.51 / 2888.96 #6 : 152.974, ravg: 104.96, cpu: 1167.54 / 2716.82 #7 : 155.579, ravg: 110.02, cpu: 2992.53 / 2744.39 #8 : 130.557, ravg: 112.08, cpu: 1126.43 / 2582.59 #9 : 138.520, ravg: 114.72, cpu: 1253.22 / 2449.65 #10 : 134.328, ravg: 116.68, cpu: 1587.95 / 2363.48 Signed-off-by: Yunlong Song Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1427809596-29559-8-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 3261300c08f0..5ab58c6d2467 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -170,6 +170,7 @@ struct perf_sched { u64 cpu_last_switched[MAX_CPUS]; struct rb_root atom_root, sorted_atom_root; struct list_head sort_list, cmp_pid; + bool force; }; static u64 get_nsecs(void) @@ -437,24 +438,43 @@ static u64 get_cpu_usage_nsec_parent(void) return sum; } -static int self_open_counters(void) +static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) { struct perf_event_attr attr; - char sbuf[STRERR_BUFSIZE]; + char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; int fd; + struct rlimit limit; + bool need_privilege = false; memset(&attr, 0, sizeof(attr)); attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_TASK_CLOCK; +force_again: fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag()); if (fd < 0) { + if (errno == EMFILE) { + if (sched->force) { + BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); + limit.rlim_cur += sched->nr_tasks - cur_task; + if (limit.rlim_cur > limit.rlim_max) { + limit.rlim_max = limit.rlim_cur; + need_privilege = true; + } + if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { + if (need_privilege && errno == EPERM) + strcpy(info, "Need privilege\n"); + } else + goto force_again; + } else + strcpy(info, "Have a try with -f option\n"); + } pr_err("Error: sys_perf_event_open() syscall returned " - "with %d (%s)\n", fd, - strerror_r(errno, sbuf, sizeof(sbuf))); + "with %d (%s)\n%s", fd, + strerror_r(errno, sbuf, sizeof(sbuf)), info); exit(EXIT_FAILURE); } return fd; @@ -542,7 +562,7 @@ static void create_tasks(struct perf_sched *sched) BUG_ON(parms == NULL); parms->task = task = sched->tasks[i]; parms->sched = sched; - parms->fd = self_open_counters(); + parms->fd = self_open_counters(sched, i); sem_init(&task->sleep_sem, 0, 0); sem_init(&task->ready_for_work, 0, 0); sem_init(&task->work_done_sem, 0, 0); @@ -1700,6 +1720,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), OPT_END() }; const struct option sched_options[] = { -- cgit v1.2.3 From f0dd330fdf07d295ac468660cf60341796d5d501 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 31 Mar 2015 21:46:35 +0800 Subject: perf sched replay: Support using -f to override perf.data file ownership Enable to use perf.data when it is not owned by current user or root. Example: $ ls -al perf.data -rw------- 1 Yunlong.Song Yunlong.Song 5321918 Mar 25 15:14 perf.data $ sudo id uid=0(root) gid=0(root) groups=0(root),64(pkcs11) Before this patch: $ sudo perf sched replay -f run measurement overhead: 98 nsecs sleep measurement overhead: 52909 nsecs the run test took 1000015 nsecs the sleep test took 1054253 nsecs File perf.data not owned by current user or root (use -f to override) As shown above, the -f option does not work at all. After this patch: $ sudo perf sched replay -f run measurement overhead: 221 nsecs sleep measurement overhead: 40514 nsecs the run test took 1000003 nsecs the sleep test took 1056098 nsecs nr_run_events: 10 nr_sleep_events: 1562 nr_wakeup_events: 5 task 0 ( :1: 1), nr_events: 1 task 1 ( :2: 2), nr_events: 1 task 2 ( :3: 3), nr_events: 1 ... ... task 1549 ( :163132: 163132), nr_events: 1 task 1550 ( :163540: 163540), nr_events: 1 task 1551 ( : 0), nr_events: 10 ------------------------------------------------------------ #1 : 50.198, ravg: 50.20, cpu: 2335.18 / 2335.18 #2 : 219.099, ravg: 67.09, cpu: 2835.11 / 2385.17 #3 : 238.626, ravg: 84.24, cpu: 3278.26 / 2474.48 #4 : 200.364, ravg: 95.85, cpu: 2977.41 / 2524.77 #5 : 176.882, ravg: 103.96, cpu: 2801.35 / 2552.43 #6 : 191.093, ravg: 112.67, cpu: 2813.70 / 2578.56 #7 : 189.448, ravg: 120.35, cpu: 2809.21 / 2601.62 #8 : 200.637, ravg: 128.38, cpu: 2849.91 / 2626.45 #9 : 248.338, ravg: 140.37, cpu: 4380.61 / 2801.87 #10 : 511.139, ravg: 177.45, cpu: 3077.73 / 2829.45 As shown above, the -f option really works now. Besides for replay, -f option can also work for latency and map. Signed-off-by: Yunlong Song Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1427809596-29559-9-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5ab58c6d2467..7b7b798b22b2 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1487,6 +1487,7 @@ static int perf_sched__read_events(struct perf_sched *sched) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = sched->force, }; int rc = -1; -- cgit v1.2.3 From ff5f3bbd40bfb8632f826f1f83223d95363f36af Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 31 Mar 2015 21:46:36 +0800 Subject: perf sched replay: Use replay_repeat to calculate the runavg of cpu usage instead of the default value 10 Since sched->replay_repeat is set to 10 as default, the sched->run_avg, sched->runavg_cpu_usage, and sched->runavg_parent_cpu_usage all use 10 to calculate their value. However, the replay_repeat can be changed to other value by using -r option, so the calculation above should use replay_repeat to achieve more accurate results instead of the default value 10. Signed-off-by: Yunlong Song Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1427809596-29559-10-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/perf/builtin-sched.c') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 7b7b798b22b2..5275bab70313 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -607,13 +607,13 @@ static void wait_for_tasks(struct perf_sched *sched) cpu_usage_1 = get_cpu_usage_nsec_parent(); if (!sched->runavg_cpu_usage) sched->runavg_cpu_usage = sched->cpu_usage; - sched->runavg_cpu_usage = (sched->runavg_cpu_usage * 9 + sched->cpu_usage) / 10; + sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; if (!sched->runavg_parent_cpu_usage) sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; - sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * 9 + - sched->parent_cpu_usage)/10; + sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + + sched->parent_cpu_usage)/sched->replay_repeat; ret = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(ret); @@ -645,7 +645,7 @@ static void run_one_test(struct perf_sched *sched) sched->sum_fluct += fluct; if (!sched->run_avg) sched->run_avg = delta; - sched->run_avg = (sched->run_avg * 9 + delta) / 10; + sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0); -- cgit v1.2.3